Merge branch 'linux-linaro-lsk' into linux-linaro-lsk-android
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / qgroup.c
index b44124dd2370ea726e66a099400163c342d8fa12..9d49c586995a18e2aaca7aed94c84b6abd735a2c 100644 (file)
 #include "locking.h"
 #include "ulist.h"
 #include "backref.h"
+#include "extent_io.h"
 
 /* TODO XXX FIXME
  *  - subvol delete -> delete when ref goes to 0? delete limits also?
  *  - reorganize keys
  *  - compressed
  *  - sync
- *  - rescan
  *  - copy also limits on subvol creation
  *  - limit
  *  - caches fuer ulists
@@ -98,7 +98,15 @@ struct btrfs_qgroup_list {
        struct btrfs_qgroup *member;
 };
 
-/* must be called with qgroup_lock held */
+struct qgroup_rescan {
+       struct btrfs_work       work;
+       struct btrfs_fs_info    *fs_info;
+};
+
+static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
+                               struct qgroup_rescan *qscan);
+
+/* must be called with qgroup_ioctl_lock held */
 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
                                           u64 qgroupid)
 {
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
                        }
                        fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
                                                                          ptr);
-                       /* FIXME read scan element */
+                       fs_info->qgroup_rescan_progress.objectid =
+                                       btrfs_qgroup_status_rescan(l, ptr);
+                       if (fs_info->qgroup_flags &
+                           BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+                               struct qgroup_rescan *qscan =
+                                       kmalloc(sizeof(*qscan), GFP_NOFS);
+                               if (!qscan) {
+                                       ret = -ENOMEM;
+                                       goto out;
+                               }
+                               fs_info->qgroup_rescan_progress.type = 0;
+                               fs_info->qgroup_rescan_progress.offset = 0;
+                               qgroup_rescan_start(fs_info, qscan);
+                       }
                        goto next1;
                }
 
@@ -420,8 +441,6 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
                qgroup = rb_entry(n, struct btrfs_qgroup, node);
                rb_erase(n, &fs_info->qgroup_tree);
 
-               WARN_ON(!list_empty(&qgroup->dirty));
-
                while (!list_empty(&qgroup->groups)) {
                        list = list_first_entry(&qgroup->groups,
                                                struct btrfs_qgroup_list,
@@ -721,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
        ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
        btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
        btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
-       /* XXX scan */
+       btrfs_set_qgroup_status_rescan(l, ptr,
+                               fs_info->qgroup_rescan_progress.objectid);
 
        btrfs_mark_buffer_dirty(l);
 
@@ -783,19 +803,21 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
                       struct btrfs_fs_info *fs_info)
 {
        struct btrfs_root *quota_root;
+       struct btrfs_root *tree_root = fs_info->tree_root;
        struct btrfs_path *path = NULL;
        struct btrfs_qgroup_status_item *ptr;
        struct extent_buffer *leaf;
        struct btrfs_key key;
+       struct btrfs_key found_key;
+       struct btrfs_qgroup *qgroup = NULL;
        int ret = 0;
+       int slot;
 
-       spin_lock(&fs_info->qgroup_lock);
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (fs_info->quota_root) {
                fs_info->pending_quota_state = 1;
-               spin_unlock(&fs_info->qgroup_lock);
                goto out;
        }
-       spin_unlock(&fs_info->qgroup_lock);
 
        /*
         * initially create the quota tree
@@ -830,10 +852,57 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
        fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
                                BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
        btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
-       btrfs_set_qgroup_status_scan(leaf, ptr, 0);
+       btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
 
        btrfs_mark_buffer_dirty(leaf);
 
+       key.objectid = 0;
+       key.type = BTRFS_ROOT_REF_KEY;
+       key.offset = 0;
+
+       btrfs_release_path(path);
+       ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
+       if (ret > 0)
+               goto out_add_root;
+       if (ret < 0)
+               goto out_free_path;
+
+
+       while (1) {
+               slot = path->slots[0];
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+               if (found_key.type == BTRFS_ROOT_REF_KEY) {
+                       ret = add_qgroup_item(trans, quota_root,
+                                             found_key.offset);
+                       if (ret)
+                               goto out_free_path;
+
+                       qgroup = add_qgroup_rb(fs_info, found_key.offset);
+                       if (IS_ERR(qgroup)) {
+                               ret = PTR_ERR(qgroup);
+                               goto out_free_path;
+                       }
+               }
+               ret = btrfs_next_item(tree_root, path);
+               if (ret < 0)
+                       goto out_free_path;
+               if (ret)
+                       break;
+       }
+
+out_add_root:
+       btrfs_release_path(path);
+       ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
+       if (ret)
+               goto out_free_path;
+
+       qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
+       if (IS_ERR(qgroup)) {
+               ret = PTR_ERR(qgroup);
+               goto out_free_path;
+       }
        spin_lock(&fs_info->qgroup_lock);
        fs_info->quota_root = quota_root;
        fs_info->pending_quota_state = 1;
@@ -847,6 +916,7 @@ out_free_root:
                kfree(quota_root);
        }
 out:
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
 
@@ -857,11 +927,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
        struct btrfs_root *quota_root;
        int ret = 0;
 
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
+       if (!fs_info->quota_root)
+               goto out;
        spin_lock(&fs_info->qgroup_lock);
-       if (!fs_info->quota_root) {
-               spin_unlock(&fs_info->qgroup_lock);
-               return 0;
-       }
        fs_info->quota_enabled = 0;
        fs_info->pending_quota_state = 0;
        quota_root = fs_info->quota_root;
@@ -869,8 +938,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
        btrfs_free_qgroup_config(fs_info);
        spin_unlock(&fs_info->qgroup_lock);
 
-       if (!quota_root)
-               return -EINVAL;
+       if (!quota_root) {
+               ret = -EINVAL;
+               goto out;
+       }
 
        ret = btrfs_clean_quota_tree(trans, quota_root);
        if (ret)
@@ -891,39 +962,62 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
        free_extent_buffer(quota_root->commit_root);
        kfree(quota_root);
 out:
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
 
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
+static void qgroup_dirty(struct btrfs_fs_info *fs_info,
+                        struct btrfs_qgroup *qgroup)
 {
-       /* FIXME */
-       return 0;
+       if (list_empty(&qgroup->dirty))
+               list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 }
 
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 {
        struct btrfs_root *quota_root;
+       struct btrfs_qgroup *parent;
+       struct btrfs_qgroup *member;
+       struct btrfs_qgroup_list *list;
        int ret = 0;
 
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        quota_root = fs_info->quota_root;
-       if (!quota_root)
-               return -EINVAL;
+       if (!quota_root) {
+               ret = -EINVAL;
+               goto out;
+       }
+       member = find_qgroup_rb(fs_info, src);
+       parent = find_qgroup_rb(fs_info, dst);
+       if (!member || !parent) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       /* check if such qgroup relation exist firstly */
+       list_for_each_entry(list, &member->groups, next_group) {
+               if (list->group == parent) {
+                       ret = -EEXIST;
+                       goto out;
+               }
+       }
 
        ret = add_qgroup_relation_item(trans, quota_root, src, dst);
        if (ret)
-               return ret;
+               goto out;
 
        ret = add_qgroup_relation_item(trans, quota_root, dst, src);
        if (ret) {
                del_qgroup_relation_item(trans, quota_root, src, dst);
-               return ret;
+               goto out;
        }
 
        spin_lock(&fs_info->qgroup_lock);
        ret = add_relation_rb(quota_root->fs_info, src, dst);
        spin_unlock(&fs_info->qgroup_lock);
-
+out:
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
 
@@ -931,13 +1025,34 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 {
        struct btrfs_root *quota_root;
+       struct btrfs_qgroup *parent;
+       struct btrfs_qgroup *member;
+       struct btrfs_qgroup_list *list;
        int ret = 0;
        int err;
 
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        quota_root = fs_info->quota_root;
-       if (!quota_root)
-               return -EINVAL;
+       if (!quota_root) {
+               ret = -EINVAL;
+               goto out;
+       }
 
+       member = find_qgroup_rb(fs_info, src);
+       parent = find_qgroup_rb(fs_info, dst);
+       if (!member || !parent) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       /* check if such qgroup relation exist firstly */
+       list_for_each_entry(list, &member->groups, next_group) {
+               if (list->group == parent)
+                       goto exist;
+       }
+       ret = -ENOENT;
+       goto out;
+exist:
        ret = del_qgroup_relation_item(trans, quota_root, src, dst);
        err = del_qgroup_relation_item(trans, quota_root, dst, src);
        if (err && !ret)
@@ -945,9 +1060,9 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
 
        spin_lock(&fs_info->qgroup_lock);
        del_relation_rb(fs_info, src, dst);
-
        spin_unlock(&fs_info->qgroup_lock);
-
+out:
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
 
@@ -958,11 +1073,21 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
        struct btrfs_qgroup *qgroup;
        int ret = 0;
 
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        quota_root = fs_info->quota_root;
-       if (!quota_root)
-               return -EINVAL;
+       if (!quota_root) {
+               ret = -EINVAL;
+               goto out;
+       }
+       qgroup = find_qgroup_rb(fs_info, qgroupid);
+       if (qgroup) {
+               ret = -EEXIST;
+               goto out;
+       }
 
        ret = add_qgroup_item(trans, quota_root, qgroupid);
+       if (ret)
+               goto out;
 
        spin_lock(&fs_info->qgroup_lock);
        qgroup = add_qgroup_rb(fs_info, qgroupid);
@@ -970,7 +1095,8 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
 
        if (IS_ERR(qgroup))
                ret = PTR_ERR(qgroup);
-
+out:
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
 
@@ -981,27 +1107,32 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
        struct btrfs_qgroup *qgroup;
        int ret = 0;
 
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        quota_root = fs_info->quota_root;
-       if (!quota_root)
-               return -EINVAL;
+       if (!quota_root) {
+               ret = -EINVAL;
+               goto out;
+       }
 
-       /* check if there are no relations to this qgroup */
-       spin_lock(&fs_info->qgroup_lock);
        qgroup = find_qgroup_rb(fs_info, qgroupid);
-       if (qgroup) {
-               if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
-                       spin_unlock(&fs_info->qgroup_lock);
-                       return -EBUSY;
+       if (!qgroup) {
+               ret = -ENOENT;
+               goto out;
+       } else {
+               /* check if there are no relations to this qgroup */
+               if (!list_empty(&qgroup->groups) ||
+                   !list_empty(&qgroup->members)) {
+                       ret = -EBUSY;
+                       goto out;
                }
        }
-       spin_unlock(&fs_info->qgroup_lock);
-
        ret = del_qgroup_item(trans, quota_root, qgroupid);
 
        spin_lock(&fs_info->qgroup_lock);
        del_qgroup_rb(quota_root->fs_info, qgroupid);
        spin_unlock(&fs_info->qgroup_lock);
-
+out:
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
 
@@ -1009,13 +1140,22 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
                       struct btrfs_fs_info *fs_info, u64 qgroupid,
                       struct btrfs_qgroup_limit *limit)
 {
-       struct btrfs_root *quota_root = fs_info->quota_root;
+       struct btrfs_root *quota_root;
        struct btrfs_qgroup *qgroup;
        int ret = 0;
 
-       if (!quota_root)
-               return -EINVAL;
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
+       quota_root = fs_info->quota_root;
+       if (!quota_root) {
+               ret = -EINVAL;
+               goto out;
+       }
 
+       qgroup = find_qgroup_rb(fs_info, qgroupid);
+       if (!qgroup) {
+               ret = -ENOENT;
+               goto out;
+       }
        ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
                                       limit->flags, limit->max_rfer,
                                       limit->max_excl, limit->rsv_rfer,
@@ -1027,31 +1167,17 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
        }
 
        spin_lock(&fs_info->qgroup_lock);
-
-       qgroup = find_qgroup_rb(fs_info, qgroupid);
-       if (!qgroup) {
-               ret = -ENOENT;
-               goto unlock;
-       }
        qgroup->lim_flags = limit->flags;
        qgroup->max_rfer = limit->max_rfer;
        qgroup->max_excl = limit->max_excl;
        qgroup->rsv_rfer = limit->rsv_rfer;
        qgroup->rsv_excl = limit->rsv_excl;
-
-unlock:
        spin_unlock(&fs_info->qgroup_lock);
-
+out:
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
 
-static void qgroup_dirty(struct btrfs_fs_info *fs_info,
-                        struct btrfs_qgroup *qgroup)
-{
-       if (list_empty(&qgroup->dirty))
-               list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
-}
-
 /*
  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
  * the modification into a list that's later used by btrfs_end_transaction to
@@ -1075,6 +1201,144 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
+                                   struct ulist *roots, struct ulist *tmp,
+                                   u64 seq)
+{
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       struct ulist_node *tmp_unode;
+       struct ulist_iterator tmp_uiter;
+       struct btrfs_qgroup *qg;
+       int ret;
+
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(roots, &uiter))) {
+               qg = find_qgroup_rb(fs_info, unode->val);
+               if (!qg)
+                       continue;
+
+               ulist_reinit(tmp);
+                                               /* XXX id not needed */
+               ret = ulist_add(tmp, qg->qgroupid,
+                               (u64)(uintptr_t)qg, GFP_ATOMIC);
+               if (ret < 0)
+                       return ret;
+               ULIST_ITER_INIT(&tmp_uiter);
+               while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+                       struct btrfs_qgroup_list *glist;
+
+                       qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+                       if (qg->refcnt < seq)
+                               qg->refcnt = seq + 1;
+                       else
+                               ++qg->refcnt;
+
+                       list_for_each_entry(glist, &qg->groups, next_group) {
+                               ret = ulist_add(tmp, glist->group->qgroupid,
+                                               (u64)(uintptr_t)glist->group,
+                                               GFP_ATOMIC);
+                               if (ret < 0)
+                                       return ret;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
+                                   struct ulist *roots, struct ulist *tmp,
+                                   u64 seq, int sgn, u64 num_bytes,
+                                   struct btrfs_qgroup *qgroup)
+{
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       struct btrfs_qgroup *qg;
+       struct btrfs_qgroup_list *glist;
+       int ret;
+
+       ulist_reinit(tmp);
+       ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+       if (ret < 0)
+               return ret;
+
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(tmp, &uiter))) {
+               qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
+               if (qg->refcnt < seq) {
+                       /* not visited by step 1 */
+                       qg->rfer += sgn * num_bytes;
+                       qg->rfer_cmpr += sgn * num_bytes;
+                       if (roots->nnodes == 0) {
+                               qg->excl += sgn * num_bytes;
+                               qg->excl_cmpr += sgn * num_bytes;
+                       }
+                       qgroup_dirty(fs_info, qg);
+               }
+               WARN_ON(qg->tag >= seq);
+               qg->tag = seq;
+
+               list_for_each_entry(glist, &qg->groups, next_group) {
+                       ret = ulist_add(tmp, glist->group->qgroupid,
+                                       (uintptr_t)glist->group, GFP_ATOMIC);
+                       if (ret < 0)
+                               return ret;
+               }
+       }
+
+       return 0;
+}
+
+static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
+                                   struct ulist *roots, struct ulist *tmp,
+                                   u64 seq, int sgn, u64 num_bytes)
+{
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       struct btrfs_qgroup *qg;
+       struct ulist_node *tmp_unode;
+       struct ulist_iterator tmp_uiter;
+       int ret;
+
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(roots, &uiter))) {
+               qg = find_qgroup_rb(fs_info, unode->val);
+               if (!qg)
+                       continue;
+
+               ulist_reinit(tmp);
+               ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
+               if (ret < 0)
+                       return ret;
+
+               ULIST_ITER_INIT(&tmp_uiter);
+               while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+                       struct btrfs_qgroup_list *glist;
+
+                       qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+                       if (qg->tag == seq)
+                               continue;
+
+                       if (qg->refcnt - seq == roots->nnodes) {
+                               qg->excl -= sgn * num_bytes;
+                               qg->excl_cmpr -= sgn * num_bytes;
+                               qgroup_dirty(fs_info, qg);
+                       }
+
+                       list_for_each_entry(glist, &qg->groups, next_group) {
+                               ret = ulist_add(tmp, glist->group->qgroupid,
+                                               (uintptr_t)glist->group,
+                                               GFP_ATOMIC);
+                               if (ret < 0)
+                                       return ret;
+                       }
+               }
+       }
+
+       return 0;
+}
+
 /*
  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
  * from the fs. First, all roots referencing the extent are searched, and
@@ -1090,10 +1354,8 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
        struct btrfs_root *quota_root;
        u64 ref_root;
        struct btrfs_qgroup *qgroup;
-       struct ulist_node *unode;
        struct ulist *roots = NULL;
        struct ulist *tmp = NULL;
-       struct ulist_iterator uiter;
        u64 seq;
        int ret = 0;
        int sgn;
@@ -1132,9 +1394,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
        case BTRFS_ADD_DELAYED_REF:
        case BTRFS_ADD_DELAYED_EXTENT:
                sgn = 1;
+               seq = btrfs_tree_mod_seq_prev(node->seq);
                break;
        case BTRFS_DROP_DELAYED_REF:
                sgn = -1;
+               seq = node->seq;
                break;
        case BTRFS_UPDATE_DELAYED_HEAD:
                return 0;
@@ -1142,20 +1406,37 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
                BUG();
        }
 
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+               if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+                       mutex_unlock(&fs_info->qgroup_rescan_lock);
+                       return 0;
+               }
+       }
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
+
        /*
         * the delayed ref sequence number we pass depends on the direction of
-        * the operation. for add operations, we pass (node->seq - 1) to skip
+        * the operation. for add operations, we pass
+        * tree_mod_log_prev_seq(node->seq) to skip
         * the delayed ref's current sequence number, because we need the state
         * of the tree before the add operation. for delete operations, we pass
         * (node->seq) to include the delayed ref's current sequence number,
         * because we need the state of the tree after the delete operation.
         */
-       ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
-                                  sgn > 0 ? node->seq - 1 : node->seq, &roots);
+       ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
        if (ret < 0)
                return ret;
 
+       mutex_lock(&fs_info->qgroup_rescan_lock);
        spin_lock(&fs_info->qgroup_lock);
+       if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+               if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+                       ret = 0;
+                       goto unlock;
+               }
+       }
+
        quota_root = fs_info->quota_root;
        if (!quota_root)
                goto unlock;
@@ -1175,106 +1456,29 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
        seq = fs_info->qgroup_seq;
        fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
 
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(roots, &uiter))) {
-               struct ulist_node *tmp_unode;
-               struct ulist_iterator tmp_uiter;
-               struct btrfs_qgroup *qg;
-
-               qg = find_qgroup_rb(fs_info, unode->val);
-               if (!qg)
-                       continue;
-
-               ulist_reinit(tmp);
-                                               /* XXX id not needed */
-               ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
-               ULIST_ITER_INIT(&tmp_uiter);
-               while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
-                       struct btrfs_qgroup_list *glist;
-
-                       qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
-                       if (qg->refcnt < seq)
-                               qg->refcnt = seq + 1;
-                       else
-                               ++qg->refcnt;
-
-                       list_for_each_entry(glist, &qg->groups, next_group) {
-                               ulist_add(tmp, glist->group->qgroupid,
-                                         (u64)(uintptr_t)glist->group,
-                                         GFP_ATOMIC);
-                       }
-               }
-       }
+       ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
+       if (ret)
+               goto unlock;
 
        /*
         * step 2: walk from the new root
         */
-       ulist_reinit(tmp);
-       ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(tmp, &uiter))) {
-               struct btrfs_qgroup *qg;
-               struct btrfs_qgroup_list *glist;
-
-               qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
-               if (qg->refcnt < seq) {
-                       /* not visited by step 1 */
-                       qg->rfer += sgn * node->num_bytes;
-                       qg->rfer_cmpr += sgn * node->num_bytes;
-                       if (roots->nnodes == 0) {
-                               qg->excl += sgn * node->num_bytes;
-                               qg->excl_cmpr += sgn * node->num_bytes;
-                       }
-                       qgroup_dirty(fs_info, qg);
-               }
-               WARN_ON(qg->tag >= seq);
-               qg->tag = seq;
-
-               list_for_each_entry(glist, &qg->groups, next_group) {
-                       ulist_add(tmp, glist->group->qgroupid,
-                                 (uintptr_t)glist->group, GFP_ATOMIC);
-               }
-       }
+       ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn,
+                                      node->num_bytes, qgroup);
+       if (ret)
+               goto unlock;
 
        /*
         * step 3: walk again from old refs
         */
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(roots, &uiter))) {
-               struct btrfs_qgroup *qg;
-               struct ulist_node *tmp_unode;
-               struct ulist_iterator tmp_uiter;
-
-               qg = find_qgroup_rb(fs_info, unode->val);
-               if (!qg)
-                       continue;
-
-               ulist_reinit(tmp);
-               ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
-               ULIST_ITER_INIT(&tmp_uiter);
-               while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
-                       struct btrfs_qgroup_list *glist;
-
-                       qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
-                       if (qg->tag == seq)
-                               continue;
-
-                       if (qg->refcnt - seq == roots->nnodes) {
-                               qg->excl -= sgn * node->num_bytes;
-                               qg->excl_cmpr -= sgn * node->num_bytes;
-                               qgroup_dirty(fs_info, qg);
-                       }
+       ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn,
+                                      node->num_bytes);
+       if (ret)
+               goto unlock;
 
-                       list_for_each_entry(glist, &qg->groups, next_group) {
-                               ulist_add(tmp, glist->group->qgroupid,
-                                         (uintptr_t)glist->group,
-                                         GFP_ATOMIC);
-                       }
-               }
-       }
-       ret = 0;
 unlock:
        spin_unlock(&fs_info->qgroup_lock);
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
        ulist_free(roots);
        ulist_free(tmp);
 
@@ -1289,10 +1493,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 {
        struct btrfs_root *quota_root = fs_info->quota_root;
        int ret = 0;
+       int start_rescan_worker = 0;
 
        if (!quota_root)
                goto out;
 
+       if (!fs_info->quota_enabled && fs_info->pending_quota_state)
+               start_rescan_worker = 1;
+
        fs_info->quota_enabled = fs_info->pending_quota_state;
 
        spin_lock(&fs_info->qgroup_lock);
@@ -1318,6 +1526,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
        if (ret)
                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 
+       if (!ret && start_rescan_worker) {
+               ret = btrfs_qgroup_rescan(fs_info);
+               if (ret)
+                       pr_err("btrfs: start rescan quota failed: %d\n", ret);
+               ret = 0;
+       }
+
 out:
 
        return ret;
@@ -1338,12 +1553,30 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
        struct btrfs_qgroup *srcgroup;
        struct btrfs_qgroup *dstgroup;
        u32 level_size = 0;
+       u64 nums;
 
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
        if (!fs_info->quota_enabled)
-               return 0;
+               goto out;
 
-       if (!quota_root)
-               return -EINVAL;
+       if (!quota_root) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (inherit) {
+               i_qgroups = (u64 *)(inherit + 1);
+               nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
+                      2 * inherit->num_excl_copies;
+               for (i = 0; i < nums; ++i) {
+                       srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
+                       if (!srcgroup) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       ++i_qgroups;
+               }
+       }
 
        /*
         * create a tracking group for the subvol itself
@@ -1470,6 +1703,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
 unlock:
        spin_unlock(&fs_info->qgroup_lock);
 out:
+       mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
 }
 
@@ -1514,7 +1748,10 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
                ret = -ENOMEM;
                goto out;
        }
-       ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+       ret = ulist_add(ulist, qgroup->qgroupid,
+                       (uintptr_t)qgroup, GFP_ATOMIC);
+       if (ret < 0)
+               goto out;
        ULIST_ITER_INIT(&uiter);
        while ((unode = ulist_next(ulist, &uiter))) {
                struct btrfs_qgroup *qg;
@@ -1523,25 +1760,27 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
                qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
 
                if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
-                   qg->reserved + qg->rfer + num_bytes >
+                   qg->reserved + (s64)qg->rfer + num_bytes >
                    qg->max_rfer) {
                        ret = -EDQUOT;
                        goto out;
                }
 
                if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
-                   qg->reserved + qg->excl + num_bytes >
+                   qg->reserved + (s64)qg->excl + num_bytes >
                    qg->max_excl) {
                        ret = -EDQUOT;
                        goto out;
                }
 
                list_for_each_entry(glist, &qg->groups, next_group) {
-                       ulist_add(ulist, glist->group->qgroupid,
-                                 (uintptr_t)glist->group, GFP_ATOMIC);
+                       ret = ulist_add(ulist, glist->group->qgroupid,
+                                       (uintptr_t)glist->group, GFP_ATOMIC);
+                       if (ret < 0)
+                               goto out;
                }
        }
-
+       ret = 0;
        /*
         * no limits exceeded, now record the reservation into all qgroups
         */
@@ -1570,6 +1809,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
        struct ulist_node *unode;
        struct ulist_iterator uiter;
        u64 ref_root = root->root_key.objectid;
+       int ret = 0;
 
        if (!is_fstree(ref_root))
                return;
@@ -1592,7 +1832,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
                btrfs_std_error(fs_info, -ENOMEM);
                goto out;
        }
-       ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+       ret = ulist_add(ulist, qgroup->qgroupid,
+                       (uintptr_t)qgroup, GFP_ATOMIC);
+       if (ret < 0)
+               goto out;
        ULIST_ITER_INIT(&uiter);
        while ((unode = ulist_next(ulist, &uiter))) {
                struct btrfs_qgroup *qg;
@@ -1603,8 +1846,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
                qg->reserved -= num_bytes;
 
                list_for_each_entry(glist, &qg->groups, next_group) {
-                       ulist_add(ulist, glist->group->qgroupid,
-                                 (uintptr_t)glist->group, GFP_ATOMIC);
+                       ret = ulist_add(ulist, glist->group->qgroupid,
+                                       (uintptr_t)glist->group, GFP_ATOMIC);
+                       if (ret < 0)
+                               goto out;
                }
        }
 
@@ -1617,8 +1862,265 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
 {
        if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
                return;
-       printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n",
+       pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
                trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
-               trans->delayed_ref_elem.seq);
+               (u32)(trans->delayed_ref_elem.seq >> 32),
+               (u32)trans->delayed_ref_elem.seq);
        BUG();
 }
+
+/*
+ * returns < 0 on error, 0 when more leafs are to be scanned.
+ * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
+ */
+static int
+qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
+                  struct btrfs_trans_handle *trans, struct ulist *tmp,
+                  struct extent_buffer *scratch_leaf)
+{
+       struct btrfs_key found;
+       struct btrfs_fs_info *fs_info = qscan->fs_info;
+       struct ulist *roots = NULL;
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       struct seq_list tree_mod_seq_elem = {};
+       u64 seq;
+       int slot;
+       int ret;
+
+       path->leave_spinning = 1;
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       ret = btrfs_search_slot_for_read(fs_info->extent_root,
+                                        &fs_info->qgroup_rescan_progress,
+                                        path, 1, 0);
+
+       pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
+                (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
+                fs_info->qgroup_rescan_progress.type,
+                (unsigned long long)fs_info->qgroup_rescan_progress.offset,
+                ret);
+
+       if (ret) {
+               /*
+                * The rescan is about to end, we will not be scanning any
+                * further blocks. We cannot unset the RESCAN flag here, because
+                * we want to commit the transaction if everything went well.
+                * To make the live accounting work in this phase, we set our
+                * scan progress pointer such that every real extent objectid
+                * will be smaller.
+                */
+               fs_info->qgroup_rescan_progress.objectid = (u64)-1;
+               btrfs_release_path(path);
+               mutex_unlock(&fs_info->qgroup_rescan_lock);
+               return ret;
+       }
+
+       btrfs_item_key_to_cpu(path->nodes[0], &found,
+                             btrfs_header_nritems(path->nodes[0]) - 1);
+       fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
+
+       btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+       memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
+       slot = path->slots[0];
+       btrfs_release_path(path);
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+       for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
+               btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
+               if (found.type != BTRFS_EXTENT_ITEM_KEY)
+                       continue;
+               ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
+                                          tree_mod_seq_elem.seq, &roots);
+               if (ret < 0)
+                       goto out;
+               spin_lock(&fs_info->qgroup_lock);
+               seq = fs_info->qgroup_seq;
+               fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
+
+               ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
+               if (ret) {
+                       spin_unlock(&fs_info->qgroup_lock);
+                       ulist_free(roots);
+                       goto out;
+               }
+
+               /*
+                * step2 of btrfs_qgroup_account_ref works from a single root,
+                * we're doing all at once here.
+                */
+               ulist_reinit(tmp);
+               ULIST_ITER_INIT(&uiter);
+               while ((unode = ulist_next(roots, &uiter))) {
+                       struct btrfs_qgroup *qg;
+
+                       qg = find_qgroup_rb(fs_info, unode->val);
+                       if (!qg)
+                               continue;
+
+                       ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
+                                       GFP_ATOMIC);
+                       if (ret < 0) {
+                               spin_unlock(&fs_info->qgroup_lock);
+                               ulist_free(roots);
+                               goto out;
+                       }
+               }
+
+               /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
+               ULIST_ITER_INIT(&uiter);
+               while ((unode = ulist_next(tmp, &uiter))) {
+                       struct btrfs_qgroup *qg;
+                       struct btrfs_qgroup_list *glist;
+
+                       qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
+                       qg->rfer += found.offset;
+                       qg->rfer_cmpr += found.offset;
+                       WARN_ON(qg->tag >= seq);
+                       if (qg->refcnt - seq == roots->nnodes) {
+                               qg->excl += found.offset;
+                               qg->excl_cmpr += found.offset;
+                       }
+                       qgroup_dirty(fs_info, qg);
+
+                       list_for_each_entry(glist, &qg->groups, next_group) {
+                               ret = ulist_add(tmp, glist->group->qgroupid,
+                                               (uintptr_t)glist->group,
+                                               GFP_ATOMIC);
+                               if (ret < 0) {
+                                       spin_unlock(&fs_info->qgroup_lock);
+                                       ulist_free(roots);
+                                       goto out;
+                               }
+                       }
+               }
+
+               spin_unlock(&fs_info->qgroup_lock);
+               ulist_free(roots);
+               ret = 0;
+       }
+
+out:
+       btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+
+       return ret;
+}
+
+static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+{
+       struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
+                                                  work);
+       struct btrfs_path *path;
+       struct btrfs_trans_handle *trans = NULL;
+       struct btrfs_fs_info *fs_info = qscan->fs_info;
+       struct ulist *tmp = NULL;
+       struct extent_buffer *scratch_leaf = NULL;
+       int err = -ENOMEM;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               goto out;
+       tmp = ulist_alloc(GFP_NOFS);
+       if (!tmp)
+               goto out;
+       scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
+       if (!scratch_leaf)
+               goto out;
+
+       err = 0;
+       while (!err) {
+               trans = btrfs_start_transaction(fs_info->fs_root, 0);
+               if (IS_ERR(trans)) {
+                       err = PTR_ERR(trans);
+                       break;
+               }
+               if (!fs_info->quota_enabled) {
+                       err = -EINTR;
+               } else {
+                       err = qgroup_rescan_leaf(qscan, path, trans,
+                                                tmp, scratch_leaf);
+               }
+               if (err > 0)
+                       btrfs_commit_transaction(trans, fs_info->fs_root);
+               else
+                       btrfs_end_transaction(trans, fs_info->fs_root);
+       }
+
+out:
+       kfree(scratch_leaf);
+       ulist_free(tmp);
+       btrfs_free_path(path);
+       kfree(qscan);
+
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+
+       if (err == 2 &&
+           fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
+               fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+       } else if (err < 0) {
+               fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+       }
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+       if (err >= 0) {
+               pr_info("btrfs: qgroup scan completed%s\n",
+                       err == 2 ? " (inconsistency flag cleared)" : "");
+       } else {
+               pr_err("btrfs: qgroup scan failed with %d\n", err);
+       }
+}
+
+static void
+qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
+{
+       memset(&qscan->work, 0, sizeof(qscan->work));
+       qscan->work.func = btrfs_qgroup_rescan_worker;
+       qscan->fs_info = fs_info;
+
+       pr_info("btrfs: qgroup scan started\n");
+       btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
+}
+
+int
+btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
+{
+       int ret = 0;
+       struct rb_node *n;
+       struct btrfs_qgroup *qgroup;
+       struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
+
+       if (!qscan)
+               return -ENOMEM;
+
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       spin_lock(&fs_info->qgroup_lock);
+       if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+               ret = -EINPROGRESS;
+       else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+               ret = -EINVAL;
+       if (ret) {
+               spin_unlock(&fs_info->qgroup_lock);
+               mutex_unlock(&fs_info->qgroup_rescan_lock);
+               kfree(qscan);
+               return ret;
+       }
+
+       fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+       memset(&fs_info->qgroup_rescan_progress, 0,
+               sizeof(fs_info->qgroup_rescan_progress));
+
+       /* clear all current qgroup tracking information */
+       for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
+               qgroup = rb_entry(n, struct btrfs_qgroup, node);
+               qgroup->rfer = 0;
+               qgroup->rfer_cmpr = 0;
+               qgroup->excl = 0;
+               qgroup->excl_cmpr = 0;
+       }
+       spin_unlock(&fs_info->qgroup_lock);
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+       qgroup_rescan_start(fs_info, qscan);
+
+       return 0;
+}