Merge git://git.kvack.org/~bcrl/aio-next
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / qgroup.c
1 /*
2  * Copyright (C) 2011 STRATO.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
27
28 #include "ctree.h"
29 #include "transaction.h"
30 #include "disk-io.h"
31 #include "locking.h"
32 #include "ulist.h"
33 #include "backref.h"
34 #include "extent_io.h"
35 #include "qgroup.h"
36
37 /* TODO XXX FIXME
38  *  - subvol delete -> delete when ref goes to 0? delete limits also?
39  *  - reorganize keys
40  *  - compressed
41  *  - sync
42  *  - copy also limits on subvol creation
43  *  - limit
44  *  - caches fuer ulists
45  *  - performance benchmarks
46  *  - check all ioctl parameters
47  */
48
49 /*
50  * one struct for each qgroup, organized in fs_info->qgroup_tree.
51  */
52 struct btrfs_qgroup {
53         u64 qgroupid;
54
55         /*
56          * state
57          */
58         u64 rfer;       /* referenced */
59         u64 rfer_cmpr;  /* referenced compressed */
60         u64 excl;       /* exclusive */
61         u64 excl_cmpr;  /* exclusive compressed */
62
63         /*
64          * limits
65          */
66         u64 lim_flags;  /* which limits are set */
67         u64 max_rfer;
68         u64 max_excl;
69         u64 rsv_rfer;
70         u64 rsv_excl;
71
72         /*
73          * reservation tracking
74          */
75         u64 reserved;
76
77         /*
78          * lists
79          */
80         struct list_head groups;  /* groups this group is member of */
81         struct list_head members; /* groups that are members of this group */
82         struct list_head dirty;   /* dirty groups */
83         struct rb_node node;      /* tree of qgroups */
84
85         /*
86          * temp variables for accounting operations
87          */
88         u64 old_refcnt;
89         u64 new_refcnt;
90 };
91
92 /*
93  * glue structure to represent the relations between qgroups.
94  */
95 struct btrfs_qgroup_list {
96         struct list_head next_group;
97         struct list_head next_member;
98         struct btrfs_qgroup *group;
99         struct btrfs_qgroup *member;
100 };
101
102 #define ptr_to_u64(x) ((u64)(uintptr_t)x)
103 #define u64_to_ptr(x) ((struct btrfs_qgroup *)(uintptr_t)x)
104
105 static int
106 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
107                    int init_flags);
108 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
109
110 /* must be called with qgroup_ioctl_lock held */
111 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
112                                            u64 qgroupid)
113 {
114         struct rb_node *n = fs_info->qgroup_tree.rb_node;
115         struct btrfs_qgroup *qgroup;
116
117         while (n) {
118                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
119                 if (qgroup->qgroupid < qgroupid)
120                         n = n->rb_left;
121                 else if (qgroup->qgroupid > qgroupid)
122                         n = n->rb_right;
123                 else
124                         return qgroup;
125         }
126         return NULL;
127 }
128
129 /* must be called with qgroup_lock held */
130 static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
131                                           u64 qgroupid)
132 {
133         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
134         struct rb_node *parent = NULL;
135         struct btrfs_qgroup *qgroup;
136
137         while (*p) {
138                 parent = *p;
139                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
140
141                 if (qgroup->qgroupid < qgroupid)
142                         p = &(*p)->rb_left;
143                 else if (qgroup->qgroupid > qgroupid)
144                         p = &(*p)->rb_right;
145                 else
146                         return qgroup;
147         }
148
149         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
150         if (!qgroup)
151                 return ERR_PTR(-ENOMEM);
152
153         qgroup->qgroupid = qgroupid;
154         INIT_LIST_HEAD(&qgroup->groups);
155         INIT_LIST_HEAD(&qgroup->members);
156         INIT_LIST_HEAD(&qgroup->dirty);
157
158         rb_link_node(&qgroup->node, parent, p);
159         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
160
161         return qgroup;
162 }
163
164 static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
165 {
166         struct btrfs_qgroup_list *list;
167
168         list_del(&qgroup->dirty);
169         while (!list_empty(&qgroup->groups)) {
170                 list = list_first_entry(&qgroup->groups,
171                                         struct btrfs_qgroup_list, next_group);
172                 list_del(&list->next_group);
173                 list_del(&list->next_member);
174                 kfree(list);
175         }
176
177         while (!list_empty(&qgroup->members)) {
178                 list = list_first_entry(&qgroup->members,
179                                         struct btrfs_qgroup_list, next_member);
180                 list_del(&list->next_group);
181                 list_del(&list->next_member);
182                 kfree(list);
183         }
184         kfree(qgroup);
185 }
186
187 /* must be called with qgroup_lock held */
188 static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
189 {
190         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
191
192         if (!qgroup)
193                 return -ENOENT;
194
195         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
196         __del_qgroup_rb(qgroup);
197         return 0;
198 }
199
200 /* must be called with qgroup_lock held */
201 static int add_relation_rb(struct btrfs_fs_info *fs_info,
202                            u64 memberid, u64 parentid)
203 {
204         struct btrfs_qgroup *member;
205         struct btrfs_qgroup *parent;
206         struct btrfs_qgroup_list *list;
207
208         member = find_qgroup_rb(fs_info, memberid);
209         parent = find_qgroup_rb(fs_info, parentid);
210         if (!member || !parent)
211                 return -ENOENT;
212
213         list = kzalloc(sizeof(*list), GFP_ATOMIC);
214         if (!list)
215                 return -ENOMEM;
216
217         list->group = parent;
218         list->member = member;
219         list_add_tail(&list->next_group, &member->groups);
220         list_add_tail(&list->next_member, &parent->members);
221
222         return 0;
223 }
224
225 /* must be called with qgroup_lock held */
226 static int del_relation_rb(struct btrfs_fs_info *fs_info,
227                            u64 memberid, u64 parentid)
228 {
229         struct btrfs_qgroup *member;
230         struct btrfs_qgroup *parent;
231         struct btrfs_qgroup_list *list;
232
233         member = find_qgroup_rb(fs_info, memberid);
234         parent = find_qgroup_rb(fs_info, parentid);
235         if (!member || !parent)
236                 return -ENOENT;
237
238         list_for_each_entry(list, &member->groups, next_group) {
239                 if (list->group == parent) {
240                         list_del(&list->next_group);
241                         list_del(&list->next_member);
242                         kfree(list);
243                         return 0;
244                 }
245         }
246         return -ENOENT;
247 }
248
249 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
250 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
251                                u64 rfer, u64 excl)
252 {
253         struct btrfs_qgroup *qgroup;
254
255         qgroup = find_qgroup_rb(fs_info, qgroupid);
256         if (!qgroup)
257                 return -EINVAL;
258         if (qgroup->rfer != rfer || qgroup->excl != excl)
259                 return -EINVAL;
260         return 0;
261 }
262 #endif
263
264 /*
265  * The full config is read in one go, only called from open_ctree()
266  * It doesn't use any locking, as at this point we're still single-threaded
267  */
268 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
269 {
270         struct btrfs_key key;
271         struct btrfs_key found_key;
272         struct btrfs_root *quota_root = fs_info->quota_root;
273         struct btrfs_path *path = NULL;
274         struct extent_buffer *l;
275         int slot;
276         int ret = 0;
277         u64 flags = 0;
278         u64 rescan_progress = 0;
279
280         if (!fs_info->quota_enabled)
281                 return 0;
282
283         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
284         if (!fs_info->qgroup_ulist) {
285                 ret = -ENOMEM;
286                 goto out;
287         }
288
289         path = btrfs_alloc_path();
290         if (!path) {
291                 ret = -ENOMEM;
292                 goto out;
293         }
294
295         /* default this to quota off, in case no status key is found */
296         fs_info->qgroup_flags = 0;
297
298         /*
299          * pass 1: read status, all qgroup infos and limits
300          */
301         key.objectid = 0;
302         key.type = 0;
303         key.offset = 0;
304         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
305         if (ret)
306                 goto out;
307
308         while (1) {
309                 struct btrfs_qgroup *qgroup;
310
311                 slot = path->slots[0];
312                 l = path->nodes[0];
313                 btrfs_item_key_to_cpu(l, &found_key, slot);
314
315                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
316                         struct btrfs_qgroup_status_item *ptr;
317
318                         ptr = btrfs_item_ptr(l, slot,
319                                              struct btrfs_qgroup_status_item);
320
321                         if (btrfs_qgroup_status_version(l, ptr) !=
322                             BTRFS_QGROUP_STATUS_VERSION) {
323                                 btrfs_err(fs_info,
324                                  "old qgroup version, quota disabled");
325                                 goto out;
326                         }
327                         if (btrfs_qgroup_status_generation(l, ptr) !=
328                             fs_info->generation) {
329                                 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330                                 btrfs_err(fs_info,
331                                         "qgroup generation mismatch, "
332                                         "marked as inconsistent");
333                         }
334                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
335                                                                           ptr);
336                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
337                         goto next1;
338                 }
339
340                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
341                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
342                         goto next1;
343
344                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
345                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
346                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
347                         btrfs_err(fs_info, "inconsitent qgroup config");
348                         flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
349                 }
350                 if (!qgroup) {
351                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
352                         if (IS_ERR(qgroup)) {
353                                 ret = PTR_ERR(qgroup);
354                                 goto out;
355                         }
356                 }
357                 switch (found_key.type) {
358                 case BTRFS_QGROUP_INFO_KEY: {
359                         struct btrfs_qgroup_info_item *ptr;
360
361                         ptr = btrfs_item_ptr(l, slot,
362                                              struct btrfs_qgroup_info_item);
363                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
364                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
365                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
366                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
367                         /* generation currently unused */
368                         break;
369                 }
370                 case BTRFS_QGROUP_LIMIT_KEY: {
371                         struct btrfs_qgroup_limit_item *ptr;
372
373                         ptr = btrfs_item_ptr(l, slot,
374                                              struct btrfs_qgroup_limit_item);
375                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
376                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
377                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
378                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
379                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
380                         break;
381                 }
382                 }
383 next1:
384                 ret = btrfs_next_item(quota_root, path);
385                 if (ret < 0)
386                         goto out;
387                 if (ret)
388                         break;
389         }
390         btrfs_release_path(path);
391
392         /*
393          * pass 2: read all qgroup relations
394          */
395         key.objectid = 0;
396         key.type = BTRFS_QGROUP_RELATION_KEY;
397         key.offset = 0;
398         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
399         if (ret)
400                 goto out;
401         while (1) {
402                 slot = path->slots[0];
403                 l = path->nodes[0];
404                 btrfs_item_key_to_cpu(l, &found_key, slot);
405
406                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
407                         goto next2;
408
409                 if (found_key.objectid > found_key.offset) {
410                         /* parent <- member, not needed to build config */
411                         /* FIXME should we omit the key completely? */
412                         goto next2;
413                 }
414
415                 ret = add_relation_rb(fs_info, found_key.objectid,
416                                       found_key.offset);
417                 if (ret == -ENOENT) {
418                         btrfs_warn(fs_info,
419                                 "orphan qgroup relation 0x%llx->0x%llx",
420                                 found_key.objectid, found_key.offset);
421                         ret = 0;        /* ignore the error */
422                 }
423                 if (ret)
424                         goto out;
425 next2:
426                 ret = btrfs_next_item(quota_root, path);
427                 if (ret < 0)
428                         goto out;
429                 if (ret)
430                         break;
431         }
432 out:
433         fs_info->qgroup_flags |= flags;
434         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
435                 fs_info->quota_enabled = 0;
436                 fs_info->pending_quota_state = 0;
437         } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
438                    ret >= 0) {
439                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
440         }
441         btrfs_free_path(path);
442
443         if (ret < 0) {
444                 ulist_free(fs_info->qgroup_ulist);
445                 fs_info->qgroup_ulist = NULL;
446                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
447         }
448
449         return ret < 0 ? ret : 0;
450 }
451
452 /*
453  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
454  * first two are in single-threaded paths.And for the third one, we have set
455  * quota_root to be null with qgroup_lock held before, so it is safe to clean
456  * up the in-memory structures without qgroup_lock held.
457  */
458 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
459 {
460         struct rb_node *n;
461         struct btrfs_qgroup *qgroup;
462
463         while ((n = rb_first(&fs_info->qgroup_tree))) {
464                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
465                 rb_erase(n, &fs_info->qgroup_tree);
466                 __del_qgroup_rb(qgroup);
467         }
468         /*
469          * we call btrfs_free_qgroup_config() when umounting
470          * filesystem and disabling quota, so we set qgroup_ulit
471          * to be null here to avoid double free.
472          */
473         ulist_free(fs_info->qgroup_ulist);
474         fs_info->qgroup_ulist = NULL;
475 }
476
477 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
478                                     struct btrfs_root *quota_root,
479                                     u64 src, u64 dst)
480 {
481         int ret;
482         struct btrfs_path *path;
483         struct btrfs_key key;
484
485         path = btrfs_alloc_path();
486         if (!path)
487                 return -ENOMEM;
488
489         key.objectid = src;
490         key.type = BTRFS_QGROUP_RELATION_KEY;
491         key.offset = dst;
492
493         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
494
495         btrfs_mark_buffer_dirty(path->nodes[0]);
496
497         btrfs_free_path(path);
498         return ret;
499 }
500
501 static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
502                                     struct btrfs_root *quota_root,
503                                     u64 src, u64 dst)
504 {
505         int ret;
506         struct btrfs_path *path;
507         struct btrfs_key key;
508
509         path = btrfs_alloc_path();
510         if (!path)
511                 return -ENOMEM;
512
513         key.objectid = src;
514         key.type = BTRFS_QGROUP_RELATION_KEY;
515         key.offset = dst;
516
517         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
518         if (ret < 0)
519                 goto out;
520
521         if (ret > 0) {
522                 ret = -ENOENT;
523                 goto out;
524         }
525
526         ret = btrfs_del_item(trans, quota_root, path);
527 out:
528         btrfs_free_path(path);
529         return ret;
530 }
531
532 static int add_qgroup_item(struct btrfs_trans_handle *trans,
533                            struct btrfs_root *quota_root, u64 qgroupid)
534 {
535         int ret;
536         struct btrfs_path *path;
537         struct btrfs_qgroup_info_item *qgroup_info;
538         struct btrfs_qgroup_limit_item *qgroup_limit;
539         struct extent_buffer *leaf;
540         struct btrfs_key key;
541
542 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
543         if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &quota_root->state)))
544                 return 0;
545 #endif
546         path = btrfs_alloc_path();
547         if (!path)
548                 return -ENOMEM;
549
550         key.objectid = 0;
551         key.type = BTRFS_QGROUP_INFO_KEY;
552         key.offset = qgroupid;
553
554         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
555                                       sizeof(*qgroup_info));
556         if (ret)
557                 goto out;
558
559         leaf = path->nodes[0];
560         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
561                                  struct btrfs_qgroup_info_item);
562         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
563         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
564         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
565         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
566         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
567
568         btrfs_mark_buffer_dirty(leaf);
569
570         btrfs_release_path(path);
571
572         key.type = BTRFS_QGROUP_LIMIT_KEY;
573         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
574                                       sizeof(*qgroup_limit));
575         if (ret)
576                 goto out;
577
578         leaf = path->nodes[0];
579         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
580                                   struct btrfs_qgroup_limit_item);
581         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
582         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
583         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
584         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
585         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
586
587         btrfs_mark_buffer_dirty(leaf);
588
589         ret = 0;
590 out:
591         btrfs_free_path(path);
592         return ret;
593 }
594
595 static int del_qgroup_item(struct btrfs_trans_handle *trans,
596                            struct btrfs_root *quota_root, u64 qgroupid)
597 {
598         int ret;
599         struct btrfs_path *path;
600         struct btrfs_key key;
601
602         path = btrfs_alloc_path();
603         if (!path)
604                 return -ENOMEM;
605
606         key.objectid = 0;
607         key.type = BTRFS_QGROUP_INFO_KEY;
608         key.offset = qgroupid;
609         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
610         if (ret < 0)
611                 goto out;
612
613         if (ret > 0) {
614                 ret = -ENOENT;
615                 goto out;
616         }
617
618         ret = btrfs_del_item(trans, quota_root, path);
619         if (ret)
620                 goto out;
621
622         btrfs_release_path(path);
623
624         key.type = BTRFS_QGROUP_LIMIT_KEY;
625         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
626         if (ret < 0)
627                 goto out;
628
629         if (ret > 0) {
630                 ret = -ENOENT;
631                 goto out;
632         }
633
634         ret = btrfs_del_item(trans, quota_root, path);
635
636 out:
637         btrfs_free_path(path);
638         return ret;
639 }
640
641 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
642                                     struct btrfs_root *root, u64 qgroupid,
643                                     u64 flags, u64 max_rfer, u64 max_excl,
644                                     u64 rsv_rfer, u64 rsv_excl)
645 {
646         struct btrfs_path *path;
647         struct btrfs_key key;
648         struct extent_buffer *l;
649         struct btrfs_qgroup_limit_item *qgroup_limit;
650         int ret;
651         int slot;
652
653         key.objectid = 0;
654         key.type = BTRFS_QGROUP_LIMIT_KEY;
655         key.offset = qgroupid;
656
657         path = btrfs_alloc_path();
658         if (!path)
659                 return -ENOMEM;
660
661         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
662         if (ret > 0)
663                 ret = -ENOENT;
664
665         if (ret)
666                 goto out;
667
668         l = path->nodes[0];
669         slot = path->slots[0];
670         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
671         btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
672         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
673         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
674         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
675         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
676
677         btrfs_mark_buffer_dirty(l);
678
679 out:
680         btrfs_free_path(path);
681         return ret;
682 }
683
684 static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
685                                    struct btrfs_root *root,
686                                    struct btrfs_qgroup *qgroup)
687 {
688         struct btrfs_path *path;
689         struct btrfs_key key;
690         struct extent_buffer *l;
691         struct btrfs_qgroup_info_item *qgroup_info;
692         int ret;
693         int slot;
694
695 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
696         if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
697                 return 0;
698 #endif
699         key.objectid = 0;
700         key.type = BTRFS_QGROUP_INFO_KEY;
701         key.offset = qgroup->qgroupid;
702
703         path = btrfs_alloc_path();
704         if (!path)
705                 return -ENOMEM;
706
707         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
708         if (ret > 0)
709                 ret = -ENOENT;
710
711         if (ret)
712                 goto out;
713
714         l = path->nodes[0];
715         slot = path->slots[0];
716         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
717         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
718         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
719         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
720         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
721         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
722
723         btrfs_mark_buffer_dirty(l);
724
725 out:
726         btrfs_free_path(path);
727         return ret;
728 }
729
730 static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
731                                      struct btrfs_fs_info *fs_info,
732                                     struct btrfs_root *root)
733 {
734         struct btrfs_path *path;
735         struct btrfs_key key;
736         struct extent_buffer *l;
737         struct btrfs_qgroup_status_item *ptr;
738         int ret;
739         int slot;
740
741         key.objectid = 0;
742         key.type = BTRFS_QGROUP_STATUS_KEY;
743         key.offset = 0;
744
745         path = btrfs_alloc_path();
746         if (!path)
747                 return -ENOMEM;
748
749         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
750         if (ret > 0)
751                 ret = -ENOENT;
752
753         if (ret)
754                 goto out;
755
756         l = path->nodes[0];
757         slot = path->slots[0];
758         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
759         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
760         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
761         btrfs_set_qgroup_status_rescan(l, ptr,
762                                 fs_info->qgroup_rescan_progress.objectid);
763
764         btrfs_mark_buffer_dirty(l);
765
766 out:
767         btrfs_free_path(path);
768         return ret;
769 }
770
771 /*
772  * called with qgroup_lock held
773  */
774 static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
775                                   struct btrfs_root *root)
776 {
777         struct btrfs_path *path;
778         struct btrfs_key key;
779         struct extent_buffer *leaf = NULL;
780         int ret;
781         int nr = 0;
782
783         path = btrfs_alloc_path();
784         if (!path)
785                 return -ENOMEM;
786
787         path->leave_spinning = 1;
788
789         key.objectid = 0;
790         key.offset = 0;
791         key.type = 0;
792
793         while (1) {
794                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
795                 if (ret < 0)
796                         goto out;
797                 leaf = path->nodes[0];
798                 nr = btrfs_header_nritems(leaf);
799                 if (!nr)
800                         break;
801                 /*
802                  * delete the leaf one by one
803                  * since the whole tree is going
804                  * to be deleted.
805                  */
806                 path->slots[0] = 0;
807                 ret = btrfs_del_items(trans, root, path, 0, nr);
808                 if (ret)
809                         goto out;
810
811                 btrfs_release_path(path);
812         }
813         ret = 0;
814 out:
815         root->fs_info->pending_quota_state = 0;
816         btrfs_free_path(path);
817         return ret;
818 }
819
820 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
821                        struct btrfs_fs_info *fs_info)
822 {
823         struct btrfs_root *quota_root;
824         struct btrfs_root *tree_root = fs_info->tree_root;
825         struct btrfs_path *path = NULL;
826         struct btrfs_qgroup_status_item *ptr;
827         struct extent_buffer *leaf;
828         struct btrfs_key key;
829         struct btrfs_key found_key;
830         struct btrfs_qgroup *qgroup = NULL;
831         int ret = 0;
832         int slot;
833
834         mutex_lock(&fs_info->qgroup_ioctl_lock);
835         if (fs_info->quota_root) {
836                 fs_info->pending_quota_state = 1;
837                 goto out;
838         }
839
840         fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
841         if (!fs_info->qgroup_ulist) {
842                 ret = -ENOMEM;
843                 goto out;
844         }
845
846         /*
847          * initially create the quota tree
848          */
849         quota_root = btrfs_create_tree(trans, fs_info,
850                                        BTRFS_QUOTA_TREE_OBJECTID);
851         if (IS_ERR(quota_root)) {
852                 ret =  PTR_ERR(quota_root);
853                 goto out;
854         }
855
856         path = btrfs_alloc_path();
857         if (!path) {
858                 ret = -ENOMEM;
859                 goto out_free_root;
860         }
861
862         key.objectid = 0;
863         key.type = BTRFS_QGROUP_STATUS_KEY;
864         key.offset = 0;
865
866         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
867                                       sizeof(*ptr));
868         if (ret)
869                 goto out_free_path;
870
871         leaf = path->nodes[0];
872         ptr = btrfs_item_ptr(leaf, path->slots[0],
873                                  struct btrfs_qgroup_status_item);
874         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
875         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
876         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
877                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
878         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
879         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
880
881         btrfs_mark_buffer_dirty(leaf);
882
883         key.objectid = 0;
884         key.type = BTRFS_ROOT_REF_KEY;
885         key.offset = 0;
886
887         btrfs_release_path(path);
888         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
889         if (ret > 0)
890                 goto out_add_root;
891         if (ret < 0)
892                 goto out_free_path;
893
894
895         while (1) {
896                 slot = path->slots[0];
897                 leaf = path->nodes[0];
898                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
899
900                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
901                         ret = add_qgroup_item(trans, quota_root,
902                                               found_key.offset);
903                         if (ret)
904                                 goto out_free_path;
905
906                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
907                         if (IS_ERR(qgroup)) {
908                                 ret = PTR_ERR(qgroup);
909                                 goto out_free_path;
910                         }
911                 }
912                 ret = btrfs_next_item(tree_root, path);
913                 if (ret < 0)
914                         goto out_free_path;
915                 if (ret)
916                         break;
917         }
918
919 out_add_root:
920         btrfs_release_path(path);
921         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
922         if (ret)
923                 goto out_free_path;
924
925         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
926         if (IS_ERR(qgroup)) {
927                 ret = PTR_ERR(qgroup);
928                 goto out_free_path;
929         }
930         spin_lock(&fs_info->qgroup_lock);
931         fs_info->quota_root = quota_root;
932         fs_info->pending_quota_state = 1;
933         spin_unlock(&fs_info->qgroup_lock);
934 out_free_path:
935         btrfs_free_path(path);
936 out_free_root:
937         if (ret) {
938                 free_extent_buffer(quota_root->node);
939                 free_extent_buffer(quota_root->commit_root);
940                 kfree(quota_root);
941         }
942 out:
943         if (ret) {
944                 ulist_free(fs_info->qgroup_ulist);
945                 fs_info->qgroup_ulist = NULL;
946         }
947         mutex_unlock(&fs_info->qgroup_ioctl_lock);
948         return ret;
949 }
950
951 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
952                         struct btrfs_fs_info *fs_info)
953 {
954         struct btrfs_root *tree_root = fs_info->tree_root;
955         struct btrfs_root *quota_root;
956         int ret = 0;
957
958         mutex_lock(&fs_info->qgroup_ioctl_lock);
959         if (!fs_info->quota_root)
960                 goto out;
961         spin_lock(&fs_info->qgroup_lock);
962         fs_info->quota_enabled = 0;
963         fs_info->pending_quota_state = 0;
964         quota_root = fs_info->quota_root;
965         fs_info->quota_root = NULL;
966         spin_unlock(&fs_info->qgroup_lock);
967
968         btrfs_free_qgroup_config(fs_info);
969
970         ret = btrfs_clean_quota_tree(trans, quota_root);
971         if (ret)
972                 goto out;
973
974         ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
975         if (ret)
976                 goto out;
977
978         list_del(&quota_root->dirty_list);
979
980         btrfs_tree_lock(quota_root->node);
981         clean_tree_block(trans, tree_root, quota_root->node);
982         btrfs_tree_unlock(quota_root->node);
983         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
984
985         free_extent_buffer(quota_root->node);
986         free_extent_buffer(quota_root->commit_root);
987         kfree(quota_root);
988 out:
989         mutex_unlock(&fs_info->qgroup_ioctl_lock);
990         return ret;
991 }
992
993 static void qgroup_dirty(struct btrfs_fs_info *fs_info,
994                          struct btrfs_qgroup *qgroup)
995 {
996         if (list_empty(&qgroup->dirty))
997                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
998 }
999
1000 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
1001                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1002 {
1003         struct btrfs_root *quota_root;
1004         struct btrfs_qgroup *parent;
1005         struct btrfs_qgroup *member;
1006         struct btrfs_qgroup_list *list;
1007         int ret = 0;
1008
1009         mutex_lock(&fs_info->qgroup_ioctl_lock);
1010         quota_root = fs_info->quota_root;
1011         if (!quota_root) {
1012                 ret = -EINVAL;
1013                 goto out;
1014         }
1015         member = find_qgroup_rb(fs_info, src);
1016         parent = find_qgroup_rb(fs_info, dst);
1017         if (!member || !parent) {
1018                 ret = -EINVAL;
1019                 goto out;
1020         }
1021
1022         /* check if such qgroup relation exist firstly */
1023         list_for_each_entry(list, &member->groups, next_group) {
1024                 if (list->group == parent) {
1025                         ret = -EEXIST;
1026                         goto out;
1027                 }
1028         }
1029
1030         ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1031         if (ret)
1032                 goto out;
1033
1034         ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1035         if (ret) {
1036                 del_qgroup_relation_item(trans, quota_root, src, dst);
1037                 goto out;
1038         }
1039
1040         spin_lock(&fs_info->qgroup_lock);
1041         ret = add_relation_rb(quota_root->fs_info, src, dst);
1042         spin_unlock(&fs_info->qgroup_lock);
1043 out:
1044         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1045         return ret;
1046 }
1047
1048 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1049                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1050 {
1051         struct btrfs_root *quota_root;
1052         struct btrfs_qgroup *parent;
1053         struct btrfs_qgroup *member;
1054         struct btrfs_qgroup_list *list;
1055         int ret = 0;
1056         int err;
1057
1058         mutex_lock(&fs_info->qgroup_ioctl_lock);
1059         quota_root = fs_info->quota_root;
1060         if (!quota_root) {
1061                 ret = -EINVAL;
1062                 goto out;
1063         }
1064
1065         member = find_qgroup_rb(fs_info, src);
1066         parent = find_qgroup_rb(fs_info, dst);
1067         if (!member || !parent) {
1068                 ret = -EINVAL;
1069                 goto out;
1070         }
1071
1072         /* check if such qgroup relation exist firstly */
1073         list_for_each_entry(list, &member->groups, next_group) {
1074                 if (list->group == parent)
1075                         goto exist;
1076         }
1077         ret = -ENOENT;
1078         goto out;
1079 exist:
1080         ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1081         err = del_qgroup_relation_item(trans, quota_root, dst, src);
1082         if (err && !ret)
1083                 ret = err;
1084
1085         spin_lock(&fs_info->qgroup_lock);
1086         del_relation_rb(fs_info, src, dst);
1087         spin_unlock(&fs_info->qgroup_lock);
1088 out:
1089         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1090         return ret;
1091 }
1092
1093 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1094                         struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1095 {
1096         struct btrfs_root *quota_root;
1097         struct btrfs_qgroup *qgroup;
1098         int ret = 0;
1099
1100         mutex_lock(&fs_info->qgroup_ioctl_lock);
1101         quota_root = fs_info->quota_root;
1102         if (!quota_root) {
1103                 ret = -EINVAL;
1104                 goto out;
1105         }
1106         qgroup = find_qgroup_rb(fs_info, qgroupid);
1107         if (qgroup) {
1108                 ret = -EEXIST;
1109                 goto out;
1110         }
1111
1112         ret = add_qgroup_item(trans, quota_root, qgroupid);
1113         if (ret)
1114                 goto out;
1115
1116         spin_lock(&fs_info->qgroup_lock);
1117         qgroup = add_qgroup_rb(fs_info, qgroupid);
1118         spin_unlock(&fs_info->qgroup_lock);
1119
1120         if (IS_ERR(qgroup))
1121                 ret = PTR_ERR(qgroup);
1122 out:
1123         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1124         return ret;
1125 }
1126
1127 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1128                         struct btrfs_fs_info *fs_info, u64 qgroupid)
1129 {
1130         struct btrfs_root *quota_root;
1131         struct btrfs_qgroup *qgroup;
1132         int ret = 0;
1133
1134         mutex_lock(&fs_info->qgroup_ioctl_lock);
1135         quota_root = fs_info->quota_root;
1136         if (!quota_root) {
1137                 ret = -EINVAL;
1138                 goto out;
1139         }
1140
1141         qgroup = find_qgroup_rb(fs_info, qgroupid);
1142         if (!qgroup) {
1143                 ret = -ENOENT;
1144                 goto out;
1145         } else {
1146                 /* check if there are no relations to this qgroup */
1147                 if (!list_empty(&qgroup->groups) ||
1148                     !list_empty(&qgroup->members)) {
1149                         ret = -EBUSY;
1150                         goto out;
1151                 }
1152         }
1153         ret = del_qgroup_item(trans, quota_root, qgroupid);
1154
1155         spin_lock(&fs_info->qgroup_lock);
1156         del_qgroup_rb(quota_root->fs_info, qgroupid);
1157         spin_unlock(&fs_info->qgroup_lock);
1158 out:
1159         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1160         return ret;
1161 }
1162
1163 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1164                        struct btrfs_fs_info *fs_info, u64 qgroupid,
1165                        struct btrfs_qgroup_limit *limit)
1166 {
1167         struct btrfs_root *quota_root;
1168         struct btrfs_qgroup *qgroup;
1169         int ret = 0;
1170
1171         mutex_lock(&fs_info->qgroup_ioctl_lock);
1172         quota_root = fs_info->quota_root;
1173         if (!quota_root) {
1174                 ret = -EINVAL;
1175                 goto out;
1176         }
1177
1178         qgroup = find_qgroup_rb(fs_info, qgroupid);
1179         if (!qgroup) {
1180                 ret = -ENOENT;
1181                 goto out;
1182         }
1183         ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1184                                        limit->flags, limit->max_rfer,
1185                                        limit->max_excl, limit->rsv_rfer,
1186                                        limit->rsv_excl);
1187         if (ret) {
1188                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1189                 btrfs_info(fs_info, "unable to update quota limit for %llu",
1190                        qgroupid);
1191         }
1192
1193         spin_lock(&fs_info->qgroup_lock);
1194         qgroup->lim_flags = limit->flags;
1195         qgroup->max_rfer = limit->max_rfer;
1196         qgroup->max_excl = limit->max_excl;
1197         qgroup->rsv_rfer = limit->rsv_rfer;
1198         qgroup->rsv_excl = limit->rsv_excl;
1199         spin_unlock(&fs_info->qgroup_lock);
1200 out:
1201         mutex_unlock(&fs_info->qgroup_ioctl_lock);
1202         return ret;
1203 }
1204 static int comp_oper(struct btrfs_qgroup_operation *oper1,
1205                      struct btrfs_qgroup_operation *oper2)
1206 {
1207         if (oper1->bytenr < oper2->bytenr)
1208                 return -1;
1209         if (oper1->bytenr > oper2->bytenr)
1210                 return 1;
1211         if (oper1->seq < oper2->seq)
1212                 return -1;
1213         if (oper1->seq > oper2->seq)
1214                 return -1;
1215         if (oper1->ref_root < oper2->ref_root)
1216                 return -1;
1217         if (oper1->ref_root > oper2->ref_root)
1218                 return 1;
1219         if (oper1->type < oper2->type)
1220                 return -1;
1221         if (oper1->type > oper2->type)
1222                 return 1;
1223         return 0;
1224 }
1225
1226 static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
1227                               struct btrfs_qgroup_operation *oper)
1228 {
1229         struct rb_node **p;
1230         struct rb_node *parent = NULL;
1231         struct btrfs_qgroup_operation *cur;
1232         int cmp;
1233
1234         spin_lock(&fs_info->qgroup_op_lock);
1235         p = &fs_info->qgroup_op_tree.rb_node;
1236         while (*p) {
1237                 parent = *p;
1238                 cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
1239                 cmp = comp_oper(cur, oper);
1240                 if (cmp < 0) {
1241                         p = &(*p)->rb_right;
1242                 } else if (cmp) {
1243                         p = &(*p)->rb_left;
1244                 } else {
1245                         spin_unlock(&fs_info->qgroup_op_lock);
1246                         return -EEXIST;
1247                 }
1248         }
1249         rb_link_node(&oper->n, parent, p);
1250         rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
1251         spin_unlock(&fs_info->qgroup_op_lock);
1252         return 0;
1253 }
1254
1255 /*
1256  * Record a quota operation for processing later on.
1257  * @trans: the transaction we are adding the delayed op to.
1258  * @fs_info: the fs_info for this fs.
1259  * @ref_root: the root of the reference we are acting on,
1260  * @bytenr: the bytenr we are acting on.
1261  * @num_bytes: the number of bytes in the reference.
1262  * @type: the type of operation this is.
1263  * @mod_seq: do we need to get a sequence number for looking up roots.
1264  *
1265  * We just add it to our trans qgroup_ref_list and carry on and process these
1266  * operations in order at some later point.  If the reference root isn't a fs
1267  * root then we don't bother with doing anything.
1268  *
1269  * MUST BE HOLDING THE REF LOCK.
1270  */
1271 int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1272                             struct btrfs_fs_info *fs_info, u64 ref_root,
1273                             u64 bytenr, u64 num_bytes,
1274                             enum btrfs_qgroup_operation_type type, int mod_seq)
1275 {
1276         struct btrfs_qgroup_operation *oper;
1277         int ret;
1278
1279         if (!is_fstree(ref_root) || !fs_info->quota_enabled)
1280                 return 0;
1281
1282         oper = kmalloc(sizeof(*oper), GFP_NOFS);
1283         if (!oper)
1284                 return -ENOMEM;
1285
1286         oper->ref_root = ref_root;
1287         oper->bytenr = bytenr;
1288         oper->num_bytes = num_bytes;
1289         oper->type = type;
1290         oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1291         INIT_LIST_HEAD(&oper->elem.list);
1292         oper->elem.seq = 0;
1293         ret = insert_qgroup_oper(fs_info, oper);
1294         if (ret) {
1295                 /* Shouldn't happen so have an assert for developers */
1296                 ASSERT(0);
1297                 kfree(oper);
1298                 return ret;
1299         }
1300         list_add_tail(&oper->list, &trans->qgroup_ref_list);
1301
1302         if (mod_seq)
1303                 btrfs_get_tree_mod_seq(fs_info, &oper->elem);
1304
1305         return 0;
1306 }
1307
1308 /*
1309  * The easy accounting, if we are adding/removing the only ref for an extent
1310  * then this qgroup and all of the parent qgroups get their refrence and
1311  * exclusive counts adjusted.
1312  */
1313 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1314                                   struct btrfs_qgroup_operation *oper)
1315 {
1316         struct btrfs_qgroup *qgroup;
1317         struct ulist *tmp;
1318         struct btrfs_qgroup_list *glist;
1319         struct ulist_node *unode;
1320         struct ulist_iterator uiter;
1321         int sign = 0;
1322         int ret = 0;
1323
1324         tmp = ulist_alloc(GFP_NOFS);
1325         if (!tmp)
1326                 return -ENOMEM;
1327
1328         spin_lock(&fs_info->qgroup_lock);
1329         if (!fs_info->quota_root)
1330                 goto out;
1331         qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1332         if (!qgroup)
1333                 goto out;
1334         switch (oper->type) {
1335         case BTRFS_QGROUP_OPER_ADD_EXCL:
1336                 sign = 1;
1337                 break;
1338         case BTRFS_QGROUP_OPER_SUB_EXCL:
1339                 sign = -1;
1340                 break;
1341         default:
1342                 ASSERT(0);
1343         }
1344         qgroup->rfer += sign * oper->num_bytes;
1345         qgroup->rfer_cmpr += sign * oper->num_bytes;
1346
1347         WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
1348         qgroup->excl += sign * oper->num_bytes;
1349         qgroup->excl_cmpr += sign * oper->num_bytes;
1350
1351         qgroup_dirty(fs_info, qgroup);
1352
1353         /* Get all of the parent groups that contain this qgroup */
1354         list_for_each_entry(glist, &qgroup->groups, next_group) {
1355                 ret = ulist_add(tmp, glist->group->qgroupid,
1356                                 ptr_to_u64(glist->group), GFP_ATOMIC);
1357                 if (ret < 0)
1358                         goto out;
1359         }
1360
1361         /* Iterate all of the parents and adjust their reference counts */
1362         ULIST_ITER_INIT(&uiter);
1363         while ((unode = ulist_next(tmp, &uiter))) {
1364                 qgroup = u64_to_ptr(unode->aux);
1365                 qgroup->rfer += sign * oper->num_bytes;
1366                 qgroup->rfer_cmpr += sign * oper->num_bytes;
1367                 qgroup->excl += sign * oper->num_bytes;
1368                 if (sign < 0)
1369                         WARN_ON(qgroup->excl < oper->num_bytes);
1370                 qgroup->excl_cmpr += sign * oper->num_bytes;
1371                 qgroup_dirty(fs_info, qgroup);
1372
1373                 /* Add any parents of the parents */
1374                 list_for_each_entry(glist, &qgroup->groups, next_group) {
1375                         ret = ulist_add(tmp, glist->group->qgroupid,
1376                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1377                         if (ret < 0)
1378                                 goto out;
1379                 }
1380         }
1381         ret = 0;
1382 out:
1383         spin_unlock(&fs_info->qgroup_lock);
1384         ulist_free(tmp);
1385         return ret;
1386 }
1387
1388 /*
1389  * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
1390  * properly.
1391  */
1392 static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
1393                                   u64 root_to_skip, struct ulist *tmp,
1394                                   struct ulist *roots, struct ulist *qgroups,
1395                                   u64 seq, int *old_roots, int rescan)
1396 {
1397         struct ulist_node *unode;
1398         struct ulist_iterator uiter;
1399         struct ulist_node *tmp_unode;
1400         struct ulist_iterator tmp_uiter;
1401         struct btrfs_qgroup *qg;
1402         int ret;
1403
1404         ULIST_ITER_INIT(&uiter);
1405         while ((unode = ulist_next(roots, &uiter))) {
1406                 /* We don't count our current root here */
1407                 if (unode->val == root_to_skip)
1408                         continue;
1409                 qg = find_qgroup_rb(fs_info, unode->val);
1410                 if (!qg)
1411                         continue;
1412                 /*
1413                  * We could have a pending removal of this same ref so we may
1414                  * not have actually found our ref root when doing
1415                  * btrfs_find_all_roots, so we need to keep track of how many
1416                  * old roots we find in case we removed ours and added a
1417                  * different one at the same time.  I don't think this could
1418                  * happen in practice but that sort of thinking leads to pain
1419                  * and suffering and to the dark side.
1420                  */
1421                 (*old_roots)++;
1422
1423                 ulist_reinit(tmp);
1424                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1425                                 GFP_ATOMIC);
1426                 if (ret < 0)
1427                         return ret;
1428                 ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg), GFP_ATOMIC);
1429                 if (ret < 0)
1430                         return ret;
1431                 ULIST_ITER_INIT(&tmp_uiter);
1432                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1433                         struct btrfs_qgroup_list *glist;
1434
1435                         qg = u64_to_ptr(tmp_unode->aux);
1436                         /*
1437                          * We use this sequence number to keep from having to
1438                          * run the whole list and 0 out the refcnt every time.
1439                          * We basically use sequnce as the known 0 count and
1440                          * then add 1 everytime we see a qgroup.  This is how we
1441                          * get how many of the roots actually point up to the
1442                          * upper level qgroups in order to determine exclusive
1443                          * counts.
1444                          *
1445                          * For rescan we want to set old_refcnt to seq so our
1446                          * exclusive calculations end up correct.
1447                          */
1448                         if (rescan)
1449                                 qg->old_refcnt = seq;
1450                         else if (qg->old_refcnt < seq)
1451                                 qg->old_refcnt = seq + 1;
1452                         else
1453                                 qg->old_refcnt++;
1454
1455                         if (qg->new_refcnt < seq)
1456                                 qg->new_refcnt = seq + 1;
1457                         else
1458                                 qg->new_refcnt++;
1459                         list_for_each_entry(glist, &qg->groups, next_group) {
1460                                 ret = ulist_add(qgroups, glist->group->qgroupid,
1461                                                 ptr_to_u64(glist->group),
1462                                                 GFP_ATOMIC);
1463                                 if (ret < 0)
1464                                         return ret;
1465                                 ret = ulist_add(tmp, glist->group->qgroupid,
1466                                                 ptr_to_u64(glist->group),
1467                                                 GFP_ATOMIC);
1468                                 if (ret < 0)
1469                                         return ret;
1470                         }
1471                 }
1472         }
1473         return 0;
1474 }
1475
1476 /*
1477  * We need to walk forward in our operation tree and account for any roots that
1478  * were deleted after we made this operation.
1479  */
1480 static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
1481                                        struct btrfs_qgroup_operation *oper,
1482                                        struct ulist *tmp,
1483                                        struct ulist *qgroups, u64 seq,
1484                                        int *old_roots)
1485 {
1486         struct ulist_node *unode;
1487         struct ulist_iterator uiter;
1488         struct btrfs_qgroup *qg;
1489         struct btrfs_qgroup_operation *tmp_oper;
1490         struct rb_node *n;
1491         int ret;
1492
1493         ulist_reinit(tmp);
1494
1495         /*
1496          * We only walk forward in the tree since we're only interested in
1497          * removals that happened _after_  our operation.
1498          */
1499         spin_lock(&fs_info->qgroup_op_lock);
1500         n = rb_next(&oper->n);
1501         spin_unlock(&fs_info->qgroup_op_lock);
1502         if (!n)
1503                 return 0;
1504         tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1505         while (tmp_oper->bytenr == oper->bytenr) {
1506                 /*
1507                  * If it's not a removal we don't care, additions work out
1508                  * properly with our refcnt tracking.
1509                  */
1510                 if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
1511                     tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
1512                         goto next;
1513                 qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
1514                 if (!qg)
1515                         goto next;
1516                 ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
1517                                 GFP_ATOMIC);
1518                 if (ret) {
1519                         if (ret < 0)
1520                                 return ret;
1521                         /*
1522                          * We only want to increase old_roots if this qgroup is
1523                          * not already in the list of qgroups.  If it is already
1524                          * there then that means it must have been re-added or
1525                          * the delete will be discarded because we had an
1526                          * existing ref that we haven't looked up yet.  In this
1527                          * case we don't want to increase old_roots.  So if ret
1528                          * == 1 then we know that this is the first time we've
1529                          * seen this qgroup and we can bump the old_roots.
1530                          */
1531                         (*old_roots)++;
1532                         ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
1533                                         GFP_ATOMIC);
1534                         if (ret < 0)
1535                                 return ret;
1536                 }
1537 next:
1538                 spin_lock(&fs_info->qgroup_op_lock);
1539                 n = rb_next(&tmp_oper->n);
1540                 spin_unlock(&fs_info->qgroup_op_lock);
1541                 if (!n)
1542                         break;
1543                 tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
1544         }
1545
1546         /* Ok now process the qgroups we found */
1547         ULIST_ITER_INIT(&uiter);
1548         while ((unode = ulist_next(tmp, &uiter))) {
1549                 struct btrfs_qgroup_list *glist;
1550
1551                 qg = u64_to_ptr(unode->aux);
1552                 if (qg->old_refcnt < seq)
1553                         qg->old_refcnt = seq + 1;
1554                 else
1555                         qg->old_refcnt++;
1556                 if (qg->new_refcnt < seq)
1557                         qg->new_refcnt = seq + 1;
1558                 else
1559                         qg->new_refcnt++;
1560                 list_for_each_entry(glist, &qg->groups, next_group) {
1561                         ret = ulist_add(qgroups, glist->group->qgroupid,
1562                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1563                         if (ret < 0)
1564                                 return ret;
1565                         ret = ulist_add(tmp, glist->group->qgroupid,
1566                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1567                         if (ret < 0)
1568                                 return ret;
1569                 }
1570         }
1571         return 0;
1572 }
1573
1574 /* Add refcnt for the newly added reference. */
1575 static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
1576                                   struct btrfs_qgroup_operation *oper,
1577                                   struct btrfs_qgroup *qgroup,
1578                                   struct ulist *tmp, struct ulist *qgroups,
1579                                   u64 seq)
1580 {
1581         struct ulist_node *unode;
1582         struct ulist_iterator uiter;
1583         struct btrfs_qgroup *qg;
1584         int ret;
1585
1586         ulist_reinit(tmp);
1587         ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
1588                         GFP_ATOMIC);
1589         if (ret < 0)
1590                 return ret;
1591         ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
1592                         GFP_ATOMIC);
1593         if (ret < 0)
1594                 return ret;
1595         ULIST_ITER_INIT(&uiter);
1596         while ((unode = ulist_next(tmp, &uiter))) {
1597                 struct btrfs_qgroup_list *glist;
1598
1599                 qg = u64_to_ptr(unode->aux);
1600                 if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1601                         if (qg->new_refcnt < seq)
1602                                 qg->new_refcnt = seq + 1;
1603                         else
1604                                 qg->new_refcnt++;
1605                 } else {
1606                         if (qg->old_refcnt < seq)
1607                                 qg->old_refcnt = seq + 1;
1608                         else
1609                                 qg->old_refcnt++;
1610                 }
1611                 list_for_each_entry(glist, &qg->groups, next_group) {
1612                         ret = ulist_add(tmp, glist->group->qgroupid,
1613                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1614                         if (ret < 0)
1615                                 return ret;
1616                         ret = ulist_add(qgroups, glist->group->qgroupid,
1617                                         ptr_to_u64(glist->group), GFP_ATOMIC);
1618                         if (ret < 0)
1619                                 return ret;
1620                 }
1621         }
1622         return 0;
1623 }
1624
1625 /*
1626  * This adjusts the counters for all referenced qgroups if need be.
1627  */
1628 static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
1629                                   u64 root_to_skip, u64 num_bytes,
1630                                   struct ulist *qgroups, u64 seq,
1631                                   int old_roots, int new_roots, int rescan)
1632 {
1633         struct ulist_node *unode;
1634         struct ulist_iterator uiter;
1635         struct btrfs_qgroup *qg;
1636         u64 cur_new_count, cur_old_count;
1637
1638         ULIST_ITER_INIT(&uiter);
1639         while ((unode = ulist_next(qgroups, &uiter))) {
1640                 bool dirty = false;
1641
1642                 qg = u64_to_ptr(unode->aux);
1643                 /*
1644                  * Wasn't referenced before but is now, add to the reference
1645                  * counters.
1646                  */
1647                 if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
1648                         qg->rfer += num_bytes;
1649                         qg->rfer_cmpr += num_bytes;
1650                         dirty = true;
1651                 }
1652
1653                 /*
1654                  * Was referenced before but isn't now, subtract from the
1655                  * reference counters.
1656                  */
1657                 if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
1658                         qg->rfer -= num_bytes;
1659                         qg->rfer_cmpr -= num_bytes;
1660                         dirty = true;
1661                 }
1662
1663                 if (qg->old_refcnt < seq)
1664                         cur_old_count = 0;
1665                 else
1666                         cur_old_count = qg->old_refcnt - seq;
1667                 if (qg->new_refcnt < seq)
1668                         cur_new_count = 0;
1669                 else
1670                         cur_new_count = qg->new_refcnt - seq;
1671
1672                 /*
1673                  * If our refcount was the same as the roots previously but our
1674                  * new count isn't the same as the number of roots now then we
1675                  * went from having a exclusive reference on this range to not.
1676                  */
1677                 if (old_roots && cur_old_count == old_roots &&
1678                     (cur_new_count != new_roots || new_roots == 0)) {
1679                         WARN_ON(cur_new_count != new_roots && new_roots == 0);
1680                         qg->excl -= num_bytes;
1681                         qg->excl_cmpr -= num_bytes;
1682                         dirty = true;
1683                 }
1684
1685                 /*
1686                  * If we didn't reference all the roots before but now we do we
1687                  * have an exclusive reference to this range.
1688                  */
1689                 if ((!old_roots || (old_roots && cur_old_count != old_roots))
1690                     && cur_new_count == new_roots) {
1691                         qg->excl += num_bytes;
1692                         qg->excl_cmpr += num_bytes;
1693                         dirty = true;
1694                 }
1695
1696                 if (dirty)
1697                         qgroup_dirty(fs_info, qg);
1698         }
1699         return 0;
1700 }
1701
1702 /*
1703  * If we removed a data extent and there were other references for that bytenr
1704  * then we need to lookup all referenced roots to make sure we still don't
1705  * reference this bytenr.  If we do then we can just discard this operation.
1706  */
1707 static int check_existing_refs(struct btrfs_trans_handle *trans,
1708                                struct btrfs_fs_info *fs_info,
1709                                struct btrfs_qgroup_operation *oper)
1710 {
1711         struct ulist *roots = NULL;
1712         struct ulist_node *unode;
1713         struct ulist_iterator uiter;
1714         int ret = 0;
1715
1716         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1717                                    oper->elem.seq, &roots);
1718         if (ret < 0)
1719                 return ret;
1720         ret = 0;
1721
1722         ULIST_ITER_INIT(&uiter);
1723         while ((unode = ulist_next(roots, &uiter))) {
1724                 if (unode->val == oper->ref_root) {
1725                         ret = 1;
1726                         break;
1727                 }
1728         }
1729         ulist_free(roots);
1730         btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1731
1732         return ret;
1733 }
1734
1735 /*
1736  * If we share a reference across multiple roots then we may need to adjust
1737  * various qgroups referenced and exclusive counters.  The basic premise is this
1738  *
1739  * 1) We have seq to represent a 0 count.  Instead of looping through all of the
1740  * qgroups and resetting their refcount to 0 we just constantly bump this
1741  * sequence number to act as the base reference count.  This means that if
1742  * anybody is equal to or below this sequence they were never referenced.  We
1743  * jack this sequence up by the number of roots we found each time in order to
1744  * make sure we don't have any overlap.
1745  *
1746  * 2) We first search all the roots that reference the area _except_ the root
1747  * we're acting on currently.  This makes up the old_refcnt of all the qgroups
1748  * before.
1749  *
1750  * 3) We walk all of the qgroups referenced by the root we are currently acting
1751  * on, and will either adjust old_refcnt in the case of a removal or the
1752  * new_refcnt in the case of an addition.
1753  *
1754  * 4) Finally we walk all the qgroups that are referenced by this range
1755  * including the root we are acting on currently.  We will adjust the counters
1756  * based on the number of roots we had and will have after this operation.
1757  *
1758  * Take this example as an illustration
1759  *
1760  *                      [qgroup 1/0]
1761  *                   /         |          \
1762  *              [qg 0/0]   [qg 0/1]     [qg 0/2]
1763  *                 \          |            /
1764  *                [        extent           ]
1765  *
1766  * Say we are adding a reference that is covered by qg 0/0.  The first step
1767  * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
1768  * old_roots being 2.  Because it is adding new_roots will be 1.  We then go
1769  * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
1770  * new_refcnt, bringing it to 3.  We then walk through all of the qgroups, we
1771  * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
1772  * reference and thus must add the size to the referenced bytes.  Everything
1773  * else is the same so nothing else changes.
1774  */
1775 static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
1776                                     struct btrfs_fs_info *fs_info,
1777                                     struct btrfs_qgroup_operation *oper)
1778 {
1779         struct ulist *roots = NULL;
1780         struct ulist *qgroups, *tmp;
1781         struct btrfs_qgroup *qgroup;
1782         struct seq_list elem = {};
1783         u64 seq;
1784         int old_roots = 0;
1785         int new_roots = 0;
1786         int ret = 0;
1787
1788         if (oper->elem.seq) {
1789                 ret = check_existing_refs(trans, fs_info, oper);
1790                 if (ret < 0)
1791                         return ret;
1792                 if (ret)
1793                         return 0;
1794         }
1795
1796         qgroups = ulist_alloc(GFP_NOFS);
1797         if (!qgroups)
1798                 return -ENOMEM;
1799
1800         tmp = ulist_alloc(GFP_NOFS);
1801         if (!tmp)
1802                 return -ENOMEM;
1803
1804         btrfs_get_tree_mod_seq(fs_info, &elem);
1805         ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
1806                                    &roots);
1807         btrfs_put_tree_mod_seq(fs_info, &elem);
1808         if (ret < 0) {
1809                 ulist_free(qgroups);
1810                 ulist_free(tmp);
1811                 return ret;
1812         }
1813         spin_lock(&fs_info->qgroup_lock);
1814         qgroup = find_qgroup_rb(fs_info, oper->ref_root);
1815         if (!qgroup)
1816                 goto out;
1817         seq = fs_info->qgroup_seq;
1818
1819         /*
1820          * So roots is the list of all the roots currently pointing at the
1821          * bytenr, including the ref we are adding if we are adding, or not if
1822          * we are removing a ref.  So we pass in the ref_root to skip that root
1823          * in our calculations.  We set old_refnct and new_refcnt cause who the
1824          * hell knows what everything looked like before, and it doesn't matter
1825          * except...
1826          */
1827         ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
1828                                      seq, &old_roots, 0);
1829         if (ret < 0)
1830                 goto out;
1831
1832         /*
1833          * Now adjust the refcounts of the qgroups that care about this
1834          * reference, either the old_count in the case of removal or new_count
1835          * in the case of an addition.
1836          */
1837         ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
1838                                      seq);
1839         if (ret < 0)
1840                 goto out;
1841
1842         /*
1843          * ...in the case of removals.  If we had a removal before we got around
1844          * to processing this operation then we need to find that guy and count
1845          * his references as if they really existed so we don't end up screwing
1846          * up the exclusive counts.  Then whenever we go to process the delete
1847          * everything will be grand and we can account for whatever exclusive
1848          * changes need to be made there.  We also have to pass in old_roots so
1849          * we have an accurate count of the roots as it pertains to this
1850          * operations view of the world.
1851          */
1852         ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
1853                                           &old_roots);
1854         if (ret < 0)
1855                 goto out;
1856
1857         /*
1858          * We are adding our root, need to adjust up the number of roots,
1859          * otherwise old_roots is the number of roots we want.
1860          */
1861         if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
1862                 new_roots = old_roots + 1;
1863         } else {
1864                 new_roots = old_roots;
1865                 old_roots++;
1866         }
1867         fs_info->qgroup_seq += old_roots + 1;
1868
1869
1870         /*
1871          * And now the magic happens, bless Arne for having a pretty elegant
1872          * solution for this.
1873          */
1874         qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
1875                                qgroups, seq, old_roots, new_roots, 0);
1876 out:
1877         spin_unlock(&fs_info->qgroup_lock);
1878         ulist_free(qgroups);
1879         ulist_free(roots);
1880         ulist_free(tmp);
1881         return ret;
1882 }
1883
1884 /*
1885  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1886  * from the fs. First, all roots referencing the extent are searched, and
1887  * then the space is accounted accordingly to the different roots. The
1888  * accounting algorithm works in 3 steps documented inline.
1889  */
1890 static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
1891                                 struct btrfs_fs_info *fs_info,
1892                                 struct btrfs_qgroup_operation *oper)
1893 {
1894         int ret = 0;
1895
1896         if (!fs_info->quota_enabled)
1897                 return 0;
1898
1899         BUG_ON(!fs_info->quota_root);
1900
1901         mutex_lock(&fs_info->qgroup_rescan_lock);
1902         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1903                 if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
1904                         mutex_unlock(&fs_info->qgroup_rescan_lock);
1905                         return 0;
1906                 }
1907         }
1908         mutex_unlock(&fs_info->qgroup_rescan_lock);
1909
1910         ASSERT(is_fstree(oper->ref_root));
1911
1912         switch (oper->type) {
1913         case BTRFS_QGROUP_OPER_ADD_EXCL:
1914         case BTRFS_QGROUP_OPER_SUB_EXCL:
1915                 ret = qgroup_excl_accounting(fs_info, oper);
1916                 break;
1917         case BTRFS_QGROUP_OPER_ADD_SHARED:
1918         case BTRFS_QGROUP_OPER_SUB_SHARED:
1919                 ret = qgroup_shared_accounting(trans, fs_info, oper);
1920                 break;
1921         default:
1922                 ASSERT(0);
1923         }
1924         return ret;
1925 }
1926
1927 /*
1928  * Needs to be called everytime we run delayed refs, even if there is an error
1929  * in order to cleanup outstanding operations.
1930  */
1931 int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
1932                                     struct btrfs_fs_info *fs_info)
1933 {
1934         struct btrfs_qgroup_operation *oper;
1935         int ret = 0;
1936
1937         while (!list_empty(&trans->qgroup_ref_list)) {
1938                 oper = list_first_entry(&trans->qgroup_ref_list,
1939                                         struct btrfs_qgroup_operation, list);
1940                 list_del_init(&oper->list);
1941                 if (!ret || !trans->aborted)
1942                         ret = btrfs_qgroup_account(trans, fs_info, oper);
1943                 spin_lock(&fs_info->qgroup_op_lock);
1944                 rb_erase(&oper->n, &fs_info->qgroup_op_tree);
1945                 spin_unlock(&fs_info->qgroup_op_lock);
1946                 btrfs_put_tree_mod_seq(fs_info, &oper->elem);
1947                 kfree(oper);
1948         }
1949         return ret;
1950 }
1951
1952 /*
1953  * called from commit_transaction. Writes all changed qgroups to disk.
1954  */
1955 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1956                       struct btrfs_fs_info *fs_info)
1957 {
1958         struct btrfs_root *quota_root = fs_info->quota_root;
1959         int ret = 0;
1960         int start_rescan_worker = 0;
1961
1962         if (!quota_root)
1963                 goto out;
1964
1965         if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1966                 start_rescan_worker = 1;
1967
1968         fs_info->quota_enabled = fs_info->pending_quota_state;
1969
1970         spin_lock(&fs_info->qgroup_lock);
1971         while (!list_empty(&fs_info->dirty_qgroups)) {
1972                 struct btrfs_qgroup *qgroup;
1973                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
1974                                           struct btrfs_qgroup, dirty);
1975                 list_del_init(&qgroup->dirty);
1976                 spin_unlock(&fs_info->qgroup_lock);
1977                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
1978                 if (ret)
1979                         fs_info->qgroup_flags |=
1980                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1981                 spin_lock(&fs_info->qgroup_lock);
1982         }
1983         if (fs_info->quota_enabled)
1984                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1985         else
1986                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1987         spin_unlock(&fs_info->qgroup_lock);
1988
1989         ret = update_qgroup_status_item(trans, fs_info, quota_root);
1990         if (ret)
1991                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1992
1993         if (!ret && start_rescan_worker) {
1994                 ret = qgroup_rescan_init(fs_info, 0, 1);
1995                 if (!ret) {
1996                         qgroup_rescan_zero_tracking(fs_info);
1997                         btrfs_queue_work(fs_info->qgroup_rescan_workers,
1998                                          &fs_info->qgroup_rescan_work);
1999                 }
2000                 ret = 0;
2001         }
2002
2003 out:
2004
2005         return ret;
2006 }
2007
2008 /*
2009  * copy the acounting information between qgroups. This is necessary when a
2010  * snapshot or a subvolume is created
2011  */
2012 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2013                          struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
2014                          struct btrfs_qgroup_inherit *inherit)
2015 {
2016         int ret = 0;
2017         int i;
2018         u64 *i_qgroups;
2019         struct btrfs_root *quota_root = fs_info->quota_root;
2020         struct btrfs_qgroup *srcgroup;
2021         struct btrfs_qgroup *dstgroup;
2022         u32 level_size = 0;
2023         u64 nums;
2024
2025         mutex_lock(&fs_info->qgroup_ioctl_lock);
2026         if (!fs_info->quota_enabled)
2027                 goto out;
2028
2029         if (!quota_root) {
2030                 ret = -EINVAL;
2031                 goto out;
2032         }
2033
2034         if (inherit) {
2035                 i_qgroups = (u64 *)(inherit + 1);
2036                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2037                        2 * inherit->num_excl_copies;
2038                 for (i = 0; i < nums; ++i) {
2039                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2040                         if (!srcgroup) {
2041                                 ret = -EINVAL;
2042                                 goto out;
2043                         }
2044                         ++i_qgroups;
2045                 }
2046         }
2047
2048         /*
2049          * create a tracking group for the subvol itself
2050          */
2051         ret = add_qgroup_item(trans, quota_root, objectid);
2052         if (ret)
2053                 goto out;
2054
2055         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2056                 ret = update_qgroup_limit_item(trans, quota_root, objectid,
2057                                                inherit->lim.flags,
2058                                                inherit->lim.max_rfer,
2059                                                inherit->lim.max_excl,
2060                                                inherit->lim.rsv_rfer,
2061                                                inherit->lim.rsv_excl);
2062                 if (ret)
2063                         goto out;
2064         }
2065
2066         if (srcid) {
2067                 struct btrfs_root *srcroot;
2068                 struct btrfs_key srckey;
2069                 int srcroot_level;
2070
2071                 srckey.objectid = srcid;
2072                 srckey.type = BTRFS_ROOT_ITEM_KEY;
2073                 srckey.offset = (u64)-1;
2074                 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
2075                 if (IS_ERR(srcroot)) {
2076                         ret = PTR_ERR(srcroot);
2077                         goto out;
2078                 }
2079
2080                 rcu_read_lock();
2081                 srcroot_level = btrfs_header_level(srcroot->node);
2082                 level_size = btrfs_level_size(srcroot, srcroot_level);
2083                 rcu_read_unlock();
2084         }
2085
2086         /*
2087          * add qgroup to all inherited groups
2088          */
2089         if (inherit) {
2090                 i_qgroups = (u64 *)(inherit + 1);
2091                 for (i = 0; i < inherit->num_qgroups; ++i) {
2092                         ret = add_qgroup_relation_item(trans, quota_root,
2093                                                        objectid, *i_qgroups);
2094                         if (ret)
2095                                 goto out;
2096                         ret = add_qgroup_relation_item(trans, quota_root,
2097                                                        *i_qgroups, objectid);
2098                         if (ret)
2099                                 goto out;
2100                         ++i_qgroups;
2101                 }
2102         }
2103
2104
2105         spin_lock(&fs_info->qgroup_lock);
2106
2107         dstgroup = add_qgroup_rb(fs_info, objectid);
2108         if (IS_ERR(dstgroup)) {
2109                 ret = PTR_ERR(dstgroup);
2110                 goto unlock;
2111         }
2112
2113         if (srcid) {
2114                 srcgroup = find_qgroup_rb(fs_info, srcid);
2115                 if (!srcgroup)
2116                         goto unlock;
2117
2118                 /*
2119                  * We call inherit after we clone the root in order to make sure
2120                  * our counts don't go crazy, so at this point the only
2121                  * difference between the two roots should be the root node.
2122                  */
2123                 dstgroup->rfer = srcgroup->rfer;
2124                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
2125                 dstgroup->excl = level_size;
2126                 dstgroup->excl_cmpr = level_size;
2127                 srcgroup->excl = level_size;
2128                 srcgroup->excl_cmpr = level_size;
2129                 qgroup_dirty(fs_info, dstgroup);
2130                 qgroup_dirty(fs_info, srcgroup);
2131         }
2132
2133         if (!inherit)
2134                 goto unlock;
2135
2136         i_qgroups = (u64 *)(inherit + 1);
2137         for (i = 0; i < inherit->num_qgroups; ++i) {
2138                 ret = add_relation_rb(quota_root->fs_info, objectid,
2139                                       *i_qgroups);
2140                 if (ret)
2141                         goto unlock;
2142                 ++i_qgroups;
2143         }
2144
2145         for (i = 0; i <  inherit->num_ref_copies; ++i) {
2146                 struct btrfs_qgroup *src;
2147                 struct btrfs_qgroup *dst;
2148
2149                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2150                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2151
2152                 if (!src || !dst) {
2153                         ret = -EINVAL;
2154                         goto unlock;
2155                 }
2156
2157                 dst->rfer = src->rfer - level_size;
2158                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
2159                 i_qgroups += 2;
2160         }
2161         for (i = 0; i <  inherit->num_excl_copies; ++i) {
2162                 struct btrfs_qgroup *src;
2163                 struct btrfs_qgroup *dst;
2164
2165                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
2166                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
2167
2168                 if (!src || !dst) {
2169                         ret = -EINVAL;
2170                         goto unlock;
2171                 }
2172
2173                 dst->excl = src->excl + level_size;
2174                 dst->excl_cmpr = src->excl_cmpr + level_size;
2175                 i_qgroups += 2;
2176         }
2177
2178 unlock:
2179         spin_unlock(&fs_info->qgroup_lock);
2180 out:
2181         mutex_unlock(&fs_info->qgroup_ioctl_lock);
2182         return ret;
2183 }
2184
2185 /*
2186  * reserve some space for a qgroup and all its parents. The reservation takes
2187  * place with start_transaction or dealloc_reserve, similar to ENOSPC
2188  * accounting. If not enough space is available, EDQUOT is returned.
2189  * We assume that the requested space is new for all qgroups.
2190  */
2191 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
2192 {
2193         struct btrfs_root *quota_root;
2194         struct btrfs_qgroup *qgroup;
2195         struct btrfs_fs_info *fs_info = root->fs_info;
2196         u64 ref_root = root->root_key.objectid;
2197         int ret = 0;
2198         struct ulist_node *unode;
2199         struct ulist_iterator uiter;
2200
2201         if (!is_fstree(ref_root))
2202                 return 0;
2203
2204         if (num_bytes == 0)
2205                 return 0;
2206
2207         spin_lock(&fs_info->qgroup_lock);
2208         quota_root = fs_info->quota_root;
2209         if (!quota_root)
2210                 goto out;
2211
2212         qgroup = find_qgroup_rb(fs_info, ref_root);
2213         if (!qgroup)
2214                 goto out;
2215
2216         /*
2217          * in a first step, we check all affected qgroups if any limits would
2218          * be exceeded
2219          */
2220         ulist_reinit(fs_info->qgroup_ulist);
2221         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2222                         (uintptr_t)qgroup, GFP_ATOMIC);
2223         if (ret < 0)
2224                 goto out;
2225         ULIST_ITER_INIT(&uiter);
2226         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2227                 struct btrfs_qgroup *qg;
2228                 struct btrfs_qgroup_list *glist;
2229
2230                 qg = u64_to_ptr(unode->aux);
2231
2232                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
2233                     qg->reserved + (s64)qg->rfer + num_bytes >
2234                     qg->max_rfer) {
2235                         ret = -EDQUOT;
2236                         goto out;
2237                 }
2238
2239                 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
2240                     qg->reserved + (s64)qg->excl + num_bytes >
2241                     qg->max_excl) {
2242                         ret = -EDQUOT;
2243                         goto out;
2244                 }
2245
2246                 list_for_each_entry(glist, &qg->groups, next_group) {
2247                         ret = ulist_add(fs_info->qgroup_ulist,
2248                                         glist->group->qgroupid,
2249                                         (uintptr_t)glist->group, GFP_ATOMIC);
2250                         if (ret < 0)
2251                                 goto out;
2252                 }
2253         }
2254         ret = 0;
2255         /*
2256          * no limits exceeded, now record the reservation into all qgroups
2257          */
2258         ULIST_ITER_INIT(&uiter);
2259         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2260                 struct btrfs_qgroup *qg;
2261
2262                 qg = u64_to_ptr(unode->aux);
2263
2264                 qg->reserved += num_bytes;
2265         }
2266
2267 out:
2268         spin_unlock(&fs_info->qgroup_lock);
2269         return ret;
2270 }
2271
2272 void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
2273 {
2274         struct btrfs_root *quota_root;
2275         struct btrfs_qgroup *qgroup;
2276         struct btrfs_fs_info *fs_info = root->fs_info;
2277         struct ulist_node *unode;
2278         struct ulist_iterator uiter;
2279         u64 ref_root = root->root_key.objectid;
2280         int ret = 0;
2281
2282         if (!is_fstree(ref_root))
2283                 return;
2284
2285         if (num_bytes == 0)
2286                 return;
2287
2288         spin_lock(&fs_info->qgroup_lock);
2289
2290         quota_root = fs_info->quota_root;
2291         if (!quota_root)
2292                 goto out;
2293
2294         qgroup = find_qgroup_rb(fs_info, ref_root);
2295         if (!qgroup)
2296                 goto out;
2297
2298         ulist_reinit(fs_info->qgroup_ulist);
2299         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
2300                         (uintptr_t)qgroup, GFP_ATOMIC);
2301         if (ret < 0)
2302                 goto out;
2303         ULIST_ITER_INIT(&uiter);
2304         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
2305                 struct btrfs_qgroup *qg;
2306                 struct btrfs_qgroup_list *glist;
2307
2308                 qg = u64_to_ptr(unode->aux);
2309
2310                 qg->reserved -= num_bytes;
2311
2312                 list_for_each_entry(glist, &qg->groups, next_group) {
2313                         ret = ulist_add(fs_info->qgroup_ulist,
2314                                         glist->group->qgroupid,
2315                                         (uintptr_t)glist->group, GFP_ATOMIC);
2316                         if (ret < 0)
2317                                 goto out;
2318                 }
2319         }
2320
2321 out:
2322         spin_unlock(&fs_info->qgroup_lock);
2323 }
2324
2325 void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
2326 {
2327         if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
2328                 return;
2329         btrfs_err(trans->root->fs_info,
2330                 "qgroups not uptodate in trans handle %p:  list is%s empty, "
2331                 "seq is %#x.%x",
2332                 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
2333                 (u32)(trans->delayed_ref_elem.seq >> 32),
2334                 (u32)trans->delayed_ref_elem.seq);
2335         BUG();
2336 }
2337
2338 /*
2339  * returns < 0 on error, 0 when more leafs are to be scanned.
2340  * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
2341  */
2342 static int
2343 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2344                    struct btrfs_trans_handle *trans, struct ulist *qgroups,
2345                    struct ulist *tmp, struct extent_buffer *scratch_leaf)
2346 {
2347         struct btrfs_key found;
2348         struct ulist *roots = NULL;
2349         struct seq_list tree_mod_seq_elem = {};
2350         u64 num_bytes;
2351         u64 seq;
2352         int new_roots;
2353         int slot;
2354         int ret;
2355
2356         path->leave_spinning = 1;
2357         mutex_lock(&fs_info->qgroup_rescan_lock);
2358         ret = btrfs_search_slot_for_read(fs_info->extent_root,
2359                                          &fs_info->qgroup_rescan_progress,
2360                                          path, 1, 0);
2361
2362         pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
2363                  fs_info->qgroup_rescan_progress.objectid,
2364                  fs_info->qgroup_rescan_progress.type,
2365                  fs_info->qgroup_rescan_progress.offset, ret);
2366
2367         if (ret) {
2368                 /*
2369                  * The rescan is about to end, we will not be scanning any
2370                  * further blocks. We cannot unset the RESCAN flag here, because
2371                  * we want to commit the transaction if everything went well.
2372                  * To make the live accounting work in this phase, we set our
2373                  * scan progress pointer such that every real extent objectid
2374                  * will be smaller.
2375                  */
2376                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
2377                 btrfs_release_path(path);
2378                 mutex_unlock(&fs_info->qgroup_rescan_lock);
2379                 return ret;
2380         }
2381
2382         btrfs_item_key_to_cpu(path->nodes[0], &found,
2383                               btrfs_header_nritems(path->nodes[0]) - 1);
2384         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
2385
2386         btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2387         memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
2388         slot = path->slots[0];
2389         btrfs_release_path(path);
2390         mutex_unlock(&fs_info->qgroup_rescan_lock);
2391
2392         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
2393                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
2394                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
2395                     found.type != BTRFS_METADATA_ITEM_KEY)
2396                         continue;
2397                 if (found.type == BTRFS_METADATA_ITEM_KEY)
2398                         num_bytes = fs_info->extent_root->leafsize;
2399                 else
2400                         num_bytes = found.offset;
2401
2402                 ulist_reinit(qgroups);
2403                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
2404                                            &roots);
2405                 if (ret < 0)
2406                         goto out;
2407                 spin_lock(&fs_info->qgroup_lock);
2408                 seq = fs_info->qgroup_seq;
2409                 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
2410
2411                 new_roots = 0;
2412                 ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
2413                                              seq, &new_roots, 1);
2414                 if (ret < 0) {
2415                         spin_unlock(&fs_info->qgroup_lock);
2416                         ulist_free(roots);
2417                         goto out;
2418                 }
2419
2420                 ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
2421                                              seq, 0, new_roots, 1);
2422                 if (ret < 0) {
2423                         spin_unlock(&fs_info->qgroup_lock);
2424                         ulist_free(roots);
2425                         goto out;
2426                 }
2427                 spin_unlock(&fs_info->qgroup_lock);
2428                 ulist_free(roots);
2429         }
2430 out:
2431         btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
2432
2433         return ret;
2434 }
2435
2436 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
2437 {
2438         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
2439                                                      qgroup_rescan_work);
2440         struct btrfs_path *path;
2441         struct btrfs_trans_handle *trans = NULL;
2442         struct ulist *tmp = NULL, *qgroups = NULL;
2443         struct extent_buffer *scratch_leaf = NULL;
2444         int err = -ENOMEM;
2445
2446         path = btrfs_alloc_path();
2447         if (!path)
2448                 goto out;
2449         qgroups = ulist_alloc(GFP_NOFS);
2450         if (!qgroups)
2451                 goto out;
2452         tmp = ulist_alloc(GFP_NOFS);
2453         if (!tmp)
2454                 goto out;
2455         scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2456         if (!scratch_leaf)
2457                 goto out;
2458
2459         err = 0;
2460         while (!err) {
2461                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2462                 if (IS_ERR(trans)) {
2463                         err = PTR_ERR(trans);
2464                         break;
2465                 }
2466                 if (!fs_info->quota_enabled) {
2467                         err = -EINTR;
2468                 } else {
2469                         err = qgroup_rescan_leaf(fs_info, path, trans,
2470                                                  qgroups, tmp, scratch_leaf);
2471                 }
2472                 if (err > 0)
2473                         btrfs_commit_transaction(trans, fs_info->fs_root);
2474                 else
2475                         btrfs_end_transaction(trans, fs_info->fs_root);
2476         }
2477
2478 out:
2479         kfree(scratch_leaf);
2480         ulist_free(qgroups);
2481         ulist_free(tmp);
2482         btrfs_free_path(path);
2483
2484         mutex_lock(&fs_info->qgroup_rescan_lock);
2485         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2486
2487         if (err == 2 &&
2488             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2489                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2490         } else if (err < 0) {
2491                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2492         }
2493         mutex_unlock(&fs_info->qgroup_rescan_lock);
2494
2495         if (err >= 0) {
2496                 btrfs_info(fs_info, "qgroup scan completed%s",
2497                         err == 2 ? " (inconsistency flag cleared)" : "");
2498         } else {
2499                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
2500         }
2501
2502         complete_all(&fs_info->qgroup_rescan_completion);
2503 }
2504
2505 /*
2506  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2507  * memory required for the rescan context.
2508  */
2509 static int
2510 qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2511                    int init_flags)
2512 {
2513         int ret = 0;
2514
2515         if (!init_flags &&
2516             (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2517              !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2518                 ret = -EINVAL;
2519                 goto err;
2520         }
2521
2522         mutex_lock(&fs_info->qgroup_rescan_lock);
2523         spin_lock(&fs_info->qgroup_lock);
2524
2525         if (init_flags) {
2526                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2527                         ret = -EINPROGRESS;
2528                 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2529                         ret = -EINVAL;
2530
2531                 if (ret) {
2532                         spin_unlock(&fs_info->qgroup_lock);
2533                         mutex_unlock(&fs_info->qgroup_rescan_lock);
2534                         goto err;
2535                 }
2536
2537                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2538         }
2539
2540         memset(&fs_info->qgroup_rescan_progress, 0,
2541                 sizeof(fs_info->qgroup_rescan_progress));
2542         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2543
2544         spin_unlock(&fs_info->qgroup_lock);
2545         mutex_unlock(&fs_info->qgroup_rescan_lock);
2546
2547         init_completion(&fs_info->qgroup_rescan_completion);
2548
2549         memset(&fs_info->qgroup_rescan_work, 0,
2550                sizeof(fs_info->qgroup_rescan_work));
2551         btrfs_init_work(&fs_info->qgroup_rescan_work,
2552                         btrfs_qgroup_rescan_worker, NULL, NULL);
2553
2554         if (ret) {
2555 err:
2556                 btrfs_info(fs_info, "qgroup_rescan_init failed with %d", ret);
2557                 return ret;
2558         }
2559
2560         return 0;
2561 }
2562
2563 static void
2564 qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2565 {
2566         struct rb_node *n;
2567         struct btrfs_qgroup *qgroup;
2568
2569         spin_lock(&fs_info->qgroup_lock);
2570         /* clear all current qgroup tracking information */
2571         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2572                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2573                 qgroup->rfer = 0;
2574                 qgroup->rfer_cmpr = 0;
2575                 qgroup->excl = 0;
2576                 qgroup->excl_cmpr = 0;
2577         }
2578         spin_unlock(&fs_info->qgroup_lock);
2579 }
2580
2581 int
2582 btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2583 {
2584         int ret = 0;
2585         struct btrfs_trans_handle *trans;
2586
2587         ret = qgroup_rescan_init(fs_info, 0, 1);
2588         if (ret)
2589                 return ret;
2590
2591         /*
2592          * We have set the rescan_progress to 0, which means no more
2593          * delayed refs will be accounted by btrfs_qgroup_account_ref.
2594          * However, btrfs_qgroup_account_ref may be right after its call
2595          * to btrfs_find_all_roots, in which case it would still do the
2596          * accounting.
2597          * To solve this, we're committing the transaction, which will
2598          * ensure we run all delayed refs and only after that, we are
2599          * going to clear all tracking information for a clean start.
2600          */
2601
2602         trans = btrfs_join_transaction(fs_info->fs_root);
2603         if (IS_ERR(trans)) {
2604                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2605                 return PTR_ERR(trans);
2606         }
2607         ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2608         if (ret) {
2609                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2610                 return ret;
2611         }
2612
2613         qgroup_rescan_zero_tracking(fs_info);
2614
2615         btrfs_queue_work(fs_info->qgroup_rescan_workers,
2616                          &fs_info->qgroup_rescan_work);
2617
2618         return 0;
2619 }
2620
2621 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2622 {
2623         int running;
2624         int ret = 0;
2625
2626         mutex_lock(&fs_info->qgroup_rescan_lock);
2627         spin_lock(&fs_info->qgroup_lock);
2628         running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2629         spin_unlock(&fs_info->qgroup_lock);
2630         mutex_unlock(&fs_info->qgroup_rescan_lock);
2631
2632         if (running)
2633                 ret = wait_for_completion_interruptible(
2634                                         &fs_info->qgroup_rescan_completion);
2635
2636         return ret;
2637 }
2638
2639 /*
2640  * this is only called from open_ctree where we're still single threaded, thus
2641  * locking is omitted here.
2642  */
2643 void
2644 btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2645 {
2646         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2647                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
2648                                  &fs_info->qgroup_rescan_work);
2649 }