Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / delayed-ref.c
1 /*
2  * Copyright (C) 2009 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/slab.h>
21 #include <linux/sort.h>
22 #include "ctree.h"
23 #include "delayed-ref.h"
24 #include "transaction.h"
25
26 struct kmem_cache *btrfs_delayed_ref_head_cachep;
27 struct kmem_cache *btrfs_delayed_tree_ref_cachep;
28 struct kmem_cache *btrfs_delayed_data_ref_cachep;
29 struct kmem_cache *btrfs_delayed_extent_op_cachep;
30 /*
31  * delayed back reference update tracking.  For subvolume trees
32  * we queue up extent allocations and backref maintenance for
33  * delayed processing.   This avoids deep call chains where we
34  * add extents in the middle of btrfs_search_slot, and it allows
35  * us to buffer up frequently modified backrefs in an rb tree instead
36  * of hammering updates on the extent allocation tree.
37  */
38
39 /*
40  * compare two delayed tree backrefs with same bytenr and type
41  */
42 static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
43                           struct btrfs_delayed_tree_ref *ref1)
44 {
45         if (ref1->root < ref2->root)
46                 return -1;
47         if (ref1->root > ref2->root)
48                 return 1;
49         if (ref1->parent < ref2->parent)
50                 return -1;
51         if (ref1->parent > ref2->parent)
52                 return 1;
53         return 0;
54 }
55
56 /*
57  * compare two delayed data backrefs with same bytenr and type
58  */
59 static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
60                           struct btrfs_delayed_data_ref *ref1)
61 {
62         if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
63                 if (ref1->root < ref2->root)
64                         return -1;
65                 if (ref1->root > ref2->root)
66                         return 1;
67                 if (ref1->objectid < ref2->objectid)
68                         return -1;
69                 if (ref1->objectid > ref2->objectid)
70                         return 1;
71                 if (ref1->offset < ref2->offset)
72                         return -1;
73                 if (ref1->offset > ref2->offset)
74                         return 1;
75         } else {
76                 if (ref1->parent < ref2->parent)
77                         return -1;
78                 if (ref1->parent > ref2->parent)
79                         return 1;
80         }
81         return 0;
82 }
83
84 /*
85  * entries in the rb tree are ordered by the byte number of the extent,
86  * type of the delayed backrefs and content of delayed backrefs.
87  */
88 static int comp_entry(struct btrfs_delayed_ref_node *ref2,
89                       struct btrfs_delayed_ref_node *ref1,
90                       bool compare_seq)
91 {
92         if (ref1->bytenr < ref2->bytenr)
93                 return -1;
94         if (ref1->bytenr > ref2->bytenr)
95                 return 1;
96         if (ref1->is_head && ref2->is_head)
97                 return 0;
98         if (ref2->is_head)
99                 return -1;
100         if (ref1->is_head)
101                 return 1;
102         if (ref1->type < ref2->type)
103                 return -1;
104         if (ref1->type > ref2->type)
105                 return 1;
106         /* merging of sequenced refs is not allowed */
107         if (compare_seq) {
108                 if (ref1->seq < ref2->seq)
109                         return -1;
110                 if (ref1->seq > ref2->seq)
111                         return 1;
112         }
113         if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
114             ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
115                 return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
116                                       btrfs_delayed_node_to_tree_ref(ref1));
117         } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
118                    ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
119                 return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
120                                       btrfs_delayed_node_to_data_ref(ref1));
121         }
122         BUG();
123         return 0;
124 }
125
126 /*
127  * insert a new ref into the rbtree.  This returns any existing refs
128  * for the same (bytenr,parent) tuple, or NULL if the new node was properly
129  * inserted.
130  */
131 static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
132                                                   struct rb_node *node)
133 {
134         struct rb_node **p = &root->rb_node;
135         struct rb_node *parent_node = NULL;
136         struct btrfs_delayed_ref_node *entry;
137         struct btrfs_delayed_ref_node *ins;
138         int cmp;
139
140         ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
141         while (*p) {
142                 parent_node = *p;
143                 entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
144                                  rb_node);
145
146                 cmp = comp_entry(entry, ins, 1);
147                 if (cmp < 0)
148                         p = &(*p)->rb_left;
149                 else if (cmp > 0)
150                         p = &(*p)->rb_right;
151                 else
152                         return entry;
153         }
154
155         rb_link_node(node, parent_node, p);
156         rb_insert_color(node, root);
157         return NULL;
158 }
159
160 /*
161  * find an head entry based on bytenr. This returns the delayed ref
162  * head if it was able to find one, or NULL if nothing was in that spot.
163  * If return_bigger is given, the next bigger entry is returned if no exact
164  * match is found.
165  */
166 static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
167                                   u64 bytenr,
168                                   struct btrfs_delayed_ref_node **last,
169                                   int return_bigger)
170 {
171         struct rb_node *n;
172         struct btrfs_delayed_ref_node *entry;
173         int cmp = 0;
174
175 again:
176         n = root->rb_node;
177         entry = NULL;
178         while (n) {
179                 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
180                 WARN_ON(!entry->in_tree);
181                 if (last)
182                         *last = entry;
183
184                 if (bytenr < entry->bytenr)
185                         cmp = -1;
186                 else if (bytenr > entry->bytenr)
187                         cmp = 1;
188                 else if (!btrfs_delayed_ref_is_head(entry))
189                         cmp = 1;
190                 else
191                         cmp = 0;
192
193                 if (cmp < 0)
194                         n = n->rb_left;
195                 else if (cmp > 0)
196                         n = n->rb_right;
197                 else
198                         return entry;
199         }
200         if (entry && return_bigger) {
201                 if (cmp > 0) {
202                         n = rb_next(&entry->rb_node);
203                         if (!n)
204                                 n = rb_first(root);
205                         entry = rb_entry(n, struct btrfs_delayed_ref_node,
206                                          rb_node);
207                         bytenr = entry->bytenr;
208                         return_bigger = 0;
209                         goto again;
210                 }
211                 return entry;
212         }
213         return NULL;
214 }
215
216 int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
217                            struct btrfs_delayed_ref_head *head)
218 {
219         struct btrfs_delayed_ref_root *delayed_refs;
220
221         delayed_refs = &trans->transaction->delayed_refs;
222         assert_spin_locked(&delayed_refs->lock);
223         if (mutex_trylock(&head->mutex))
224                 return 0;
225
226         atomic_inc(&head->node.refs);
227         spin_unlock(&delayed_refs->lock);
228
229         mutex_lock(&head->mutex);
230         spin_lock(&delayed_refs->lock);
231         if (!head->node.in_tree) {
232                 mutex_unlock(&head->mutex);
233                 btrfs_put_delayed_ref(&head->node);
234                 return -EAGAIN;
235         }
236         btrfs_put_delayed_ref(&head->node);
237         return 0;
238 }
239
240 static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
241                                     struct btrfs_delayed_ref_root *delayed_refs,
242                                     struct btrfs_delayed_ref_node *ref)
243 {
244         rb_erase(&ref->rb_node, &delayed_refs->root);
245         ref->in_tree = 0;
246         btrfs_put_delayed_ref(ref);
247         delayed_refs->num_entries--;
248         if (trans->delayed_ref_updates)
249                 trans->delayed_ref_updates--;
250 }
251
252 static int merge_ref(struct btrfs_trans_handle *trans,
253                      struct btrfs_delayed_ref_root *delayed_refs,
254                      struct btrfs_delayed_ref_node *ref, u64 seq)
255 {
256         struct rb_node *node;
257         int merged = 0;
258         int mod = 0;
259         int done = 0;
260
261         node = rb_prev(&ref->rb_node);
262         while (node) {
263                 struct btrfs_delayed_ref_node *next;
264
265                 next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
266                 node = rb_prev(node);
267                 if (next->bytenr != ref->bytenr)
268                         break;
269                 if (seq && next->seq >= seq)
270                         break;
271                 if (comp_entry(ref, next, 0))
272                         continue;
273
274                 if (ref->action == next->action) {
275                         mod = next->ref_mod;
276                 } else {
277                         if (ref->ref_mod < next->ref_mod) {
278                                 struct btrfs_delayed_ref_node *tmp;
279
280                                 tmp = ref;
281                                 ref = next;
282                                 next = tmp;
283                                 done = 1;
284                         }
285                         mod = -next->ref_mod;
286                 }
287
288                 merged++;
289                 drop_delayed_ref(trans, delayed_refs, next);
290                 ref->ref_mod += mod;
291                 if (ref->ref_mod == 0) {
292                         drop_delayed_ref(trans, delayed_refs, ref);
293                         break;
294                 } else {
295                         /*
296                          * You can't have multiples of the same ref on a tree
297                          * block.
298                          */
299                         WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
300                                 ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
301                 }
302
303                 if (done)
304                         break;
305                 node = rb_prev(&ref->rb_node);
306         }
307
308         return merged;
309 }
310
311 void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
312                               struct btrfs_fs_info *fs_info,
313                               struct btrfs_delayed_ref_root *delayed_refs,
314                               struct btrfs_delayed_ref_head *head)
315 {
316         struct rb_node *node;
317         u64 seq = 0;
318
319         spin_lock(&fs_info->tree_mod_seq_lock);
320         if (!list_empty(&fs_info->tree_mod_seq_list)) {
321                 struct seq_list *elem;
322
323                 elem = list_first_entry(&fs_info->tree_mod_seq_list,
324                                         struct seq_list, list);
325                 seq = elem->seq;
326         }
327         spin_unlock(&fs_info->tree_mod_seq_lock);
328
329         node = rb_prev(&head->node.rb_node);
330         while (node) {
331                 struct btrfs_delayed_ref_node *ref;
332
333                 ref = rb_entry(node, struct btrfs_delayed_ref_node,
334                                rb_node);
335                 if (ref->bytenr != head->node.bytenr)
336                         break;
337
338                 /* We can't merge refs that are outside of our seq count */
339                 if (seq && ref->seq >= seq)
340                         break;
341                 if (merge_ref(trans, delayed_refs, ref, seq))
342                         node = rb_prev(&head->node.rb_node);
343                 else
344                         node = rb_prev(node);
345         }
346 }
347
348 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
349                             struct btrfs_delayed_ref_root *delayed_refs,
350                             u64 seq)
351 {
352         struct seq_list *elem;
353         int ret = 0;
354
355         spin_lock(&fs_info->tree_mod_seq_lock);
356         if (!list_empty(&fs_info->tree_mod_seq_list)) {
357                 elem = list_first_entry(&fs_info->tree_mod_seq_list,
358                                         struct seq_list, list);
359                 if (seq >= elem->seq) {
360                         pr_debug("holding back delayed_ref %llu, lowest is "
361                                  "%llu (%p)\n", seq, elem->seq, delayed_refs);
362                         ret = 1;
363                 }
364         }
365
366         spin_unlock(&fs_info->tree_mod_seq_lock);
367         return ret;
368 }
369
370 int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
371                            struct list_head *cluster, u64 start)
372 {
373         int count = 0;
374         struct btrfs_delayed_ref_root *delayed_refs;
375         struct rb_node *node;
376         struct btrfs_delayed_ref_node *ref;
377         struct btrfs_delayed_ref_head *head;
378
379         delayed_refs = &trans->transaction->delayed_refs;
380         if (start == 0) {
381                 node = rb_first(&delayed_refs->root);
382         } else {
383                 ref = NULL;
384                 find_ref_head(&delayed_refs->root, start + 1, &ref, 1);
385                 if (ref) {
386                         node = &ref->rb_node;
387                 } else
388                         node = rb_first(&delayed_refs->root);
389         }
390 again:
391         while (node && count < 32) {
392                 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
393                 if (btrfs_delayed_ref_is_head(ref)) {
394                         head = btrfs_delayed_node_to_head(ref);
395                         if (list_empty(&head->cluster)) {
396                                 list_add_tail(&head->cluster, cluster);
397                                 delayed_refs->run_delayed_start =
398                                         head->node.bytenr;
399                                 count++;
400
401                                 WARN_ON(delayed_refs->num_heads_ready == 0);
402                                 delayed_refs->num_heads_ready--;
403                         } else if (count) {
404                                 /* the goal of the clustering is to find extents
405                                  * that are likely to end up in the same extent
406                                  * leaf on disk.  So, we don't want them spread
407                                  * all over the tree.  Stop now if we've hit
408                                  * a head that was already in use
409                                  */
410                                 break;
411                         }
412                 }
413                 node = rb_next(node);
414         }
415         if (count) {
416                 return 0;
417         } else if (start) {
418                 /*
419                  * we've gone to the end of the rbtree without finding any
420                  * clusters.  start from the beginning and try again
421                  */
422                 start = 0;
423                 node = rb_first(&delayed_refs->root);
424                 goto again;
425         }
426         return 1;
427 }
428
429 void btrfs_release_ref_cluster(struct list_head *cluster)
430 {
431         struct list_head *pos, *q;
432
433         list_for_each_safe(pos, q, cluster)
434                 list_del_init(pos);
435 }
436
437 /*
438  * helper function to update an extent delayed ref in the
439  * rbtree.  existing and update must both have the same
440  * bytenr and parent
441  *
442  * This may free existing if the update cancels out whatever
443  * operation it was doing.
444  */
445 static noinline void
446 update_existing_ref(struct btrfs_trans_handle *trans,
447                     struct btrfs_delayed_ref_root *delayed_refs,
448                     struct btrfs_delayed_ref_node *existing,
449                     struct btrfs_delayed_ref_node *update)
450 {
451         if (update->action != existing->action) {
452                 /*
453                  * this is effectively undoing either an add or a
454                  * drop.  We decrement the ref_mod, and if it goes
455                  * down to zero we just delete the entry without
456                  * every changing the extent allocation tree.
457                  */
458                 existing->ref_mod--;
459                 if (existing->ref_mod == 0)
460                         drop_delayed_ref(trans, delayed_refs, existing);
461                 else
462                         WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
463                                 existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
464         } else {
465                 WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
466                         existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
467                 /*
468                  * the action on the existing ref matches
469                  * the action on the ref we're trying to add.
470                  * Bump the ref_mod by one so the backref that
471                  * is eventually added/removed has the correct
472                  * reference count
473                  */
474                 existing->ref_mod += update->ref_mod;
475         }
476 }
477
478 /*
479  * helper function to update the accounting in the head ref
480  * existing and update must have the same bytenr
481  */
482 static noinline void
483 update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
484                          struct btrfs_delayed_ref_node *update)
485 {
486         struct btrfs_delayed_ref_head *existing_ref;
487         struct btrfs_delayed_ref_head *ref;
488
489         existing_ref = btrfs_delayed_node_to_head(existing);
490         ref = btrfs_delayed_node_to_head(update);
491         BUG_ON(existing_ref->is_data != ref->is_data);
492
493         if (ref->must_insert_reserved) {
494                 /* if the extent was freed and then
495                  * reallocated before the delayed ref
496                  * entries were processed, we can end up
497                  * with an existing head ref without
498                  * the must_insert_reserved flag set.
499                  * Set it again here
500                  */
501                 existing_ref->must_insert_reserved = ref->must_insert_reserved;
502
503                 /*
504                  * update the num_bytes so we make sure the accounting
505                  * is done correctly
506                  */
507                 existing->num_bytes = update->num_bytes;
508
509         }
510
511         if (ref->extent_op) {
512                 if (!existing_ref->extent_op) {
513                         existing_ref->extent_op = ref->extent_op;
514                 } else {
515                         if (ref->extent_op->update_key) {
516                                 memcpy(&existing_ref->extent_op->key,
517                                        &ref->extent_op->key,
518                                        sizeof(ref->extent_op->key));
519                                 existing_ref->extent_op->update_key = 1;
520                         }
521                         if (ref->extent_op->update_flags) {
522                                 existing_ref->extent_op->flags_to_set |=
523                                         ref->extent_op->flags_to_set;
524                                 existing_ref->extent_op->update_flags = 1;
525                         }
526                         btrfs_free_delayed_extent_op(ref->extent_op);
527                 }
528         }
529         /*
530          * update the reference mod on the head to reflect this new operation
531          */
532         existing->ref_mod += update->ref_mod;
533 }
534
535 /*
536  * helper function to actually insert a head node into the rbtree.
537  * this does all the dirty work in terms of maintaining the correct
538  * overall modification count.
539  */
540 static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
541                                         struct btrfs_trans_handle *trans,
542                                         struct btrfs_delayed_ref_node *ref,
543                                         u64 bytenr, u64 num_bytes,
544                                         int action, int is_data)
545 {
546         struct btrfs_delayed_ref_node *existing;
547         struct btrfs_delayed_ref_head *head_ref = NULL;
548         struct btrfs_delayed_ref_root *delayed_refs;
549         int count_mod = 1;
550         int must_insert_reserved = 0;
551
552         /*
553          * the head node stores the sum of all the mods, so dropping a ref
554          * should drop the sum in the head node by one.
555          */
556         if (action == BTRFS_UPDATE_DELAYED_HEAD)
557                 count_mod = 0;
558         else if (action == BTRFS_DROP_DELAYED_REF)
559                 count_mod = -1;
560
561         /*
562          * BTRFS_ADD_DELAYED_EXTENT means that we need to update
563          * the reserved accounting when the extent is finally added, or
564          * if a later modification deletes the delayed ref without ever
565          * inserting the extent into the extent allocation tree.
566          * ref->must_insert_reserved is the flag used to record
567          * that accounting mods are required.
568          *
569          * Once we record must_insert_reserved, switch the action to
570          * BTRFS_ADD_DELAYED_REF because other special casing is not required.
571          */
572         if (action == BTRFS_ADD_DELAYED_EXTENT)
573                 must_insert_reserved = 1;
574         else
575                 must_insert_reserved = 0;
576
577         delayed_refs = &trans->transaction->delayed_refs;
578
579         /* first set the basic ref node struct up */
580         atomic_set(&ref->refs, 1);
581         ref->bytenr = bytenr;
582         ref->num_bytes = num_bytes;
583         ref->ref_mod = count_mod;
584         ref->type  = 0;
585         ref->action  = 0;
586         ref->is_head = 1;
587         ref->in_tree = 1;
588         ref->seq = 0;
589
590         head_ref = btrfs_delayed_node_to_head(ref);
591         head_ref->must_insert_reserved = must_insert_reserved;
592         head_ref->is_data = is_data;
593
594         INIT_LIST_HEAD(&head_ref->cluster);
595         mutex_init(&head_ref->mutex);
596
597         trace_btrfs_delayed_ref_head(ref, head_ref, action);
598
599         existing = tree_insert(&delayed_refs->root, &ref->rb_node);
600
601         if (existing) {
602                 update_existing_head_ref(existing, ref);
603                 /*
604                  * we've updated the existing ref, free the newly
605                  * allocated ref
606                  */
607                 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
608         } else {
609                 delayed_refs->num_heads++;
610                 delayed_refs->num_heads_ready++;
611                 delayed_refs->num_entries++;
612                 trans->delayed_ref_updates++;
613         }
614 }
615
616 /*
617  * helper to insert a delayed tree ref into the rbtree.
618  */
619 static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
620                                          struct btrfs_trans_handle *trans,
621                                          struct btrfs_delayed_ref_node *ref,
622                                          u64 bytenr, u64 num_bytes, u64 parent,
623                                          u64 ref_root, int level, int action,
624                                          int for_cow)
625 {
626         struct btrfs_delayed_ref_node *existing;
627         struct btrfs_delayed_tree_ref *full_ref;
628         struct btrfs_delayed_ref_root *delayed_refs;
629         u64 seq = 0;
630
631         if (action == BTRFS_ADD_DELAYED_EXTENT)
632                 action = BTRFS_ADD_DELAYED_REF;
633
634         delayed_refs = &trans->transaction->delayed_refs;
635
636         /* first set the basic ref node struct up */
637         atomic_set(&ref->refs, 1);
638         ref->bytenr = bytenr;
639         ref->num_bytes = num_bytes;
640         ref->ref_mod = 1;
641         ref->action = action;
642         ref->is_head = 0;
643         ref->in_tree = 1;
644
645         if (need_ref_seq(for_cow, ref_root))
646                 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
647         ref->seq = seq;
648
649         full_ref = btrfs_delayed_node_to_tree_ref(ref);
650         full_ref->parent = parent;
651         full_ref->root = ref_root;
652         if (parent)
653                 ref->type = BTRFS_SHARED_BLOCK_REF_KEY;
654         else
655                 ref->type = BTRFS_TREE_BLOCK_REF_KEY;
656         full_ref->level = level;
657
658         trace_btrfs_delayed_tree_ref(ref, full_ref, action);
659
660         existing = tree_insert(&delayed_refs->root, &ref->rb_node);
661
662         if (existing) {
663                 update_existing_ref(trans, delayed_refs, existing, ref);
664                 /*
665                  * we've updated the existing ref, free the newly
666                  * allocated ref
667                  */
668                 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
669         } else {
670                 delayed_refs->num_entries++;
671                 trans->delayed_ref_updates++;
672         }
673 }
674
675 /*
676  * helper to insert a delayed data ref into the rbtree.
677  */
678 static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
679                                          struct btrfs_trans_handle *trans,
680                                          struct btrfs_delayed_ref_node *ref,
681                                          u64 bytenr, u64 num_bytes, u64 parent,
682                                          u64 ref_root, u64 owner, u64 offset,
683                                          int action, int for_cow)
684 {
685         struct btrfs_delayed_ref_node *existing;
686         struct btrfs_delayed_data_ref *full_ref;
687         struct btrfs_delayed_ref_root *delayed_refs;
688         u64 seq = 0;
689
690         if (action == BTRFS_ADD_DELAYED_EXTENT)
691                 action = BTRFS_ADD_DELAYED_REF;
692
693         delayed_refs = &trans->transaction->delayed_refs;
694
695         /* first set the basic ref node struct up */
696         atomic_set(&ref->refs, 1);
697         ref->bytenr = bytenr;
698         ref->num_bytes = num_bytes;
699         ref->ref_mod = 1;
700         ref->action = action;
701         ref->is_head = 0;
702         ref->in_tree = 1;
703
704         if (need_ref_seq(for_cow, ref_root))
705                 seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
706         ref->seq = seq;
707
708         full_ref = btrfs_delayed_node_to_data_ref(ref);
709         full_ref->parent = parent;
710         full_ref->root = ref_root;
711         if (parent)
712                 ref->type = BTRFS_SHARED_DATA_REF_KEY;
713         else
714                 ref->type = BTRFS_EXTENT_DATA_REF_KEY;
715
716         full_ref->objectid = owner;
717         full_ref->offset = offset;
718
719         trace_btrfs_delayed_data_ref(ref, full_ref, action);
720
721         existing = tree_insert(&delayed_refs->root, &ref->rb_node);
722
723         if (existing) {
724                 update_existing_ref(trans, delayed_refs, existing, ref);
725                 /*
726                  * we've updated the existing ref, free the newly
727                  * allocated ref
728                  */
729                 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
730         } else {
731                 delayed_refs->num_entries++;
732                 trans->delayed_ref_updates++;
733         }
734 }
735
736 /*
737  * add a delayed tree ref.  This does all of the accounting required
738  * to make sure the delayed ref is eventually processed before this
739  * transaction commits.
740  */
741 int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
742                                struct btrfs_trans_handle *trans,
743                                u64 bytenr, u64 num_bytes, u64 parent,
744                                u64 ref_root,  int level, int action,
745                                struct btrfs_delayed_extent_op *extent_op,
746                                int for_cow)
747 {
748         struct btrfs_delayed_tree_ref *ref;
749         struct btrfs_delayed_ref_head *head_ref;
750         struct btrfs_delayed_ref_root *delayed_refs;
751
752         BUG_ON(extent_op && extent_op->is_data);
753         ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
754         if (!ref)
755                 return -ENOMEM;
756
757         head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
758         if (!head_ref) {
759                 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
760                 return -ENOMEM;
761         }
762
763         head_ref->extent_op = extent_op;
764
765         delayed_refs = &trans->transaction->delayed_refs;
766         spin_lock(&delayed_refs->lock);
767
768         /*
769          * insert both the head node and the new ref without dropping
770          * the spin lock
771          */
772         add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
773                                    num_bytes, action, 0);
774
775         add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
776                                    num_bytes, parent, ref_root, level, action,
777                                    for_cow);
778         spin_unlock(&delayed_refs->lock);
779         if (need_ref_seq(for_cow, ref_root))
780                 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
781
782         return 0;
783 }
784
785 /*
786  * add a delayed data ref. it's similar to btrfs_add_delayed_tree_ref.
787  */
788 int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
789                                struct btrfs_trans_handle *trans,
790                                u64 bytenr, u64 num_bytes,
791                                u64 parent, u64 ref_root,
792                                u64 owner, u64 offset, int action,
793                                struct btrfs_delayed_extent_op *extent_op,
794                                int for_cow)
795 {
796         struct btrfs_delayed_data_ref *ref;
797         struct btrfs_delayed_ref_head *head_ref;
798         struct btrfs_delayed_ref_root *delayed_refs;
799
800         BUG_ON(extent_op && !extent_op->is_data);
801         ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
802         if (!ref)
803                 return -ENOMEM;
804
805         head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
806         if (!head_ref) {
807                 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
808                 return -ENOMEM;
809         }
810
811         head_ref->extent_op = extent_op;
812
813         delayed_refs = &trans->transaction->delayed_refs;
814         spin_lock(&delayed_refs->lock);
815
816         /*
817          * insert both the head node and the new ref without dropping
818          * the spin lock
819          */
820         add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
821                                    num_bytes, action, 1);
822
823         add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
824                                    num_bytes, parent, ref_root, owner, offset,
825                                    action, for_cow);
826         spin_unlock(&delayed_refs->lock);
827         if (need_ref_seq(for_cow, ref_root))
828                 btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
829
830         return 0;
831 }
832
833 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
834                                 struct btrfs_trans_handle *trans,
835                                 u64 bytenr, u64 num_bytes,
836                                 struct btrfs_delayed_extent_op *extent_op)
837 {
838         struct btrfs_delayed_ref_head *head_ref;
839         struct btrfs_delayed_ref_root *delayed_refs;
840
841         head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
842         if (!head_ref)
843                 return -ENOMEM;
844
845         head_ref->extent_op = extent_op;
846
847         delayed_refs = &trans->transaction->delayed_refs;
848         spin_lock(&delayed_refs->lock);
849
850         add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
851                                    num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
852                                    extent_op->is_data);
853
854         spin_unlock(&delayed_refs->lock);
855         return 0;
856 }
857
858 /*
859  * this does a simple search for the head node for a given extent.
860  * It must be called with the delayed ref spinlock held, and it returns
861  * the head node if any where found, or NULL if not.
862  */
863 struct btrfs_delayed_ref_head *
864 btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
865 {
866         struct btrfs_delayed_ref_node *ref;
867         struct btrfs_delayed_ref_root *delayed_refs;
868
869         delayed_refs = &trans->transaction->delayed_refs;
870         ref = find_ref_head(&delayed_refs->root, bytenr, NULL, 0);
871         if (ref)
872                 return btrfs_delayed_node_to_head(ref);
873         return NULL;
874 }
875
876 void btrfs_delayed_ref_exit(void)
877 {
878         if (btrfs_delayed_ref_head_cachep)
879                 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
880         if (btrfs_delayed_tree_ref_cachep)
881                 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
882         if (btrfs_delayed_data_ref_cachep)
883                 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
884         if (btrfs_delayed_extent_op_cachep)
885                 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
886 }
887
888 int btrfs_delayed_ref_init(void)
889 {
890         btrfs_delayed_ref_head_cachep = kmem_cache_create(
891                                 "btrfs_delayed_ref_head",
892                                 sizeof(struct btrfs_delayed_ref_head), 0,
893                                 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
894         if (!btrfs_delayed_ref_head_cachep)
895                 goto fail;
896
897         btrfs_delayed_tree_ref_cachep = kmem_cache_create(
898                                 "btrfs_delayed_tree_ref",
899                                 sizeof(struct btrfs_delayed_tree_ref), 0,
900                                 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
901         if (!btrfs_delayed_tree_ref_cachep)
902                 goto fail;
903
904         btrfs_delayed_data_ref_cachep = kmem_cache_create(
905                                 "btrfs_delayed_data_ref",
906                                 sizeof(struct btrfs_delayed_data_ref), 0,
907                                 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
908         if (!btrfs_delayed_data_ref_cachep)
909                 goto fail;
910
911         btrfs_delayed_extent_op_cachep = kmem_cache_create(
912                                 "btrfs_delayed_extent_op",
913                                 sizeof(struct btrfs_delayed_extent_op), 0,
914                                 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
915         if (!btrfs_delayed_extent_op_cachep)
916                 goto fail;
917
918         return 0;
919 fail:
920         btrfs_delayed_ref_exit();
921         return -ENOMEM;
922 }