Btrfs: rewrite btrfs_trim_block_group()
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30
31 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35                            struct btrfs_free_space *info);
36
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38                                                struct btrfs_path *path,
39                                                u64 offset)
40 {
41         struct btrfs_key key;
42         struct btrfs_key location;
43         struct btrfs_disk_key disk_key;
44         struct btrfs_free_space_header *header;
45         struct extent_buffer *leaf;
46         struct inode *inode = NULL;
47         int ret;
48
49         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50         key.offset = offset;
51         key.type = 0;
52
53         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54         if (ret < 0)
55                 return ERR_PTR(ret);
56         if (ret > 0) {
57                 btrfs_release_path(path);
58                 return ERR_PTR(-ENOENT);
59         }
60
61         leaf = path->nodes[0];
62         header = btrfs_item_ptr(leaf, path->slots[0],
63                                 struct btrfs_free_space_header);
64         btrfs_free_space_key(leaf, header, &disk_key);
65         btrfs_disk_key_to_cpu(&location, &disk_key);
66         btrfs_release_path(path);
67
68         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69         if (!inode)
70                 return ERR_PTR(-ENOENT);
71         if (IS_ERR(inode))
72                 return inode;
73         if (is_bad_inode(inode)) {
74                 iput(inode);
75                 return ERR_PTR(-ENOENT);
76         }
77
78         inode->i_mapping->flags &= ~__GFP_FS;
79
80         return inode;
81 }
82
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84                                       struct btrfs_block_group_cache
85                                       *block_group, struct btrfs_path *path)
86 {
87         struct inode *inode = NULL;
88         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
89
90         spin_lock(&block_group->lock);
91         if (block_group->inode)
92                 inode = igrab(block_group->inode);
93         spin_unlock(&block_group->lock);
94         if (inode)
95                 return inode;
96
97         inode = __lookup_free_space_inode(root, path,
98                                           block_group->key.objectid);
99         if (IS_ERR(inode))
100                 return inode;
101
102         spin_lock(&block_group->lock);
103         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
104                 printk(KERN_INFO "Old style space inode found, converting.\n");
105                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106                         BTRFS_INODE_NODATACOW;
107                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108         }
109
110         if (!block_group->iref) {
111                 block_group->inode = igrab(inode);
112                 block_group->iref = 1;
113         }
114         spin_unlock(&block_group->lock);
115
116         return inode;
117 }
118
119 int __create_free_space_inode(struct btrfs_root *root,
120                               struct btrfs_trans_handle *trans,
121                               struct btrfs_path *path, u64 ino, u64 offset)
122 {
123         struct btrfs_key key;
124         struct btrfs_disk_key disk_key;
125         struct btrfs_free_space_header *header;
126         struct btrfs_inode_item *inode_item;
127         struct extent_buffer *leaf;
128         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
129         int ret;
130
131         ret = btrfs_insert_empty_inode(trans, root, path, ino);
132         if (ret)
133                 return ret;
134
135         /* We inline crc's for the free disk space cache */
136         if (ino != BTRFS_FREE_INO_OBJECTID)
137                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138
139         leaf = path->nodes[0];
140         inode_item = btrfs_item_ptr(leaf, path->slots[0],
141                                     struct btrfs_inode_item);
142         btrfs_item_key(leaf, &disk_key, path->slots[0]);
143         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144                              sizeof(*inode_item));
145         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146         btrfs_set_inode_size(leaf, inode_item, 0);
147         btrfs_set_inode_nbytes(leaf, inode_item, 0);
148         btrfs_set_inode_uid(leaf, inode_item, 0);
149         btrfs_set_inode_gid(leaf, inode_item, 0);
150         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
151         btrfs_set_inode_flags(leaf, inode_item, flags);
152         btrfs_set_inode_nlink(leaf, inode_item, 1);
153         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
154         btrfs_set_inode_block_group(leaf, inode_item, offset);
155         btrfs_mark_buffer_dirty(leaf);
156         btrfs_release_path(path);
157
158         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
159         key.offset = offset;
160         key.type = 0;
161
162         ret = btrfs_insert_empty_item(trans, root, path, &key,
163                                       sizeof(struct btrfs_free_space_header));
164         if (ret < 0) {
165                 btrfs_release_path(path);
166                 return ret;
167         }
168         leaf = path->nodes[0];
169         header = btrfs_item_ptr(leaf, path->slots[0],
170                                 struct btrfs_free_space_header);
171         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172         btrfs_set_free_space_key(leaf, header, &disk_key);
173         btrfs_mark_buffer_dirty(leaf);
174         btrfs_release_path(path);
175
176         return 0;
177 }
178
179 int create_free_space_inode(struct btrfs_root *root,
180                             struct btrfs_trans_handle *trans,
181                             struct btrfs_block_group_cache *block_group,
182                             struct btrfs_path *path)
183 {
184         int ret;
185         u64 ino;
186
187         ret = btrfs_find_free_objectid(root, &ino);
188         if (ret < 0)
189                 return ret;
190
191         return __create_free_space_inode(root, trans, path, ino,
192                                          block_group->key.objectid);
193 }
194
195 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196                                     struct btrfs_trans_handle *trans,
197                                     struct btrfs_path *path,
198                                     struct inode *inode)
199 {
200         struct btrfs_block_rsv *rsv;
201         u64 needed_bytes;
202         loff_t oldsize;
203         int ret = 0;
204
205         rsv = trans->block_rsv;
206         trans->block_rsv = &root->fs_info->global_block_rsv;
207
208         /* 1 for slack space, 1 for updating the inode */
209         needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210                 btrfs_calc_trans_metadata_size(root, 1);
211
212         spin_lock(&trans->block_rsv->lock);
213         if (trans->block_rsv->reserved < needed_bytes) {
214                 spin_unlock(&trans->block_rsv->lock);
215                 trans->block_rsv = rsv;
216                 return -ENOSPC;
217         }
218         spin_unlock(&trans->block_rsv->lock);
219
220         oldsize = i_size_read(inode);
221         btrfs_i_size_write(inode, 0);
222         truncate_pagecache(inode, oldsize, 0);
223
224         /*
225          * We don't need an orphan item because truncating the free space cache
226          * will never be split across transactions.
227          */
228         ret = btrfs_truncate_inode_items(trans, root, inode,
229                                          0, BTRFS_EXTENT_DATA_KEY);
230
231         if (ret) {
232                 trans->block_rsv = rsv;
233                 WARN_ON(1);
234                 return ret;
235         }
236
237         ret = btrfs_update_inode(trans, root, inode);
238         trans->block_rsv = rsv;
239
240         return ret;
241 }
242
243 static int readahead_cache(struct inode *inode)
244 {
245         struct file_ra_state *ra;
246         unsigned long last_index;
247
248         ra = kzalloc(sizeof(*ra), GFP_NOFS);
249         if (!ra)
250                 return -ENOMEM;
251
252         file_ra_state_init(ra, inode->i_mapping);
253         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
254
255         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
256
257         kfree(ra);
258
259         return 0;
260 }
261
262 struct io_ctl {
263         void *cur, *orig;
264         struct page *page;
265         struct page **pages;
266         struct btrfs_root *root;
267         unsigned long size;
268         int index;
269         int num_pages;
270         unsigned check_crcs:1;
271 };
272
273 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
274                        struct btrfs_root *root)
275 {
276         memset(io_ctl, 0, sizeof(struct io_ctl));
277         io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
278                 PAGE_CACHE_SHIFT;
279         io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
280                                 GFP_NOFS);
281         if (!io_ctl->pages)
282                 return -ENOMEM;
283         io_ctl->root = root;
284         if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
285                 io_ctl->check_crcs = 1;
286         return 0;
287 }
288
289 static void io_ctl_free(struct io_ctl *io_ctl)
290 {
291         kfree(io_ctl->pages);
292 }
293
294 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
295 {
296         if (io_ctl->cur) {
297                 kunmap(io_ctl->page);
298                 io_ctl->cur = NULL;
299                 io_ctl->orig = NULL;
300         }
301 }
302
303 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
304 {
305         WARN_ON(io_ctl->cur);
306         BUG_ON(io_ctl->index >= io_ctl->num_pages);
307         io_ctl->page = io_ctl->pages[io_ctl->index++];
308         io_ctl->cur = kmap(io_ctl->page);
309         io_ctl->orig = io_ctl->cur;
310         io_ctl->size = PAGE_CACHE_SIZE;
311         if (clear)
312                 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
313 }
314
315 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
316 {
317         int i;
318
319         io_ctl_unmap_page(io_ctl);
320
321         for (i = 0; i < io_ctl->num_pages; i++) {
322                 if (io_ctl->pages[i]) {
323                         ClearPageChecked(io_ctl->pages[i]);
324                         unlock_page(io_ctl->pages[i]);
325                         page_cache_release(io_ctl->pages[i]);
326                 }
327         }
328 }
329
330 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
331                                 int uptodate)
332 {
333         struct page *page;
334         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
335         int i;
336
337         for (i = 0; i < io_ctl->num_pages; i++) {
338                 page = find_or_create_page(inode->i_mapping, i, mask);
339                 if (!page) {
340                         io_ctl_drop_pages(io_ctl);
341                         return -ENOMEM;
342                 }
343                 io_ctl->pages[i] = page;
344                 if (uptodate && !PageUptodate(page)) {
345                         btrfs_readpage(NULL, page);
346                         lock_page(page);
347                         if (!PageUptodate(page)) {
348                                 printk(KERN_ERR "btrfs: error reading free "
349                                        "space cache\n");
350                                 io_ctl_drop_pages(io_ctl);
351                                 return -EIO;
352                         }
353                 }
354         }
355
356         for (i = 0; i < io_ctl->num_pages; i++) {
357                 clear_page_dirty_for_io(io_ctl->pages[i]);
358                 set_page_extent_mapped(io_ctl->pages[i]);
359         }
360
361         return 0;
362 }
363
364 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
365 {
366         u64 *val;
367
368         io_ctl_map_page(io_ctl, 1);
369
370         /*
371          * Skip the csum areas.  If we don't check crcs then we just have a
372          * 64bit chunk at the front of the first page.
373          */
374         if (io_ctl->check_crcs) {
375                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
376                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
377         } else {
378                 io_ctl->cur += sizeof(u64);
379                 io_ctl->size -= sizeof(u64) * 2;
380         }
381
382         val = io_ctl->cur;
383         *val = cpu_to_le64(generation);
384         io_ctl->cur += sizeof(u64);
385 }
386
387 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
388 {
389         u64 *gen;
390
391         /*
392          * Skip the crc area.  If we don't check crcs then we just have a 64bit
393          * chunk at the front of the first page.
394          */
395         if (io_ctl->check_crcs) {
396                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
397                 io_ctl->size -= sizeof(u64) +
398                         (sizeof(u32) * io_ctl->num_pages);
399         } else {
400                 io_ctl->cur += sizeof(u64);
401                 io_ctl->size -= sizeof(u64) * 2;
402         }
403
404         gen = io_ctl->cur;
405         if (le64_to_cpu(*gen) != generation) {
406                 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
407                                    "(%Lu) does not match inode (%Lu)\n", *gen,
408                                    generation);
409                 io_ctl_unmap_page(io_ctl);
410                 return -EIO;
411         }
412         io_ctl->cur += sizeof(u64);
413         return 0;
414 }
415
416 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
417 {
418         u32 *tmp;
419         u32 crc = ~(u32)0;
420         unsigned offset = 0;
421
422         if (!io_ctl->check_crcs) {
423                 io_ctl_unmap_page(io_ctl);
424                 return;
425         }
426
427         if (index == 0)
428                 offset = sizeof(u32) * io_ctl->num_pages;;
429
430         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
431                               PAGE_CACHE_SIZE - offset);
432         btrfs_csum_final(crc, (char *)&crc);
433         io_ctl_unmap_page(io_ctl);
434         tmp = kmap(io_ctl->pages[0]);
435         tmp += index;
436         *tmp = crc;
437         kunmap(io_ctl->pages[0]);
438 }
439
440 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
441 {
442         u32 *tmp, val;
443         u32 crc = ~(u32)0;
444         unsigned offset = 0;
445
446         if (!io_ctl->check_crcs) {
447                 io_ctl_map_page(io_ctl, 0);
448                 return 0;
449         }
450
451         if (index == 0)
452                 offset = sizeof(u32) * io_ctl->num_pages;
453
454         tmp = kmap(io_ctl->pages[0]);
455         tmp += index;
456         val = *tmp;
457         kunmap(io_ctl->pages[0]);
458
459         io_ctl_map_page(io_ctl, 0);
460         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
461                               PAGE_CACHE_SIZE - offset);
462         btrfs_csum_final(crc, (char *)&crc);
463         if (val != crc) {
464                 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
465                                    "space cache\n");
466                 io_ctl_unmap_page(io_ctl);
467                 return -EIO;
468         }
469
470         return 0;
471 }
472
473 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
474                             void *bitmap)
475 {
476         struct btrfs_free_space_entry *entry;
477
478         if (!io_ctl->cur)
479                 return -ENOSPC;
480
481         entry = io_ctl->cur;
482         entry->offset = cpu_to_le64(offset);
483         entry->bytes = cpu_to_le64(bytes);
484         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
485                 BTRFS_FREE_SPACE_EXTENT;
486         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
487         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
488
489         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
490                 return 0;
491
492         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
493
494         /* No more pages to map */
495         if (io_ctl->index >= io_ctl->num_pages)
496                 return 0;
497
498         /* map the next page */
499         io_ctl_map_page(io_ctl, 1);
500         return 0;
501 }
502
503 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
504 {
505         if (!io_ctl->cur)
506                 return -ENOSPC;
507
508         /*
509          * If we aren't at the start of the current page, unmap this one and
510          * map the next one if there is any left.
511          */
512         if (io_ctl->cur != io_ctl->orig) {
513                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
514                 if (io_ctl->index >= io_ctl->num_pages)
515                         return -ENOSPC;
516                 io_ctl_map_page(io_ctl, 0);
517         }
518
519         memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
520         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
521         if (io_ctl->index < io_ctl->num_pages)
522                 io_ctl_map_page(io_ctl, 0);
523         return 0;
524 }
525
526 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
527 {
528         /*
529          * If we're not on the boundary we know we've modified the page and we
530          * need to crc the page.
531          */
532         if (io_ctl->cur != io_ctl->orig)
533                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
534         else
535                 io_ctl_unmap_page(io_ctl);
536
537         while (io_ctl->index < io_ctl->num_pages) {
538                 io_ctl_map_page(io_ctl, 1);
539                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
540         }
541 }
542
543 static int io_ctl_read_entry(struct io_ctl *io_ctl,
544                             struct btrfs_free_space *entry, u8 *type)
545 {
546         struct btrfs_free_space_entry *e;
547         int ret;
548
549         if (!io_ctl->cur) {
550                 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
551                 if (ret)
552                         return ret;
553         }
554
555         e = io_ctl->cur;
556         entry->offset = le64_to_cpu(e->offset);
557         entry->bytes = le64_to_cpu(e->bytes);
558         *type = e->type;
559         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
560         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
561
562         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
563                 return 0;
564
565         io_ctl_unmap_page(io_ctl);
566
567         return 0;
568 }
569
570 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
571                               struct btrfs_free_space *entry)
572 {
573         int ret;
574
575         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
576         if (ret)
577                 return ret;
578
579         memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
580         io_ctl_unmap_page(io_ctl);
581
582         return 0;
583 }
584
585 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
586                             struct btrfs_free_space_ctl *ctl,
587                             struct btrfs_path *path, u64 offset)
588 {
589         struct btrfs_free_space_header *header;
590         struct extent_buffer *leaf;
591         struct io_ctl io_ctl;
592         struct btrfs_key key;
593         struct btrfs_free_space *e, *n;
594         struct list_head bitmaps;
595         u64 num_entries;
596         u64 num_bitmaps;
597         u64 generation;
598         u8 type;
599         int ret = 0;
600
601         INIT_LIST_HEAD(&bitmaps);
602
603         /* Nothing in the space cache, goodbye */
604         if (!i_size_read(inode))
605                 return 0;
606
607         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
608         key.offset = offset;
609         key.type = 0;
610
611         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
612         if (ret < 0)
613                 return 0;
614         else if (ret > 0) {
615                 btrfs_release_path(path);
616                 return 0;
617         }
618
619         ret = -1;
620
621         leaf = path->nodes[0];
622         header = btrfs_item_ptr(leaf, path->slots[0],
623                                 struct btrfs_free_space_header);
624         num_entries = btrfs_free_space_entries(leaf, header);
625         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
626         generation = btrfs_free_space_generation(leaf, header);
627         btrfs_release_path(path);
628
629         if (BTRFS_I(inode)->generation != generation) {
630                 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
631                        " not match free space cache generation (%llu)\n",
632                        (unsigned long long)BTRFS_I(inode)->generation,
633                        (unsigned long long)generation);
634                 return 0;
635         }
636
637         if (!num_entries)
638                 return 0;
639
640         ret = io_ctl_init(&io_ctl, inode, root);
641         if (ret)
642                 return ret;
643
644         ret = readahead_cache(inode);
645         if (ret)
646                 goto out;
647
648         ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
649         if (ret)
650                 goto out;
651
652         ret = io_ctl_check_crc(&io_ctl, 0);
653         if (ret)
654                 goto free_cache;
655
656         ret = io_ctl_check_generation(&io_ctl, generation);
657         if (ret)
658                 goto free_cache;
659
660         while (num_entries) {
661                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
662                                       GFP_NOFS);
663                 if (!e)
664                         goto free_cache;
665
666                 ret = io_ctl_read_entry(&io_ctl, e, &type);
667                 if (ret) {
668                         kmem_cache_free(btrfs_free_space_cachep, e);
669                         goto free_cache;
670                 }
671
672                 if (!e->bytes) {
673                         kmem_cache_free(btrfs_free_space_cachep, e);
674                         goto free_cache;
675                 }
676
677                 if (type == BTRFS_FREE_SPACE_EXTENT) {
678                         spin_lock(&ctl->tree_lock);
679                         ret = link_free_space(ctl, e);
680                         spin_unlock(&ctl->tree_lock);
681                         if (ret) {
682                                 printk(KERN_ERR "Duplicate entries in "
683                                        "free space cache, dumping\n");
684                                 kmem_cache_free(btrfs_free_space_cachep, e);
685                                 goto free_cache;
686                         }
687                 } else {
688                         BUG_ON(!num_bitmaps);
689                         num_bitmaps--;
690                         e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
691                         if (!e->bitmap) {
692                                 kmem_cache_free(
693                                         btrfs_free_space_cachep, e);
694                                 goto free_cache;
695                         }
696                         spin_lock(&ctl->tree_lock);
697                         ret = link_free_space(ctl, e);
698                         ctl->total_bitmaps++;
699                         ctl->op->recalc_thresholds(ctl);
700                         spin_unlock(&ctl->tree_lock);
701                         if (ret) {
702                                 printk(KERN_ERR "Duplicate entries in "
703                                        "free space cache, dumping\n");
704                                 kmem_cache_free(btrfs_free_space_cachep, e);
705                                 goto free_cache;
706                         }
707                         list_add_tail(&e->list, &bitmaps);
708                 }
709
710                 num_entries--;
711         }
712
713         io_ctl_unmap_page(&io_ctl);
714
715         /*
716          * We add the bitmaps at the end of the entries in order that
717          * the bitmap entries are added to the cache.
718          */
719         list_for_each_entry_safe(e, n, &bitmaps, list) {
720                 list_del_init(&e->list);
721                 ret = io_ctl_read_bitmap(&io_ctl, e);
722                 if (ret)
723                         goto free_cache;
724         }
725
726         io_ctl_drop_pages(&io_ctl);
727         ret = 1;
728 out:
729         io_ctl_free(&io_ctl);
730         return ret;
731 free_cache:
732         io_ctl_drop_pages(&io_ctl);
733         __btrfs_remove_free_space_cache(ctl);
734         goto out;
735 }
736
737 int load_free_space_cache(struct btrfs_fs_info *fs_info,
738                           struct btrfs_block_group_cache *block_group)
739 {
740         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
741         struct btrfs_root *root = fs_info->tree_root;
742         struct inode *inode;
743         struct btrfs_path *path;
744         int ret = 0;
745         bool matched;
746         u64 used = btrfs_block_group_used(&block_group->item);
747
748         /*
749          * If we're unmounting then just return, since this does a search on the
750          * normal root and not the commit root and we could deadlock.
751          */
752         if (btrfs_fs_closing(fs_info))
753                 return 0;
754
755         /*
756          * If this block group has been marked to be cleared for one reason or
757          * another then we can't trust the on disk cache, so just return.
758          */
759         spin_lock(&block_group->lock);
760         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
761                 spin_unlock(&block_group->lock);
762                 return 0;
763         }
764         spin_unlock(&block_group->lock);
765
766         path = btrfs_alloc_path();
767         if (!path)
768                 return 0;
769
770         inode = lookup_free_space_inode(root, block_group, path);
771         if (IS_ERR(inode)) {
772                 btrfs_free_path(path);
773                 return 0;
774         }
775
776         /* We may have converted the inode and made the cache invalid. */
777         spin_lock(&block_group->lock);
778         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
779                 spin_unlock(&block_group->lock);
780                 goto out;
781         }
782         spin_unlock(&block_group->lock);
783
784         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
785                                       path, block_group->key.objectid);
786         btrfs_free_path(path);
787         if (ret <= 0)
788                 goto out;
789
790         spin_lock(&ctl->tree_lock);
791         matched = (ctl->free_space == (block_group->key.offset - used -
792                                        block_group->bytes_super));
793         spin_unlock(&ctl->tree_lock);
794
795         if (!matched) {
796                 __btrfs_remove_free_space_cache(ctl);
797                 printk(KERN_ERR "block group %llu has an wrong amount of free "
798                        "space\n", block_group->key.objectid);
799                 ret = -1;
800         }
801 out:
802         if (ret < 0) {
803                 /* This cache is bogus, make sure it gets cleared */
804                 spin_lock(&block_group->lock);
805                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
806                 spin_unlock(&block_group->lock);
807                 ret = 0;
808
809                 printk(KERN_ERR "btrfs: failed to load free space cache "
810                        "for block group %llu\n", block_group->key.objectid);
811         }
812
813         iput(inode);
814         return ret;
815 }
816
817 /**
818  * __btrfs_write_out_cache - write out cached info to an inode
819  * @root - the root the inode belongs to
820  * @ctl - the free space cache we are going to write out
821  * @block_group - the block_group for this cache if it belongs to a block_group
822  * @trans - the trans handle
823  * @path - the path to use
824  * @offset - the offset for the key we'll insert
825  *
826  * This function writes out a free space cache struct to disk for quick recovery
827  * on mount.  This will return 0 if it was successfull in writing the cache out,
828  * and -1 if it was not.
829  */
830 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
831                             struct btrfs_free_space_ctl *ctl,
832                             struct btrfs_block_group_cache *block_group,
833                             struct btrfs_trans_handle *trans,
834                             struct btrfs_path *path, u64 offset)
835 {
836         struct btrfs_free_space_header *header;
837         struct extent_buffer *leaf;
838         struct rb_node *node;
839         struct list_head *pos, *n;
840         struct extent_state *cached_state = NULL;
841         struct btrfs_free_cluster *cluster = NULL;
842         struct extent_io_tree *unpin = NULL;
843         struct io_ctl io_ctl;
844         struct list_head bitmap_list;
845         struct btrfs_key key;
846         u64 start, extent_start, extent_end, len;
847         int entries = 0;
848         int bitmaps = 0;
849         int ret;
850         int err = -1;
851
852         INIT_LIST_HEAD(&bitmap_list);
853
854         if (!i_size_read(inode))
855                 return -1;
856
857         ret = io_ctl_init(&io_ctl, inode, root);
858         if (ret)
859                 return -1;
860
861         /* Get the cluster for this block_group if it exists */
862         if (block_group && !list_empty(&block_group->cluster_list))
863                 cluster = list_entry(block_group->cluster_list.next,
864                                      struct btrfs_free_cluster,
865                                      block_group_list);
866
867         /* Lock all pages first so we can lock the extent safely. */
868         io_ctl_prepare_pages(&io_ctl, inode, 0);
869
870         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
871                          0, &cached_state, GFP_NOFS);
872
873         node = rb_first(&ctl->free_space_offset);
874         if (!node && cluster) {
875                 node = rb_first(&cluster->root);
876                 cluster = NULL;
877         }
878
879         /* Make sure we can fit our crcs into the first page */
880         if (io_ctl.check_crcs &&
881             (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
882                 WARN_ON(1);
883                 goto out_nospc;
884         }
885
886         io_ctl_set_generation(&io_ctl, trans->transid);
887
888         /* Write out the extent entries */
889         while (node) {
890                 struct btrfs_free_space *e;
891
892                 e = rb_entry(node, struct btrfs_free_space, offset_index);
893                 entries++;
894
895                 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
896                                        e->bitmap);
897                 if (ret)
898                         goto out_nospc;
899
900                 if (e->bitmap) {
901                         list_add_tail(&e->list, &bitmap_list);
902                         bitmaps++;
903                 }
904                 node = rb_next(node);
905                 if (!node && cluster) {
906                         node = rb_first(&cluster->root);
907                         cluster = NULL;
908                 }
909         }
910
911         /*
912          * We want to add any pinned extents to our free space cache
913          * so we don't leak the space
914          */
915
916         /*
917          * We shouldn't have switched the pinned extents yet so this is the
918          * right one
919          */
920         unpin = root->fs_info->pinned_extents;
921
922         if (block_group)
923                 start = block_group->key.objectid;
924
925         while (block_group && (start < block_group->key.objectid +
926                                block_group->key.offset)) {
927                 ret = find_first_extent_bit(unpin, start,
928                                             &extent_start, &extent_end,
929                                             EXTENT_DIRTY);
930                 if (ret) {
931                         ret = 0;
932                         break;
933                 }
934
935                 /* This pinned extent is out of our range */
936                 if (extent_start >= block_group->key.objectid +
937                     block_group->key.offset)
938                         break;
939
940                 extent_start = max(extent_start, start);
941                 extent_end = min(block_group->key.objectid +
942                                  block_group->key.offset, extent_end + 1);
943                 len = extent_end - extent_start;
944
945                 entries++;
946                 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
947                 if (ret)
948                         goto out_nospc;
949
950                 start = extent_end;
951         }
952
953         /* Write out the bitmaps */
954         list_for_each_safe(pos, n, &bitmap_list) {
955                 struct btrfs_free_space *entry =
956                         list_entry(pos, struct btrfs_free_space, list);
957
958                 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
959                 if (ret)
960                         goto out_nospc;
961                 list_del_init(&entry->list);
962         }
963
964         /* Zero out the rest of the pages just to make sure */
965         io_ctl_zero_remaining_pages(&io_ctl);
966
967         ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
968                                 0, i_size_read(inode), &cached_state);
969         io_ctl_drop_pages(&io_ctl);
970         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
971                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
972
973         if (ret)
974                 goto out;
975
976
977         ret = filemap_write_and_wait(inode->i_mapping);
978         if (ret)
979                 goto out;
980
981         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
982         key.offset = offset;
983         key.type = 0;
984
985         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
986         if (ret < 0) {
987                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
988                                  EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
989                                  GFP_NOFS);
990                 goto out;
991         }
992         leaf = path->nodes[0];
993         if (ret > 0) {
994                 struct btrfs_key found_key;
995                 BUG_ON(!path->slots[0]);
996                 path->slots[0]--;
997                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
998                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
999                     found_key.offset != offset) {
1000                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1001                                          inode->i_size - 1,
1002                                          EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1003                                          NULL, GFP_NOFS);
1004                         btrfs_release_path(path);
1005                         goto out;
1006                 }
1007         }
1008
1009         BTRFS_I(inode)->generation = trans->transid;
1010         header = btrfs_item_ptr(leaf, path->slots[0],
1011                                 struct btrfs_free_space_header);
1012         btrfs_set_free_space_entries(leaf, header, entries);
1013         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1014         btrfs_set_free_space_generation(leaf, header, trans->transid);
1015         btrfs_mark_buffer_dirty(leaf);
1016         btrfs_release_path(path);
1017
1018         err = 0;
1019 out:
1020         io_ctl_free(&io_ctl);
1021         if (err) {
1022                 invalidate_inode_pages2(inode->i_mapping);
1023                 BTRFS_I(inode)->generation = 0;
1024         }
1025         btrfs_update_inode(trans, root, inode);
1026         return err;
1027
1028 out_nospc:
1029         list_for_each_safe(pos, n, &bitmap_list) {
1030                 struct btrfs_free_space *entry =
1031                         list_entry(pos, struct btrfs_free_space, list);
1032                 list_del_init(&entry->list);
1033         }
1034         io_ctl_drop_pages(&io_ctl);
1035         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1036                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1037         goto out;
1038 }
1039
1040 int btrfs_write_out_cache(struct btrfs_root *root,
1041                           struct btrfs_trans_handle *trans,
1042                           struct btrfs_block_group_cache *block_group,
1043                           struct btrfs_path *path)
1044 {
1045         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1046         struct inode *inode;
1047         int ret = 0;
1048
1049         root = root->fs_info->tree_root;
1050
1051         spin_lock(&block_group->lock);
1052         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1053                 spin_unlock(&block_group->lock);
1054                 return 0;
1055         }
1056         spin_unlock(&block_group->lock);
1057
1058         inode = lookup_free_space_inode(root, block_group, path);
1059         if (IS_ERR(inode))
1060                 return 0;
1061
1062         ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1063                                       path, block_group->key.objectid);
1064         if (ret) {
1065                 spin_lock(&block_group->lock);
1066                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1067                 spin_unlock(&block_group->lock);
1068                 ret = 0;
1069 #ifdef DEBUG
1070                 printk(KERN_ERR "btrfs: failed to write free space cace "
1071                        "for block group %llu\n", block_group->key.objectid);
1072 #endif
1073         }
1074
1075         iput(inode);
1076         return ret;
1077 }
1078
1079 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1080                                           u64 offset)
1081 {
1082         BUG_ON(offset < bitmap_start);
1083         offset -= bitmap_start;
1084         return (unsigned long)(div_u64(offset, unit));
1085 }
1086
1087 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1088 {
1089         return (unsigned long)(div_u64(bytes, unit));
1090 }
1091
1092 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1093                                    u64 offset)
1094 {
1095         u64 bitmap_start;
1096         u64 bytes_per_bitmap;
1097
1098         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1099         bitmap_start = offset - ctl->start;
1100         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1101         bitmap_start *= bytes_per_bitmap;
1102         bitmap_start += ctl->start;
1103
1104         return bitmap_start;
1105 }
1106
1107 static int tree_insert_offset(struct rb_root *root, u64 offset,
1108                               struct rb_node *node, int bitmap)
1109 {
1110         struct rb_node **p = &root->rb_node;
1111         struct rb_node *parent = NULL;
1112         struct btrfs_free_space *info;
1113
1114         while (*p) {
1115                 parent = *p;
1116                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1117
1118                 if (offset < info->offset) {
1119                         p = &(*p)->rb_left;
1120                 } else if (offset > info->offset) {
1121                         p = &(*p)->rb_right;
1122                 } else {
1123                         /*
1124                          * we could have a bitmap entry and an extent entry
1125                          * share the same offset.  If this is the case, we want
1126                          * the extent entry to always be found first if we do a
1127                          * linear search through the tree, since we want to have
1128                          * the quickest allocation time, and allocating from an
1129                          * extent is faster than allocating from a bitmap.  So
1130                          * if we're inserting a bitmap and we find an entry at
1131                          * this offset, we want to go right, or after this entry
1132                          * logically.  If we are inserting an extent and we've
1133                          * found a bitmap, we want to go left, or before
1134                          * logically.
1135                          */
1136                         if (bitmap) {
1137                                 if (info->bitmap) {
1138                                         WARN_ON_ONCE(1);
1139                                         return -EEXIST;
1140                                 }
1141                                 p = &(*p)->rb_right;
1142                         } else {
1143                                 if (!info->bitmap) {
1144                                         WARN_ON_ONCE(1);
1145                                         return -EEXIST;
1146                                 }
1147                                 p = &(*p)->rb_left;
1148                         }
1149                 }
1150         }
1151
1152         rb_link_node(node, parent, p);
1153         rb_insert_color(node, root);
1154
1155         return 0;
1156 }
1157
1158 /*
1159  * searches the tree for the given offset.
1160  *
1161  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1162  * want a section that has at least bytes size and comes at or after the given
1163  * offset.
1164  */
1165 static struct btrfs_free_space *
1166 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1167                    u64 offset, int bitmap_only, int fuzzy)
1168 {
1169         struct rb_node *n = ctl->free_space_offset.rb_node;
1170         struct btrfs_free_space *entry, *prev = NULL;
1171
1172         /* find entry that is closest to the 'offset' */
1173         while (1) {
1174                 if (!n) {
1175                         entry = NULL;
1176                         break;
1177                 }
1178
1179                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1180                 prev = entry;
1181
1182                 if (offset < entry->offset)
1183                         n = n->rb_left;
1184                 else if (offset > entry->offset)
1185                         n = n->rb_right;
1186                 else
1187                         break;
1188         }
1189
1190         if (bitmap_only) {
1191                 if (!entry)
1192                         return NULL;
1193                 if (entry->bitmap)
1194                         return entry;
1195
1196                 /*
1197                  * bitmap entry and extent entry may share same offset,
1198                  * in that case, bitmap entry comes after extent entry.
1199                  */
1200                 n = rb_next(n);
1201                 if (!n)
1202                         return NULL;
1203                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1204                 if (entry->offset != offset)
1205                         return NULL;
1206
1207                 WARN_ON(!entry->bitmap);
1208                 return entry;
1209         } else if (entry) {
1210                 if (entry->bitmap) {
1211                         /*
1212                          * if previous extent entry covers the offset,
1213                          * we should return it instead of the bitmap entry
1214                          */
1215                         n = &entry->offset_index;
1216                         while (1) {
1217                                 n = rb_prev(n);
1218                                 if (!n)
1219                                         break;
1220                                 prev = rb_entry(n, struct btrfs_free_space,
1221                                                 offset_index);
1222                                 if (!prev->bitmap) {
1223                                         if (prev->offset + prev->bytes > offset)
1224                                                 entry = prev;
1225                                         break;
1226                                 }
1227                         }
1228                 }
1229                 return entry;
1230         }
1231
1232         if (!prev)
1233                 return NULL;
1234
1235         /* find last entry before the 'offset' */
1236         entry = prev;
1237         if (entry->offset > offset) {
1238                 n = rb_prev(&entry->offset_index);
1239                 if (n) {
1240                         entry = rb_entry(n, struct btrfs_free_space,
1241                                         offset_index);
1242                         BUG_ON(entry->offset > offset);
1243                 } else {
1244                         if (fuzzy)
1245                                 return entry;
1246                         else
1247                                 return NULL;
1248                 }
1249         }
1250
1251         if (entry->bitmap) {
1252                 n = &entry->offset_index;
1253                 while (1) {
1254                         n = rb_prev(n);
1255                         if (!n)
1256                                 break;
1257                         prev = rb_entry(n, struct btrfs_free_space,
1258                                         offset_index);
1259                         if (!prev->bitmap) {
1260                                 if (prev->offset + prev->bytes > offset)
1261                                         return prev;
1262                                 break;
1263                         }
1264                 }
1265                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1266                         return entry;
1267         } else if (entry->offset + entry->bytes > offset)
1268                 return entry;
1269
1270         if (!fuzzy)
1271                 return NULL;
1272
1273         while (1) {
1274                 if (entry->bitmap) {
1275                         if (entry->offset + BITS_PER_BITMAP *
1276                             ctl->unit > offset)
1277                                 break;
1278                 } else {
1279                         if (entry->offset + entry->bytes > offset)
1280                                 break;
1281                 }
1282
1283                 n = rb_next(&entry->offset_index);
1284                 if (!n)
1285                         return NULL;
1286                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1287         }
1288         return entry;
1289 }
1290
1291 static inline void
1292 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1293                     struct btrfs_free_space *info)
1294 {
1295         rb_erase(&info->offset_index, &ctl->free_space_offset);
1296         ctl->free_extents--;
1297 }
1298
1299 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1300                               struct btrfs_free_space *info)
1301 {
1302         __unlink_free_space(ctl, info);
1303         ctl->free_space -= info->bytes;
1304 }
1305
1306 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1307                            struct btrfs_free_space *info)
1308 {
1309         int ret = 0;
1310
1311         BUG_ON(!info->bitmap && !info->bytes);
1312         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1313                                  &info->offset_index, (info->bitmap != NULL));
1314         if (ret)
1315                 return ret;
1316
1317         ctl->free_space += info->bytes;
1318         ctl->free_extents++;
1319         return ret;
1320 }
1321
1322 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1323 {
1324         struct btrfs_block_group_cache *block_group = ctl->private;
1325         u64 max_bytes;
1326         u64 bitmap_bytes;
1327         u64 extent_bytes;
1328         u64 size = block_group->key.offset;
1329         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1330         int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1331
1332         BUG_ON(ctl->total_bitmaps > max_bitmaps);
1333
1334         /*
1335          * The goal is to keep the total amount of memory used per 1gb of space
1336          * at or below 32k, so we need to adjust how much memory we allow to be
1337          * used by extent based free space tracking
1338          */
1339         if (size < 1024 * 1024 * 1024)
1340                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1341         else
1342                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1343                         div64_u64(size, 1024 * 1024 * 1024);
1344
1345         /*
1346          * we want to account for 1 more bitmap than what we have so we can make
1347          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1348          * we add more bitmaps.
1349          */
1350         bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1351
1352         if (bitmap_bytes >= max_bytes) {
1353                 ctl->extents_thresh = 0;
1354                 return;
1355         }
1356
1357         /*
1358          * we want the extent entry threshold to always be at most 1/2 the maxw
1359          * bytes we can have, or whatever is less than that.
1360          */
1361         extent_bytes = max_bytes - bitmap_bytes;
1362         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1363
1364         ctl->extents_thresh =
1365                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1366 }
1367
1368 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1369                                        struct btrfs_free_space *info,
1370                                        u64 offset, u64 bytes)
1371 {
1372         unsigned long start, count;
1373
1374         start = offset_to_bit(info->offset, ctl->unit, offset);
1375         count = bytes_to_bits(bytes, ctl->unit);
1376         BUG_ON(start + count > BITS_PER_BITMAP);
1377
1378         bitmap_clear(info->bitmap, start, count);
1379
1380         info->bytes -= bytes;
1381 }
1382
1383 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1384                               struct btrfs_free_space *info, u64 offset,
1385                               u64 bytes)
1386 {
1387         __bitmap_clear_bits(ctl, info, offset, bytes);
1388         ctl->free_space -= bytes;
1389 }
1390
1391 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1392                             struct btrfs_free_space *info, u64 offset,
1393                             u64 bytes)
1394 {
1395         unsigned long start, count;
1396
1397         start = offset_to_bit(info->offset, ctl->unit, offset);
1398         count = bytes_to_bits(bytes, ctl->unit);
1399         BUG_ON(start + count > BITS_PER_BITMAP);
1400
1401         bitmap_set(info->bitmap, start, count);
1402
1403         info->bytes += bytes;
1404         ctl->free_space += bytes;
1405 }
1406
1407 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1408                          struct btrfs_free_space *bitmap_info, u64 *offset,
1409                          u64 *bytes)
1410 {
1411         unsigned long found_bits = 0;
1412         unsigned long bits, i;
1413         unsigned long next_zero;
1414
1415         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1416                           max_t(u64, *offset, bitmap_info->offset));
1417         bits = bytes_to_bits(*bytes, ctl->unit);
1418
1419         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1420              i < BITS_PER_BITMAP;
1421              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1422                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1423                                                BITS_PER_BITMAP, i);
1424                 if ((next_zero - i) >= bits) {
1425                         found_bits = next_zero - i;
1426                         break;
1427                 }
1428                 i = next_zero;
1429         }
1430
1431         if (found_bits) {
1432                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1433                 *bytes = (u64)(found_bits) * ctl->unit;
1434                 return 0;
1435         }
1436
1437         return -1;
1438 }
1439
1440 static struct btrfs_free_space *
1441 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1442 {
1443         struct btrfs_free_space *entry;
1444         struct rb_node *node;
1445         int ret;
1446
1447         if (!ctl->free_space_offset.rb_node)
1448                 return NULL;
1449
1450         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1451         if (!entry)
1452                 return NULL;
1453
1454         for (node = &entry->offset_index; node; node = rb_next(node)) {
1455                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1456                 if (entry->bytes < *bytes)
1457                         continue;
1458
1459                 if (entry->bitmap) {
1460                         ret = search_bitmap(ctl, entry, offset, bytes);
1461                         if (!ret)
1462                                 return entry;
1463                         continue;
1464                 }
1465
1466                 *offset = entry->offset;
1467                 *bytes = entry->bytes;
1468                 return entry;
1469         }
1470
1471         return NULL;
1472 }
1473
1474 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1475                            struct btrfs_free_space *info, u64 offset)
1476 {
1477         info->offset = offset_to_bitmap(ctl, offset);
1478         info->bytes = 0;
1479         INIT_LIST_HEAD(&info->list);
1480         link_free_space(ctl, info);
1481         ctl->total_bitmaps++;
1482
1483         ctl->op->recalc_thresholds(ctl);
1484 }
1485
1486 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1487                         struct btrfs_free_space *bitmap_info)
1488 {
1489         unlink_free_space(ctl, bitmap_info);
1490         kfree(bitmap_info->bitmap);
1491         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1492         ctl->total_bitmaps--;
1493         ctl->op->recalc_thresholds(ctl);
1494 }
1495
1496 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1497                               struct btrfs_free_space *bitmap_info,
1498                               u64 *offset, u64 *bytes)
1499 {
1500         u64 end;
1501         u64 search_start, search_bytes;
1502         int ret;
1503
1504 again:
1505         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1506
1507         /*
1508          * XXX - this can go away after a few releases.
1509          *
1510          * since the only user of btrfs_remove_free_space is the tree logging
1511          * stuff, and the only way to test that is under crash conditions, we
1512          * want to have this debug stuff here just in case somethings not
1513          * working.  Search the bitmap for the space we are trying to use to
1514          * make sure its actually there.  If its not there then we need to stop
1515          * because something has gone wrong.
1516          */
1517         search_start = *offset;
1518         search_bytes = *bytes;
1519         search_bytes = min(search_bytes, end - search_start + 1);
1520         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1521         BUG_ON(ret < 0 || search_start != *offset);
1522
1523         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1524                 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1525                 *bytes -= end - *offset + 1;
1526                 *offset = end + 1;
1527         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1528                 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1529                 *bytes = 0;
1530         }
1531
1532         if (*bytes) {
1533                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1534                 if (!bitmap_info->bytes)
1535                         free_bitmap(ctl, bitmap_info);
1536
1537                 /*
1538                  * no entry after this bitmap, but we still have bytes to
1539                  * remove, so something has gone wrong.
1540                  */
1541                 if (!next)
1542                         return -EINVAL;
1543
1544                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1545                                        offset_index);
1546
1547                 /*
1548                  * if the next entry isn't a bitmap we need to return to let the
1549                  * extent stuff do its work.
1550                  */
1551                 if (!bitmap_info->bitmap)
1552                         return -EAGAIN;
1553
1554                 /*
1555                  * Ok the next item is a bitmap, but it may not actually hold
1556                  * the information for the rest of this free space stuff, so
1557                  * look for it, and if we don't find it return so we can try
1558                  * everything over again.
1559                  */
1560                 search_start = *offset;
1561                 search_bytes = *bytes;
1562                 ret = search_bitmap(ctl, bitmap_info, &search_start,
1563                                     &search_bytes);
1564                 if (ret < 0 || search_start != *offset)
1565                         return -EAGAIN;
1566
1567                 goto again;
1568         } else if (!bitmap_info->bytes)
1569                 free_bitmap(ctl, bitmap_info);
1570
1571         return 0;
1572 }
1573
1574 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1575                                struct btrfs_free_space *info, u64 offset,
1576                                u64 bytes)
1577 {
1578         u64 bytes_to_set = 0;
1579         u64 end;
1580
1581         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1582
1583         bytes_to_set = min(end - offset, bytes);
1584
1585         bitmap_set_bits(ctl, info, offset, bytes_to_set);
1586
1587         return bytes_to_set;
1588
1589 }
1590
1591 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1592                       struct btrfs_free_space *info)
1593 {
1594         struct btrfs_block_group_cache *block_group = ctl->private;
1595
1596         /*
1597          * If we are below the extents threshold then we can add this as an
1598          * extent, and don't have to deal with the bitmap
1599          */
1600         if (ctl->free_extents < ctl->extents_thresh) {
1601                 /*
1602                  * If this block group has some small extents we don't want to
1603                  * use up all of our free slots in the cache with them, we want
1604                  * to reserve them to larger extents, however if we have plent
1605                  * of cache left then go ahead an dadd them, no sense in adding
1606                  * the overhead of a bitmap if we don't have to.
1607                  */
1608                 if (info->bytes <= block_group->sectorsize * 4) {
1609                         if (ctl->free_extents * 2 <= ctl->extents_thresh)
1610                                 return false;
1611                 } else {
1612                         return false;
1613                 }
1614         }
1615
1616         /*
1617          * some block groups are so tiny they can't be enveloped by a bitmap, so
1618          * don't even bother to create a bitmap for this
1619          */
1620         if (BITS_PER_BITMAP * block_group->sectorsize >
1621             block_group->key.offset)
1622                 return false;
1623
1624         return true;
1625 }
1626
1627 static struct btrfs_free_space_op free_space_op = {
1628         .recalc_thresholds      = recalculate_thresholds,
1629         .use_bitmap             = use_bitmap,
1630 };
1631
1632 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1633                               struct btrfs_free_space *info)
1634 {
1635         struct btrfs_free_space *bitmap_info;
1636         struct btrfs_block_group_cache *block_group = NULL;
1637         int added = 0;
1638         u64 bytes, offset, bytes_added;
1639         int ret;
1640
1641         bytes = info->bytes;
1642         offset = info->offset;
1643
1644         if (!ctl->op->use_bitmap(ctl, info))
1645                 return 0;
1646
1647         if (ctl->op == &free_space_op)
1648                 block_group = ctl->private;
1649 again:
1650         /*
1651          * Since we link bitmaps right into the cluster we need to see if we
1652          * have a cluster here, and if so and it has our bitmap we need to add
1653          * the free space to that bitmap.
1654          */
1655         if (block_group && !list_empty(&block_group->cluster_list)) {
1656                 struct btrfs_free_cluster *cluster;
1657                 struct rb_node *node;
1658                 struct btrfs_free_space *entry;
1659
1660                 cluster = list_entry(block_group->cluster_list.next,
1661                                      struct btrfs_free_cluster,
1662                                      block_group_list);
1663                 spin_lock(&cluster->lock);
1664                 node = rb_first(&cluster->root);
1665                 if (!node) {
1666                         spin_unlock(&cluster->lock);
1667                         goto no_cluster_bitmap;
1668                 }
1669
1670                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1671                 if (!entry->bitmap) {
1672                         spin_unlock(&cluster->lock);
1673                         goto no_cluster_bitmap;
1674                 }
1675
1676                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1677                         bytes_added = add_bytes_to_bitmap(ctl, entry,
1678                                                           offset, bytes);
1679                         bytes -= bytes_added;
1680                         offset += bytes_added;
1681                 }
1682                 spin_unlock(&cluster->lock);
1683                 if (!bytes) {
1684                         ret = 1;
1685                         goto out;
1686                 }
1687         }
1688
1689 no_cluster_bitmap:
1690         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1691                                          1, 0);
1692         if (!bitmap_info) {
1693                 BUG_ON(added);
1694                 goto new_bitmap;
1695         }
1696
1697         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1698         bytes -= bytes_added;
1699         offset += bytes_added;
1700         added = 0;
1701
1702         if (!bytes) {
1703                 ret = 1;
1704                 goto out;
1705         } else
1706                 goto again;
1707
1708 new_bitmap:
1709         if (info && info->bitmap) {
1710                 add_new_bitmap(ctl, info, offset);
1711                 added = 1;
1712                 info = NULL;
1713                 goto again;
1714         } else {
1715                 spin_unlock(&ctl->tree_lock);
1716
1717                 /* no pre-allocated info, allocate a new one */
1718                 if (!info) {
1719                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1720                                                  GFP_NOFS);
1721                         if (!info) {
1722                                 spin_lock(&ctl->tree_lock);
1723                                 ret = -ENOMEM;
1724                                 goto out;
1725                         }
1726                 }
1727
1728                 /* allocate the bitmap */
1729                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1730                 spin_lock(&ctl->tree_lock);
1731                 if (!info->bitmap) {
1732                         ret = -ENOMEM;
1733                         goto out;
1734                 }
1735                 goto again;
1736         }
1737
1738 out:
1739         if (info) {
1740                 if (info->bitmap)
1741                         kfree(info->bitmap);
1742                 kmem_cache_free(btrfs_free_space_cachep, info);
1743         }
1744
1745         return ret;
1746 }
1747
1748 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1749                           struct btrfs_free_space *info, bool update_stat)
1750 {
1751         struct btrfs_free_space *left_info;
1752         struct btrfs_free_space *right_info;
1753         bool merged = false;
1754         u64 offset = info->offset;
1755         u64 bytes = info->bytes;
1756
1757         /*
1758          * first we want to see if there is free space adjacent to the range we
1759          * are adding, if there is remove that struct and add a new one to
1760          * cover the entire range
1761          */
1762         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1763         if (right_info && rb_prev(&right_info->offset_index))
1764                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1765                                      struct btrfs_free_space, offset_index);
1766         else
1767                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1768
1769         if (right_info && !right_info->bitmap) {
1770                 if (update_stat)
1771                         unlink_free_space(ctl, right_info);
1772                 else
1773                         __unlink_free_space(ctl, right_info);
1774                 info->bytes += right_info->bytes;
1775                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1776                 merged = true;
1777         }
1778
1779         if (left_info && !left_info->bitmap &&
1780             left_info->offset + left_info->bytes == offset) {
1781                 if (update_stat)
1782                         unlink_free_space(ctl, left_info);
1783                 else
1784                         __unlink_free_space(ctl, left_info);
1785                 info->offset = left_info->offset;
1786                 info->bytes += left_info->bytes;
1787                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1788                 merged = true;
1789         }
1790
1791         return merged;
1792 }
1793
1794 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1795                            u64 offset, u64 bytes)
1796 {
1797         struct btrfs_free_space *info;
1798         int ret = 0;
1799
1800         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1801         if (!info)
1802                 return -ENOMEM;
1803
1804         info->offset = offset;
1805         info->bytes = bytes;
1806
1807         spin_lock(&ctl->tree_lock);
1808
1809         if (try_merge_free_space(ctl, info, true))
1810                 goto link;
1811
1812         /*
1813          * There was no extent directly to the left or right of this new
1814          * extent then we know we're going to have to allocate a new extent, so
1815          * before we do that see if we need to drop this into a bitmap
1816          */
1817         ret = insert_into_bitmap(ctl, info);
1818         if (ret < 0) {
1819                 goto out;
1820         } else if (ret) {
1821                 ret = 0;
1822                 goto out;
1823         }
1824 link:
1825         ret = link_free_space(ctl, info);
1826         if (ret)
1827                 kmem_cache_free(btrfs_free_space_cachep, info);
1828 out:
1829         spin_unlock(&ctl->tree_lock);
1830
1831         if (ret) {
1832                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1833                 BUG_ON(ret == -EEXIST);
1834         }
1835
1836         return ret;
1837 }
1838
1839 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1840                             u64 offset, u64 bytes)
1841 {
1842         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1843         struct btrfs_free_space *info;
1844         struct btrfs_free_space *next_info = NULL;
1845         int ret = 0;
1846
1847         spin_lock(&ctl->tree_lock);
1848
1849 again:
1850         info = tree_search_offset(ctl, offset, 0, 0);
1851         if (!info) {
1852                 /*
1853                  * oops didn't find an extent that matched the space we wanted
1854                  * to remove, look for a bitmap instead
1855                  */
1856                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1857                                           1, 0);
1858                 if (!info) {
1859                         /* the tree logging code might be calling us before we
1860                          * have fully loaded the free space rbtree for this
1861                          * block group.  So it is possible the entry won't
1862                          * be in the rbtree yet at all.  The caching code
1863                          * will make sure not to put it in the rbtree if
1864                          * the logging code has pinned it.
1865                          */
1866                         goto out_lock;
1867                 }
1868         }
1869
1870         if (info->bytes < bytes && rb_next(&info->offset_index)) {
1871                 u64 end;
1872                 next_info = rb_entry(rb_next(&info->offset_index),
1873                                              struct btrfs_free_space,
1874                                              offset_index);
1875
1876                 if (next_info->bitmap)
1877                         end = next_info->offset +
1878                               BITS_PER_BITMAP * ctl->unit - 1;
1879                 else
1880                         end = next_info->offset + next_info->bytes;
1881
1882                 if (next_info->bytes < bytes ||
1883                     next_info->offset > offset || offset > end) {
1884                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
1885                               " trying to use %llu\n",
1886                               (unsigned long long)info->offset,
1887                               (unsigned long long)info->bytes,
1888                               (unsigned long long)bytes);
1889                         WARN_ON(1);
1890                         ret = -EINVAL;
1891                         goto out_lock;
1892                 }
1893
1894                 info = next_info;
1895         }
1896
1897         if (info->bytes == bytes) {
1898                 unlink_free_space(ctl, info);
1899                 if (info->bitmap) {
1900                         kfree(info->bitmap);
1901                         ctl->total_bitmaps--;
1902                 }
1903                 kmem_cache_free(btrfs_free_space_cachep, info);
1904                 ret = 0;
1905                 goto out_lock;
1906         }
1907
1908         if (!info->bitmap && info->offset == offset) {
1909                 unlink_free_space(ctl, info);
1910                 info->offset += bytes;
1911                 info->bytes -= bytes;
1912                 ret = link_free_space(ctl, info);
1913                 WARN_ON(ret);
1914                 goto out_lock;
1915         }
1916
1917         if (!info->bitmap && info->offset <= offset &&
1918             info->offset + info->bytes >= offset + bytes) {
1919                 u64 old_start = info->offset;
1920                 /*
1921                  * we're freeing space in the middle of the info,
1922                  * this can happen during tree log replay
1923                  *
1924                  * first unlink the old info and then
1925                  * insert it again after the hole we're creating
1926                  */
1927                 unlink_free_space(ctl, info);
1928                 if (offset + bytes < info->offset + info->bytes) {
1929                         u64 old_end = info->offset + info->bytes;
1930
1931                         info->offset = offset + bytes;
1932                         info->bytes = old_end - info->offset;
1933                         ret = link_free_space(ctl, info);
1934                         WARN_ON(ret);
1935                         if (ret)
1936                                 goto out_lock;
1937                 } else {
1938                         /* the hole we're creating ends at the end
1939                          * of the info struct, just free the info
1940                          */
1941                         kmem_cache_free(btrfs_free_space_cachep, info);
1942                 }
1943                 spin_unlock(&ctl->tree_lock);
1944
1945                 /* step two, insert a new info struct to cover
1946                  * anything before the hole
1947                  */
1948                 ret = btrfs_add_free_space(block_group, old_start,
1949                                            offset - old_start);
1950                 WARN_ON(ret);
1951                 goto out;
1952         }
1953
1954         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1955         if (ret == -EAGAIN)
1956                 goto again;
1957         BUG_ON(ret);
1958 out_lock:
1959         spin_unlock(&ctl->tree_lock);
1960 out:
1961         return ret;
1962 }
1963
1964 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1965                            u64 bytes)
1966 {
1967         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1968         struct btrfs_free_space *info;
1969         struct rb_node *n;
1970         int count = 0;
1971
1972         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1973                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1974                 if (info->bytes >= bytes)
1975                         count++;
1976                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1977                        (unsigned long long)info->offset,
1978                        (unsigned long long)info->bytes,
1979                        (info->bitmap) ? "yes" : "no");
1980         }
1981         printk(KERN_INFO "block group has cluster?: %s\n",
1982                list_empty(&block_group->cluster_list) ? "no" : "yes");
1983         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1984                "\n", count);
1985 }
1986
1987 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1988 {
1989         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1990
1991         spin_lock_init(&ctl->tree_lock);
1992         ctl->unit = block_group->sectorsize;
1993         ctl->start = block_group->key.objectid;
1994         ctl->private = block_group;
1995         ctl->op = &free_space_op;
1996
1997         /*
1998          * we only want to have 32k of ram per block group for keeping
1999          * track of free space, and if we pass 1/2 of that we want to
2000          * start converting things over to using bitmaps
2001          */
2002         ctl->extents_thresh = ((1024 * 32) / 2) /
2003                                 sizeof(struct btrfs_free_space);
2004 }
2005
2006 /*
2007  * for a given cluster, put all of its extents back into the free
2008  * space cache.  If the block group passed doesn't match the block group
2009  * pointed to by the cluster, someone else raced in and freed the
2010  * cluster already.  In that case, we just return without changing anything
2011  */
2012 static int
2013 __btrfs_return_cluster_to_free_space(
2014                              struct btrfs_block_group_cache *block_group,
2015                              struct btrfs_free_cluster *cluster)
2016 {
2017         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2018         struct btrfs_free_space *entry;
2019         struct rb_node *node;
2020
2021         spin_lock(&cluster->lock);
2022         if (cluster->block_group != block_group)
2023                 goto out;
2024
2025         cluster->block_group = NULL;
2026         cluster->window_start = 0;
2027         list_del_init(&cluster->block_group_list);
2028
2029         node = rb_first(&cluster->root);
2030         while (node) {
2031                 bool bitmap;
2032
2033                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2034                 node = rb_next(&entry->offset_index);
2035                 rb_erase(&entry->offset_index, &cluster->root);
2036
2037                 bitmap = (entry->bitmap != NULL);
2038                 if (!bitmap)
2039                         try_merge_free_space(ctl, entry, false);
2040                 tree_insert_offset(&ctl->free_space_offset,
2041                                    entry->offset, &entry->offset_index, bitmap);
2042         }
2043         cluster->root = RB_ROOT;
2044
2045 out:
2046         spin_unlock(&cluster->lock);
2047         btrfs_put_block_group(block_group);
2048         return 0;
2049 }
2050
2051 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
2052 {
2053         struct btrfs_free_space *info;
2054         struct rb_node *node;
2055
2056         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2057                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2058                 if (!info->bitmap) {
2059                         unlink_free_space(ctl, info);
2060                         kmem_cache_free(btrfs_free_space_cachep, info);
2061                 } else {
2062                         free_bitmap(ctl, info);
2063                 }
2064                 if (need_resched()) {
2065                         spin_unlock(&ctl->tree_lock);
2066                         cond_resched();
2067                         spin_lock(&ctl->tree_lock);
2068                 }
2069         }
2070 }
2071
2072 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2073 {
2074         spin_lock(&ctl->tree_lock);
2075         __btrfs_remove_free_space_cache_locked(ctl);
2076         spin_unlock(&ctl->tree_lock);
2077 }
2078
2079 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2080 {
2081         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2082         struct btrfs_free_cluster *cluster;
2083         struct list_head *head;
2084
2085         spin_lock(&ctl->tree_lock);
2086         while ((head = block_group->cluster_list.next) !=
2087                &block_group->cluster_list) {
2088                 cluster = list_entry(head, struct btrfs_free_cluster,
2089                                      block_group_list);
2090
2091                 WARN_ON(cluster->block_group != block_group);
2092                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2093                 if (need_resched()) {
2094                         spin_unlock(&ctl->tree_lock);
2095                         cond_resched();
2096                         spin_lock(&ctl->tree_lock);
2097                 }
2098         }
2099         __btrfs_remove_free_space_cache_locked(ctl);
2100         spin_unlock(&ctl->tree_lock);
2101
2102 }
2103
2104 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2105                                u64 offset, u64 bytes, u64 empty_size)
2106 {
2107         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2108         struct btrfs_free_space *entry = NULL;
2109         u64 bytes_search = bytes + empty_size;
2110         u64 ret = 0;
2111
2112         spin_lock(&ctl->tree_lock);
2113         entry = find_free_space(ctl, &offset, &bytes_search);
2114         if (!entry)
2115                 goto out;
2116
2117         ret = offset;
2118         if (entry->bitmap) {
2119                 bitmap_clear_bits(ctl, entry, offset, bytes);
2120                 if (!entry->bytes)
2121                         free_bitmap(ctl, entry);
2122         } else {
2123                 unlink_free_space(ctl, entry);
2124                 entry->offset += bytes;
2125                 entry->bytes -= bytes;
2126                 if (!entry->bytes)
2127                         kmem_cache_free(btrfs_free_space_cachep, entry);
2128                 else
2129                         link_free_space(ctl, entry);
2130         }
2131
2132 out:
2133         spin_unlock(&ctl->tree_lock);
2134
2135         return ret;
2136 }
2137
2138 /*
2139  * given a cluster, put all of its extents back into the free space
2140  * cache.  If a block group is passed, this function will only free
2141  * a cluster that belongs to the passed block group.
2142  *
2143  * Otherwise, it'll get a reference on the block group pointed to by the
2144  * cluster and remove the cluster from it.
2145  */
2146 int btrfs_return_cluster_to_free_space(
2147                                struct btrfs_block_group_cache *block_group,
2148                                struct btrfs_free_cluster *cluster)
2149 {
2150         struct btrfs_free_space_ctl *ctl;
2151         int ret;
2152
2153         /* first, get a safe pointer to the block group */
2154         spin_lock(&cluster->lock);
2155         if (!block_group) {
2156                 block_group = cluster->block_group;
2157                 if (!block_group) {
2158                         spin_unlock(&cluster->lock);
2159                         return 0;
2160                 }
2161         } else if (cluster->block_group != block_group) {
2162                 /* someone else has already freed it don't redo their work */
2163                 spin_unlock(&cluster->lock);
2164                 return 0;
2165         }
2166         atomic_inc(&block_group->count);
2167         spin_unlock(&cluster->lock);
2168
2169         ctl = block_group->free_space_ctl;
2170
2171         /* now return any extents the cluster had on it */
2172         spin_lock(&ctl->tree_lock);
2173         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2174         spin_unlock(&ctl->tree_lock);
2175
2176         /* finally drop our ref */
2177         btrfs_put_block_group(block_group);
2178         return ret;
2179 }
2180
2181 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2182                                    struct btrfs_free_cluster *cluster,
2183                                    struct btrfs_free_space *entry,
2184                                    u64 bytes, u64 min_start)
2185 {
2186         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2187         int err;
2188         u64 search_start = cluster->window_start;
2189         u64 search_bytes = bytes;
2190         u64 ret = 0;
2191
2192         search_start = min_start;
2193         search_bytes = bytes;
2194
2195         err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2196         if (err)
2197                 return 0;
2198
2199         ret = search_start;
2200         __bitmap_clear_bits(ctl, entry, ret, bytes);
2201
2202         return ret;
2203 }
2204
2205 /*
2206  * given a cluster, try to allocate 'bytes' from it, returns 0
2207  * if it couldn't find anything suitably large, or a logical disk offset
2208  * if things worked out
2209  */
2210 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2211                              struct btrfs_free_cluster *cluster, u64 bytes,
2212                              u64 min_start)
2213 {
2214         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2215         struct btrfs_free_space *entry = NULL;
2216         struct rb_node *node;
2217         u64 ret = 0;
2218
2219         spin_lock(&cluster->lock);
2220         if (bytes > cluster->max_size)
2221                 goto out;
2222
2223         if (cluster->block_group != block_group)
2224                 goto out;
2225
2226         node = rb_first(&cluster->root);
2227         if (!node)
2228                 goto out;
2229
2230         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2231         while(1) {
2232                 if (entry->bytes < bytes ||
2233                     (!entry->bitmap && entry->offset < min_start)) {
2234                         node = rb_next(&entry->offset_index);
2235                         if (!node)
2236                                 break;
2237                         entry = rb_entry(node, struct btrfs_free_space,
2238                                          offset_index);
2239                         continue;
2240                 }
2241
2242                 if (entry->bitmap) {
2243                         ret = btrfs_alloc_from_bitmap(block_group,
2244                                                       cluster, entry, bytes,
2245                                                       min_start);
2246                         if (ret == 0) {
2247                                 node = rb_next(&entry->offset_index);
2248                                 if (!node)
2249                                         break;
2250                                 entry = rb_entry(node, struct btrfs_free_space,
2251                                                  offset_index);
2252                                 continue;
2253                         }
2254                 } else {
2255                         ret = entry->offset;
2256
2257                         entry->offset += bytes;
2258                         entry->bytes -= bytes;
2259                 }
2260
2261                 if (entry->bytes == 0)
2262                         rb_erase(&entry->offset_index, &cluster->root);
2263                 break;
2264         }
2265 out:
2266         spin_unlock(&cluster->lock);
2267
2268         if (!ret)
2269                 return 0;
2270
2271         spin_lock(&ctl->tree_lock);
2272
2273         ctl->free_space -= bytes;
2274         if (entry->bytes == 0) {
2275                 ctl->free_extents--;
2276                 if (entry->bitmap) {
2277                         kfree(entry->bitmap);
2278                         ctl->total_bitmaps--;
2279                         ctl->op->recalc_thresholds(ctl);
2280                 }
2281                 kmem_cache_free(btrfs_free_space_cachep, entry);
2282         }
2283
2284         spin_unlock(&ctl->tree_lock);
2285
2286         return ret;
2287 }
2288
2289 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2290                                 struct btrfs_free_space *entry,
2291                                 struct btrfs_free_cluster *cluster,
2292                                 u64 offset, u64 bytes, u64 min_bytes)
2293 {
2294         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2295         unsigned long next_zero;
2296         unsigned long i;
2297         unsigned long search_bits;
2298         unsigned long total_bits;
2299         unsigned long found_bits;
2300         unsigned long start = 0;
2301         unsigned long total_found = 0;
2302         int ret;
2303         bool found = false;
2304
2305         i = offset_to_bit(entry->offset, block_group->sectorsize,
2306                           max_t(u64, offset, entry->offset));
2307         search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2308         total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2309
2310 again:
2311         found_bits = 0;
2312         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2313              i < BITS_PER_BITMAP;
2314              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2315                 next_zero = find_next_zero_bit(entry->bitmap,
2316                                                BITS_PER_BITMAP, i);
2317                 if (next_zero - i >= search_bits) {
2318                         found_bits = next_zero - i;
2319                         break;
2320                 }
2321                 i = next_zero;
2322         }
2323
2324         if (!found_bits)
2325                 return -ENOSPC;
2326
2327         if (!found) {
2328                 start = i;
2329                 cluster->max_size = 0;
2330                 found = true;
2331         }
2332
2333         total_found += found_bits;
2334
2335         if (cluster->max_size < found_bits * block_group->sectorsize)
2336                 cluster->max_size = found_bits * block_group->sectorsize;
2337
2338         if (total_found < total_bits) {
2339                 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2340                 if (i - start > total_bits * 2) {
2341                         total_found = 0;
2342                         cluster->max_size = 0;
2343                         found = false;
2344                 }
2345                 goto again;
2346         }
2347
2348         cluster->window_start = start * block_group->sectorsize +
2349                 entry->offset;
2350         rb_erase(&entry->offset_index, &ctl->free_space_offset);
2351         ret = tree_insert_offset(&cluster->root, entry->offset,
2352                                  &entry->offset_index, 1);
2353         BUG_ON(ret);
2354
2355         return 0;
2356 }
2357
2358 /*
2359  * This searches the block group for just extents to fill the cluster with.
2360  */
2361 static noinline int
2362 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2363                         struct btrfs_free_cluster *cluster,
2364                         struct list_head *bitmaps, u64 offset, u64 bytes,
2365                         u64 min_bytes)
2366 {
2367         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2368         struct btrfs_free_space *first = NULL;
2369         struct btrfs_free_space *entry = NULL;
2370         struct btrfs_free_space *prev = NULL;
2371         struct btrfs_free_space *last;
2372         struct rb_node *node;
2373         u64 window_start;
2374         u64 window_free;
2375         u64 max_extent;
2376         u64 max_gap = 128 * 1024;
2377
2378         entry = tree_search_offset(ctl, offset, 0, 1);
2379         if (!entry)
2380                 return -ENOSPC;
2381
2382         /*
2383          * We don't want bitmaps, so just move along until we find a normal
2384          * extent entry.
2385          */
2386         while (entry->bitmap) {
2387                 if (list_empty(&entry->list))
2388                         list_add_tail(&entry->list, bitmaps);
2389                 node = rb_next(&entry->offset_index);
2390                 if (!node)
2391                         return -ENOSPC;
2392                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2393         }
2394
2395         window_start = entry->offset;
2396         window_free = entry->bytes;
2397         max_extent = entry->bytes;
2398         first = entry;
2399         last = entry;
2400         prev = entry;
2401
2402         while (window_free <= min_bytes) {
2403                 node = rb_next(&entry->offset_index);
2404                 if (!node)
2405                         return -ENOSPC;
2406                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2407
2408                 if (entry->bitmap) {
2409                         if (list_empty(&entry->list))
2410                                 list_add_tail(&entry->list, bitmaps);
2411                         continue;
2412                 }
2413
2414                 /*
2415                  * we haven't filled the empty size and the window is
2416                  * very large.  reset and try again
2417                  */
2418                 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2419                     entry->offset - window_start > (min_bytes * 2)) {
2420                         first = entry;
2421                         window_start = entry->offset;
2422                         window_free = entry->bytes;
2423                         last = entry;
2424                         max_extent = entry->bytes;
2425                 } else {
2426                         last = entry;
2427                         window_free += entry->bytes;
2428                         if (entry->bytes > max_extent)
2429                                 max_extent = entry->bytes;
2430                 }
2431                 prev = entry;
2432         }
2433
2434         cluster->window_start = first->offset;
2435
2436         node = &first->offset_index;
2437
2438         /*
2439          * now we've found our entries, pull them out of the free space
2440          * cache and put them into the cluster rbtree
2441          */
2442         do {
2443                 int ret;
2444
2445                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2446                 node = rb_next(&entry->offset_index);
2447                 if (entry->bitmap)
2448                         continue;
2449
2450                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2451                 ret = tree_insert_offset(&cluster->root, entry->offset,
2452                                          &entry->offset_index, 0);
2453                 BUG_ON(ret);
2454         } while (node && entry != last);
2455
2456         cluster->max_size = max_extent;
2457
2458         return 0;
2459 }
2460
2461 /*
2462  * This specifically looks for bitmaps that may work in the cluster, we assume
2463  * that we have already failed to find extents that will work.
2464  */
2465 static noinline int
2466 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2467                      struct btrfs_free_cluster *cluster,
2468                      struct list_head *bitmaps, u64 offset, u64 bytes,
2469                      u64 min_bytes)
2470 {
2471         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2472         struct btrfs_free_space *entry;
2473         int ret = -ENOSPC;
2474         u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2475
2476         if (ctl->total_bitmaps == 0)
2477                 return -ENOSPC;
2478
2479         /*
2480          * The bitmap that covers offset won't be in the list unless offset
2481          * is just its start offset.
2482          */
2483         entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2484         if (entry->offset != bitmap_offset) {
2485                 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2486                 if (entry && list_empty(&entry->list))
2487                         list_add(&entry->list, bitmaps);
2488         }
2489
2490         list_for_each_entry(entry, bitmaps, list) {
2491                 if (entry->bytes < min_bytes)
2492                         continue;
2493                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2494                                            bytes, min_bytes);
2495                 if (!ret)
2496                         return 0;
2497         }
2498
2499         /*
2500          * The bitmaps list has all the bitmaps that record free space
2501          * starting after offset, so no more search is required.
2502          */
2503         return -ENOSPC;
2504 }
2505
2506 /*
2507  * here we try to find a cluster of blocks in a block group.  The goal
2508  * is to find at least bytes free and up to empty_size + bytes free.
2509  * We might not find them all in one contiguous area.
2510  *
2511  * returns zero and sets up cluster if things worked out, otherwise
2512  * it returns -enospc
2513  */
2514 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2515                              struct btrfs_root *root,
2516                              struct btrfs_block_group_cache *block_group,
2517                              struct btrfs_free_cluster *cluster,
2518                              u64 offset, u64 bytes, u64 empty_size)
2519 {
2520         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2521         struct btrfs_free_space *entry, *tmp;
2522         LIST_HEAD(bitmaps);
2523         u64 min_bytes;
2524         int ret;
2525
2526         /* for metadata, allow allocates with more holes */
2527         if (btrfs_test_opt(root, SSD_SPREAD)) {
2528                 min_bytes = bytes + empty_size;
2529         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2530                 /*
2531                  * we want to do larger allocations when we are
2532                  * flushing out the delayed refs, it helps prevent
2533                  * making more work as we go along.
2534                  */
2535                 if (trans->transaction->delayed_refs.flushing)
2536                         min_bytes = max(bytes, (bytes + empty_size) >> 1);
2537                 else
2538                         min_bytes = max(bytes, (bytes + empty_size) >> 4);
2539         } else
2540                 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2541
2542         spin_lock(&ctl->tree_lock);
2543
2544         /*
2545          * If we know we don't have enough space to make a cluster don't even
2546          * bother doing all the work to try and find one.
2547          */
2548         if (ctl->free_space < min_bytes) {
2549                 spin_unlock(&ctl->tree_lock);
2550                 return -ENOSPC;
2551         }
2552
2553         spin_lock(&cluster->lock);
2554
2555         /* someone already found a cluster, hooray */
2556         if (cluster->block_group) {
2557                 ret = 0;
2558                 goto out;
2559         }
2560
2561         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2562                                       bytes, min_bytes);
2563         if (ret)
2564                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2565                                            offset, bytes, min_bytes);
2566
2567         /* Clear our temporary list */
2568         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2569                 list_del_init(&entry->list);
2570
2571         if (!ret) {
2572                 atomic_inc(&block_group->count);
2573                 list_add_tail(&cluster->block_group_list,
2574                               &block_group->cluster_list);
2575                 cluster->block_group = block_group;
2576         }
2577 out:
2578         spin_unlock(&cluster->lock);
2579         spin_unlock(&ctl->tree_lock);
2580
2581         return ret;
2582 }
2583
2584 /*
2585  * simple code to zero out a cluster
2586  */
2587 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2588 {
2589         spin_lock_init(&cluster->lock);
2590         spin_lock_init(&cluster->refill_lock);
2591         cluster->root = RB_ROOT;
2592         cluster->max_size = 0;
2593         INIT_LIST_HEAD(&cluster->block_group_list);
2594         cluster->block_group = NULL;
2595 }
2596
2597 static int do_trimming(struct btrfs_block_group_cache *block_group,
2598                        u64 *total_trimmed, u64 start, u64 bytes,
2599                        u64 reserved_start, u64 reserved_bytes)
2600 {
2601         struct btrfs_space_info *space_info = block_group->space_info;
2602         struct btrfs_fs_info *fs_info = block_group->fs_info;
2603         int ret;
2604         int update = 0;
2605         u64 trimmed = 0;
2606
2607         spin_lock(&space_info->lock);
2608         spin_lock(&block_group->lock);
2609         if (!block_group->ro) {
2610                 block_group->reserved += reserved_bytes;
2611                 space_info->bytes_reserved += reserved_bytes;
2612                 update = 1;
2613         }
2614         spin_unlock(&block_group->lock);
2615         spin_unlock(&space_info->lock);
2616
2617         ret = btrfs_error_discard_extent(fs_info->extent_root,
2618                                          start, bytes, &trimmed);
2619         if (!ret)
2620                 *total_trimmed += trimmed;
2621
2622         btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2623
2624         if (update) {
2625                 spin_lock(&space_info->lock);
2626                 spin_lock(&block_group->lock);
2627                 if (block_group->ro)
2628                         space_info->bytes_readonly += reserved_bytes;
2629                 block_group->reserved -= reserved_bytes;
2630                 space_info->bytes_reserved -= reserved_bytes;
2631                 spin_unlock(&space_info->lock);
2632                 spin_unlock(&block_group->lock);
2633         }
2634
2635         return ret;
2636 }
2637
2638 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2639                           u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2640 {
2641         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2642         struct btrfs_free_space *entry;
2643         struct rb_node *node;
2644         int ret = 0;
2645         u64 extent_start;
2646         u64 extent_bytes;
2647         u64 bytes;
2648
2649         while (start < end) {
2650                 spin_lock(&ctl->tree_lock);
2651
2652                 if (ctl->free_space < minlen) {
2653                         spin_unlock(&ctl->tree_lock);
2654                         break;
2655                 }
2656
2657                 entry = tree_search_offset(ctl, start, 0, 1);
2658                 if (!entry) {
2659                         spin_unlock(&ctl->tree_lock);
2660                         break;
2661                 }
2662
2663                 /* skip bitmaps */
2664                 while (entry->bitmap) {
2665                         node = rb_next(&entry->offset_index);
2666                         if (!node) {
2667                                 spin_unlock(&ctl->tree_lock);
2668                                 goto out;
2669                         }
2670                         entry = rb_entry(node, struct btrfs_free_space,
2671                                          offset_index);
2672                 }
2673
2674                 if (entry->offset >= end) {
2675                         spin_unlock(&ctl->tree_lock);
2676                         break;
2677                 }
2678
2679                 extent_start = entry->offset;
2680                 extent_bytes = entry->bytes;
2681                 start = max(start, extent_start);
2682                 bytes = min(extent_start + extent_bytes, end) - start;
2683                 if (bytes < minlen) {
2684                         spin_unlock(&ctl->tree_lock);
2685                         goto next;
2686                 }
2687
2688                 unlink_free_space(ctl, entry);
2689                 kmem_cache_free(btrfs_free_space_cachep, entry);
2690
2691                 spin_unlock(&ctl->tree_lock);
2692
2693                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2694                                   extent_start, extent_bytes);
2695                 if (ret)
2696                         break;
2697 next:
2698                 start += bytes;
2699
2700                 if (fatal_signal_pending(current)) {
2701                         ret = -ERESTARTSYS;
2702                         break;
2703                 }
2704
2705                 cond_resched();
2706         }
2707 out:
2708         return ret;
2709 }
2710
2711 static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2712                         u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2713 {
2714         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2715         struct btrfs_free_space *entry;
2716         int ret = 0;
2717         int ret2;
2718         u64 bytes;
2719         u64 offset = offset_to_bitmap(ctl, start);
2720
2721         while (offset < end) {
2722                 bool next_bitmap = false;
2723
2724                 spin_lock(&ctl->tree_lock);
2725
2726                 if (ctl->free_space < minlen) {
2727                         spin_unlock(&ctl->tree_lock);
2728                         break;
2729                 }
2730
2731                 entry = tree_search_offset(ctl, offset, 1, 0);
2732                 if (!entry) {
2733                         spin_unlock(&ctl->tree_lock);
2734                         next_bitmap = true;
2735                         goto next;
2736                 }
2737
2738                 bytes = minlen;
2739                 ret2 = search_bitmap(ctl, entry, &start, &bytes);
2740                 if (ret2 || start >= end) {
2741                         spin_unlock(&ctl->tree_lock);
2742                         next_bitmap = true;
2743                         goto next;
2744                 }
2745
2746                 bytes = min(bytes, end - start);
2747                 if (bytes < minlen) {
2748                         spin_unlock(&ctl->tree_lock);
2749                         goto next;
2750                 }
2751
2752                 bitmap_clear_bits(ctl, entry, start, bytes);
2753                 if (entry->bytes == 0)
2754                         free_bitmap(ctl, entry);
2755
2756                 spin_unlock(&ctl->tree_lock);
2757
2758                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2759                                   start, bytes);
2760                 if (ret)
2761                         break;
2762 next:
2763                 if (next_bitmap) {
2764                         offset += BITS_PER_BITMAP * ctl->unit;
2765                 } else {
2766                         start += bytes;
2767                         if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2768                                 offset += BITS_PER_BITMAP * ctl->unit;
2769                 }
2770
2771                 if (fatal_signal_pending(current)) {
2772                         ret = -ERESTARTSYS;
2773                         break;
2774                 }
2775
2776                 cond_resched();
2777         }
2778
2779         return ret;
2780 }
2781
2782 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2783                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2784 {
2785         int ret;
2786
2787         *trimmed = 0;
2788
2789         ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2790         if (ret)
2791                 return ret;
2792
2793         ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
2794
2795         return ret;
2796 }
2797
2798 /*
2799  * Find the left-most item in the cache tree, and then return the
2800  * smallest inode number in the item.
2801  *
2802  * Note: the returned inode number may not be the smallest one in
2803  * the tree, if the left-most item is a bitmap.
2804  */
2805 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2806 {
2807         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2808         struct btrfs_free_space *entry = NULL;
2809         u64 ino = 0;
2810
2811         spin_lock(&ctl->tree_lock);
2812
2813         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2814                 goto out;
2815
2816         entry = rb_entry(rb_first(&ctl->free_space_offset),
2817                          struct btrfs_free_space, offset_index);
2818
2819         if (!entry->bitmap) {
2820                 ino = entry->offset;
2821
2822                 unlink_free_space(ctl, entry);
2823                 entry->offset++;
2824                 entry->bytes--;
2825                 if (!entry->bytes)
2826                         kmem_cache_free(btrfs_free_space_cachep, entry);
2827                 else
2828                         link_free_space(ctl, entry);
2829         } else {
2830                 u64 offset = 0;
2831                 u64 count = 1;
2832                 int ret;
2833
2834                 ret = search_bitmap(ctl, entry, &offset, &count);
2835                 BUG_ON(ret);
2836
2837                 ino = offset;
2838                 bitmap_clear_bits(ctl, entry, offset, 1);
2839                 if (entry->bytes == 0)
2840                         free_bitmap(ctl, entry);
2841         }
2842 out:
2843         spin_unlock(&ctl->tree_lock);
2844
2845         return ino;
2846 }
2847
2848 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2849                                     struct btrfs_path *path)
2850 {
2851         struct inode *inode = NULL;
2852
2853         spin_lock(&root->cache_lock);
2854         if (root->cache_inode)
2855                 inode = igrab(root->cache_inode);
2856         spin_unlock(&root->cache_lock);
2857         if (inode)
2858                 return inode;
2859
2860         inode = __lookup_free_space_inode(root, path, 0);
2861         if (IS_ERR(inode))
2862                 return inode;
2863
2864         spin_lock(&root->cache_lock);
2865         if (!btrfs_fs_closing(root->fs_info))
2866                 root->cache_inode = igrab(inode);
2867         spin_unlock(&root->cache_lock);
2868
2869         return inode;
2870 }
2871
2872 int create_free_ino_inode(struct btrfs_root *root,
2873                           struct btrfs_trans_handle *trans,
2874                           struct btrfs_path *path)
2875 {
2876         return __create_free_space_inode(root, trans, path,
2877                                          BTRFS_FREE_INO_OBJECTID, 0);
2878 }
2879
2880 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2881 {
2882         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2883         struct btrfs_path *path;
2884         struct inode *inode;
2885         int ret = 0;
2886         u64 root_gen = btrfs_root_generation(&root->root_item);
2887
2888         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2889                 return 0;
2890
2891         /*
2892          * If we're unmounting then just return, since this does a search on the
2893          * normal root and not the commit root and we could deadlock.
2894          */
2895         if (btrfs_fs_closing(fs_info))
2896                 return 0;
2897
2898         path = btrfs_alloc_path();
2899         if (!path)
2900                 return 0;
2901
2902         inode = lookup_free_ino_inode(root, path);
2903         if (IS_ERR(inode))
2904                 goto out;
2905
2906         if (root_gen != BTRFS_I(inode)->generation)
2907                 goto out_put;
2908
2909         ret = __load_free_space_cache(root, inode, ctl, path, 0);
2910
2911         if (ret < 0)
2912                 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2913                        "root %llu\n", root->root_key.objectid);
2914 out_put:
2915         iput(inode);
2916 out:
2917         btrfs_free_path(path);
2918         return ret;
2919 }
2920
2921 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2922                               struct btrfs_trans_handle *trans,
2923                               struct btrfs_path *path)
2924 {
2925         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2926         struct inode *inode;
2927         int ret;
2928
2929         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2930                 return 0;
2931
2932         inode = lookup_free_ino_inode(root, path);
2933         if (IS_ERR(inode))
2934                 return 0;
2935
2936         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2937         if (ret) {
2938                 btrfs_delalloc_release_metadata(inode, inode->i_size);
2939 #ifdef DEBUG
2940                 printk(KERN_ERR "btrfs: failed to write free ino cache "
2941                        "for root %llu\n", root->root_key.objectid);
2942 #endif
2943         }
2944
2945         iput(inode);
2946         return ret;
2947 }