Btrfs: Add backing store, memory management
[firefly-linux-kernel-4.4.55.git] / fs / btrfs / ctree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7
8 static inline void init_path(struct ctree_path *p)
9 {
10         memset(p, 0, sizeof(*p));
11 }
12
13 static void release_path(struct ctree_root *root, struct ctree_path *p)
14 {
15         int i;
16         for (i = 0; i < MAX_LEVEL; i++) {
17                 if (!p->nodes[i])
18                         break;
19                 tree_block_release(root, p->nodes[i]);
20         }
21 }
22
23 static inline unsigned int leaf_data_end(struct leaf *leaf)
24 {
25         unsigned int nr = leaf->header.nritems;
26         if (nr == 0)
27                 return ARRAY_SIZE(leaf->data);
28         return leaf->items[nr-1].offset;
29 }
30
31 static inline int leaf_free_space(struct leaf *leaf)
32 {
33         int data_end = leaf_data_end(leaf);
34         int nritems = leaf->header.nritems;
35         char *items_end = (char *)(leaf->items + nritems + 1);
36         return (char *)(leaf->data + data_end) - (char *)items_end;
37 }
38
39 int comp_keys(struct key *k1, struct key *k2)
40 {
41         if (k1->objectid > k2->objectid)
42                 return 1;
43         if (k1->objectid < k2->objectid)
44                 return -1;
45         if (k1->flags > k2->flags)
46                 return 1;
47         if (k1->flags < k2->flags)
48                 return -1;
49         if (k1->offset > k2->offset)
50                 return 1;
51         if (k1->offset < k2->offset)
52                 return -1;
53         return 0;
54 }
55 int generic_bin_search(char *p, int item_size, struct key *key,
56                        int max, int *slot)
57 {
58         int low = 0;
59         int high = max;
60         int mid;
61         int ret;
62         struct key *tmp;
63
64         while(low < high) {
65                 mid = (low + high) / 2;
66                 tmp = (struct key *)(p + mid * item_size);
67                 ret = comp_keys(tmp, key);
68
69                 if (ret < 0)
70                         low = mid + 1;
71                 else if (ret > 0)
72                         high = mid;
73                 else {
74                         *slot = mid;
75                         return 0;
76                 }
77         }
78         *slot = low;
79         return 1;
80 }
81
82 int bin_search(struct node *c, struct key *key, int *slot)
83 {
84         if (is_leaf(c->header.flags)) {
85                 struct leaf *l = (struct leaf *)c;
86                 return generic_bin_search((void *)l->items, sizeof(struct item),
87                                           key, c->header.nritems, slot);
88         } else {
89                 return generic_bin_search((void *)c->keys, sizeof(struct key),
90                                           key, c->header.nritems, slot);
91         }
92         return -1;
93 }
94
95 int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p)
96 {
97         struct tree_buffer *b = root->node;
98         struct node *c;
99
100         int slot;
101         int ret;
102         int level;
103         b->count++;
104         while (b) {
105                 c = &b->node;
106                 level = node_level(c->header.flags);
107                 p->nodes[level] = b;
108                 ret = bin_search(c, key, &slot);
109                 if (!is_leaf(c->header.flags)) {
110                         if (ret && slot > 0)
111                                 slot -= 1;
112                         p->slots[level] = slot;
113                         b = read_tree_block(root, c->blockptrs[slot]);
114                         continue;
115                 } else {
116                         p->slots[level] = slot;
117                         return ret;
118                 }
119         }
120         return -1;
121 }
122
123 static void fixup_low_keys(struct ctree_root *root,
124                            struct ctree_path *path, struct key *key,
125                            int level)
126 {
127         int i;
128         /* adjust the pointers going up the tree */
129         for (i = level; i < MAX_LEVEL; i++) {
130                 struct node *t;
131                 int tslot = path->slots[i];
132                 if (!path->nodes[i])
133                         break;
134                 t = &path->nodes[i]->node;
135                 memcpy(t->keys + tslot, key, sizeof(*key));
136                 write_tree_block(root, path->nodes[i]);
137                 if (tslot != 0)
138                         break;
139         }
140 }
141
142 int __insert_ptr(struct ctree_root *root,
143                 struct ctree_path *path, struct key *key,
144                 u64 blocknr, int slot, int level)
145 {
146         struct node *c;
147         struct node *lower;
148         struct key *lower_key;
149         int nritems;
150         /* need a new root */
151         if (!path->nodes[level]) {
152                 struct tree_buffer *t;
153                 t = alloc_free_block(root);
154                 c = &t->node;
155                 memset(c, 0, sizeof(c));
156                 c->header.nritems = 2;
157                 c->header.flags = node_level(level);
158                 c->header.blocknr = t->blocknr;
159                 lower = &path->nodes[level-1]->node;
160                 if (is_leaf(lower->header.flags))
161                         lower_key = &((struct leaf *)lower)->items[0].key;
162                 else
163                         lower_key = lower->keys;
164                 memcpy(c->keys, lower_key, sizeof(struct key));
165                 memcpy(c->keys + 1, key, sizeof(struct key));
166                 c->blockptrs[0] = path->nodes[level-1]->blocknr;
167                 c->blockptrs[1] = blocknr;
168                 /* the path has an extra ref to root->node */
169                 tree_block_release(root, root->node);
170                 root->node = t;
171                 t->count++;
172                 write_tree_block(root, t);
173                 path->nodes[level] = t;
174                 path->slots[level] = 0;
175                 if (c->keys[1].objectid == 0)
176                         BUG();
177                 return 0;
178         }
179         lower = &path->nodes[level]->node;
180         nritems = lower->header.nritems;
181         if (slot > nritems)
182                 BUG();
183         if (nritems == NODEPTRS_PER_BLOCK)
184                 BUG();
185         if (slot != nritems) {
186                 memmove(lower->keys + slot + 1, lower->keys + slot,
187                         (nritems - slot) * sizeof(struct key));
188                 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
189                         (nritems - slot) * sizeof(u64));
190         }
191         memcpy(lower->keys + slot, key, sizeof(struct key));
192         lower->blockptrs[slot] = blocknr;
193         lower->header.nritems++;
194         if (lower->keys[1].objectid == 0)
195                         BUG();
196         write_tree_block(root, path->nodes[level]);
197         return 0;
198 }
199
200 int push_node_left(struct ctree_root *root, struct ctree_path *path, int level)
201 {
202         int slot;
203         struct node *left;
204         struct node *right;
205         int push_items = 0;
206         int left_nritems;
207         int right_nritems;
208         struct tree_buffer *t;
209         struct tree_buffer *right_buf;
210
211         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
212                 return 1;
213         slot = path->slots[level + 1];
214         if (slot == 0)
215                 return 1;
216
217         t = read_tree_block(root,
218                             path->nodes[level + 1]->node.blockptrs[slot - 1]);
219         left = &t->node;
220         right_buf = path->nodes[level];
221         right = &right_buf->node;
222         left_nritems = left->header.nritems;
223         right_nritems = right->header.nritems;
224         push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
225         if (push_items <= 0) {
226                 tree_block_release(root, t);
227                 return 1;
228         }
229
230         if (right_nritems < push_items)
231                 push_items = right_nritems;
232         memcpy(left->keys + left_nritems, right->keys,
233                 push_items * sizeof(struct key));
234         memcpy(left->blockptrs + left_nritems, right->blockptrs,
235                 push_items * sizeof(u64));
236         memmove(right->keys, right->keys + push_items,
237                 (right_nritems - push_items) * sizeof(struct key));
238         memmove(right->blockptrs, right->blockptrs + push_items,
239                 (right_nritems - push_items) * sizeof(u64));
240         right->header.nritems -= push_items;
241         left->header.nritems += push_items;
242
243         /* adjust the pointers going up the tree */
244         fixup_low_keys(root, path, right->keys, level + 1);
245
246         write_tree_block(root, t);
247         write_tree_block(root, right_buf);
248
249         /* then fixup the leaf pointer in the path */
250         if (path->slots[level] < push_items) {
251                 path->slots[level] += left_nritems;
252                 tree_block_release(root, path->nodes[level]);
253                 path->nodes[level] = t;
254                 path->slots[level + 1] -= 1;
255         } else {
256                 path->slots[level] -= push_items;
257                 tree_block_release(root, t);
258         }
259         return 0;
260 }
261
262 int push_node_right(struct ctree_root *root, struct ctree_path *path, int level)
263 {
264         int slot;
265         struct tree_buffer *t;
266         struct tree_buffer *src_buffer;
267         struct node *dst;
268         struct node *src;
269         int push_items = 0;
270         int dst_nritems;
271         int src_nritems;
272
273         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
274                 return 1;
275         slot = path->slots[level + 1];
276         if (slot == NODEPTRS_PER_BLOCK - 1)
277                 return 1;
278
279         if (slot >= path->nodes[level + 1]->node.header.nritems -1)
280                 return 1;
281
282         t = read_tree_block(root,
283                             path->nodes[level + 1]->node.blockptrs[slot + 1]);
284         dst = &t->node;
285         src_buffer = path->nodes[level];
286         src = &src_buffer->node;
287         dst_nritems = dst->header.nritems;
288         src_nritems = src->header.nritems;
289         push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
290         if (push_items <= 0) {
291                 tree_block_release(root, t);
292                 return 1;
293         }
294
295         if (src_nritems < push_items)
296                 push_items = src_nritems;
297         memmove(dst->keys + push_items, dst->keys,
298                 dst_nritems * sizeof(struct key));
299         memcpy(dst->keys, src->keys + src_nritems - push_items,
300                 push_items * sizeof(struct key));
301
302         memmove(dst->blockptrs + push_items, dst->blockptrs,
303                 dst_nritems * sizeof(u64));
304         memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
305                 push_items * sizeof(u64));
306
307         src->header.nritems -= push_items;
308         dst->header.nritems += push_items;
309
310         /* adjust the pointers going up the tree */
311         memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
312                 dst->keys, sizeof(struct key));
313
314         write_tree_block(root, path->nodes[level + 1]);
315         write_tree_block(root, t);
316         write_tree_block(root, src_buffer);
317
318         /* then fixup the leaf pointer in the path */
319         if (path->slots[level] >= src->header.nritems) {
320                 path->slots[level] -= src->header.nritems;
321                 tree_block_release(root, path->nodes[level]);
322                 path->nodes[level] = t;
323                 path->slots[level + 1] += 1;
324         } else {
325                 tree_block_release(root, t);
326         }
327         return 0;
328 }
329
330 int insert_ptr(struct ctree_root *root,
331                 struct ctree_path *path, struct key *key,
332                 u64 blocknr, int level)
333 {
334         struct tree_buffer *t = path->nodes[level];
335         struct node *c = &path->nodes[level]->node;
336         struct node *b;
337         struct tree_buffer *b_buffer;
338         struct tree_buffer *bal[MAX_LEVEL];
339         int bal_level = level;
340         int mid;
341         int bal_start = -1;
342
343         memset(bal, 0, ARRAY_SIZE(bal));
344         while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
345                 c = &t->node;
346                 if (push_node_left(root, path,
347                    node_level(c->header.flags)) == 0)
348                         break;
349                 if (push_node_right(root, path,
350                    node_level(c->header.flags)) == 0)
351                         break;
352                 bal_start = bal_level;
353                 if (bal_level == MAX_LEVEL - 1)
354                         BUG();
355                 b_buffer = alloc_free_block(root);
356                 b = &b_buffer->node;
357                 b->header.flags = c->header.flags;
358                 b->header.blocknr = b_buffer->blocknr;
359                 mid = (c->header.nritems + 1) / 2;
360                 memcpy(b->keys, c->keys + mid,
361                         (c->header.nritems - mid) * sizeof(struct key));
362                 memcpy(b->blockptrs, c->blockptrs + mid,
363                         (c->header.nritems - mid) * sizeof(u64));
364                 b->header.nritems = c->header.nritems - mid;
365                 c->header.nritems = mid;
366
367                 write_tree_block(root, t);
368                 write_tree_block(root, b_buffer);
369
370                 bal[bal_level] = b_buffer;
371                 if (bal_level == MAX_LEVEL - 1)
372                         break;
373                 bal_level += 1;
374                 t = path->nodes[bal_level];
375         }
376         while(bal_start > 0) {
377                 b_buffer = bal[bal_start];
378                 c = &path->nodes[bal_start]->node;
379                 __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr,
380                                 path->slots[bal_start + 1] + 1, bal_start + 1);
381                 if (path->slots[bal_start] >= c->header.nritems) {
382                         path->slots[bal_start] -= c->header.nritems;
383                         tree_block_release(root, path->nodes[bal_start]);
384                         path->nodes[bal_start] = b_buffer;
385                         path->slots[bal_start + 1] += 1;
386                 } else {
387                         tree_block_release(root, b_buffer);
388                 }
389                 bal_start--;
390                 if (!bal[bal_start])
391                         break;
392         }
393         return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1,
394                             level);
395 }
396
397 int leaf_space_used(struct leaf *l, int start, int nr)
398 {
399         int data_len;
400         int end = start + nr - 1;
401
402         if (!nr)
403                 return 0;
404         data_len = l->items[start].offset + l->items[start].size;
405         data_len = data_len - l->items[end].offset;
406         data_len += sizeof(struct item) * nr;
407         return data_len;
408 }
409
410 int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
411                    int data_size)
412 {
413         struct tree_buffer *right_buf = path->nodes[0];
414         struct leaf *right = &right_buf->leaf;
415         struct tree_buffer *t;
416         struct leaf *left;
417         int slot;
418         int i;
419         int free_space;
420         int push_space = 0;
421         int push_items = 0;
422         struct item *item;
423         int old_left_nritems;
424
425         slot = path->slots[1];
426         if (slot == 0) {
427                 return 1;
428         }
429         if (!path->nodes[1]) {
430                 return 1;
431         }
432         t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
433         left = &t->leaf;
434         free_space = leaf_free_space(left);
435         if (free_space < data_size + sizeof(struct item)) {
436                 tree_block_release(root, t);
437                 return 1;
438         }
439         for (i = 0; i < right->header.nritems; i++) {
440                 item = right->items + i;
441                 if (path->slots[0] == i)
442                         push_space += data_size + sizeof(*item);
443                 if (item->size + sizeof(*item) + push_space > free_space)
444                         break;
445                 push_items++;
446                 push_space += item->size + sizeof(*item);
447         }
448         if (push_items == 0) {
449                 tree_block_release(root, t);
450                 return 1;
451         }
452         /* push data from right to left */
453         memcpy(left->items + left->header.nritems,
454                 right->items, push_items * sizeof(struct item));
455         push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
456         memcpy(left->data + leaf_data_end(left) - push_space,
457                 right->data + right->items[push_items - 1].offset,
458                 push_space);
459         old_left_nritems = left->header.nritems;
460         BUG_ON(old_left_nritems < 0);
461
462         for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
463                 left->items[i].offset -= LEAF_DATA_SIZE -
464                         left->items[old_left_nritems -1].offset;
465         }
466         left->header.nritems += push_items;
467
468         /* fixup right node */
469         push_space = right->items[push_items-1].offset - leaf_data_end(right);
470         memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
471                 leaf_data_end(right), push_space);
472         memmove(right->items, right->items + push_items,
473                 (right->header.nritems - push_items) * sizeof(struct item));
474         right->header.nritems -= push_items;
475         push_space = LEAF_DATA_SIZE;
476
477         for (i = 0; i < right->header.nritems; i++) {
478                 right->items[i].offset = push_space - right->items[i].size;
479                 push_space = right->items[i].offset;
480         }
481
482         write_tree_block(root, t);
483         write_tree_block(root, right_buf);
484
485         fixup_low_keys(root, path, &right->items[0].key, 1);
486
487         /* then fixup the leaf pointer in the path */
488         if (path->slots[0] < push_items) {
489                 path->slots[0] += old_left_nritems;
490                 tree_block_release(root, path->nodes[0]);
491                 path->nodes[0] = t;
492                 path->slots[1] -= 1;
493         } else {
494                 tree_block_release(root, t);
495                 path->slots[0] -= push_items;
496         }
497         BUG_ON(path->slots[0] < 0);
498         return 0;
499 }
500
501 int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size)
502 {
503         struct tree_buffer *l_buf = path->nodes[0];
504         struct leaf *l = &l_buf->leaf;
505         int nritems;
506         int mid;
507         int slot;
508         struct leaf *right;
509         struct tree_buffer *right_buffer;
510         int space_needed = data_size + sizeof(struct item);
511         int data_copy_size;
512         int rt_data_off;
513         int i;
514         int ret;
515
516         if (push_leaf_left(root, path, data_size) == 0) {
517                 l_buf = path->nodes[0];
518                 l = &l_buf->leaf;
519                 if (leaf_free_space(l) >= sizeof(struct item) + data_size)
520                         return 0;
521         }
522         slot = path->slots[0];
523         nritems = l->header.nritems;
524         mid = (nritems + 1)/ 2;
525
526         right_buffer = alloc_free_block(root);
527         BUG_ON(!right_buffer);
528         BUG_ON(mid == nritems);
529         right = &right_buffer->leaf;
530         memset(right, 0, sizeof(*right));
531         if (mid <= slot) {
532                 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
533                         LEAF_DATA_SIZE)
534                         BUG();
535         } else {
536                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
537                         LEAF_DATA_SIZE)
538                         BUG();
539         }
540         right->header.nritems = nritems - mid;
541         right->header.blocknr = right_buffer->blocknr;
542         right->header.flags = node_level(0);
543         data_copy_size = l->items[mid].offset + l->items[mid].size -
544                          leaf_data_end(l);
545         memcpy(right->items, l->items + mid,
546                (nritems - mid) * sizeof(struct item));
547         memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
548                l->data + leaf_data_end(l), data_copy_size);
549         rt_data_off = LEAF_DATA_SIZE -
550                      (l->items[mid].offset + l->items[mid].size);
551         for (i = 0; i < right->header.nritems; i++) {
552                 right->items[i].offset += rt_data_off;
553         }
554         l->header.nritems = mid;
555         ret = insert_ptr(root, path, &right->items[0].key,
556                           right_buffer->blocknr, 1);
557
558         write_tree_block(root, right_buffer);
559         write_tree_block(root, l_buf);
560
561         BUG_ON(path->slots[0] != slot);
562         if (mid <= slot) {
563                 tree_block_release(root, path->nodes[0]);
564                 path->nodes[0] = right_buffer;
565                 path->slots[0] -= mid;
566                 path->slots[1] += 1;
567         } else
568                 tree_block_release(root, right_buffer);
569         BUG_ON(path->slots[0] < 0);
570         return ret;
571 }
572
573 int insert_item(struct ctree_root *root, struct key *key,
574                           void *data, int data_size)
575 {
576         int ret;
577         int slot;
578         int slot_orig;
579         struct leaf *leaf;
580         struct tree_buffer *leaf_buf;
581         unsigned int nritems;
582         unsigned int data_end;
583         struct ctree_path path;
584
585         if (!root->node) {
586                 struct tree_buffer *t;
587                 t = alloc_free_block(root);
588                 BUG_ON(!t);
589                 t->node.header.nritems = 0;
590                 t->node.header.flags = node_level(0);
591                 t->node.header.blocknr = t->blocknr;
592                 root->node = t;
593                 write_tree_block(root, t);
594         }
595         init_path(&path);
596         ret = search_slot(root, key, &path);
597         if (ret == 0) {
598                 release_path(root, &path);
599                 return -EEXIST;
600         }
601
602         slot_orig = path.slots[0];
603         leaf_buf = path.nodes[0];
604         leaf = &leaf_buf->leaf;
605         if (leaf_free_space(leaf) <  sizeof(struct item) + data_size) {
606                 split_leaf(root, &path, data_size);
607                 leaf_buf = path.nodes[0];
608                 leaf = &path.nodes[0]->leaf;
609         }
610         nritems = leaf->header.nritems;
611         data_end = leaf_data_end(leaf);
612
613         if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
614                 BUG();
615
616         slot = path.slots[0];
617         BUG_ON(slot < 0);
618         if (slot == 0)
619                 fixup_low_keys(root, &path, key, 1);
620         if (slot != nritems) {
621                 int i;
622                 unsigned int old_data = leaf->items[slot].offset +
623                                         leaf->items[slot].size;
624
625                 /*
626                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
627                  */
628                 /* first correct the data pointers */
629                 for (i = slot; i < nritems; i++)
630                         leaf->items[i].offset -= data_size;
631
632                 /* shift the items */
633                 memmove(leaf->items + slot + 1, leaf->items + slot,
634                         (nritems - slot) * sizeof(struct item));
635
636                 /* shift the data */
637                 memmove(leaf->data + data_end - data_size, leaf->data +
638                         data_end, old_data - data_end);
639                 data_end = old_data;
640         }
641         memcpy(&leaf->items[slot].key, key, sizeof(struct key));
642         leaf->items[slot].offset = data_end - data_size;
643         leaf->items[slot].size = data_size;
644         memcpy(leaf->data + data_end - data_size, data, data_size);
645         leaf->header.nritems += 1;
646         write_tree_block(root, leaf_buf);
647         if (leaf_free_space(leaf) < 0)
648                 BUG();
649         release_path(root, &path);
650         return 0;
651 }
652
653 int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
654 {
655         int slot;
656         struct tree_buffer *t;
657         struct node *node;
658         int nritems;
659
660         while(1) {
661                 t = path->nodes[level];
662                 if (!t)
663                         break;
664                 node = &t->node;
665                 slot = path->slots[level];
666                 nritems = node->header.nritems;
667
668                 if (slot != nritems -1) {
669                         memmove(node->keys + slot, node->keys + slot + 1,
670                                 sizeof(struct key) * (nritems - slot - 1));
671                         memmove(node->blockptrs + slot,
672                                 node->blockptrs + slot + 1,
673                                 sizeof(u64) * (nritems - slot - 1));
674                 }
675                 node->header.nritems--;
676                 write_tree_block(root, t);
677                 if (node->header.nritems != 0) {
678                         int tslot;
679                         if (slot == 0)
680                                 fixup_low_keys(root, path, node->keys,
681                                                level + 1);
682                         tslot = path->slots[level+1];
683                         t->count++;
684                         push_node_left(root, path, level);
685                         if (node->header.nritems) {
686                                 push_node_right(root, path, level);
687                         }
688                         if (node->header.nritems) {
689                                 tree_block_release(root, t);
690                                 break;
691                         }
692                         tree_block_release(root, t);
693                         path->slots[level+1] = tslot;
694                 }
695                 if (t == root->node) {
696                         /* just turn the root into a leaf and break */
697                         root->node->node.header.flags = node_level(0);
698                         write_tree_block(root, t);
699                         break;
700                 }
701                 level++;
702                 if (!path->nodes[level])
703                         BUG();
704         }
705         return 0;
706 }
707
708 int del_item(struct ctree_root *root, struct ctree_path *path)
709 {
710         int slot;
711         struct leaf *leaf;
712         struct tree_buffer *leaf_buf;
713         int doff;
714         int dsize;
715
716         leaf_buf = path->nodes[0];
717         leaf = &leaf_buf->leaf;
718         slot = path->slots[0];
719         doff = leaf->items[slot].offset;
720         dsize = leaf->items[slot].size;
721
722         if (slot != leaf->header.nritems - 1) {
723                 int i;
724                 int data_end = leaf_data_end(leaf);
725                 memmove(leaf->data + data_end + dsize,
726                         leaf->data + data_end,
727                         doff - data_end);
728                 for (i = slot + 1; i < leaf->header.nritems; i++)
729                         leaf->items[i].offset += dsize;
730                 memmove(leaf->items + slot, leaf->items + slot + 1,
731                         sizeof(struct item) *
732                         (leaf->header.nritems - slot - 1));
733         }
734         leaf->header.nritems -= 1;
735         if (leaf->header.nritems == 0) {
736                 if (leaf_buf == root->node) {
737                         leaf->header.flags = node_level(0);
738                         write_tree_block(root, leaf_buf);
739                 } else
740                         del_ptr(root, path, 1);
741         } else {
742                 if (slot == 0)
743                         fixup_low_keys(root, path, &leaf->items[0].key, 1);
744                 write_tree_block(root, leaf_buf);
745                 if (leaf_space_used(leaf, 0, leaf->header.nritems) <
746                     LEAF_DATA_SIZE / 4) {
747                         /* push_leaf_left fixes the path.
748                          * make sure the path still points to our leaf
749                          * for possible call to del_ptr below
750                          */
751                         slot = path->slots[1];
752                         leaf_buf->count++;
753                         push_leaf_left(root, path, 1);
754                         if (leaf->header.nritems == 0) {
755                                 path->slots[1] = slot;
756                                 del_ptr(root, path, 1);
757                         }
758                         tree_block_release(root, leaf_buf);
759                 }
760         }
761         return 0;
762 }
763
764 void print_leaf(struct leaf *l)
765 {
766         int i;
767         int nr = l->header.nritems;
768         struct item *item;
769         printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr,
770                leaf_free_space(l));
771         fflush(stdout);
772         for (i = 0 ; i < nr ; i++) {
773                 item = l->items + i;
774                 printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n",
775                         i,
776                         item->key.objectid, item->key.flags, item->key.offset,
777                         item->offset, item->size);
778                 fflush(stdout);
779                 printf("\t\titem data %.*s\n", item->size, l->data+item->offset);
780                 fflush(stdout);
781         }
782 }
783 void print_tree(struct ctree_root *root, struct tree_buffer *t)
784 {
785         int i;
786         int nr;
787         struct node *c;
788
789         if (!t)
790                 return;
791         c = &t->node;
792         nr = c->header.nritems;
793         if (c->header.blocknr != t->blocknr)
794                 BUG();
795         if (is_leaf(c->header.flags)) {
796                 print_leaf((struct leaf *)c);
797                 return;
798         }
799         printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr,
800                 node_level(c->header.flags), c->header.nritems,
801                 NODEPTRS_PER_BLOCK - c->header.nritems);
802         fflush(stdout);
803         for (i = 0; i < nr; i++) {
804                 printf("\tkey %d (%lu %u %lu) block %lu\n",
805                        i,
806                        c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset,
807                        c->blockptrs[i]);
808                 fflush(stdout);
809         }
810         for (i = 0; i < nr; i++) {
811                 struct tree_buffer *next_buf = read_tree_block(root,
812                                                             c->blockptrs[i]);
813                 struct node *next = &next_buf->node;
814                 if (is_leaf(next->header.flags) &&
815                     node_level(c->header.flags) != 1)
816                         BUG();
817                 if (node_level(next->header.flags) !=
818                         node_level(c->header.flags) - 1)
819                         BUG();
820                 print_tree(root, next_buf);
821                 tree_block_release(root, next_buf);
822         }
823
824 }
825
826 /* for testing only */
827 int next_key(int i, int max_key) {
828         return rand() % max_key;
829         // return i;
830 }
831
832 int main() {
833         struct ctree_root *root;
834         struct key ins;
835         struct key last = { (u64)-1, 0, 0};
836         char *buf;
837         int i;
838         int num;
839         int ret;
840         int run_size = 1000000;
841         int max_key = 100000000;
842         int tree_size = 0;
843         struct ctree_path path;
844
845         radix_tree_init();
846
847
848         root = open_ctree("dbfile");
849
850         srand(55);
851         for (i = 0; i < run_size; i++) {
852                 buf = malloc(64);
853                 num = next_key(i, max_key);
854                 // num = i;
855                 sprintf(buf, "string-%d", num);
856                 // printf("insert %d\n", num);
857                 ins.objectid = num;
858                 ins.offset = 0;
859                 ins.flags = 0;
860                 ret = insert_item(root, &ins, buf, strlen(buf));
861                 if (!ret)
862                         tree_size++;
863         }
864         close_ctree(root);
865         root = open_ctree("dbfile");
866         printf("starting search\n");
867         srand(55);
868         for (i = 0; i < run_size; i++) {
869                 num = next_key(i, max_key);
870                 ins.objectid = num;
871                 init_path(&path);
872                 ret = search_slot(root, &ins, &path);
873                 if (ret) {
874                         print_tree(root, root->node);
875                         printf("unable to find %d\n", num);
876                         exit(1);
877                 }
878                 release_path(root, &path);
879         }
880         close_ctree(root);
881         root = open_ctree("dbfile");
882         printf("node %p level %d total ptrs %d free spc %lu\n", root->node,
883                 node_level(root->node->node.header.flags),
884                 root->node->node.header.nritems,
885                 NODEPTRS_PER_BLOCK - root->node->node.header.nritems);
886         printf("all searches good, deleting some items\n");
887         i = 0;
888         srand(55);
889         for (i = 0 ; i < run_size/4; i++) {
890                 num = next_key(i, max_key);
891                 ins.objectid = num;
892                 init_path(&path);
893                 ret = search_slot(root, &ins, &path);
894                 if (ret)
895                         continue;
896                 ret = del_item(root, &path);
897                 if (ret != 0)
898                         BUG();
899                 release_path(root, &path);
900                 tree_size--;
901         }
902         srand(128);
903         for (i = 0; i < run_size; i++) {
904                 buf = malloc(64);
905                 num = next_key(i, max_key);
906                 sprintf(buf, "string-%d", num);
907                 ins.objectid = num;
908                 ret = insert_item(root, &ins, buf, strlen(buf));
909                 if (!ret)
910                         tree_size++;
911         }
912         close_ctree(root);
913         root = open_ctree("dbfile");
914         printf("starting search2\n");
915         srand(128);
916         for (i = 0; i < run_size; i++) {
917                 num = next_key(i, max_key);
918                 ins.objectid = num;
919                 init_path(&path);
920                 ret = search_slot(root, &ins, &path);
921                 if (ret) {
922                         print_tree(root, root->node);
923                         printf("unable to find %d\n", num);
924                         exit(1);
925                 }
926                 release_path(root, &path);
927         }
928         printf("starting big long delete run\n");
929         while(root->node && root->node->node.header.nritems > 0) {
930                 struct leaf *leaf;
931                 int slot;
932                 ins.objectid = (u64)-1;
933                 init_path(&path);
934                 ret = search_slot(root, &ins, &path);
935                 if (ret == 0)
936                         BUG();
937
938                 leaf = &path.nodes[0]->leaf;
939                 slot = path.slots[0];
940                 if (slot != leaf->header.nritems)
941                         BUG();
942                 while(path.slots[0] > 0) {
943                         path.slots[0] -= 1;
944                         slot = path.slots[0];
945                         leaf = &path.nodes[0]->leaf;
946
947                         if (comp_keys(&last, &leaf->items[slot].key) <= 0)
948                                 BUG();
949                         memcpy(&last, &leaf->items[slot].key, sizeof(last));
950                         ret = del_item(root, &path);
951                         if (ret != 0) {
952                                 printf("del_item returned %d\n", ret);
953                                 BUG();
954                         }
955                         tree_size--;
956                 }
957                 release_path(root, &path);
958         }
959         close_ctree(root);
960         printf("tree size is now %d\n", tree_size);
961         return 0;
962 }