2 * Eddie Kohler, Yandong Mao, Robert Morris
3 * Copyright (c) 2012-2014 President and Fellows of Harvard College
4 * Copyright (c) 2012-2014 Massachusetts Institute of Technology
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, subject to the conditions
9 * listed in the Masstree LICENSE file. These conditions include: you must
10 * preserve this copyright notice, and you cannot mention the copyright
11 * holders in advertising related to the Software without their permission.
12 * The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
13 * notice is a summary of the Masstree LICENSE file; the license in that file
16 #include "masstree.hh"
17 #include "masstree_key.hh"
18 #include "masstree_struct.hh"
19 #include "masstree_tcursor.hh"
20 #include "masstree_get.hh"
21 #include "masstree_insert.hh"
22 #include "masstree_split.hh"
23 #include "masstree_remove.hh"
24 #include "masstree_scan.hh"
25 #include "masstree_print.hh"
26 #include "query_masstree.hh"
27 #include "string_slice.hh"
28 #include "kpermuter.hh"
30 #include "stringbag.hh"
36 static uint64_t heightcounts[300], fillcounts[100];
39 static void treestats1(node_base<P>* n, unsigned height) {
43 assert(height < arraysize(heightcounts));
46 leaf<P> *lf = (leaf<P> *)n;
47 typename leaf<P>::permuter_type perm = lf->permutation_;
48 for (int idx = 0; idx < perm.size(); ++idx) {
50 typename leaf<P>::leafvalue_type lv = lf->lv_[p];
51 if (!lv || !lf->is_layer(p))
52 heightcounts[height] ++;
54 node_base<P> *in = lv.layer()->unsplit_ancestor();
55 treestats1(in, height + 1);
59 internode<P> *in = (internode<P> *) n;
60 for (int i = 0; i <= n->size(); ++i)
62 treestats1(in->child_[i], height + 1);
64 assert((size_t) n->size() < arraysize(fillcounts));
65 fillcounts[n->size()] += 1;
69 void query_table<P>::stats(FILE* f) {
70 memset(heightcounts, 0, sizeof(heightcounts));
71 memset(fillcounts, 0, sizeof(fillcounts));
72 treestats1(table_.root(), 0);
73 fprintf(f, " heights:");
74 for (unsigned i = 0; i < arraysize(heightcounts); ++i)
76 fprintf(f, " %d=%" PRIu64, i, heightcounts[i]);
77 fprintf(f, "\n node counts:");
78 for (unsigned i = 0; i < arraysize(fillcounts); ++i)
80 fprintf(f, " %d=%" PRIu64, i, fillcounts[i]);
85 static void json_stats1(node_base<P>* n, lcdf::Json& j, int layer, int depth,
90 else if (n->isleaf()) {
91 leaf<P>* lf = static_cast<leaf<P>*>(n);
92 // track number of leaves by depth and size
93 j[&"l1_node_by_depth"[!layer * 3]][depth] += 1;
94 j[&"l1_leaf_by_depth"[!layer * 3]][depth] += 1;
95 j[&"l1_leaf_by_size"[!layer * 3]][lf->size()] += 1;
97 // per-key information
98 typename leaf<P>::permuter_type perm(lf->permutation_);
100 size_t active_ksuf_len = 0;
101 for (int i = 0; i < perm.size(); ++i)
102 if (lf->is_layer(perm[i])) {
103 lcdf::Json x = j["l1_size"];
105 json_stats1(lf->lv_[perm[i]].layer(), j, layer + 1, 0, ti);
106 j["l1_size_sum"] += j["l1_size"].to_i();
111 int l = sizeof(typename P::ikey_type) * layer
112 + lf->keylenx_[perm[i]];
113 if (lf->has_ksuf(perm[i])) {
114 size_t ksuf_len = lf->ksuf(perm[i]).len;
116 active_ksuf_len += ksuf_len;
119 j["key_by_length"][l] += 1;
123 j["key_by_layer"][layer] += n;
125 // key suffix information
126 if (lf->allocated_size() != lf->min_allocated_size()
127 && lf->ksuf_external()) {
128 j["overridden_ksuf"] += 1;
129 j["overridden_ksuf_capacity"] += lf->allocated_size() - lf->min_allocated_size();
131 if (lf->ksuf_capacity()) {
133 j["ksuf_capacity"] += lf->ksuf_capacity();
134 j["ksuf_len"] += active_ksuf_len;
135 j["ksuf_by_layer"][layer] += 1;
136 if (!active_ksuf_len) {
137 j["unused_ksuf_capacity"] += lf->ksuf_capacity();
138 j["unused_ksuf_by_layer"][layer] += 1;
139 if (lf->ksuf_external())
140 j["unused_ksuf_external"] += 1;
142 j["used_ksuf_by_layer"][layer] += 1;
145 internode<P> *in = static_cast<internode<P> *>(n);
146 for (int i = 0; i <= n->size(); ++i)
148 json_stats1(in->child_[i], j, layer, depth + 1, ti);
149 j[&"l1_node_by_depth"[!layer * 3]][depth] += 1;
153 template <typename P>
154 void query_table<P>::json_stats(lcdf::Json& j, threadinfo& ti)
160 const char* jarrays[] = {
161 "node_by_depth", "leaf_by_depth", "leaf_by_size",
162 "l1_node_by_depth", "l1_leaf_by_depth", "l1_leaf_by_size",
163 "key_by_layer", "key_by_length",
164 "ksuf_by_layer", "unused_ksuf_by_layer", "used_ksuf_by_layer"
166 for (const char** x = jarrays; x != jarrays + sizeof(jarrays) / sizeof(*jarrays); ++x)
167 j[*x] = Json::make_array();
169 json_stats1(table_.root(), j, 0, 0, ti);
172 for (const char** x = jarrays; x != jarrays + sizeof(jarrays) / sizeof(*jarrays); ++x) {
174 for (Json* it = a.array_data(); it != a.end_array_data(); ++it)
176 *it = Json((size_t) 0);
182 template <typename N>
183 static Str findpv(N *n, int pvi, int npv)
185 // XXX assumes that most keys differ in the low bytes
186 // XXX will fail badly if all keys have the same prefix
187 // XXX not clear what to do then
188 int nbranch = 1, branchid = 0;
189 typedef typename N::internode_type internode_type;
190 typedef typename N::leaf_type leaf_type;
192 n = n->unsplit_ancestor();
195 typename N::nodeversion_type v = n->stable();
196 int size = n->size() + !n->isleaf();
200 int total_nkeys_estimate = nbranch * size;
201 int first_pv_in_node = branchid * size;
202 int pv_offset = pvi * total_nkeys_estimate / npv - first_pv_in_node;
204 if (!n->isleaf() && total_nkeys_estimate < npv) {
205 internode_type *in = static_cast<internode_type *>(n);
206 pv_offset = std::min(std::max(pv_offset, 0), size - 1);
207 N *next = in->child_[pv_offset];
208 if (!n->has_changed(v)) {
209 nbranch = total_nkeys_estimate;
210 branchid = first_pv_in_node + pv_offset;
216 pv_offset = std::min(std::max(pv_offset, 0), size - 1 - !n->isleaf());
217 typename N::ikey_type ikey0;
219 leaf_type *l = static_cast<leaf_type *>(n);
220 typename leaf_type::permuter_type perm = l->permutation();
221 ikey0 = l->ikey0_[perm[pv_offset]];
223 internode_type *in = static_cast<internode_type *>(n);
224 ikey0 = in->ikey0_[pv_offset];
227 if (!n->has_changed(v)) {
228 char *x = (char *) malloc(sizeof(ikey0));
229 int len = string_slice<typename N::ikey_type>::unparse_comparable(x, sizeof(ikey0), ikey0);
235 // findpivots should allocate memory for pv[i]->s, which will be
236 // freed by the caller.
237 template <typename P>
238 void query_table<P>::findpivots(Str *pv, int npv) const
240 pv[0].assign(NULL, 0);
241 char *cmaxk = (char *)malloc(MASSTREE_MAXKEYLEN);
242 memset(cmaxk, 255, MASSTREE_MAXKEYLEN);
243 pv[npv - 1].assign(cmaxk, MASSTREE_MAXKEYLEN);
244 for (int i = 1; i < npv - 1; i++)
245 pv[i] = findpv(table_.root(), i, npv - 1);
250 const char * const *vbegin_, * const *vend_;
255 scan_tester(const char * const *vbegin, const char * const *vend,
256 bool reverse = false)
257 : vbegin_(vbegin), vend_(vend), keylen_(0), reverse_(reverse),
260 memset(key_, 255, sizeof(key_));
261 keylen_ = sizeof(key_);
264 template <typename SS, typename K>
265 void visit_leaf(const SS&, const K&, threadinfo&) {
267 bool visit_value(Str key, row_type*, threadinfo&) {
268 memcpy(key_, key.s, key.len);
270 const char *pos = (reverse_ ? vend_[-1] : vbegin_[0]);
271 if ((int) strlen(pos) != key.len || memcmp(pos, key.s, key.len) != 0) {
272 fprintf(stderr, "%sscan encountered %.*s, expected %s\n", reverse_ ? "r" : "", key.len, key.s, pos);
273 assert((int) strlen(pos) == key.len && memcmp(pos, key.s, key.len) == 0);
275 fprintf(stderr, "%sscan %.*s\n", reverse_ ? "r" : "", key.len, key.s);
276 (reverse_ ? --vend_ : ++vbegin_);
278 return vbegin_ != vend_;
280 template <typename T>
281 int scan(T& table, threadinfo& ti) {
282 return table.table().scan(Str(key_, keylen_), first_, *this, ti);
284 template <typename T>
285 int rscan(T& table, threadinfo& ti) {
286 return table.table().rscan(Str(key_, keylen_), first_, *this, ti);
291 template <typename P>
292 void query_table<P>::test(threadinfo& ti) {
297 const char * const values[] = {
298 "", "0", "1", "10", "100000000", // 0-4
299 "1000000001", "1000000002", "2", "20", "200000000", // 5-9
300 "aaaaaaaaaaaaaaaaaaaaaaaaaa", // 10
301 "aaaaaaaaaaaaaaabbbb", "aaaaaaaaaaaaaaabbbc", "aaaaaaaaaxaaaaabbbc", "b", "c", "d", "e", "f", "g", "h", "i", "j",
302 "kkkkkkkk\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" "a",
303 "kkkkkkkk\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" "b",
306 const char * const *end_values = values + arraysize(values);
307 const char *values_copy[arraysize(values)];
308 memcpy(values_copy, values, sizeof(values));
310 for (int i = arraysize(values); i > 0; --i) {
312 q.run_replace(t.table(), Str(values_copy[x]), Str(values_copy[x]), ti);
313 values_copy[x] = values_copy[i - 1];
319 scan_tester scanner(values, values + 3);
320 while (scanner.scan(t, ti)) {
321 scanner.vend_ = std::min(scanner.vend_ + 3, end_values);
322 fprintf(stderr, "-scanbreak-\n");
325 scanner = scan_tester(values, values + 8);
326 while (scanner.scan(t, ti)) {
327 scanner.vend_ = std::min(scanner.vend_ + 8, end_values);
328 fprintf(stderr, "-scanbreak-\n");
331 scanner = scan_tester(values + 10, values + 11);
332 int r = t.table_.scan(Str(values[10]), true, scanner, ti);
333 always_assert(r == 1);
335 scanner = scan_tester(values + 10, values + 11);
336 r = t.table_.scan(Str(values[10] + 1), true, scanner, ti);
337 always_assert(r == 1);
339 scanner = scan_tester(values + 11, values + 12);
340 r = t.table_.scan(Str(values[10]), false, scanner, ti);
341 always_assert(r == 1);
343 scanner = scan_tester(values + 10, values + 11);
344 r = t.table_.scan(Str("aaaaaaaaaaaaaaaaaaaaaaaaaZ"), true, scanner, ti);
345 always_assert(r == 1);
347 scanner = scan_tester(values + 11, values + 12);
348 r = t.table_.scan(Str(values[11]), true, scanner, ti);
349 always_assert(r == 1);
351 scanner = scan_tester(values + 12, values + 13);
352 r = t.table_.scan(Str(values[11]), false, scanner, ti);
353 always_assert(r == 1);
356 scanner = scan_tester(end_values - 3, end_values, true);
357 while (scanner.rscan(t, ti)) {
358 scanner.vbegin_ = std::max(scanner.vbegin_ - 3, (const char * const *) values);
359 fprintf(stderr, "-scanbreak-\n");
362 scanner = scan_tester(end_values - 2, end_values, true);
363 r = scanner.rscan(t, ti);
364 always_assert(r == 2);
365 scanner.vbegin_ = std::max(scanner.vbegin_ - 2, (const char * const *) values);
366 fprintf(stderr, "-scanbreak-\n");
367 r = scanner.rscan(t, ti);
368 always_assert(r == 2);
370 scanner = scan_tester(end_values - 8, end_values, true);
371 while (scanner.rscan(t, ti)) {
372 scanner.vbegin_ = std::max(scanner.vbegin_ - 8, (const char * const *) values);
373 fprintf(stderr, "-scanbreak-\n");
376 scanner = scan_tester(values + 10, values + 11);
377 r = t.table_.rscan(Str(values[10]), true, scanner, ti);
378 always_assert(r == 1);
380 scanner = scan_tester(values + 10, values + 11);
381 r = t.table_.rscan(Str("aaaaaaaaaaaaaaaaaaaaaaaaab"), true, scanner, ti);
382 always_assert(r == 1);
384 scanner = scan_tester(values + 9, values + 10);
385 r = t.table_.rscan(Str(values[10]), false, scanner, ti);
386 always_assert(r == 1);
388 scanner = scan_tester(values + 10, values + 11);
389 r = t.table_.rscan(Str("aaaaaaaaaaaaaaaaaaaaaaaaab"), true, scanner, ti);
390 always_assert(r == 1);
392 scanner = scan_tester(values + 11, values + 12);
393 r = t.table_.rscan(Str(values[11]), true, scanner, ti);
394 always_assert(r == 1);
396 scanner = scan_tester(values + 10, values + 11);
397 r = t.table_.rscan(Str(values[11]), false, scanner, ti);
398 always_assert(r == 1);
402 t.findpivots(pv, 10);
403 for (int i = 0; i < 10; ++i) {
404 fprintf(stderr, "%d >%.*s<\n", i, std::min(pv[i].len, 10), pv[i].s);
405 free((char *)pv[i].s);
408 for (int i = 0; i < 4; ++i) {
409 fprintf(stderr, "%d >%.*s<\n", i, std::min(pv[i].len, 10), pv[i].s);
410 free((char *)pv[i].s);
416 template <typename P>
417 void query_table<P>::print(FILE *f, int indent) const {
418 table_.print(f, indent);
421 template class basic_table<default_table::param_type>;
422 template class query_table<default_table::param_type>;