1 #ifndef _NDB_BASE_TXN_BTREE_H_
2 #define _NDB_BASE_TXN_BTREE_H_
4 #include "btree_choice.h"
8 #include "ndb_type_traits.h"
12 #include <type_traits>
15 // each Transaction implementation should specialize this for special
16 // behavior- the default implementation is just nops
17 template <template <typename> class Transaction>
18 struct base_txn_btree_handler {
19 static inline void on_construct() {} // called when initializing
20 static const bool has_background_task = false;
23 template <template <typename> class Transaction, typename P>
24 class base_txn_btree {
27 typedef transaction_base::tid_t tid_t;
28 typedef transaction_base::size_type size_type;
29 typedef transaction_base::string_type string_type;
30 typedef concurrent_btree::string_type keystring_type;
32 base_txn_btree(size_type value_size_hint = 128,
33 bool mostly_append = false,
34 const std::string &name = "<unknown>")
35 : value_size_hint(value_size_hint),
37 been_destructed(false)
39 base_txn_btree_handler<Transaction>::on_construct();
51 return underlying_btree.size();
55 get_value_size_hint() const
57 return value_size_hint;
61 set_value_size_hint(size_type value_size_hint)
63 this->value_size_hint = value_size_hint;
67 underlying_btree.print();
71 * only call when you are sure there are no concurrent modifications on the
72 * tree. is neither threadsafe nor transactional
74 * Note that when you call unsafe_purge(), this txn_btree becomes
75 * completely invalidated and un-usable. Any further operations
76 * (other than calling the destructor) are undefined
78 std::map<std::string, uint64_t> unsafe_purge(bool dump_stats = false);
82 struct purge_tree_walker : public concurrent_btree::tree_walk_callback {
83 virtual void on_node_begin(const typename concurrent_btree::node_opaque_t *n);
84 virtual void on_node_success();
85 virtual void on_node_failure();
86 #ifdef TXN_BTREE_DUMP_PURGE_STATS
88 : purge_stats_nodes(0),
89 purge_stats_nosuffix_nodes(0) {}
90 std::map<size_t, size_t> purge_stats_tuple_record_size_counts; // just the record
91 std::map<size_t, size_t> purge_stats_tuple_alloc_size_counts; // includes overhead
92 //std::map<size_t, size_t> purge_stats_tuple_chain_counts;
93 std::vector<uint16_t> purge_stats_nkeys_node;
94 size_t purge_stats_nodes;
95 size_t purge_stats_nosuffix_nodes;
97 std::map<std::string, uint64_t>
100 std::map<std::string, uint64_t> ret;
102 for (std::vector<uint16_t>::iterator it = purge_stats_nkeys_node.begin();
103 it != purge_stats_nkeys_node.end(); ++it)
105 const double avg_nkeys_node = double(v)/double(purge_stats_nkeys_node.size());
106 const double avg_fill_factor = avg_nkeys_node/double(concurrent_btree::NKeysPerNode);
107 std::cerr << "btree node stats" << std::endl;
108 std::cerr << " avg_nkeys_node: " << avg_nkeys_node << std::endl;
109 std::cerr << " avg_fill_factor: " << avg_fill_factor << std::endl;
110 std::cerr << " num_nodes: " << purge_stats_nodes << std::endl;
111 std::cerr << " num_nosuffix_nodes: " << purge_stats_nosuffix_nodes << std::endl;
112 std::cerr << "record size stats (nbytes => count)" << std::endl;
113 for (std::map<size_t, size_t>::iterator it = purge_stats_tuple_record_size_counts.begin();
114 it != purge_stats_tuple_record_size_counts.end(); ++it)
115 std::cerr << " " << it->first << " => " << it->second << std::endl;
116 std::cerr << "alloc size stats (nbytes => count)" << std::endl;
117 for (std::map<size_t, size_t>::iterator it = purge_stats_tuple_alloc_size_counts.begin();
118 it != purge_stats_tuple_alloc_size_counts.end(); ++it)
119 std::cerr << " " << (it->first + sizeof(dbtuple)) << " => " << it->second << std::endl;
120 //std::cerr << "chain stats (length => count)" << std::endl;
121 //for (std::map<size_t, size_t>::iterator it = purge_stats_tuple_chain_counts.begin();
122 // it != purge_stats_tuple_chain_counts.end(); ++it) {
123 // std::cerr << " " << it->first << " => " << it->second << std::endl;
124 // ret["chain_" + std::to_string(it->first)] += it->second;
126 //std::cerr << "deleted recored stats" << std::endl;
127 //std::cerr << " logically_removed (total): " << (purge_stats_tuple_logically_removed_no_mark + purge_stats_tuple_logically_removed_with_mark) << std::endl;
128 //std::cerr << " logically_removed_no_mark: " << purge_stats_tuple_logically_removed_no_mark << std::endl;
129 //std::cerr << " logically_removed_with_mark: " << purge_stats_tuple_logically_removed_with_mark << std::endl;
135 std::vector< std::pair<typename concurrent_btree::value_type, bool> > spec_values;
140 // readers are placed here so they can be shared amongst
141 // derived implementations
143 template <typename Traits, typename Callback,
144 typename KeyReader, typename ValueReader>
145 struct txn_search_range_callback : public concurrent_btree::low_level_search_range_callback {
146 constexpr txn_search_range_callback(
147 Transaction<Traits> *t,
148 Callback *caller_callback,
149 KeyReader *key_reader,
150 ValueReader *value_reader)
151 : t(t), caller_callback(caller_callback),
152 key_reader(key_reader), value_reader(value_reader) {}
154 virtual void on_resp_node(const typename concurrent_btree::node_opaque_t *n, uint64_t version);
155 virtual bool invoke(const typename concurrent_btree::string_type &k, typename concurrent_btree::value_type v,
156 const typename concurrent_btree::node_opaque_t *n, uint64_t version);
159 Transaction<Traits> *const t;
160 Callback *const caller_callback;
161 KeyReader *const key_reader;
162 ValueReader *const value_reader;
165 template <typename Traits, typename ValueReader>
167 do_search(Transaction<Traits> &t,
168 const typename P::Key &k,
169 ValueReader &value_reader);
171 template <typename Traits, typename Callback,
172 typename KeyReader, typename ValueReader>
174 do_search_range_call(Transaction<Traits> &t,
175 const typename P::Key &lower,
176 const typename P::Key *upper,
178 KeyReader &key_reader,
179 ValueReader &value_reader);
181 template <typename Traits, typename Callback,
182 typename KeyReader, typename ValueReader>
184 do_rsearch_range_call(Transaction<Traits> &t,
185 const typename P::Key &upper,
186 const typename P::Key *lower,
188 KeyReader &key_reader,
189 ValueReader &value_reader);
191 // expect_new indicates if we expect the record to not exist in the tree-
192 // is just a hint that affects perf, not correctness. remove is put with nullptr
195 // NOTE: both key and value are expected to be stable values already
196 template <typename Traits>
197 void do_tree_put(Transaction<Traits> &t,
198 const std::string *k,
199 const typename P::Value *v,
200 dbtuple::tuple_writer_t writer,
203 concurrent_btree underlying_btree;
204 size_type value_size_hint;
206 bool been_destructed;
210 STATIC_COUNTER_DECL(scopedperf::tsc_ctr, txn_btree_search_probe0, txn_btree_search_probe0_cg)
211 STATIC_COUNTER_DECL(scopedperf::tsc_ctr, txn_btree_search_probe1, txn_btree_search_probe1_cg)
214 template <template <typename> class Transaction, typename P>
215 template <typename Traits, typename ValueReader>
217 base_txn_btree<Transaction, P>::do_search(
218 Transaction<Traits> &t,
219 const typename P::Key &k,
220 ValueReader &value_reader)
224 typename P::KeyWriter key_writer(&k);
225 const std::string * const key_str =
226 key_writer.fully_materialize(true, t.string_allocator());
228 // search the underlying btree to map k=>(btree_node|tuple)
229 typename concurrent_btree::value_type underlying_v{};
230 concurrent_btree::versioned_node_t search_info;
231 const bool found = this->underlying_btree.search(varkey(*key_str), underlying_v, &search_info);
233 const dbtuple * const tuple = reinterpret_cast<const dbtuple *>(underlying_v);
234 return t.do_tuple_read(tuple, value_reader);
236 // not found, add to absent_set
237 t.do_node_read(search_info.first, search_info.second);
242 template <template <typename> class Transaction, typename P>
243 std::map<std::string, uint64_t>
244 base_txn_btree<Transaction, P>::unsafe_purge(bool dump_stats)
246 ALWAYS_ASSERT(!been_destructed);
247 been_destructed = true;
249 scoped_rcu_region guard;
250 underlying_btree.tree_walk(w);
251 underlying_btree.clear();
252 #ifdef TXN_BTREE_DUMP_PURGE_STATS
254 return std::map<std::string, uint64_t>();
255 return w.dump_stats();
257 return std::map<std::string, uint64_t>();
261 template <template <typename> class Transaction, typename P>
263 base_txn_btree<Transaction, P>::purge_tree_walker::on_node_begin(const typename concurrent_btree::node_opaque_t *n)
265 INVARIANT(spec_values.empty());
266 spec_values = concurrent_btree::ExtractValues(n);
269 template <template <typename> class Transaction, typename P>
271 base_txn_btree<Transaction, P>::purge_tree_walker::on_node_success()
273 for (size_t i = 0; i < spec_values.size(); i++) {
274 dbtuple *tuple = (dbtuple *) spec_values[i].first;
276 #ifdef TXN_BTREE_DUMP_PURGE_STATS
277 // XXX(stephentu): should we also walk the chain?
278 purge_stats_tuple_record_size_counts[tuple->is_deleting() ? 0 : tuple->size]++;
279 purge_stats_tuple_alloc_size_counts[tuple->alloc_size]++;
280 //purge_stats_tuple_chain_counts[tuple->chain_length()]++;
282 if (base_txn_btree_handler<Transaction>::has_background_task) {
283 #ifdef CHECK_INVARIANTS
284 lock_guard<dbtuple> l(tuple, false);
286 if (!tuple->is_deleting()) {
287 INVARIANT(tuple->is_latest());
288 tuple->clear_latest();
289 tuple->mark_deleting();
290 dbtuple::release(tuple);
292 // enqueued already to background gc by the writer of the delete
295 // XXX: this path is probably not right
296 dbtuple::release_no_rcu(tuple);
299 #ifdef TXN_BTREE_DUMP_PURGE_STATS
300 purge_stats_nkeys_node.push_back(spec_values.size());
302 for (size_t i = 0; i < spec_values.size(); i++)
303 if (spec_values[i].second)
305 purge_stats_nosuffix_nodes++;
311 template <template <typename> class Transaction, typename P>
313 base_txn_btree<Transaction, P>::purge_tree_walker::on_node_failure()
318 template <template <typename> class Transaction, typename P>
319 template <typename Traits>
320 void base_txn_btree<Transaction, P>::do_tree_put(
321 Transaction<Traits> &t,
322 const std::string *k,
323 const typename P::Value *v,
324 dbtuple::tuple_writer_t writer,
328 INVARIANT(!expect_new || v); // makes little sense to remove() a key you expect
329 // to not be present, so we assert this doesn't happen
330 // for now [since this would indicate a suboptimality]
333 if (unlikely(t.is_snapshot())) {
334 const transaction_base::abort_reason r = transaction_base::ABORT_REASON_USER;
336 throw transaction_abort_exception(r);
338 dbtuple *px = nullptr;
342 auto ret = t.try_insert_new_tuple(this->underlying_btree, k, v, writer);
343 INVARIANT(!ret.second || ret.first);
344 if (unlikely(ret.second)) {
345 const transaction_base::abort_reason r = transaction_base::ABORT_REASON_WRITE_NODE_INTERFERENCE;
347 throw transaction_abort_exception(r);
355 typename concurrent_btree::value_type bv = 0;
356 if (!this->underlying_btree.search(varkey(*k), bv)) {
357 // XXX(stephentu): if we are removing a key and we can't find it, then we
358 // should just treat this as a read [of an empty-value], instead of
359 // explicitly inserting an empty node...
363 px = reinterpret_cast<dbtuple *>(bv);
367 // add to write set normally, as non-insert
368 t.write_set.emplace_back(px, k, v, writer, &this->underlying_btree, false);
370 // should already exist in write set as insert
371 // (because of try_insert_new_tuple())
373 // too expensive to be a practical check
374 //INVARIANT(t.find_write_set(px) != t.write_set.end());
375 //INVARIANT(t.find_write_set(px)->is_insert());
379 template <template <typename> class Transaction, typename P>
380 template <typename Traits, typename Callback,
381 typename KeyReader, typename ValueReader>
383 base_txn_btree<Transaction, P>
384 ::txn_search_range_callback<Traits, Callback, KeyReader, ValueReader>
386 const typename concurrent_btree::node_opaque_t *n, uint64_t version)
388 VERBOSE(std::cerr << "on_resp_node(): <node=0x" << util::hexify(intptr_t(n))
389 << ", version=" << version << ">" << std::endl);
390 VERBOSE(std::cerr << " " << concurrent_btree::NodeStringify(n) << std::endl);
391 t->do_node_read(n, version);
394 template <template <typename> class Transaction, typename P>
395 template <typename Traits, typename Callback,
396 typename KeyReader, typename ValueReader>
398 base_txn_btree<Transaction, P>
399 ::txn_search_range_callback<Traits, Callback, KeyReader, ValueReader>
401 const typename concurrent_btree::string_type &k, typename concurrent_btree::value_type v,
402 const typename concurrent_btree::node_opaque_t *n, uint64_t version)
405 VERBOSE(std::cerr << "search range k: " << util::hexify(k) << " from <node=0x" << util::hexify(n)
406 << ", version=" << version << ">" << std::endl
407 << " " << *((dbtuple *) v) << std::endl);
408 const dbtuple * const tuple = reinterpret_cast<const dbtuple *>(v);
409 if (t->do_tuple_read(tuple, *value_reader))
410 return caller_callback->invoke(
411 (*key_reader)(k), value_reader->results());
415 template <template <typename> class Transaction, typename P>
416 template <typename Traits, typename Callback,
417 typename KeyReader, typename ValueReader>
419 base_txn_btree<Transaction, P>::do_search_range_call(
420 Transaction<Traits> &t,
421 const typename P::Key &lower,
422 const typename P::Key *upper,
424 KeyReader &key_reader,
425 ValueReader &value_reader)
429 VERBOSE(std::cerr << "txn_btree(0x" << util::hexify(intptr_t(this))
430 << ")::search_range_call [" << util::hexify(lower)
431 << ", " << util::hexify(*upper) << ")" << std::endl);
433 VERBOSE(std::cerr << "txn_btree(0x" << util::hexify(intptr_t(this))
434 << ")::search_range_call [" << util::hexify(lower)
435 << ", +inf)" << std::endl);
437 typename P::KeyWriter lower_key_writer(&lower);
438 const std::string * const lower_str =
439 lower_key_writer.fully_materialize(true, t.string_allocator());
441 typename P::KeyWriter upper_key_writer(upper);
442 const std::string * const upper_str =
443 upper_key_writer.fully_materialize(true, t.string_allocator());
445 if (unlikely(upper_str && *upper_str <= *lower_str))
448 txn_search_range_callback<Traits, Callback, KeyReader, ValueReader> c(
449 &t, &callback, &key_reader, &value_reader);
453 uppervk = varkey(*upper_str);
454 this->underlying_btree.search_range_call(
455 varkey(*lower_str), upper_str ? &uppervk : nullptr,
456 c, t.string_allocator()());
459 template <template <typename> class Transaction, typename P>
460 template <typename Traits, typename Callback,
461 typename KeyReader, typename ValueReader>
463 base_txn_btree<Transaction, P>::do_rsearch_range_call(
464 Transaction<Traits> &t,
465 const typename P::Key &upper,
466 const typename P::Key *lower,
468 KeyReader &key_reader,
469 ValueReader &value_reader)
473 typename P::KeyWriter lower_key_writer(lower);
474 const std::string * const lower_str =
475 lower_key_writer.fully_materialize(true, t.string_allocator());
477 typename P::KeyWriter upper_key_writer(&upper);
478 const std::string * const upper_str =
479 upper_key_writer.fully_materialize(true, t.string_allocator());
481 if (unlikely(lower_str && *upper_str <= *lower_str))
484 txn_search_range_callback<Traits, Callback, KeyReader, ValueReader> c(
485 &t, &callback, &key_reader, &value_reader);
489 lowervk = varkey(*lower_str);
490 this->underlying_btree.rsearch_range_call(
491 varkey(*upper_str), lower_str ? &lowervk : nullptr,
492 c, t.string_allocator()());
495 #endif /* _NDB_BASE_TXN_BTREE_H_ */