8 import multiprocessing as mp
15 NTRIALS = 1 if DRYRUN else 3
17 PERSIST_REAL='persist-real'
18 PERSIST_TEMP='persist-temp'
19 PERSIST_NONE='persist-none'
25 ('/data/scidb/001/2/stephentu/data.log', 1.),
26 ('/data/scidb/001/3/stephentu/data.log', 1.),
28 'tempprefix' : '/tmp',
29 'disable_madv_willneed' : False,
34 ('/f0/stephentu/data.log', 7./24.),
35 ('/f1/stephentu/data.log', 7./24.),
36 ('/f2/stephentu/data.log', 7./24.),
38 'tempprefix' : '/run/shm',
39 'disable_madv_willneed' : True,
45 'tempprefix' : '/run/shm',
46 'disable_madv_willneed' : False,
50 NCPUS = mp.cpu_count()
52 TPCC_STANDARD_MIX='45,43,4,4,4'
53 TPCC_REALISTIC_MIX='39,37,4,10,10'
55 KNOB_ENABLE_YCSB_SCALE=True
56 KNOB_ENABLE_TPCC_SCALE=True
57 KNOB_ENABLE_TPCC_MULTIPART=True
58 KNOB_ENABLE_TPCC_MULTIPART_SKEW=True
59 KNOB_ENABLE_TPCC_FACTOR_ANALYSIS=True
60 KNOB_ENABLE_TPCC_PERSIST_FACTOR_ANALYSIS=True
61 KNOB_ENABLE_TPCC_RO_SNAPSHOTS=True
64 KNOB_ENABLE_TPCC_SCALE_ALLPERSIST=False
65 KNOB_ENABLE_TPCC_SCALE_ALLPERSIST_COMPRESS=False
66 KNOB_ENABLE_TPCC_SCALE_ALLPERSIST_NOFSYNC=False
67 KNOB_ENABLE_TPCC_SCALE_FAKEWRITES=False
68 KNOB_ENABLE_TPCC_SCALE_GC=False
69 KNOB_ENABLE_TPCC_FACTOR_ANALYSIS_1=False
72 prog_suffix= '.masstree' if USE_MASSTREE else '.silotree'
73 return '../%s%s/benchmarks/dbtest' % (tpe, prog_suffix)
77 def get_scale_threads(stride):
78 thds = range(0, NCPUS + 1, stride)
82 ### helpers for log allocation
85 return [e / denom for e in x]
88 return [e * a for e in x]
92 assert len(a) == len(b)
93 return [x - y for x, y in zip(a, b)]
96 return math.sqrt(math.fsum([e * e for e in x]))
99 return math.fsum([abs(e) for e in x])
101 def argcmp(x, comp, predicate):
104 for i in xrange(len(x)):
105 if not predicate(x[i]):
107 if idx is None or comp(x[i], val):
112 raise Exception("no argmin satisfiying predicate")
115 def argmin(x, predicate=lambda x: True):
116 return argcmp(x, lambda a, b: a < b, predicate)
118 def argmax(x, predicate=lambda x: True):
119 return argcmp(x, lambda a, b: a > b, predicate)
121 def allocate(nworkers, weights):
122 def score(allocation):
123 #print "score(): allocation=", allocation, "weighted=", normalize(allocation), \
124 # "score=",onenorm(sub(normalize(allocation), weights))
125 return onenorm(sub(normalize(allocation), weights))
127 # assumes weights are normalized
128 approx = map(int, map(math.ceil, scale(weights, nworkers)))
129 diff = sum(approx) - nworkers
133 #print normalize(approx)
135 best, bestValue = None, None
136 for idx in xrange(len(approx)):
142 if bestValue is None or s < bestValue:
143 best, bestValue = cpy, s
144 assert best is not None
151 #print normalize(approx)
153 best, bestValue = None, None
154 for idx in xrange(len(approx)):
158 if bestValue is None or s < bestValue:
159 best, bestValue = cpy, s
160 assert best is not None
164 #print "choice =", approx
165 #print "weights =", weights
166 #print "allocweights=", normalize(approx)
171 ret.append(range(acc, acc + x))
175 if KNOB_ENABLE_YCSB_SCALE:
176 def mk_ycsb_entries(nthds):
179 'name' : 'scale_rmw',
180 'dbs' : ['kvdb', 'ndb-proto1', 'ndb-proto2'],
182 'scale_factors' : [160000],
183 'benchmarks' : ['ycsb'],
184 'bench_opts' : ['--workload-mix 80,0,20,0'],
187 'persist' : [PERSIST_NONE],
188 'numa_memory' : ['%dG' % (40 + 2 * nthds)],
191 THREADS = get_scale_threads(4)
192 for nthds in THREADS:
193 grids += mk_ycsb_entries(nthds)
196 if KNOB_ENABLE_TPCC_SCALE:
197 def mk_grid(name, bench, nthds):
200 'dbs' : ['ndb-proto2'],
202 'scale_factors' : [nthds],
203 'benchmarks' : [bench],
204 'par_load' : [False],
206 'persist' : [PERSIST_REAL, PERSIST_TEMP, PERSIST_NONE],
207 'numa_memory' : ['%dG' % (4 * nthds)],
209 THREADS = get_scale_threads(4)
210 grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
213 # x-axis varies the % multi-partition for new order. hold scale_factor constant @ 28,
214 # nthreads also constant at 28
215 if KNOB_ENABLE_TPCC_MULTIPART:
216 D_RANGE = range(0, 11)
219 'name' : 'multipart:pct',
220 'dbs' : ['ndb-proto2'],
222 'scale_factors': [28],
223 'benchmarks' : ['tpcc'],
225 ['--workload-mix 100,0,0,0,0 --new-order-remote-item-pct %d' % d for d in D_RANGE],
226 'par_load' : [False],
228 'persist' : [PERSIST_NONE],
229 'numa_memory' : ['%dG' % (4 * 28)],
232 'binary' : [binary_path('out-factor-gc')],
233 'name' : 'multipart:pct',
234 'dbs' : ['ndb-proto2'],
236 'scale_factors': [28],
237 'benchmarks' : ['tpcc'],
239 ['--workload-mix 100,0,0,0,0 --new-order-remote-item-pct %d' % d for d in D_RANGE],
240 'par_load' : [False],
242 'persist' : [PERSIST_NONE],
243 'disable_snapshots' : [True],
244 'numa_memory' : ['%dG' % (4 * 28)],
247 'binary' : [binary_path('out-factor-gc')],
248 'name' : 'multipart:pct',
249 'dbs' : ['ndb-proto2'],
251 'scale_factors': [28],
252 'benchmarks' : ['tpcc'],
254 ['--enable-separate-tree-per-partition --workload-mix 100,0,0,0,0 --new-order-remote-item-pct %d' % d for d in D_RANGE],
255 'par_load' : [False],
257 'persist' : [PERSIST_NONE],
258 'disable_snapshots' : [True],
259 'numa_memory' : ['%dG' % (4 * 28)],
262 'name' : 'multipart:pct',
265 'scale_factors': [28],
266 'benchmarks' : ['tpcc'],
268 ['--workload-mix 100,0,0,0,0 --enable-separate-tree-per-partition --enable-partition-locks --new-order-remote-item-pct %d' % d for d in D_RANGE],
269 'par_load' : [False],
271 'persist' : [PERSIST_NONE],
272 'numa_memory' : ['%dG' % (4 * 28)],
276 if KNOB_ENABLE_TPCC_MULTIPART_SKEW:
280 'name' : 'multipart:skew',
281 'dbs' : ['ndb-proto2'],
283 'scale_factors': [4],
284 'benchmarks' : ['tpcc'],
286 '--workload-mix 100,0,0,0,0',
288 'par_load' : [False],
291 'persist' : [PERSIST_NONE],
292 'numa_memory' : ['%dG' % (4 * nthds)],
295 'name' : 'multipart:skew',
296 'dbs' : ['ndb-proto2'],
298 'scale_factors': [4],
299 'benchmarks' : ['tpcc'],
301 '--workload-mix 100,0,0,0,0 --new-order-fast-id-gen'
303 'par_load' : [False],
305 'persist' : [PERSIST_NONE],
306 'numa_memory' : ['%dG' % (4 * nthds)],
311 'name' : 'multipart:skew',
314 'scale_factors': [4],
315 'benchmarks' : ['tpcc'],
317 ['--workload-mix 100,0,0,0,0 --enable-separate-tree-per-partition --enable-partition-locks'],
318 'par_load' : [False],
320 'persist' : [PERSIST_NONE],
321 'numa_memory' : ['%dG' % (4 * 4)],
324 thds = [1,2,4,6,8,10,12,16,20,24,28,32]
325 grids += list(it.chain.from_iterable([mk_grids(t) for t in thds]))
327 if KNOB_ENABLE_TPCC_FACTOR_ANALYSIS:
329 # baseline (jemalloc, no-overwrites, gc, snapshots)
336 'binary' : [binary_path('out-factor-gc-nowriteinplace')],
337 'name' : 'factoranalysis',
338 'dbs' : ['ndb-proto2'],
340 'scale_factors': [28],
341 'benchmarks' : ['tpcc'],
342 'par_load' : [False],
344 'persist' : [PERSIST_NONE],
345 'numa_memory' : [None, '%dG' % (4 * 28)],
348 'binary' : [binary_path('out-factor-gc')],
349 'name' : 'factoranalysis',
350 'dbs' : ['ndb-proto2'],
352 'scale_factors': [28],
353 'benchmarks' : ['tpcc'],
354 'par_load' : [False],
356 'persist' : [PERSIST_NONE],
357 'numa_memory' : ['%dG' % (4 * 28)],
358 'disable_snapshots' : [False],
361 'binary' : [binary_path('out-factor-gc')],
362 'name' : 'factoranalysis',
363 'dbs' : ['ndb-proto2'],
365 'scale_factors': [28],
366 'benchmarks' : ['tpcc'],
367 'bench_opts' : ['--disable-read-only-snapshots'],
368 'par_load' : [False],
370 'persist' : [PERSIST_NONE],
371 'numa_memory' : ['%dG' % (4 * 28)],
372 'disable_snapshots' : [True],
375 'binary' : [binary_path('out-factor-gc')],
376 'name' : 'factoranalysis',
377 'dbs' : ['ndb-proto2'],
379 'scale_factors': [28],
380 'benchmarks' : ['tpcc'],
381 'bench_opts' : ['--disable-read-only-snapshots'],
382 'par_load' : [False],
384 'persist' : [PERSIST_NONE],
385 'numa_memory' : ['%dG' % (4 * 28)],
386 'disable_snapshots' : [True],
387 'disable_gc' : [True],
391 if KNOB_ENABLE_TPCC_FACTOR_ANALYSIS_1:
393 # baseline (jemalloc, no-overwrites, gc, no-snapshots)
400 'binary' : [binary_path('out-factor-gc-nowriteinplace')],
401 'name' : 'factoranalysis',
402 'dbs' : ['ndb-proto2'],
404 'scale_factors': [28],
405 'benchmarks' : ['tpcc'],
406 'bench_opts' : ['--workload-mix %s --disable-read-only-snapshots' % TPCC_REALISTIC_MIX],
407 'par_load' : [False],
409 'persist' : [PERSIST_NONE],
410 'numa_memory' : [None, '%dG' % (4 * 28)],
411 'disable_snapshots': [True],
414 'binary' : [binary_path('out-factor-gc')],
415 'name' : 'factoranalysis',
416 'dbs' : ['ndb-proto2'],
418 'scale_factors': [28],
419 'benchmarks' : ['tpcc'],
420 'bench_opts' : ['--workload-mix %s --disable-read-only-snapshots' % TPCC_REALISTIC_MIX],
421 'par_load' : [False],
423 'persist' : [PERSIST_NONE],
424 'numa_memory' : ['%dG' % (4 * 28)],
425 'disable_snapshots' : [True],
428 'binary' : [binary_path('out-factor-gc')],
429 'name' : 'factoranalysis',
430 'dbs' : ['ndb-proto2'],
432 'scale_factors': [28],
433 'benchmarks' : ['tpcc'],
434 'bench_opts' : ['--workload-mix %s' % TPCC_REALISTIC_MIX],
435 'par_load' : [False],
437 'persist' : [PERSIST_NONE],
438 'numa_memory' : ['%dG' % (4 * 28)],
439 'disable_gc' : [False, True],
443 if KNOB_ENABLE_TPCC_PERSIST_FACTOR_ANALYSIS:
444 # write zero length log records (perfect/fake compression)
445 # lz4-compress buffers
448 'binary' : [binary_path('out-factor-fake-compression')],
449 'name' : 'persistfactoranalysis',
450 'dbs' : ['ndb-proto2'],
452 'scale_factors': [28],
453 'benchmarks' : ['tpcc'],
454 'par_load' : [False],
456 'persist' : [PERSIST_REAL],
457 'numa_memory' : ['%dG' % (4 * 28)],
460 'binary' : [binary_path('out-perf')],
461 'name' : 'persistfactoranalysis',
462 'dbs' : ['ndb-proto2'],
464 'scale_factors': [28],
465 'benchmarks' : ['tpcc'],
466 'par_load' : [False],
468 'persist' : [PERSIST_REAL],
469 'numa_memory' : ['%dG' % (4 * 28)],
470 'log_compress' : [True],
475 # * 50% new order, 50% stock level
476 # * scale factor 8, n-threads 16
477 # * x-axis is --new-order-remote-item-pct from [0, 20, 40, 60, 80, 100]
478 if KNOB_ENABLE_TPCC_RO_SNAPSHOTS:
479 RO_DRANGE = [0, 20, 40, 60, 80, 100]
483 'dbs' : ['ndb-proto2'],
485 'scale_factors': [8],
486 'benchmarks' : ['tpcc'],
487 'bench_opts' : ['--workload-mix 50,0,0,0,50 --new-order-remote-item-pct %d' % d for d in RO_DRANGE],
488 'par_load' : [False],
490 'persist' : [PERSIST_NONE],
491 'numa_memory' : ['%dG' % (4 * 16)],
492 'disable_snapshots' : [False],
496 'binary' : [binary_path('out-factor-gc')],
497 'dbs' : ['ndb-proto2'],
499 'scale_factors': [8],
500 'benchmarks' : ['tpcc'],
501 'bench_opts' : ['--disable-read-only-snapshots --workload-mix 50,0,0,0,50 --new-order-remote-item-pct %d' % d for d in RO_DRANGE],
502 'par_load' : [False],
504 'persist' : [PERSIST_NONE],
505 'numa_memory' : ['%dG' % (4 * 16)],
506 'disable_snapshots' : [True],
510 if KNOB_ENABLE_TPCC_SCALE_ALLPERSIST:
511 def mk_grid(name, bench, nthds):
514 'dbs' : ['ndb-proto2'],
516 'scale_factors' : [nthds],
517 'benchmarks' : [bench],
519 'par_load' : [False],
521 'persist' : [PERSIST_REAL],
522 'numa_memory' : ['%dG' % (4 * nthds)],
524 THREADS = get_scale_threads(4)
525 grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
527 if KNOB_ENABLE_TPCC_SCALE_ALLPERSIST_COMPRESS:
528 def mk_grid(name, bench, nthds):
531 'dbs' : ['ndb-proto2'],
533 'scale_factors' : [nthds],
534 'benchmarks' : [bench],
536 'par_load' : [False],
538 'persist' : [PERSIST_REAL],
539 'numa_memory' : ['%dG' % (4 * nthds)],
540 'log_compress' : [True],
542 THREADS = get_scale_threads(4)
543 grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
545 if KNOB_ENABLE_TPCC_SCALE_ALLPERSIST_NOFSYNC:
546 def mk_grid(name, bench, nthds):
549 'dbs' : ['ndb-proto2'],
551 'scale_factors' : [nthds],
552 'benchmarks' : [bench],
554 'par_load' : [False],
556 'persist' : [PERSIST_REAL],
557 'numa_memory' : ['%dG' % (4 * nthds)],
558 'log_nofsync' : [True],
560 THREADS = get_scale_threads(4)
561 grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
563 if KNOB_ENABLE_TPCC_SCALE_FAKEWRITES:
564 def mk_grid(name, bench, nthds):
567 'dbs' : ['ndb-proto2'],
569 'scale_factors' : [nthds],
570 'benchmarks' : [bench],
572 'par_load' : [False],
574 'persist' : [PERSIST_REAL],
575 'numa_memory' : ['%dG' % (4 * nthds)],
576 'log_fake_writes' : [True],
578 THREADS = get_scale_threads(4)
579 grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
581 if KNOB_ENABLE_TPCC_SCALE_GC:
582 def mk_grid(name, bench, nthds):
585 'dbs' : ['ndb-proto2'],
587 'scale_factors' : [nthds],
588 'benchmarks' : [bench],
590 'par_load' : [False],
592 'persist' : [PERSIST_NONE],
593 'numa_memory' : ['%dG' % (4 * nthds)],
594 'disable_gc' : [False, True],
596 THREADS = get_scale_threads(4)
597 grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
599 def check_binary_executable(binary):
600 return os.path.isfile(binary) and os.access(binary, os.X_OK)
602 def run_configuration(
603 binary, disable_madv_willneed,
604 basedir, dbtype, bench, scale_factor, nthreads, bench_opts,
605 par_load, retry_aborted_txn, backoff_aborted_txn, numa_memory, logfiles,
606 assignments, log_fake_writes, log_nofsync, log_compress,
607 disable_gc, disable_snapshots, ntries=5):
608 # Note: assignments is a list of list of ints
609 assert len(logfiles) == len(assignments)
610 assert not log_fake_writes or len(logfiles)
611 assert not log_nofsync or len(logfiles)
612 assert not log_compress or len(logfiles)
616 '--basedir', basedir,
618 '--num-threads', str(nthreads),
619 '--scale-factor', str(scale_factor),
622 ] + ([] if not bench_opts else ['--bench-opts', bench_opts]) \
623 + ([] if not par_load else ['--parallel-loading']) \
624 + ([] if not retry_aborted_txn else ['--retry-aborted-transactions']) \
625 + ([] if not backoff_aborted_txn else ['--backoff-aborted-transactions']) \
626 + ([] if not numa_memory else ['--numa-memory', numa_memory]) \
627 + ([] if not logfiles else list(it.chain.from_iterable([['--logfile', f] for f in logfiles]))) \
628 + ([] if not assignments else list(it.chain.from_iterable([['--assignment', ','.join(map(str, x))] for x in assignments]))) \
629 + ([] if not log_fake_writes else ['--log-fake-writes']) \
630 + ([] if not log_nofsync else ['--log-nofsync']) \
631 + ([] if not log_compress else ['--log-compress']) \
632 + ([] if not disable_gc else ['--disable-gc']) \
633 + ([] if not disable_snapshots else ['--disable-snapshots'])
634 print >>sys.stderr, '[INFO] running command:'
635 print >>sys.stderr, ('DISABLE_MADV_WILLNEED=1' if disable_madv_willneed else ''), ' '.join([x.replace(' ', r'\ ') for x in args])
637 with open('stderr.log', 'w') as err:
638 env = dict(os.environ)
639 if disable_madv_willneed:
640 env['DISABLE_MADV_WILLNEED'] = '1'
641 p = subprocess.Popen(args, stdin=open('/dev/null', 'r'), stdout=subprocess.PIPE, stderr=err, env=env)
642 print >>sys.stderr, 'pid=', p.pid
645 toks = r.strip().split(' ')
647 assert check_binary_executable(binary)
650 print 'Failure: retcode=', retcode, ', stdout=', r
652 shutil.copyfile('stderr.log', 'stderr.%d.log' % p.pid)
654 return run_configuration(
655 binary, disable_madv_willneed,
656 basedir, dbtype, bench, scale_factor, nthreads, bench_opts,
657 par_load, retry_aborted_txn, backoff_aborted_txn, numa_memory, logfiles,
658 assignments, log_fake_writes, log_nofsync, log_compress,
659 disable_gc, disable_snapshots, ntries - 1)
661 print "Out of tries!"
663 return tuple(map(float, toks))
665 if __name__ == '__main__':
666 (_, basedir, outfile) = sys.argv
668 DEFAULT_BINARY=binary_path('out-perf')
669 # list all the binaries needed
670 binaries = set(it.chain.from_iterable([grid.get('binary', [DEFAULT_BINARY]) for grid in grids]))
672 for binary in binaries:
673 if not check_binary_executable(binary):
674 print >>sys.stderr, '[ERROR] cannot find binary %s' % binary
675 failed.append(binary)
677 r = re.compile(r'out-(.*)\.(masstree|silotree)')
678 print >>sys.stderr, \
679 '[INFO] Try running the following commands in the root source directory:'
680 for binary in failed:
681 folder = binary.split(os.sep)[1]
684 print >>sys.stderr, '[ERROR] bad binary name %s' % binary
686 print >>sys.stderr, 'MASSTREE=%d MODE=%s make -j dbtest' % (1 if m.group(2) == 'masstree' else 0, m.group(1))
689 # iterate over all configs
692 for (binary, db, bench, scale_factor, threads, bench_opts,
693 par_load, retry, backoff, numa_memory, persist,
694 log_fake_writes, log_nofsync, log_compress,
695 disable_gc, disable_snapshots) in it.product(
696 grid.get('binary', [DEFAULT_BINARY]),
697 grid['dbs'], grid['benchmarks'], grid['scale_factors'],
698 grid['threads'], grid.get('bench_opts', ['']), grid['par_load'],
699 grid['retry'], grid.get('backoff', [False]),
700 grid['numa_memory'], grid['persist'],
701 grid.get('log_fake_writes', [False]),
702 grid.get('log_nofsync', [False]),
703 grid.get('log_compress', [False]),
704 grid.get('disable_gc', [False]),
705 grid.get('disable_snapshots', [False])):
706 node = platform.node()
707 disable_madv_willneed = MACHINE_CONFIG[node]['disable_madv_willneed']
710 'disable_madv_willneed' : disable_madv_willneed,
711 'name' : grid['name'],
714 'scale_factor' : scale_factor,
716 'bench_opts' : bench_opts,
717 'par_load' : par_load,
721 'numa_memory' : numa_memory,
722 'log_fake_writes' : log_fake_writes,
723 'log_nofsync' : log_nofsync,
724 'log_compress' : log_compress,
725 'disable_gc' : disable_gc,
726 'disable_snapshots' : disable_snapshots,
728 print >>sys.stderr, '[INFO] running config %s' % (str(config))
729 if persist != PERSIST_NONE:
730 info = MACHINE_CONFIG[node]['logfiles']
731 tempprefix = MACHINE_CONFIG[node]['tempprefix']
733 [x[0] for x in info] if persist == PERSIST_REAL \
734 else [os.path.join(tempprefix, 'data%d.log' % (idx)) for idx in xrange(len(info))]
736 normalize([x[1] for x in info]) if persist == PERSIST_REAL else \
737 normalize([1.0 for _ in info])
738 assignments = allocate(threads, weights)
740 logfiles, assignments = [], []
742 for _ in range(NTRIALS):
743 value = run_configuration(
744 binary, disable_madv_willneed,
745 basedir, db, bench, scale_factor, threads,
746 bench_opts, par_load, retry, backoff, numa_memory,
747 logfiles, assignments, log_fake_writes,
748 log_nofsync, log_compress, disable_gc,
751 results.append((config, values))
753 # write intermediate results
754 with open(outfile + '.py', 'w') as fp:
755 print >>fp, 'RESULTS = %s' % (repr(results))
758 with open(outfile + '.py', 'w') as fp:
759 print >>fp, 'RESULTS = %s' % (repr(results))