silo/benchmarks/runner.py

   1 #!/usr/bin/env python
   2
   3 import itertools as it
   4 import platform
   5 import math
   6 import subprocess
   7 import sys
   8 import multiprocessing as mp
   9 import os
  10 import re
  11
  12 DRYRUN = True
  13 USE_MASSTREE = True
  14
  15 NTRIALS = 1 if DRYRUN else 3
  16
  17 PERSIST_REAL='persist-real'
  18 PERSIST_TEMP='persist-temp'
  19 PERSIST_NONE='persist-none'
  20
  21 MACHINE_CONFIG = {
  22   'modis2' : {
  23       'logfiles' : (
  24           ('data.log', 1.),
  25           ('/data/scidb/001/2/stephentu/data.log', 1.),
  26           ('/data/scidb/001/3/stephentu/data.log', 1.),
  27       ),
  28       'tempprefix' : '/tmp',
  29       'disable_madv_willneed' : False,
  30   },
  31   'istc3' : {
  32       'logfiles' : (
  33           ('data.log', 3./24.),
  34           ('/f0/stephentu/data.log', 7./24.),
  35           ('/f1/stephentu/data.log', 7./24.),
  36           ('/f2/stephentu/data.log', 7./24.),
  37       ),
  38       'tempprefix' : '/run/shm',
  39       'disable_madv_willneed' : True,
  40   },
  41   'istc4' : {
  42       'logfiles' : (
  43           ('data.log', 1.),
  44       ),
  45       'tempprefix' : '/run/shm',
  46       'disable_madv_willneed' : False,
  47   },
  48 }
  49
  50 NCPUS = mp.cpu_count()
  51
  52 TPCC_STANDARD_MIX='45,43,4,4,4'
  53 TPCC_REALISTIC_MIX='39,37,4,10,10'
  54
  55 KNOB_ENABLE_YCSB_SCALE=True
  56 KNOB_ENABLE_TPCC_SCALE=True
  57 KNOB_ENABLE_TPCC_MULTIPART=True
  58 KNOB_ENABLE_TPCC_MULTIPART_SKEW=True
  59 KNOB_ENABLE_TPCC_FACTOR_ANALYSIS=True
  60 KNOB_ENABLE_TPCC_PERSIST_FACTOR_ANALYSIS=True
  61 KNOB_ENABLE_TPCC_RO_SNAPSHOTS=True
  62
  63 ## debugging runs
  64 KNOB_ENABLE_TPCC_SCALE_ALLPERSIST=False
  65 KNOB_ENABLE_TPCC_SCALE_ALLPERSIST_COMPRESS=False
  66 KNOB_ENABLE_TPCC_SCALE_ALLPERSIST_NOFSYNC=False
  67 KNOB_ENABLE_TPCC_SCALE_FAKEWRITES=False
  68 KNOB_ENABLE_TPCC_SCALE_GC=False
  69 KNOB_ENABLE_TPCC_FACTOR_ANALYSIS_1=False
  70
  71 def binary_path(tpe):
  72   prog_suffix= '.masstree' if USE_MASSTREE else '.silotree'
  73   return '../%s%s/benchmarks/dbtest' % (tpe, prog_suffix)
  74
  75 grids = []
  76
  77 def get_scale_threads(stride):
  78   thds = range(0, NCPUS + 1, stride)
  79   thds[0] = 1
  80   return thds
  81
  82 ### helpers for log allocation
  83 def normalize(x):
  84   denom = math.fsum(x)
  85   return [e / denom for e in x]
  86
  87 def scale(x, a):
  88   return [e * a for e in x]
  89
  90 # a - b
  91 def sub(a, b):
  92   assert len(a) == len(b)
  93   return [x - y for x, y in zip(a, b)]
  94
  95 def twonorm(x):
  96   return math.sqrt(math.fsum([e * e for e in x]))
  97
  98 def onenorm(x):
  99   return math.fsum([abs(e) for e in x])
 100
 101 def argcmp(x, comp, predicate):
 102   idx = None
 103   val = None
 104   for i in xrange(len(x)):
 105     if not predicate(x[i]):
 106       continue
 107     if idx is None or comp(x[i], val):
 108       idx = i
 109       val = x[i]
 110   if idx is None:
 111     # couldn't find it
 112     raise Exception("no argmin satisfiying predicate")
 113   return idx
 114
 115 def argmin(x, predicate=lambda x: True):
 116   return argcmp(x, lambda a, b: a < b, predicate)
 117
 118 def argmax(x, predicate=lambda x: True):
 119   return argcmp(x, lambda a, b: a > b, predicate)
 120
 121 def allocate(nworkers, weights):
 122   def score(allocation):
 123     #print "score(): allocation=", allocation, "weighted=", normalize(allocation), \
 124     #    "score=",onenorm(sub(normalize(allocation), weights))
 125     return onenorm(sub(normalize(allocation), weights))
 126
 127   # assumes weights are normalized
 128   approx = map(int, map(math.ceil, scale(weights, nworkers)))
 129   diff = sum(approx) - nworkers
 130   if diff > 0:
 131     #print "OVER"
 132     #print approx
 133     #print normalize(approx)
 134     while diff > 0:
 135       best, bestValue = None, None
 136       for idx in xrange(len(approx)):
 137         if not approx[idx]:
 138           continue
 139         cpy = approx[:]
 140         cpy[idx] -= 1
 141         s = score(cpy)
 142         if bestValue is None or s < bestValue:
 143           best, bestValue = cpy, s
 144       assert best is not None
 145       approx = best
 146       diff -= 1
 147
 148   elif diff < 0:
 149     #print "UNDER"
 150     #print approx
 151     #print normalize(approx)
 152     while diff < 0:
 153       best, bestValue = None, None
 154       for idx in xrange(len(approx)):
 155         cpy = approx[:]
 156         cpy[idx] += 1
 157         s = score(cpy)
 158         if bestValue is None or s < bestValue:
 159           best, bestValue = cpy, s
 160       assert best is not None
 161       approx = best
 162       diff += 1
 163
 164   #print "choice      =", approx
 165   #print "weights     =", weights
 166   #print "allocweights=", normalize(approx)
 167
 168   acc = 0
 169   ret = []
 170   for x in approx:
 171     ret.append(range(acc, acc + x))
 172     acc += x
 173   return ret
 174
 175 if KNOB_ENABLE_YCSB_SCALE:
 176   def mk_ycsb_entries(nthds):
 177     return [
 178       {
 179         'name' : 'scale_rmw',
 180         'dbs' : ['kvdb', 'ndb-proto1', 'ndb-proto2'],
 181         'threads' : [nthds],
 182         'scale_factors' : [160000],
 183         'benchmarks' : ['ycsb'],
 184         'bench_opts' : ['--workload-mix 80,0,20,0'],
 185         'par_load' : [True],
 186         'retry' : [False],
 187         'persist' : [PERSIST_NONE],
 188         'numa_memory' : ['%dG' % (40 + 2 * nthds)],
 189       },
 190     ]
 191   THREADS = get_scale_threads(4)
 192   for nthds in THREADS:
 193     grids += mk_ycsb_entries(nthds)
 194
 195 # exp 2:
 196 if KNOB_ENABLE_TPCC_SCALE:
 197   def mk_grid(name, bench, nthds):
 198     return {
 199       'name' : name,
 200       'dbs' : ['ndb-proto2'],
 201       'threads' : [nthds],
 202       'scale_factors' : [nthds],
 203       'benchmarks' : [bench],
 204       'par_load' : [False],
 205       'retry' : [False],
 206       'persist' : [PERSIST_REAL, PERSIST_TEMP, PERSIST_NONE],
 207       'numa_memory' : ['%dG' % (4 * nthds)],
 208     }
 209   THREADS = get_scale_threads(4)
 210   grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
 211
 212 # exp 3:
 213 #   x-axis varies the % multi-partition for new order. hold scale_factor constant @ 28,
 214 #   nthreads also constant at 28
 215 if KNOB_ENABLE_TPCC_MULTIPART:
 216   D_RANGE = range(0, 11)
 217   grids += [
 218     {
 219       'name' : 'multipart:pct',
 220       'dbs' : ['ndb-proto2'],
 221       'threads' : [28],
 222       'scale_factors': [28],
 223       'benchmarks' : ['tpcc'],
 224       'bench_opts' :
 225           ['--workload-mix 100,0,0,0,0 --new-order-remote-item-pct %d' % d for d in D_RANGE],
 226       'par_load' : [False],
 227       'retry' : [False],
 228       'persist' : [PERSIST_NONE],
 229       'numa_memory' : ['%dG' % (4 * 28)],
 230     },
 231     {
 232       'binary' : [binary_path('out-factor-gc')],
 233       'name' : 'multipart:pct',
 234       'dbs' : ['ndb-proto2'],
 235       'threads' : [28],
 236       'scale_factors': [28],
 237       'benchmarks' : ['tpcc'],
 238       'bench_opts' :
 239           ['--workload-mix 100,0,0,0,0 --new-order-remote-item-pct %d' % d for d in D_RANGE],
 240       'par_load' : [False],
 241       'retry' : [False],
 242       'persist' : [PERSIST_NONE],
 243       'disable_snapshots' : [True],
 244       'numa_memory' : ['%dG' % (4 * 28)],
 245     },
 246     {
 247       'binary' : [binary_path('out-factor-gc')],
 248       'name' : 'multipart:pct',
 249       'dbs' : ['ndb-proto2'],
 250       'threads' : [28],
 251       'scale_factors': [28],
 252       'benchmarks' : ['tpcc'],
 253       'bench_opts' :
 254           ['--enable-separate-tree-per-partition --workload-mix 100,0,0,0,0 --new-order-remote-item-pct %d' % d for d in D_RANGE],
 255       'par_load' : [False],
 256       'retry' : [False],
 257       'persist' : [PERSIST_NONE],
 258       'disable_snapshots' : [True],
 259       'numa_memory' : ['%dG' % (4 * 28)],
 260     },
 261     {
 262       'name' : 'multipart:pct',
 263       'dbs' : ['kvdb-st'],
 264       'threads' : [28],
 265       'scale_factors': [28],
 266       'benchmarks' : ['tpcc'],
 267       'bench_opts' :
 268         ['--workload-mix 100,0,0,0,0 --enable-separate-tree-per-partition --enable-partition-locks --new-order-remote-item-pct %d' % d for d in D_RANGE],
 269       'par_load' : [False],
 270       'retry' : [False],
 271       'persist' : [PERSIST_NONE],
 272       'numa_memory' : ['%dG' % (4 * 28)],
 273     },
 274   ]
 275
 276 if KNOB_ENABLE_TPCC_MULTIPART_SKEW:
 277   def mk_grids(nthds):
 278     return [
 279       {
 280         'name' : 'multipart:skew',
 281         'dbs' : ['ndb-proto2'],
 282         'threads' : [nthds],
 283         'scale_factors': [4],
 284         'benchmarks' : ['tpcc'],
 285         'bench_opts' : [
 286           '--workload-mix 100,0,0,0,0',
 287         ],
 288         'par_load' : [False],
 289         'retry' : [True],
 290         'backoff' : [True],
 291         'persist' : [PERSIST_NONE],
 292         'numa_memory' : ['%dG' % (4 * nthds)],
 293       },
 294       {
 295         'name' : 'multipart:skew',
 296         'dbs' : ['ndb-proto2'],
 297         'threads' : [nthds],
 298         'scale_factors': [4],
 299         'benchmarks' : ['tpcc'],
 300         'bench_opts' : [
 301           '--workload-mix 100,0,0,0,0 --new-order-fast-id-gen'
 302         ],
 303         'par_load' : [False],
 304         'retry' : [True],
 305         'persist' : [PERSIST_NONE],
 306         'numa_memory' : ['%dG' % (4 * nthds)],
 307       },
 308     ]
 309   grids += [
 310     {
 311       'name' : 'multipart:skew',
 312       'dbs' : ['kvdb-st'],
 313       'threads' : [1],
 314       'scale_factors': [4],
 315       'benchmarks' : ['tpcc'],
 316       'bench_opts' :
 317         ['--workload-mix 100,0,0,0,0 --enable-separate-tree-per-partition --enable-partition-locks'],
 318       'par_load' : [False],
 319       'retry' : [False],
 320       'persist' : [PERSIST_NONE],
 321       'numa_memory' : ['%dG' % (4 * 4)],
 322     },
 323   ]
 324   thds = [1,2,4,6,8,10,12,16,20,24,28,32]
 325   grids += list(it.chain.from_iterable([mk_grids(t) for t in thds]))
 326
 327 if KNOB_ENABLE_TPCC_FACTOR_ANALYSIS:
 328   # order is:
 329   # baseline (jemalloc, no-overwrites, gc, snapshots)
 330   # +allocator
 331   # +insert
 332   # -snapshots
 333   # -gc
 334   grids += [
 335     {
 336       'binary' : [binary_path('out-factor-gc-nowriteinplace')],
 337       'name' : 'factoranalysis',
 338       'dbs' : ['ndb-proto2'],
 339       'threads' : [28],
 340       'scale_factors': [28],
 341       'benchmarks' : ['tpcc'],
 342       'par_load' : [False],
 343       'retry' : [False],
 344       'persist' : [PERSIST_NONE],
 345       'numa_memory' : [None, '%dG' % (4 * 28)],
 346     },
 347     {
 348       'binary' : [binary_path('out-factor-gc')],
 349       'name' : 'factoranalysis',
 350       'dbs' : ['ndb-proto2'],
 351       'threads' : [28],
 352       'scale_factors': [28],
 353       'benchmarks' : ['tpcc'],
 354       'par_load' : [False],
 355       'retry' : [False],
 356       'persist' : [PERSIST_NONE],
 357       'numa_memory' : ['%dG' % (4 * 28)],
 358       'disable_snapshots' : [False],
 359     },
 360     {
 361       'binary' : [binary_path('out-factor-gc')],
 362       'name' : 'factoranalysis',
 363       'dbs' : ['ndb-proto2'],
 364       'threads' : [28],
 365       'scale_factors': [28],
 366       'benchmarks' : ['tpcc'],
 367       'bench_opts' : ['--disable-read-only-snapshots'],
 368       'par_load' : [False],
 369       'retry' : [False],
 370       'persist' : [PERSIST_NONE],
 371       'numa_memory' : ['%dG' % (4 * 28)],
 372       'disable_snapshots' : [True],
 373     },
 374     {
 375       'binary' : [binary_path('out-factor-gc')],
 376       'name' : 'factoranalysis',
 377       'dbs' : ['ndb-proto2'],
 378       'threads' : [28],
 379       'scale_factors': [28],
 380       'benchmarks' : ['tpcc'],
 381       'bench_opts' : ['--disable-read-only-snapshots'],
 382       'par_load' : [False],
 383       'retry' : [False],
 384       'persist' : [PERSIST_NONE],
 385       'numa_memory' : ['%dG' % (4 * 28)],
 386       'disable_snapshots' : [True],
 387       'disable_gc' : [True],
 388     },
 389   ]
 390
 391 if KNOB_ENABLE_TPCC_FACTOR_ANALYSIS_1:
 392   # order is:
 393   # baseline (jemalloc, no-overwrites, gc, no-snapshots)
 394   # +allocator
 395   # +insert
 396   # +snapshots
 397   # -gc
 398   grids += [
 399     {
 400       'binary' : [binary_path('out-factor-gc-nowriteinplace')],
 401       'name' : 'factoranalysis',
 402       'dbs' : ['ndb-proto2'],
 403       'threads' : [28],
 404       'scale_factors': [28],
 405       'benchmarks' : ['tpcc'],
 406       'bench_opts' : ['--workload-mix %s --disable-read-only-snapshots' % TPCC_REALISTIC_MIX],
 407       'par_load' : [False],
 408       'retry' : [False],
 409       'persist' : [PERSIST_NONE],
 410       'numa_memory' : [None, '%dG' % (4 * 28)],
 411       'disable_snapshots': [True],
 412     },
 413     {
 414       'binary' : [binary_path('out-factor-gc')],
 415       'name' : 'factoranalysis',
 416       'dbs' : ['ndb-proto2'],
 417       'threads' : [28],
 418       'scale_factors': [28],
 419       'benchmarks' : ['tpcc'],
 420       'bench_opts' : ['--workload-mix %s --disable-read-only-snapshots' % TPCC_REALISTIC_MIX],
 421       'par_load' : [False],
 422       'retry' : [False],
 423       'persist' : [PERSIST_NONE],
 424       'numa_memory' : ['%dG' % (4 * 28)],
 425       'disable_snapshots' : [True],
 426     },
 427     {
 428       'binary' : [binary_path('out-factor-gc')],
 429       'name' : 'factoranalysis',
 430       'dbs' : ['ndb-proto2'],
 431       'threads' : [28],
 432       'scale_factors': [28],
 433       'benchmarks' : ['tpcc'],
 434       'bench_opts' : ['--workload-mix %s' % TPCC_REALISTIC_MIX],
 435       'par_load' : [False],
 436       'retry' : [False],
 437       'persist' : [PERSIST_NONE],
 438       'numa_memory' : ['%dG' % (4 * 28)],
 439       'disable_gc' : [False, True],
 440     },
 441   ]
 442
 443 if KNOB_ENABLE_TPCC_PERSIST_FACTOR_ANALYSIS:
 444   # write zero length log records (perfect/fake compression)
 445   # lz4-compress buffers
 446   grids += [
 447     {
 448       'binary' : [binary_path('out-factor-fake-compression')],
 449       'name' : 'persistfactoranalysis',
 450       'dbs' : ['ndb-proto2'],
 451       'threads' : [28],
 452       'scale_factors': [28],
 453       'benchmarks' : ['tpcc'],
 454       'par_load' : [False],
 455       'retry' : [False],
 456       'persist' : [PERSIST_REAL],
 457       'numa_memory' : ['%dG' % (4 * 28)],
 458     },
 459     {
 460       'binary' : [binary_path('out-perf')],
 461       'name' : 'persistfactoranalysis',
 462       'dbs' : ['ndb-proto2'],
 463       'threads' : [28],
 464       'scale_factors': [28],
 465       'benchmarks' : ['tpcc'],
 466       'par_load' : [False],
 467       'retry' : [False],
 468       'persist' : [PERSIST_REAL],
 469       'numa_memory' : ['%dG' % (4 * 28)],
 470       'log_compress' : [True],
 471     },
 472   ]
 473
 474 # exp 5:
 475 #  * 50% new order, 50% stock level
 476 #  * scale factor 8, n-threads 16
 477 #  * x-axis is --new-order-remote-item-pct from [0, 20, 40, 60, 80, 100]
 478 if KNOB_ENABLE_TPCC_RO_SNAPSHOTS:
 479   RO_DRANGE = [0, 20, 40, 60, 80, 100]
 480   grids += [
 481     {
 482       'name' : 'readonly',
 483       'dbs' : ['ndb-proto2'],
 484       'threads' : [16],
 485       'scale_factors': [8],
 486       'benchmarks' : ['tpcc'],
 487       'bench_opts' : ['--workload-mix 50,0,0,0,50 --new-order-remote-item-pct %d' % d for d in RO_DRANGE],
 488       'par_load' : [False],
 489       'retry' : [True],
 490       'persist' : [PERSIST_NONE],
 491       'numa_memory' : ['%dG' % (4 * 16)],
 492       'disable_snapshots' : [False],
 493     },
 494     {
 495       'name' : 'readonly',
 496       'binary' : [binary_path('out-factor-gc')],
 497       'dbs' : ['ndb-proto2'],
 498       'threads' : [16],
 499       'scale_factors': [8],
 500       'benchmarks' : ['tpcc'],
 501       'bench_opts' : ['--disable-read-only-snapshots --workload-mix 50,0,0,0,50 --new-order-remote-item-pct %d' % d for d in RO_DRANGE],
 502       'par_load' : [False],
 503       'retry' : [True],
 504       'persist' : [PERSIST_NONE],
 505       'numa_memory' : ['%dG' % (4 * 16)],
 506       'disable_snapshots' : [True],
 507     },
 508   ]
 509
 510 if KNOB_ENABLE_TPCC_SCALE_ALLPERSIST:
 511   def mk_grid(name, bench, nthds):
 512     return {
 513       'name' : name,
 514       'dbs' : ['ndb-proto2'],
 515       'threads' : [nthds],
 516       'scale_factors' : [nthds],
 517       'benchmarks' : [bench],
 518       'bench_opts' : [''],
 519       'par_load' : [False],
 520       'retry' : [False],
 521       'persist' : [PERSIST_REAL],
 522       'numa_memory' : ['%dG' % (4 * nthds)],
 523     }
 524   THREADS = get_scale_threads(4)
 525   grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
 526
 527 if KNOB_ENABLE_TPCC_SCALE_ALLPERSIST_COMPRESS:
 528   def mk_grid(name, bench, nthds):
 529     return {
 530       'name' : name,
 531       'dbs' : ['ndb-proto2'],
 532       'threads' : [nthds],
 533       'scale_factors' : [nthds],
 534       'benchmarks' : [bench],
 535       'bench_opts' : [''],
 536       'par_load' : [False],
 537       'retry' : [False],
 538       'persist' : [PERSIST_REAL],
 539       'numa_memory' : ['%dG' % (4 * nthds)],
 540       'log_compress' : [True],
 541     }
 542   THREADS = get_scale_threads(4)
 543   grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
 544
 545 if KNOB_ENABLE_TPCC_SCALE_ALLPERSIST_NOFSYNC:
 546   def mk_grid(name, bench, nthds):
 547     return {
 548       'name' : name,
 549       'dbs' : ['ndb-proto2'],
 550       'threads' : [nthds],
 551       'scale_factors' : [nthds],
 552       'benchmarks' : [bench],
 553       'bench_opts' : [''],
 554       'par_load' : [False],
 555       'retry' : [False],
 556       'persist' : [PERSIST_REAL],
 557       'numa_memory' : ['%dG' % (4 * nthds)],
 558       'log_nofsync' : [True],
 559     }
 560   THREADS = get_scale_threads(4)
 561   grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
 562
 563 if KNOB_ENABLE_TPCC_SCALE_FAKEWRITES:
 564   def mk_grid(name, bench, nthds):
 565     return {
 566       'name' : name,
 567       'dbs' : ['ndb-proto2'],
 568       'threads' : [nthds],
 569       'scale_factors' : [nthds],
 570       'benchmarks' : [bench],
 571       'bench_opts' : [''],
 572       'par_load' : [False],
 573       'retry' : [False],
 574       'persist' : [PERSIST_REAL],
 575       'numa_memory' : ['%dG' % (4 * nthds)],
 576       'log_fake_writes' : [True],
 577     }
 578   THREADS = get_scale_threads(4)
 579   grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
 580
 581 if KNOB_ENABLE_TPCC_SCALE_GC:
 582   def mk_grid(name, bench, nthds):
 583     return {
 584       'name' : name,
 585       'dbs' : ['ndb-proto2'],
 586       'threads' : [nthds],
 587       'scale_factors' : [nthds],
 588       'benchmarks' : [bench],
 589       'bench_opts' : [''],
 590       'par_load' : [False],
 591       'retry' : [False],
 592       'persist' : [PERSIST_NONE],
 593       'numa_memory' : ['%dG' % (4 * nthds)],
 594       'disable_gc' : [False, True],
 595     }
 596   THREADS = get_scale_threads(4)
 597   grids += [mk_grid('scale_tpcc', 'tpcc', t) for t in THREADS]
 598
 599 def check_binary_executable(binary):
 600   return os.path.isfile(binary) and os.access(binary, os.X_OK)
 601
 602 def run_configuration(
 603     binary, disable_madv_willneed,
 604     basedir, dbtype, bench, scale_factor, nthreads, bench_opts,
 605     par_load, retry_aborted_txn, backoff_aborted_txn, numa_memory, logfiles,
 606     assignments, log_fake_writes, log_nofsync, log_compress,
 607     disable_gc, disable_snapshots, ntries=5):
 608   # Note: assignments is a list of list of ints
 609   assert len(logfiles) == len(assignments)
 610   assert not log_fake_writes or len(logfiles)
 611   assert not log_nofsync or len(logfiles)
 612   assert not log_compress or len(logfiles)
 613   args = [
 614       binary,
 615       '--bench', bench,
 616       '--basedir', basedir,
 617       '--db-type', dbtype,
 618       '--num-threads', str(nthreads),
 619       '--scale-factor', str(scale_factor),
 620       '--txn-flags', '1',
 621       '--runtime', '60',
 622   ] + ([] if not bench_opts else ['--bench-opts', bench_opts]) \
 623     + ([] if not par_load else ['--parallel-loading']) \
 624     + ([] if not retry_aborted_txn else ['--retry-aborted-transactions']) \
 625     + ([] if not backoff_aborted_txn else ['--backoff-aborted-transactions']) \
 626     + ([] if not numa_memory else ['--numa-memory', numa_memory]) \
 627     + ([] if not logfiles else list(it.chain.from_iterable([['--logfile', f] for f in logfiles]))) \
 628     + ([] if not assignments else list(it.chain.from_iterable([['--assignment', ','.join(map(str, x))] for x in assignments]))) \
 629     + ([] if not log_fake_writes else ['--log-fake-writes']) \
 630     + ([] if not log_nofsync else ['--log-nofsync']) \
 631     + ([] if not log_compress else ['--log-compress']) \
 632     + ([] if not disable_gc else ['--disable-gc']) \
 633     + ([] if not disable_snapshots else ['--disable-snapshots'])
 634   print >>sys.stderr, '[INFO] running command:'
 635   print >>sys.stderr, ('DISABLE_MADV_WILLNEED=1' if disable_madv_willneed else ''), ' '.join([x.replace(' ', r'\ ') for x in args])
 636   if not DRYRUN:
 637     with open('stderr.log', 'w') as err:
 638       env = dict(os.environ)
 639       if disable_madv_willneed:
 640         env['DISABLE_MADV_WILLNEED'] = '1'
 641       p = subprocess.Popen(args, stdin=open('/dev/null', 'r'), stdout=subprocess.PIPE, stderr=err, env=env)
 642       print >>sys.stderr, 'pid=', p.pid
 643       r = p.stdout.read()
 644       retcode = p.wait()
 645       toks = r.strip().split(' ')
 646   else:
 647     assert check_binary_executable(binary)
 648     toks = [0,0,0,0,0]
 649   if len(toks) != 5:
 650     print 'Failure: retcode=', retcode, ', stdout=', r
 651     import shutil
 652     shutil.copyfile('stderr.log', 'stderr.%d.log' % p.pid)
 653     if ntries:
 654       return run_configuration(
 655           binary, disable_madv_willneed,
 656           basedir, dbtype, bench, scale_factor, nthreads, bench_opts,
 657           par_load, retry_aborted_txn, backoff_aborted_txn, numa_memory, logfiles,
 658           assignments, log_fake_writes, log_nofsync, log_compress,
 659           disable_gc, disable_snapshots, ntries - 1)
 660     else:
 661       print "Out of tries!"
 662       assert False
 663   return tuple(map(float, toks))
 664
 665 if __name__ == '__main__':
 666   (_, basedir, outfile) = sys.argv
 667
 668   DEFAULT_BINARY=binary_path('out-perf')
 669   # list all the binaries needed
 670   binaries = set(it.chain.from_iterable([grid.get('binary', [DEFAULT_BINARY]) for grid in grids]))
 671   failed = []
 672   for binary in binaries:
 673     if not check_binary_executable(binary):
 674       print >>sys.stderr, '[ERROR] cannot find binary %s' % binary
 675       failed.append(binary)
 676   if failed:
 677     r = re.compile(r'out-(.*)\.(masstree|silotree)')
 678     print >>sys.stderr, \
 679         '[INFO] Try running the following commands in the root source directory:'
 680     for binary in failed:
 681       folder = binary.split(os.sep)[1]
 682       m = r.match(folder)
 683       if not m:
 684         print >>sys.stderr, '[ERROR] bad binary name %s' % binary
 685       else:
 686         print >>sys.stderr, 'MASSTREE=%d MODE=%s make -j dbtest' % (1 if m.group(2) == 'masstree' else 0, m.group(1))
 687     sys.exit(1)
 688
 689   # iterate over all configs
 690   results = []
 691   for grid in grids:
 692     for (binary, db, bench, scale_factor, threads, bench_opts,
 693          par_load, retry, backoff, numa_memory, persist,
 694          log_fake_writes, log_nofsync, log_compress,
 695          disable_gc, disable_snapshots) in it.product(
 696         grid.get('binary', [DEFAULT_BINARY]),
 697         grid['dbs'], grid['benchmarks'], grid['scale_factors'],
 698         grid['threads'], grid.get('bench_opts', ['']), grid['par_load'],
 699         grid['retry'], grid.get('backoff', [False]),
 700         grid['numa_memory'], grid['persist'],
 701         grid.get('log_fake_writes', [False]),
 702         grid.get('log_nofsync', [False]),
 703         grid.get('log_compress', [False]),
 704         grid.get('disable_gc', [False]),
 705         grid.get('disable_snapshots', [False])):
 706       node = platform.node()
 707       disable_madv_willneed = MACHINE_CONFIG[node]['disable_madv_willneed']
 708       config = {
 709         'binary'                : binary,
 710         'disable_madv_willneed' : disable_madv_willneed,
 711         'name'                  : grid['name'],
 712         'db'                    : db,
 713         'bench'                 : bench,
 714         'scale_factor'          : scale_factor,
 715         'threads'               : threads,
 716         'bench_opts'            : bench_opts,
 717         'par_load'              : par_load,
 718         'retry'                 : retry,
 719         'backoff'               : backoff,
 720         'persist'               : persist,
 721         'numa_memory'           : numa_memory,
 722         'log_fake_writes'       : log_fake_writes,
 723         'log_nofsync'           : log_nofsync,
 724         'log_compress'          : log_compress,
 725         'disable_gc'            : disable_gc,
 726         'disable_snapshots'     : disable_snapshots,
 727       }
 728       print >>sys.stderr, '[INFO] running config %s' % (str(config))
 729       if persist != PERSIST_NONE:
 730         info = MACHINE_CONFIG[node]['logfiles']
 731         tempprefix = MACHINE_CONFIG[node]['tempprefix']
 732         logfiles = \
 733             [x[0] for x in info] if persist == PERSIST_REAL \
 734               else [os.path.join(tempprefix, 'data%d.log' % (idx)) for idx in xrange(len(info))]
 735         weights = \
 736           normalize([x[1] for x in info]) if persist == PERSIST_REAL else \
 737           normalize([1.0 for _ in info])
 738         assignments = allocate(threads, weights)
 739       else:
 740         logfiles, assignments = [], []
 741       values = []
 742       for _ in range(NTRIALS):
 743         value = run_configuration(
 744             binary, disable_madv_willneed,
 745             basedir, db, bench, scale_factor, threads,
 746             bench_opts, par_load, retry, backoff, numa_memory,
 747             logfiles, assignments, log_fake_writes,
 748             log_nofsync, log_compress, disable_gc,
 749             disable_snapshots)
 750         values.append(value)
 751       results.append((config, values))
 752
 753     # write intermediate results
 754     with open(outfile + '.py', 'w') as fp:
 755       print >>fp, 'RESULTS = %s' % (repr(results))
 756
 757   # write results
 758   with open(outfile + '.py', 'w') as fp:
 759     print >>fp, 'RESULTS = %s' % (repr(results))