jsbench-2013.1/harness.py

   1 #!/usr/bin/env python
   2 # Copyright (C) 2011, 2012 Purdue University
   3 # Written by Gregor Richards
   4 # All rights reserved.
   5 #
   6 # Redistribution and use in source and binary forms, with or without
   7 # modification, are permitted provided that the following conditions are met:
   8 #
   9 # 1. Redistributions of source code must retain the above copyright notice,
  10 #    this list of conditions and the following disclaimer.
  11 # 2. Redistributions in binary form must reproduce the above copyright notice,
  12 #    this list of conditions and the following disclaimer in the documentation
  13 #    and/or other materials provided with the distribution.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  19 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25 # POSSIBILITY OF SUCH DAMAGE.
  26
  27 import math
  28 import os
  29 import re
  30 import sys
  31
  32 benchmarks = ["amazon/chrome", "amazon/firefox", "amazon/safari",
  33               "facebook/chrome", "facebook/firefox", "facebook/safari",
  34               "google/chrome", "google/firefox", "google/safari",
  35               "twitter/chrome", "twitter/firefox", "twitter/safari",
  36               "yahoo/chrome", "yahoo/firefox", "yahoo/safari"]
  37 modes = {
  38     "*": ["urem"],
  39     "amazon/firefox": ["urm"],
  40     "google/firefox": ["uem"]
  41 }
  42 runcount = 25
  43 keepruns = 20
  44
  45 keepfrom = runcount - keepruns
  46
  47 if len(sys.argv) != 2:
  48     print "Use: python harness.py <JS executable>"
  49     exit(1)
  50 js = sys.argv[1]
  51
  52 # standard t-distribution for normally distributed samples
  53 tDistribution = [0, 0, 12.71, 4.30, 3.18, 2.78, 2.57, 2.45, 2.36, 2.31, 2.26,
  54 2.23, 2.20, 2.18, 2.16, 2.14, 2.13, 2.12, 2.11, 2.10, 2.09, 2.09, 2.08, 2.07,
  55 2.07, 2.06, 2.06, 2.06, 2.05, 2.05, 2.05, 2.04, 2.04, 2.04, 2.03, 2.03, 2.03,
  56 2.03, 2.03, 2.02, 2.02, 2.02, 2.02, 2.02, 2.02, 2.02, 2.01, 2.01, 2.01, 2.01,
  57 2.01, 2.01, 2.01, 2.01, 2.01, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00,
  58 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99,
  59 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99,
  60 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.98, 1.98, 1.98, 1.98, 1.98,
  61 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
  62 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
  63 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
  64 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
  65 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  66 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  67 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  68 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  69 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  70 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  71 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  72 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  73 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  74 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  75 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  76 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  77 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  78 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  79 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  80 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  81 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  82 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  83 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  84 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  85 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  86 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  87 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  88 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  89 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.96]
  90
  91 def tDist(n):
  92     if (n >= len(tDistribution)):
  93         return tDistribution[-1]
  94     return tDistribution[n]
  95
  96 results = {}
  97
  98 for benchmark in benchmarks:
  99     results[benchmark] = {}
 100
 101     bmodes = modes["*"]
 102     if benchmark in modes:
 103         bmodes = modes[benchmark]
 104
 105     for mode in bmodes:
 106         results[benchmark][mode] = []
 107
 108         for runno in range(runcount):
 109             # Now run it and get the results
 110             print(benchmark + " " + mode + " " + str(runno))
 111             res = os.popen(js + " " + benchmark + "/" + mode + ".js").read()
 112             time = float(re.match("Time: ([0-9]*)ms", res).group(1))
 113
 114             if runno >= keepfrom:
 115                 results[benchmark][mode].append(time)
 116
 117 # Collect the totals
 118 sresults = {}
 119 totals = {
 120     "mean": 1,
 121     "stddev": 1,
 122     "sem": 1,
 123     "ci": 1,
 124     "runs": 0
 125 }
 126
 127 for benchmark in benchmarks:
 128     sresults[benchmark] = {}
 129
 130
 131     print("middle");
 132
 133     bmodes = modes["*"]
 134     if benchmark in modes:
 135         bmodes = modes[benchmark]
 136
 137     for mode in bmodes:
 138         sresults[benchmark][mode] = sresult = {}
 139         result = results[benchmark][mode]
 140         totals["runs"] = totals["runs"] + 1
 141
 142         sresult["mode"] = mode
 143
 144         mean = sresult["mean"] = sum(result) / len(result)
 145         stddev = sresult["stddev"] = math.sqrt(
 146             sum(
 147                 map(lambda e: math.pow(e - mean, 2), result)
 148             ) / (len(result) - 1)
 149         )
 150
 151         sm = sresult["sm"] = stddev / mean
 152         sem = sresult["sem"] = stddev / math.sqrt(len(result))
 153         semm = sresult["semm"] = sem / mean
 154         ci = sresult["ci"] = tDist(len(result)) * sem
 155         cim = sresult["cim"] = ci / mean
 156
 157         totals["mean"] *= mean
 158         totals["stddev"] *= stddev
 159         totals["sem"] *= sem
 160         totals["ci"] *= ci
 161
 162 power = 1 / totals["runs"]
 163 totals["mean"] = math.pow(totals["mean"], power)
 164 totals["stddev"] = math.pow(totals["stddev"], power)
 165 totals["sm"] = totals["stddev"] / totals["mean"]
 166 totals["sem"] = math.pow(totals["sem"], power)
 167 totals["semm"] = totals["sem"] / totals["mean"]
 168 totals["ci"] = math.pow(totals["ci"], power)
 169 totals["cim"] = totals["ci"] / totals["mean"]
 170
 171 totals["sm"] *= 100
 172 totals["semm"] *= 100
 173 totals["cim"] *= 100
 174
 175 print "Final results:"
 176 print u"  %(mean)fms \u00b1 %(cim)f%% (lower is better)" % totals
 177 print "  Standard deviation = %(sm)f%% of mean" % totals
 178 print "  Standard error = %(semm)f%% of mean" % totals
 179 print "  %(runs)d runs" % {"runs": runcount}
 180 print ""
 181
 182 print "Result breakdown:"
 183 for benchmark in benchmarks:
 184     print "  %(benchmark)s:" % {"benchmark": benchmark}
 185
 186     bmodes = modes["*"]
 187     if benchmark in modes:
 188         bmodes = modes[benchmark]
 189
 190     for mode in bmodes:
 191         print u"  %(mode)s: %(mean)fms \u00b1 %(cim)f%% (stddev=%(sm)f%%, stderr=%(semm)f%%)" % sresults[benchmark][mode]
 192 print ""
 193
 194 print "Raw results:"
 195 for benchmark in benchmarks:
 196     print "  %(benchmark)s:" % {"benchmark": benchmark}
 197
 198     bmodes = modes["*"]
 199     if benchmark in modes:
 200         bmodes = modes[benchmark]
 201
 202     for mode in bmodes:
 203         print "    %(mode)s: %(results)s" % {
 204             "mode": mode,
 205             "results": results[benchmark][mode]
 206         }