jsbench-2013.1/harness.py

   1 #!/usr/bin/env python
   2 # Copyright (C) 2011, 2012 Purdue University
   3 # Written by Gregor Richards
   4 # All rights reserved.
   5 #
   6 # Redistribution and use in source and binary forms, with or without
   7 # modification, are permitted provided that the following conditions are met:
   8 #
   9 # 1. Redistributions of source code must retain the above copyright notice,
  10 #    this list of conditions and the following disclaimer.
  11 # 2. Redistributions in binary form must reproduce the above copyright notice,
  12 #    this list of conditions and the following disclaimer in the documentation
  13 #    and/or other materials provided with the distribution.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  19 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25 # POSSIBILITY OF SUCH DAMAGE.
  26
  27 import math
  28 import os
  29 import re
  30 import sys
  31
  32 benchmarks = [
  33     "amazon/chrome", "amazon/chrome-win", "amazon/firefox",
  34     "amazon/firefox-win", "amazon/safari",
  35     "facebook/chrome", "facebook/chrome-win", "facebook/firefox",
  36     "facebook/firefox-win", "facebook/safari",
  37     "google/chrome", "google/chrome-win", "google/firefox",
  38     "google/firefox-win", "google/safari",
  39     "twitter/chrome", "twitter/chrome-win", "twitter/firefox",
  40     "twitter/firefox-win", "twitter/safari",
  41     "yahoo/chrome", "yahoo/chrome-win", "yahoo/firefox",
  42     "yahoo/firefox-win", "yahoo/safari",
  43 ]
  44
  45 modes = {
  46     "*": ["urem"],
  47     "amazon/firefox": ["urm"],
  48     "amazon/firefox-win": ["urm"],
  49     "google/firefox": ["uem"],
  50     "twitter/chrome-win": ["rem"]
  51 };
  52
  53 '''
  54 benchmarks = ["amazon/chrome", "amazon/firefox", "amazon/safari",
  55               "facebook/chrome", "facebook/firefox", "facebook/safari",
  56               "google/chrome", "google/firefox", "google/safari",
  57               "twitter/chrome", "twitter/firefox", "twitter/safari",
  58               "yahoo/chrome", "yahoo/firefox", "yahoo/safari"]
  59 modes = {
  60     "*": ["urem"],
  61     "amazon/firefox": ["urm"],
  62     "google/firefox": ["uem"]
  63 }'''
  64
  65 #runcount = 25
  66
  67 if len(sys.argv) != 3:
  68     print "Use: python harness.py <JS executable> <number of runs>"
  69     exit(1)
  70 js = sys.argv[1]
  71 runcount = int(sys.argv[2])
  72
  73 keepruns = 20
  74 keepfrom = runcount - keepruns
  75
  76 # standard t-distribution for normally distributed samples
  77 tDistribution = [0, 0, 12.71, 4.30, 3.18, 2.78, 2.57, 2.45, 2.36, 2.31, 2.26,
  78 2.23, 2.20, 2.18, 2.16, 2.14, 2.13, 2.12, 2.11, 2.10, 2.09, 2.09, 2.08, 2.07,
  79 2.07, 2.06, 2.06, 2.06, 2.05, 2.05, 2.05, 2.04, 2.04, 2.04, 2.03, 2.03, 2.03,
  80 2.03, 2.03, 2.02, 2.02, 2.02, 2.02, 2.02, 2.02, 2.02, 2.01, 2.01, 2.01, 2.01,
  81 2.01, 2.01, 2.01, 2.01, 2.01, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00,
  82 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 2.00, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99,
  83 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99,
  84 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.99, 1.98, 1.98, 1.98, 1.98, 1.98,
  85 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
  86 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
  87 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
  88 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.98,
  89 1.98, 1.98, 1.98, 1.98, 1.98, 1.98, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  90 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  91 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  92 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  93 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  94 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  95 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  96 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  97 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  98 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
  99 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 100 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 101 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 102 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 103 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 104 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 105 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 106 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 107 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 108 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 109 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 110 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 111 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 112 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97,
 113 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.97, 1.96]
 114
 115 def tDist(n):
 116     if (n >= len(tDistribution)):
 117         return tDistribution[-1]
 118     return tDistribution[n]
 119
 120 results = {}
 121
 122 for benchmark in benchmarks:
 123     results[benchmark] = {}
 124
 125     bmodes = modes["*"]
 126     if benchmark in modes:
 127         bmodes = modes[benchmark]
 128
 129     for mode in bmodes:
 130         results[benchmark][mode] = []
 131
 132         for runno in range(runcount):
 133             # Now run it and get the results
 134             print(benchmark + " " + mode + " " + str(runno))
 135             res = os.popen(js + " " + benchmark + "/" + mode + ".js").read()
 136             time = float(re.findall("Time: ([0-9]*)ms", res)[0])
 137 #            time = float(re.match("Time: ([0-9]*)ms", res).group(1))
 138
 139             if runno >= keepfrom:
 140                 results[benchmark][mode].append(time)
 141
 142 # Collect the totals
 143 sresults = {}
 144 totals = {
 145     "mean": 1,
 146     "stddev": 1,
 147     "sem": 1,
 148     "ci": 1,
 149     "runs": 0
 150 }
 151
 152 power = 1.0 / len(benchmarks)
 153
 154 for benchmark in benchmarks:
 155     sresults[benchmark] = {}
 156
 157     bmodes = modes["*"]
 158     if benchmark in modes:
 159         bmodes = modes[benchmark]
 160
 161     for mode in bmodes:
 162         sresults[benchmark][mode] = sresult = {}
 163         result = results[benchmark][mode]
 164         totals["runs"] = totals["runs"] + 1
 165
 166         sresult["mode"] = mode
 167
 168         mean = sresult["mean"] = float(sum(result)) / len(result)
 169         stddev = sresult["stddev"] = math.sqrt(
 170             sum(
 171                 map(lambda e: math.pow(e - mean, 2), result)
 172             ) / float((len(result) - 1))
 173         )
 174
 175         sm = sresult["sm"] = stddev / float(mean)
 176         sem = sresult["sem"] = stddev / float(math.sqrt(len(result)))
 177         semm = sresult["semm"] = sem / float(mean)
 178         ci = sresult["ci"] = tDist(len(result)) * sem
 179         cim = sresult["cim"] = ci / float(mean)
 180
 181         totals["mean"] *= math.pow(mean, power)
 182
 183         if (stddev == 0):
 184             totals["stddev"] *= 1
 185             print("stddev is 0")
 186         else:
 187             totals["stddev"] *= math.pow(stddev, power)
 188         totals["sem"] *= math.pow(sem, power)
 189         totals["ci"] *= math.pow(ci, power)
 190
 191 #power = 1.0 / totals["runs"]
 192 #totals["mean"] = math.pow(totals["mean"], power)
 193 #totals["stddev"] = math.pow(totals["stddev"], power)
 194 #totals["sem"] = math.pow(totals["sem"], power)
 195 #totals["ci"] = math.pow(totals["ci"], power)
 196
 197 totals["sm"] = totals["stddev"] / float(totals["mean"])
 198 totals["semm"] = totals["sem"] / float(totals["mean"])
 199 totals["cim"] = totals["ci"] / float(totals["mean"])
 200
 201 totals["sm"] *= 100
 202 totals["semm"] *= 100
 203 totals["cim"] *= 100
 204
 205 print "Final results:"
 206 print "  %(mean)fms +- %(cim)f%% (lower is better)" % totals
 207 print "  Standard deviation = %(sm)f%% of mean" % totals
 208 print "  Standard error = %(semm)f%% of mean" % totals
 209 print "  %(runs)d runs" % {"runs": runcount}
 210 print ""
 211
 212 print "Result breakdown:"
 213 for benchmark in benchmarks:
 214     print "  %(benchmark)s:" % {"benchmark": benchmark}
 215
 216     bmodes = modes["*"]
 217     if benchmark in modes:
 218         bmodes = modes[benchmark]
 219
 220     for mode in bmodes:
 221         print "  %(mode)s: %(mean)fms +- %(cim)f%% (stddev=%(sm)f%%, stderr=%(semm)f%%)" % sresults[benchmark][mode]
 222
 223 print ""
 224
 225 print "Raw results:"
 226 for benchmark in benchmarks:
 227     print "  %(benchmark)s:" % {"benchmark": benchmark}
 228
 229     bmodes = modes["*"]
 230     if benchmark in modes:
 231         bmodes = modes[benchmark]
 232
 233     for mode in bmodes:
 234         print "    %(mode)s: %(results)s" % {
 235             "mode": mode,
 236             "results": results[benchmark][mode]
 237         }