removing duplicates
[satune.git] / src / analyzer / tunerloganalyzer.py
1 import re
2 import argparse
3 import sys
4 import os
5
6
7 class AutoTunerArgParser:
8         def __init__(self):
9                 self.parser = argparse.ArgumentParser(description='Parsing the output log of the CSolver auto tuner ...')
10                 self.parser.add_argument('--folder', '-f', metavar='bin', type=str, nargs=1,help='output log of running the autotuner ...')
11                 self.parser.add_argument('--number', '-n', metavar='122', type=int, nargs=1,help='Number of runs ...')
12                 self.args = self.parser.parse_args()
13
14         def getFolder(self):
15                 return self.args.folder[0]
16
17         def getRunNumber(self):
18                 return self.args.number[0]
19
20 PROBLEMS = []
21
22 TUNABLEHEADER = ["DECOMPOSEORDER", "MUSTREACHGLOBAL", "MUSTREACHLOCAL", "MUSTREACHPRUNE", "OPTIMIZEORDERSTRUCTURE",
23                 "ORDERINTEGERENCODING", "PREPROCESS", "NODEENCODING", "EDGEENCODING", "MUSTEDGEPRUNE", "ELEMENTOPT",
24                 "ENCODINGGRAPHOPT", "ELEMENTOPTSETS", "PROXYVARIABLE", "MUSTVALUE", "NAIVEENCODER", "VARIABLEORDER",
25                 "PROBLEM","SATTIME", "EXECTIME","TUNERNUMBER"]
26
27 configs = {"EXECTIME": "-",
28                 "SATTIME":"-",
29                 "TESTCASE":"-",
30                 "PREPROCESS" : "-",
31                 "ELEMENTOPT" : "-",
32                 "ELEMENTOPTSETS" : "-",
33                 "PROXYVARIABLE" : "-",
34                 "#SubGraph" : "-",
35                 "NODEENCODING" : "-",
36                 "EDGEENCODING" : "-",
37                 "NAIVEENCODER" :"-",
38                 "ENCODINGGRAPHOPT" : "-"
39                 }
40
41 REGEXES = {"EXECTIME": "CSOLVER solve time: (.*)",
42                 "SATTIME":"SAT Solving time: (.*)",
43                 "TESTCASE": "deserializing (.+) ...",
44                 "PREPROCESS" : "Param PREPROCESS = (.*)range=\[0,1\]",
45                 "ELEMENTOPT" : "Param ELEMENTOPT = (.*)range=\[0,1\]",
46                 "ELEMENTOPTSETS" : "Param ELEMENTOPTSETS = (.*)range=\[0,1\]",
47                 "PROXYVARIABLE" : "Param PROXYVARIABLE = (.*)range=\[1,5\]",
48                 "#SubGraph" : "#SubGraph = (.*)",
49                 "NODEENCODING" : "Param NODEENCODING = (.*)range=\[0,3\](.*)",
50                 "EDGEENCODING" : "Param EDGEENCODING = (.*)range=\[0,2\](.*)",
51                 "NAIVEENCODER" : "Param NAIVEENCODER = (.*)range=\[1,3\](.*)",
52                 "ENCODINGGRAPHOPT" : "Param ENCODINGGRAPHOPT = (.*)range=\[0,1\]"
53                 }
54
55 def printHeader(file):
56         global TUNABLEHEADER
57         mystr=""
58         for header in TUNABLEHEADER:
59                  mystr+=str(header)+","
60         print >>file, mystr
61
62 def dump(file, row):
63         global TUNABLEHEADER
64         mystr=""
65         for i in range(len(TUNABLEHEADER)):
66                 mystr += row[TUNABLEHEADER[i]]+ ","
67         print "mystr is:"+ mystr
68         print >>file, mystr
69
70 def loadTunerInfo(row, filename):
71         with open(filename) as f:
72                 for line in f:
73                         numbers = re.findall('\d+',line)
74                         numbers = map(int,numbers)
75                         row[TUNABLEHEADER[numbers[3]]] = row[TUNABLEHEADER[numbers[3]]] + str(numbers[7])
76
77 def loadSolverTime(row, filename):
78         global REGEXES
79         global configs
80         with open(filename) as f:
81                 for line in f:
82                         for regex in REGEXES:
83                                 p = re.compile(REGEXES[regex])
84                                 token = p.search(line)
85                                 if token is not None:
86                                         if regex == "TESTCASE":
87                                                 configs[regex] = re.search(REGEXES[regex], line).group(1)
88                                         else:
89                                                 configs[regex] = re.findall("\d+\.?\d*", line)[0]
90         row["SATTIME"] = configs["SATTIME"]
91         row["EXECTIME"] = configs["EXECTIME"]
92
93 def loadProblemName(row,filename):
94         global PROBLEMS
95         with open(filename) as f:
96                 problem = f.readline().replace("\n","")
97                 probNumber = int(f.readline())
98                 if probNumber >= len(PROBLEMS):
99                         PROBLEMS.insert(probNumber,problem)
100                 elif PROBLEMS[probNumber] != problem:
101                         PROBLEMS[probNumber] = problem
102                 row["PROBLEM"] = problem
103
104 def loadTunerNumber(row, filename):
105         with open(filename) as f:
106                 row["TUNERNUMBER"] = f.readline().replace("\n","")
107 def analyzeLogs(file):
108         global configs
109         argprocess = AutoTunerArgParser()
110         printHeader(file)
111         rows = []
112         for i in range(argprocess.getRunNumber()):
113                 row = {"DECOMPOSEORDER" : "",
114                         "MUSTREACHGLOBAL" : "",
115                         "MUSTREACHLOCAL" : "",
116                         "MUSTREACHPRUNE" : "", 
117                         "OPTIMIZEORDERSTRUCTURE" : "",
118                         "ORDERINTEGERENCODING" : "",
119                         "PREPROCESS" : "",
120                         "NODEENCODING" : "",
121                         "EDGEENCODING" : "",
122                         "MUSTEDGEPRUNE" : "",
123                         "ELEMENTOPT" : "",
124                         "ENCODINGGRAPHOPT" : "", 
125                         "ELEMENTOPTSETS" : "", 
126                         "PROXYVARIABLE" : "", 
127                         "MUSTVALUE" : "", 
128                         "NAIVEENCODER" : "", 
129                         "VARIABLEORDER" : "",
130                         "PROBLEM":"",
131                         "SATTIME":"",
132                         "EXECTIME": "",
133                         "TUNERNUMBER":""
134                 }
135                 loadTunerNumber(row, argprocess.getFolder() + "/tunernum" + str(i))
136                 loadTunerInfo(row, argprocess.getFolder()+"/tuner"+str(i)+"used")
137                 loadSolverTime(row, argprocess.getFolder()+"/log"+str(i))
138                 loadProblemName(row, argprocess.getFolder()+"/problem"+str(i))
139                 dump(file, row)
140                 rows.append(row)
141         return rows
142
143 def tunerCountAnalysis(file, rows):
144         global TUNABLEHEADER
145         global PROBLEMS
146         tunercount = {}
147         tunernumber = {}
148         for row in rows:
149                 mystr=""
150                 for i in range(18):
151                         if not row[TUNABLEHEADER[i]]:
152                                 mystr += "."
153                         else:
154                                 mystr+=row[TUNABLEHEADER[i]]
155                 if mystr not in tunercount:
156                         tunercount.update({mystr : 1})
157                         tunernumber.update({mystr : str(row["TUNERNUMBER"])})
158                 else :
159                         tunercount[mystr] += 1
160                         tunernumber[mystr] += "-" + str(row["TUNERNUMBER"])
161
162         problems = set(map(lambda x: x["PROBLEM"], rows))
163         print "Number of repititive tuners"
164         for key in tunercount:
165                 if tunercount[key] > 1:
166                         print key + "(ids:" + tunernumber[key]  + ") = #" + str(tunercount[key])
167
168 def combineRowForEachTuner(rows):
169         global PROBLEMS
170         newRows = []
171         combined = None
172         for row in rows:
173                 if row["PROBLEM"] == PROBLEMS[0]:
174                         combined = row
175                 for key in row:
176                         if row[key]:
177                                 combined[key] = row[key]
178                 if row["PROBLEM"] == PROBLEMS[len(PROBLEMS)-1]:
179                         newRows.append(combined)
180         return newRows
181
182 def main():
183         file = open("tuner.csv", "w")
184         rows = analyzeLogs(file)
185         tunerCountAnalysis(file, combineRowForEachTuner(rows) )
186         file.close()
187         return
188
189 if __name__ == "__main__":
190         main()