edits
[cdsspec-compiler.git] / src / edu / uci / eecs / specExtraction / SpecExtractor.java
1 package edu.uci.eecs.specExtraction;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.FileReader;
7 import java.io.IOException;
8 import java.io.LineNumberReader;
9 import java.util.ArrayList;
10 import java.util.Collections;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15
16 import edu.uci.eecs.codeGenerator.CodeGeneratorUtils;
17 import edu.uci.eecs.codeGenerator.Environment;
18 import edu.uci.eecs.utilParser.ParseException;
19
20 /**
21  * <p>
22  * This class represents the specification extractor of the specification. The
23  * main function of this class is to read C/C++11 source files and extract the
24  * corresponding specifications, and record corresponding information such as
25  * location, e.g., the file name and the line number, to help the code
26  * generation process.
27  * </p>
28  * 
29  * @author Peizhao Ou
30  * 
31  */
32 public class SpecExtractor {
33         public final HashMap<File, ArrayList<InterfaceConstruct>> interfaceListMap;
34         public final HashMap<File, ArrayList<OPConstruct>> OPListMap;
35         public final HashSet<String> OPLabelSet;
36         // Note that we only allow one entry per file at most
37         public final HashMap<File, EntryConstruct> entryMap;
38
39         public final HashSet<String> headerFiles;
40
41         private GlobalConstruct globalConstruct;
42
43         public SpecExtractor() {
44                 interfaceListMap = new HashMap<File, ArrayList<InterfaceConstruct>>();
45                 OPListMap = new HashMap<File, ArrayList<OPConstruct>>();
46                 OPLabelSet = new HashSet<String>();
47                 entryMap = new HashMap<File, EntryConstruct>();
48                 headerFiles = new HashSet<String>();
49                 globalConstruct = null;
50         }
51
52         private void addInterfaceConstruct(InterfaceConstruct construct) {
53                 ArrayList<InterfaceConstruct> list = interfaceListMap.get(construct.file);
54                 if (list == null) {
55                         list = new ArrayList<InterfaceConstruct>();
56                         interfaceListMap.put(construct.file, list);
57                 }
58                 list.add(construct);
59         }
60
61         private void addOPConstruct(OPConstruct construct) {
62                 ArrayList<OPConstruct> list = OPListMap.get(construct.file);
63                 if (list == null) {
64                         list = new ArrayList<OPConstruct>();
65                         OPListMap.put(construct.file, list);
66                 }
67                 list.add(construct);
68         }
69
70         private void addEntryConstruct(File file, EntryConstruct construct) throws WrongAnnotationException {
71                 EntryConstruct old = entryMap.get(file);
72                 if (old == null)
73                         entryMap.put(file, construct);
74                 else { // Error processing
75                         String errMsg = "Multiple @Entry annotations in the same file.\n\t Other @Entry at Line " + old.beginLineNum
76                                         + ".";
77                         WrongAnnotationException.err(file, construct.beginLineNum, errMsg);
78                 }
79         }
80
81         public GlobalConstruct getGlobalConstruct() {
82                 return this.globalConstruct;
83         }
84
85         /**
86          * <p>
87          * A print out function for the purpose of debugging. Note that we better
88          * call this function after having called the checkSemantics() function to
89          * check annotation consistency.
90          * </p>
91          */
92         public void printAnnotations() {
93                 System.out.println("/**********    Print out of specification extraction    **********/");
94                 System.out.println("// Extracted header files");
95                 for (String header : headerFiles)
96                         System.out.println(header);
97
98                 System.out.println("// Global State Construct");
99                 if (globalConstruct != null)
100                         System.out.println(globalConstruct);
101
102                 for (File file : interfaceListMap.keySet()) {
103                         ArrayList<InterfaceConstruct> list = interfaceListMap.get(file);
104                         System.out.println("// Interface in file: " + file.getName());
105                         for (InterfaceConstruct construct : list) {
106                                 System.out.println(construct);
107                                 System.out.println("EndLineNumFunc: " + construct.getEndLineNumFunction());
108                         }
109                 }
110
111                 for (File file : OPListMap.keySet()) {
112                         System.out.println("// Ordering points in file: " + file.getName());
113                         ArrayList<OPConstruct> list = OPListMap.get(file);
114                         for (OPConstruct construct : list)
115                                 System.out.println(construct);
116                 }
117
118                 for (File file : entryMap.keySet()) {
119                         System.out.println("// Entry in file: " + file.getName());
120                         System.out.println(entryMap.get(file));
121                 }
122         }
123
124         /**
125          * <p>
126          * Perform basic semantics checking of the extracted specification.
127          * </p>
128          * 
129          * @return
130          * @throws WrongAnnotationException
131          */
132         public void checkSemantics() throws WrongAnnotationException {
133                 String errMsg = null;
134
135                 // Assert that we have defined and only defined one global state
136                 // annotation
137                 if (globalConstruct == null) {
138                         errMsg = "Spec error: There should be one global state annotation.\n";
139                         throw new WrongAnnotationException(errMsg);
140                 }
141
142                 // Assert that the interface constructs have unique label name
143                 HashMap<String, InterfaceConstruct> interfaceMap = new HashMap<String, InterfaceConstruct>();
144                 for (File f : interfaceListMap.keySet()) {
145                         ArrayList<InterfaceConstruct> list = interfaceListMap.get(f);
146                         if (list != null) {
147                                 for (InterfaceConstruct construct : list) {
148                                         InterfaceConstruct existingConstruct = interfaceMap.get(construct.getName());
149                                         if (existingConstruct != null) { // Error
150                                                 errMsg = "Interface labels duplication with: \"" + construct.getName() + "\" in File \""
151                                                                 + existingConstruct.file.getName() + "\", Line " + existingConstruct.beginLineNum + ".";
152                                                 WrongAnnotationException.err(construct.file, construct.beginLineNum, errMsg);
153                                         } else {
154                                                 interfaceMap.put(construct.getName(), construct);
155                                         }
156                                 }
157                         }
158                 }
159
160                 // Process ordering point labels
161                 for (File file : OPListMap.keySet()) {
162                         ArrayList<OPConstruct> list = OPListMap.get(file);
163                         for (OPConstruct construct : list) {
164                                 if (construct.type == OPType.OPCheck || construct.type == OPType.PotentialOP) {
165                                         String label = construct.label;
166                                         OPLabelSet.add(label);
167                                 }
168                         }
169                 }
170
171         }
172
173         /**
174          * <p>
175          * This function applies on a String (a plain line of text) to check whether
176          * the current line is a C/C++ header include statement. If it is, it
177          * extracts the header file name and store it, and returns true; otherwise,
178          * it returns false.
179          * </p>
180          * 
181          * @param line
182          *            The line of text to be processed
183          * @return Returns true if the current line is a C/C++ header include
184          *         statement
185          */
186         public boolean extractHeaders(String line) {
187                 // "^( |\t)*#include( |\t)+("|<)([a-zA-Z_0-9\-\.])+("|>)"
188                 Pattern regexp = Pattern.compile("^( |\\t)*(#include)( |\\t)+(\"|<)([a-zA-Z_0-9\\-\\.]+)(\"|>)");
189                 Matcher matcher = regexp.matcher(line);
190
191                 // process the line.
192                 if (matcher.find()) {
193                         String header = null;
194                         String braceSymbol = matcher.group(4);
195                         if (braceSymbol.equals("<"))
196                                 header = "<" + matcher.group(5) + ">";
197                         else
198                                 header = "\"" + matcher.group(5) + "\"";
199                         if (!SpecNaming.isPreIncludedHeader(header)) {
200                                 headerFiles.add(header);
201                         }
202                         return true;
203                 } else
204                         return false;
205         }
206
207         /**
208          * <p>
209          * A sub-routine to extract the construct from beginning till end. When
210          * called, we have already match the beginning of the construct. We will
211          * call this sub-routine when we extract the interface construct and the
212          * global state construct.
213          * </p>
214          * 
215          * <p>
216          * The side effect of this function is that the lineReader has just read the
217          * end of the construct, meaning that the caller can get the end line number
218          * by calling lineReader.getLineNumber().
219          * </p>
220          * 
221          * @param file
222          *            The file that we are processing
223          * @param lineReader
224          *            The LineNumberReader that we are using when processing the
225          *            current file.
226          * @param file
227          *            The file that we are processing
228          * @param curLine
229          *            The current line that we are processing. It should be the
230          *            beginning line of the annotation construct.
231          * @param beginLineNum
232          *            The beginning line number of the interface construct
233          *            annotation
234          * @return Returns the annotation string list of the current construct
235          * @throws WrongAnnotationException
236          */
237         private ArrayList<String> extractTillConstructEnd(File file, LineNumberReader lineReader, String curLine,
238                         int beginLineNum) throws WrongAnnotationException {
239                 ArrayList<String> annotations = new ArrayList<String>();
240                 annotations.add(curLine);
241                 // System.out.println(curLine);
242                 // Initial settings for matching lines
243                 // "\*/( |\t)*$"
244                 Pattern regexpEnd = Pattern.compile("\\*/( |\\t)*$");
245                 Matcher matcher = regexpEnd.matcher(curLine);
246                 if (matcher.find()) { // The beginning line is also the end line
247                         annotations.add(curLine);
248                         return annotations;
249                 } else {
250                         try {
251                                 String line;
252                                 while ((line = lineReader.readLine()) != null) {
253                                         // process the line.
254                                         // System.out.println(line);
255
256                                         matcher.reset(line); // reset the input
257                                         annotations.add(line);
258                                         if (matcher.find())
259                                                 return annotations;
260                                 }
261                                 WrongAnnotationException.err(file, beginLineNum,
262                                                 "The interface annotation should have the matching closing symbol closing \"*/\"");
263                         } catch (IOException e) {
264                                 e.printStackTrace();
265                         }
266                 }
267                 return null;
268         }
269
270         /**
271          * <p>
272          * A sub-routine to extract the global construct. When called, we have
273          * already match the beginning of the construct.
274          * </p>
275          * 
276          * @param file
277          *            The file that we are processing
278          * @param lineReader
279          *            The LineNumberReader that we are using when processing the
280          *            current file.
281          * @param curLine
282          *            The current line that we are processing. It should be the
283          *            beginning line of the annotation construct.
284          * @param beginLineNum
285          *            The beginning line number of the interface construct
286          *            annotation
287          * @throws WrongAnnotationException
288          */
289         private void extractGlobalConstruct(File file, LineNumberReader lineReader, String curLine, int beginLineNum)
290                         throws WrongAnnotationException {
291                 ArrayList<String> annotations = extractTillConstructEnd(file, lineReader, curLine, beginLineNum);
292                 GlobalConstruct construct = new GlobalConstruct(file, beginLineNum, annotations);
293                 if (globalConstruct != null) { // Check if we have seen a global state
294                                                                                 // construct earlier
295                         File otherDefinitionFile = globalConstruct.file;
296                         int otherDefinitionLine = globalConstruct.beginLineNum;
297                         String errMsg = "Multiple definition of global state.\n" + "\tAnother definition is in File \""
298                                         + otherDefinitionFile.getName() + "\" (Line " + otherDefinitionLine + ").";
299                         WrongAnnotationException.err(file, beginLineNum, errMsg);
300                 }
301                 globalConstruct = construct;
302         }
303
304         /**
305          * @param file
306          *            The current file we are processing
307          * @param lineReader
308          *            Call this function when the lineReader will read the beginning
309          *            of the definition right away
310          * @return The line number of the ending line of the interfae definition. If
311          *         returning -1, it means the curl symbols in the interface do not
312          *         match
313          * @throws WrongAnnotationException
314          */
315         private int findEndLineNumFunction(File file, LineNumberReader lineReader) throws WrongAnnotationException {
316                 String line;
317                 // FIXME: We assume that in the string of the code, there does not exist
318                 // the symbol '{' & '{'
319                 try {
320                         boolean foundFirstCurl = false;
321                         int unmatchedCnt = 0;
322                         while ((line = lineReader.readLine()) != null) {
323                                 // process the line.
324                                 // System.out.println(line);
325
326                                 // Extract the one-liner construct first
327                                 extractOneLineConstruct(file, lineReader.getLineNumber(), line);
328
329                                 for (int i = 0; i < line.length(); i++) {
330                                         char ch = line.charAt(i);
331                                         if (ch == '{') {
332                                                 foundFirstCurl = true;
333                                                 unmatchedCnt++;
334                                         } else if (ch == '}') {
335                                                 unmatchedCnt--;
336                                         }
337                                         // The current line is the end of the function
338                                         if (foundFirstCurl && unmatchedCnt == 0) {
339                                                 int endLineNumFunction = lineReader.getLineNumber();
340                                                 return endLineNumFunction;
341                                         }
342                                 }
343                         }
344                 } catch (IOException e) {
345                         e.printStackTrace();
346                 }
347                 // -1 means the curl symbols in the interface do not match
348                 return -1;
349         }
350
351         /**
352          * <p>
353          * A sub-routine to extract the interface construct. When called, we have
354          * already match the beginning of the construct, and we also need to find
355          * the ending line number of the closing brace of the corresponding
356          * function.
357          * </p>
358          * 
359          * @param file
360          *            The file that we are processing
361          * @param lineReader
362          *            The LineNumberReader that we are using when processing the
363          *            current file.
364          * @param curLine
365          *            The current line that we are processing. It should be the
366          *            beginning line of the annotation construct.
367          * @param beginLineNum
368          *            The beginning line number of the interface construct
369          *            annotation
370          * @throws WrongAnnotationException
371          * @throws IOException
372          * @throws ParseException
373          */
374         private void extractInterfaceConstruct(File file, LineNumberReader lineReader, String curLine, int beginLineNum)
375                         throws WrongAnnotationException, IOException, ParseException {
376                 ArrayList<String> annotations = extractTillConstructEnd(file, lineReader, curLine, beginLineNum);
377                 int endLineNum = lineReader.getLineNumber();
378                 InterfaceConstruct construct = new InterfaceConstruct(file, beginLineNum, endLineNum, annotations);
379                 addInterfaceConstruct(construct);
380
381                 // Process the corresponding interface function declaration header
382                 String line = null;
383                 int lineNum = -1;
384                 String errMsg;
385                 try {
386                         line = lineReader.readLine();
387                         lineNum = lineReader.getLineNumber();
388                         construct.processFunctionDeclaration(line);
389                 } catch (IOException e) {
390                         errMsg = "Spec error in file \"" + file.getName() + "\", Line " + lineNum
391                                         + " :\n\tThe function declaration should take only one line and have the correct syntax (follow the annotations immediately)\n";
392                         System.out.println(errMsg);
393                         throw e;
394                 } catch (ParseException e) {
395                         errMsg = "Spec error in file \"" + file.getName() + "\", Line " + lineNum
396                                         + " :\n\tThe function declaration should take only one line and have the correct syntax (follow the annotations immediately)\n";
397                         System.out.println(errMsg);
398                         throw e;
399                 }
400
401                 // Now we find the end of the interface definition
402                 int endLineNumFunction = findEndLineNumFunction(file, lineReader);
403                 construct.setEndLineNumFunction(endLineNumFunction);
404                 if (endLineNumFunction == -1) {
405                         WrongAnnotationException.err(file, beginLineNum,
406                                         "The interface definition does NOT have matching curls '}'");
407                 }
408         }
409
410         /**
411          * <p>
412          * A sub-routine to extract the ordering point construct. When called, we
413          * have already match the beginning of the construct.
414          * </p>
415          * 
416          * @param file
417          *            The file that we are processing
418          * @param beginLineNum
419          *            The beginning line number of the interface construct
420          *            annotation
421          * @param curLine
422          *            The current line that we are processing. It should be the
423          *            beginning line of the annotation construct.
424          * @param type
425          *            The type of ordering point construct we are processing
426          * @throws WrongAnnotationException
427          */
428         private void extractOPConstruct(File file, int beginLineNum, String curLine, OPType type)
429                         throws WrongAnnotationException {
430                 String condition = null;
431                 String label = null;
432
433                 // "(\(\s?(\w+)\s?\))?\s:\s?(.+)\*/\s?$"
434                 Pattern regexp = Pattern.compile("(\\(\\s*(\\w+)\\s*\\))?\\s*:\\s*(.+)\\*/\\s*$");
435                 Matcher matcher = regexp.matcher(curLine);
436                 if (matcher.find()) {
437                         label = matcher.group(2);
438                         condition = matcher.group(3);
439                 } else {
440                         WrongAnnotationException.err(file, beginLineNum,
441                                         "Wrong syntax for the ordering point construct. You might need a colon before the condition.");
442                 }
443                 OPConstruct op = new OPConstruct(file, beginLineNum, type, label, condition, curLine);
444                 addOPConstruct(op);
445         }
446
447         /**
448          * <p>
449          * A sub-routine to extract the entry construct. When called, we have
450          * already match the beginning of the construct.
451          * </p>
452          * 
453          * @param file
454          *            The file that we are processing
455          * @param beginLineNum
456          *            The beginning line number of the interface construct
457          *            annotation
458          * @param curLine
459          *            Current line being processed
460          * @throws WrongAnnotationException
461          */
462         public void extractEntryConstruct(File file, int beginLineNum, String curLine) throws WrongAnnotationException {
463                 addEntryConstruct(file, new EntryConstruct(file, beginLineNum, curLine));
464         }
465
466         /**
467          * <p>
468          * A sub-routine to extract those annotation constructs that take only one
469          * line --- Entry, OPDefine, PotentialOP, OPCheck, OPClear and OPClearDefin.
470          * </p>
471          * 
472          * @param file
473          *            The file that we are processing
474          * @param beginLineNum
475          *            The beginning line number of the interface construct
476          *            annotation
477          * @param curLine
478          *            The current line that we are processing. It should be the
479          *            beginning line of the annotation construct.
480          * @throws WrongAnnotationException
481          */
482         private void extractOneLineConstruct(File file, int beginLineNum, String curLine) throws WrongAnnotationException {
483                 // "/\*\*\s*@(Entry|OPDefine|PotentialOP|OPCheck|OPClear|OPClearDefine)"
484                 Pattern regexpBegin = Pattern.compile("/\\*\\*\\s*@(\\w+)");
485                 Matcher matcher = regexpBegin.matcher(curLine);
486                 matcher.reset(curLine);
487                 if (matcher.find()) {
488                         String name = matcher.group(1);
489                         if (name.equals("Entry"))
490                                 extractEntryConstruct(file, beginLineNum, curLine);
491                         else if (name.equals("OPDefine") || name.equals("PotentialOP") || name.equals("OPCheck")
492                                         || name.equals("OPClear") || name.equals("OPClearDefine"))
493                                 extractOPConstruct(file, beginLineNum, curLine, OPType.valueOf(name));
494                 }
495         }
496
497         /**
498          * <p>
499          * This function will process a given C/C++ file ( .h, .c or .cc). It will
500          * extract all the headers included in that file, and all the annotation
501          * constructs specified in that file. We then will store the information in
502          * the corresponding containers.
503          * </p>
504          * 
505          * <p>
506          * The basic idea is to read the file line by line, and then use regular
507          * expression to match the specific annotations or the header files.
508          * </p>
509          * 
510          * @param file
511          *            The file object of the corresponding file to be processed
512          * @throws WrongAnnotationException
513          * @throws ParseException
514          */
515         public void extractConstruct(File file) throws WrongAnnotationException, ParseException {
516                 BufferedReader br = null;
517                 LineNumberReader lineReader = null;
518                 try {
519                         // Initial settings for processing the lines
520                         br = new BufferedReader(new FileReader(file));
521                         lineReader = new LineNumberReader(br);
522                         // "/\*\*\s*@(DeclareState|Interface)"
523                         Pattern regexpBegin = Pattern.compile("/\\*\\*\\s*@(DeclareState|Interface)");
524                         Matcher matcher = regexpBegin.matcher("");
525
526                         String line;
527                         while ((line = lineReader.readLine()) != null) {
528                                 // Start to process the line
529
530                                 // First try to process the line to see if it's a header file
531                                 // include
532                                 boolean succ = extractHeaders(line);
533                                 if (succ) // It's a header line and we successfully extract it
534                                         continue;
535
536                                 int beginLineNum = lineReader.getLineNumber();
537                                 // Extract the one-liner construct first
538                                 extractOneLineConstruct(file, beginLineNum, line);
539
540                                 // Now we process the line to see if it's an annotation (State
541                                 // or Interface)
542                                 matcher.reset(line); // reset the input
543                                 if (matcher.find()) { // Found the beginning line
544                                         // The matching annotation name
545                                         String constructName = matcher.group(1);
546
547                                         // Process each annotation accordingly
548                                         if (constructName.equals(SpecNaming.DeclareState)) {
549                                                 extractGlobalConstruct(file, lineReader, line, beginLineNum);
550                                         } else if (constructName.equals(SpecNaming.Interface)) {
551                                                 extractInterfaceConstruct(file, lineReader, line, beginLineNum);
552                                         } else {
553                                                 WrongAnnotationException.err(file, beginLineNum,
554                                                                 constructName + " is not a supported annotation.");
555                                         }
556
557                                 }
558                         }
559                 } catch (FileNotFoundException e) {
560                         e.printStackTrace();
561                 } catch (IOException e) {
562                         e.printStackTrace();
563                 } finally {
564                         try {
565                                 lineReader.close();
566                         } catch (IOException e) {
567                                 e.printStackTrace();
568                         }
569                 }
570         }
571
572         /**
573          * <p>
574          * Given a list of files, it scans each file and add found SpecConstrcut to
575          * the _constructs list.
576          * </p>
577          * 
578          * @param files
579          *            The list of files that needs to be processed. In general, this
580          *            list only need to contain those that have specification
581          *            annotations
582          * @throws WrongAnnotationException
583          * @throws ParseException
584          */
585         public void extract(File[] files) throws WrongAnnotationException, ParseException {
586                 for (int i = 0; i < files.length; i++)
587                         extract(files[i]);
588
589                 // Check basic specification semantics
590                 checkSemantics();
591         }
592         
593         public void extract(ArrayList<File> files) throws WrongAnnotationException, ParseException {
594                 for (int i = 0; i < files.size(); i++)
595                         extract(files.get(i));
596
597                 // Check basic specification semantics
598                 checkSemantics();
599         }
600
601         /**
602          * <p>
603          * Extract the specification annotations and header files in the current
604          * file. This function should generally be called by extractFiles.
605          * </p>
606          * 
607          * @param files
608          *            The list of files that needs to be processed. In general, this
609          *            list only need to contain those that have specification
610          *            annotations
611          * @throws WrongAnnotationException
612          * @throws ParseException
613          */
614         public void extract(File file) throws WrongAnnotationException, ParseException {
615                 extractConstruct(file);
616         }
617 }