0e470e0e5361bc9f476b6e69f06d21709faffcd2
[cdsspec-compiler.git] / src / edu / uci / eecs / specExtraction / SpecExtractor.java
1 package edu.uci.eecs.specExtraction;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.FileReader;
7 import java.io.IOException;
8 import java.io.LineNumberReader;
9 import java.util.ArrayList;
10 import java.util.Collections;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15
16 import edu.uci.eecs.codeGenerator.CodeGeneratorUtils;
17 import edu.uci.eecs.codeGenerator.Environment;
18 import edu.uci.eecs.utilParser.ParseException;
19
20 /**
21  * <p>
22  * This class represents the specification extractor of the specification. The
23  * main function of this class is to read C/C++11 source files and extract the
24  * corresponding specifications, and record corresponding information such as
25  * location, e.g., the file name and the line number, to help the code
26  * generation process.
27  * </p>
28  * 
29  * @author Peizhao Ou
30  * 
31  */
32 public class SpecExtractor {
33         public final HashMap<File, ArrayList<InterfaceConstruct>> interfaceListMap;
34         public final HashMap<File, ArrayList<OPConstruct>> OPListMap;
35         public final HashSet<String> OPLabelSet;
36         // Note that we only allow one entry per file at most
37         public final HashMap<File, EntryConstruct> entryMap;
38
39         public final HashSet<String> headerFiles;
40
41         private GlobalConstruct globalConstruct;
42
43         public SpecExtractor() {
44                 interfaceListMap = new HashMap<File, ArrayList<InterfaceConstruct>>();
45                 OPListMap = new HashMap<File, ArrayList<OPConstruct>>();
46                 OPLabelSet = new HashSet<String>();
47                 entryMap = new HashMap<File, EntryConstruct>();
48                 headerFiles = new HashSet<String>();
49                 globalConstruct = null;
50         }
51
52         private void addInterfaceConstruct(InterfaceConstruct construct) {
53                 ArrayList<InterfaceConstruct> list = interfaceListMap
54                                 .get(construct.file);
55                 if (list == null) {
56                         list = new ArrayList<InterfaceConstruct>();
57                         interfaceListMap.put(construct.file, list);
58                 }
59                 list.add(construct);
60         }
61
62         private void addOPConstruct(OPConstruct construct) {
63                 ArrayList<OPConstruct> list = OPListMap.get(construct.file);
64                 if (list == null) {
65                         list = new ArrayList<OPConstruct>();
66                         OPListMap.put(construct.file, list);
67                 }
68                 list.add(construct);
69         }
70
71         private void addEntryConstruct(File file, EntryConstruct construct)
72                         throws WrongAnnotationException {
73                 EntryConstruct old = entryMap.get(file);
74                 if (old == null)
75                         entryMap.put(file, construct);
76                 else { // Error processing
77                         String errMsg = "Multiple @Entry annotations in the same file.\n\t Other @Entry at Line "
78                                         + old.beginLineNum + ".";
79                         WrongAnnotationException.err(file, construct.beginLineNum, errMsg);
80                 }
81         }
82
83         public GlobalConstruct getGlobalConstruct() {
84                 return this.globalConstruct;
85         }
86
87         /**
88          * <p>
89          * A print out function for the purpose of debugging. Note that we better
90          * call this function after having called the checkSemantics() function to
91          * check annotation consistency.
92          * </p>
93          */
94         public void printAnnotations() {
95                 System.out
96                                 .println("/**********    Print out of specification extraction    **********/");
97                 System.out.println("// Extracted header files");
98                 for (String header : headerFiles)
99                         System.out.println(header);
100
101                 System.out.println("// Global State Construct");
102                 if (globalConstruct != null)
103                         System.out.println(globalConstruct);
104
105                 for (File file : interfaceListMap.keySet()) {
106                         ArrayList<InterfaceConstruct> list = interfaceListMap.get(file);
107                         System.out.println("// Interface in file: " + file.getName());
108                         for (InterfaceConstruct construct : list) {
109                                 System.out.println(construct);
110                                 System.out.println("EndLineNumFunc: "
111                                                 + construct.getEndLineNumFunction());
112                         }
113                 }
114
115                 for (File file : OPListMap.keySet()) {
116                         System.out.println("// Ordering points in file: " + file.getName());
117                         ArrayList<OPConstruct> list = OPListMap.get(file);
118                         for (OPConstruct construct : list)
119                                 System.out.println(construct);
120                 }
121
122                 for (File file : entryMap.keySet()) {
123                         System.out.println("// Entry in file: " + file.getName());
124                         System.out.println(entryMap.get(file));
125                 }
126         }
127
128         /**
129          * <p>
130          * Perform basic semantics checking of the extracted specification.
131          * </p>
132          * 
133          * @return
134          * @throws WrongAnnotationException
135          */
136         public void checkSemantics() throws WrongAnnotationException {
137                 String errMsg = null;
138
139                 // Assert that we have defined and only defined one global state
140                 // annotation
141                 if (globalConstruct == null) {
142                         errMsg = "Spec error: There should be one global state annotation.\n";
143                         throw new WrongAnnotationException(errMsg);
144                 }
145
146                 // Assert that the interface constructs have unique label name
147                 HashMap<String, InterfaceConstruct> interfaceMap = new HashMap<String, InterfaceConstruct>();
148                 for (File f : interfaceListMap.keySet()) {
149                         ArrayList<InterfaceConstruct> list = interfaceListMap.get(f);
150                         if (list != null) {
151                                 for (InterfaceConstruct construct : list) {
152                                         InterfaceConstruct existingConstruct = interfaceMap
153                                                         .get(construct.getName());
154                                         if (existingConstruct != null) { // Error
155                                                 errMsg = "Interface labels duplication with: \""
156                                                                 + construct.getName() + "\" in File \""
157                                                                 + existingConstruct.file.getName()
158                                                                 + "\", Line " + existingConstruct.beginLineNum
159                                                                 + ".";
160                                                 WrongAnnotationException.err(construct.file,
161                                                                 construct.beginLineNum, errMsg);
162                                         } else {
163                                                 interfaceMap.put(construct.getName(), construct);
164                                         }
165                                 }
166                         }
167                 }
168
169                 // Process ordering point labels
170                 for (File file : OPListMap.keySet()) {
171                         ArrayList<OPConstruct> list = OPListMap.get(file);
172                         for (OPConstruct construct : list) {
173                                 if (construct.type == OPType.OPCheck
174                                                 || construct.type == OPType.PotentialOP) {
175                                         String label = construct.label;
176                                         OPLabelSet.add(label);
177                                 }
178                         }
179                 }
180
181         }
182
183         /**
184          * <p>
185          * This function applies on a String (a plain line of text) to check whether
186          * the current line is a C/C++ header include statement. If it is, it
187          * extracts the header file name and store it, and returns true; otherwise,
188          * it returns false.
189          * </p>
190          * 
191          * @param line
192          *            The line of text to be processed
193          * @return Returns true if the current line is a C/C++ header include
194          *         statement
195          */
196         public boolean extractHeaders(String line) {
197                 // "^( |\t)*#include( |\t)+("|<)([a-zA-Z_0-9\-\.])+("|>)"
198                 Pattern regexp = Pattern
199                                 .compile("^( |\\t)*(#include)( |\\t)+(\"|<)([a-zA-Z_0-9\\-\\.]+)(\"|>)");
200                 Matcher matcher = regexp.matcher(line);
201
202                 // process the line.
203                 if (matcher.find()) {
204                         String header = null;
205                         String braceSymbol = matcher.group(4);
206                         if (braceSymbol.equals("<"))
207                                 header = "<" + matcher.group(5) + ">";
208                         else
209                                 header = "\"" + matcher.group(5) + "\"";
210                         if (!SpecNaming.isPreIncludedHeader(header)) {
211                                 headerFiles.add(header);
212                         }
213                         return true;
214                 } else
215                         return false;
216         }
217
218         /**
219          * <p>
220          * A sub-routine to extract the construct from beginning till end. When
221          * called, we have already match the beginning of the construct. We will
222          * call this sub-routine when we extract the interface construct and the
223          * global state construct.
224          * </p>
225          * 
226          * <p>
227          * The side effect of this function is that the lineReader has just read the
228          * end of the construct, meaning that the caller can get the end line number
229          * by calling lineReader.getLineNumber().
230          * </p>
231          * 
232          * @param file
233          *            The file that we are processing
234          * @param lineReader
235          *            The LineNumberReader that we are using when processing the
236          *            current file.
237          * @param file
238          *            The file that we are processing
239          * @param curLine
240          *            The current line that we are processing. It should be the
241          *            beginning line of the annotation construct.
242          * @param beginLineNum
243          *            The beginning line number of the interface construct
244          *            annotation
245          * @return Returns the annotation string list of the current construct
246          * @throws WrongAnnotationException
247          */
248         private ArrayList<String> extractTillConstructEnd(File file,
249                         LineNumberReader lineReader, String curLine, int beginLineNum)
250                         throws WrongAnnotationException {
251                 ArrayList<String> annotations = new ArrayList<String>();
252                 annotations.add(curLine);
253                 // System.out.println(curLine);
254                 // Initial settings for matching lines
255                 // "\*/( |\t)*$"
256                 Pattern regexpEnd = Pattern.compile("\\*/( |\\t)*$");
257                 Matcher matcher = regexpEnd.matcher(curLine);
258                 if (matcher.find()) { // The beginning line is also the end line
259                         annotations.add(curLine);
260                         return annotations;
261                 } else {
262                         try {
263                                 String line;
264                                 while ((line = lineReader.readLine()) != null) {
265                                         // process the line.
266                                         // System.out.println(line);
267
268                                         matcher.reset(line); // reset the input
269                                         annotations.add(line);
270                                         if (matcher.find())
271                                                 return annotations;
272                                 }
273                                 WrongAnnotationException
274                                                 .err(file,
275                                                                 beginLineNum,
276                                                                 "The interface annotation should have the matching closing symbol closing \"*/\"");
277                         } catch (IOException e) {
278                                 e.printStackTrace();
279                         }
280                 }
281                 return null;
282         }
283
284         /**
285          * <p>
286          * A sub-routine to extract the global construct. When called, we have
287          * already match the beginning of the construct.
288          * </p>
289          * 
290          * @param file
291          *            The file that we are processing
292          * @param lineReader
293          *            The LineNumberReader that we are using when processing the
294          *            current file.
295          * @param curLine
296          *            The current line that we are processing. It should be the
297          *            beginning line of the annotation construct.
298          * @param beginLineNum
299          *            The beginning line number of the interface construct
300          *            annotation
301          * @throws WrongAnnotationException
302          */
303         private void extractGlobalConstruct(File file, LineNumberReader lineReader,
304                         String curLine, int beginLineNum) throws WrongAnnotationException {
305                 ArrayList<String> annotations = extractTillConstructEnd(file,
306                                 lineReader, curLine, beginLineNum);
307                 GlobalConstruct construct = new GlobalConstruct(file, beginLineNum,
308                                 annotations);
309                 if (globalConstruct != null) { // Check if we have seen a global state
310                                                                                 // construct earlier
311                         File otherDefinitionFile = globalConstruct.file;
312                         int otherDefinitionLine = globalConstruct.beginLineNum;
313                         String errMsg = "Multiple definition of global state.\n"
314                                         + "\tAnother definition is in File \""
315                                         + otherDefinitionFile.getName() + "\" (Line "
316                                         + otherDefinitionLine + ").";
317                         WrongAnnotationException.err(file, beginLineNum, errMsg);
318                 }
319                 globalConstruct = construct;
320         }
321
322         /**
323          * @param file
324          *            The current file we are processing
325          * @param lineReader
326          *            Call this function when the lineReader will read the beginning
327          *            of the definition right away
328          * @param startingLine
329          *            The line that we should start processing
330          * @return The line number of the ending line of the interfae definition. If
331          *         returning -1, it means the curl symbols in the interface do not
332          *         match
333          * @throws WrongAnnotationException
334          */
335         private int findEndLineNumFunction(File file, LineNumberReader lineReader,
336                         String startingLine) throws WrongAnnotationException {
337                 String line = startingLine;
338                 // FIXME: We assume that in the string of the code, there does not exist
339                 // the symbol '{' & '{'
340                 try {
341                         boolean foundFirstCurl = false;
342                         int unmatchedCnt = 0;
343                         do {
344                                 // process the line.
345                                 // System.out.println(line);
346
347                                 // Extract the one-liner construct first
348                                 extractOneLineConstruct(file, lineReader.getLineNumber(), line);
349
350                                 for (int i = 0; i < line.length(); i++) {
351                                         char ch = line.charAt(i);
352                                         if (ch == '{') {
353                                                 foundFirstCurl = true;
354                                                 unmatchedCnt++;
355                                         } else if (ch == '}') {
356                                                 unmatchedCnt--;
357                                         }
358                                         // The current line is the end of the function
359                                         if (foundFirstCurl && unmatchedCnt == 0) {
360                                                 int endLineNumFunction = lineReader.getLineNumber();
361                                                 return endLineNumFunction;
362                                         }
363                                 }
364                         } while ((line = lineReader.readLine()) != null);
365                 } catch (IOException e) {
366                         e.printStackTrace();
367                 }
368                 // -1 means the curl symbols in the interface do not match
369                 return -1;
370         }
371
372         /**
373          * <p>
374          * A sub-routine to extract the interface construct. When called, we have
375          * already match the beginning of the construct, and we also need to find
376          * the ending line number of the closing brace of the corresponding
377          * function.
378          * </p>
379          * 
380          * @param file
381          *            The file that we are processing
382          * @param lineReader
383          *            The LineNumberReader that we are using when processing the
384          *            current file.
385          * @param curLine
386          *            The current line that we are processing. It should be the
387          *            beginning line of the annotation construct.
388          * @param beginLineNum
389          *            The beginning line number of the interface construct
390          *            annotation
391          * @throws WrongAnnotationException
392          * @throws IOException
393          * @throws ParseException
394          */
395         private void extractInterfaceConstruct(File file,
396                         LineNumberReader lineReader, String curLine, int beginLineNum)
397                         throws WrongAnnotationException, IOException, ParseException {
398                 ArrayList<String> annotations = extractTillConstructEnd(file,
399                                 lineReader, curLine, beginLineNum);
400                 int endLineNum = lineReader.getLineNumber();
401                 InterfaceConstruct construct = new InterfaceConstruct(file,
402                                 beginLineNum, endLineNum, annotations);
403                 addInterfaceConstruct(construct);
404
405                 // Process the corresponding interface function declaration header
406                 String line = null;
407                 int lineNum = -1;
408                 String errMsg;
409                 try {
410                         line = lineReader.readLine();
411                         lineNum = lineReader.getLineNumber();
412                         construct.processFunctionDeclaration(line);
413                 } catch (IOException e) {
414                         errMsg = "Spec error in file \""
415                                         + file.getName()
416                                         + "\", Line "
417                                         + lineNum
418                                         + " :\n\tThe function declaration should take only one line and have the correct syntax (follow the annotations immediately)\n";
419                         System.out.println(errMsg);
420                         throw e;
421                 } catch (ParseException e) {
422                         errMsg = "Spec error in file \""
423                                         + file.getName()
424                                         + "\", Line "
425                                         + lineNum
426                                         + " :\n\tThe function declaration should take only one line and have the correct syntax (follow the annotations immediately)\n";
427                         System.out.println(errMsg);
428                         throw e;
429                 }
430
431                 // Now we find the end of the interface definition
432                 int endLineNumFunction = findEndLineNumFunction(file, lineReader, line);
433                 construct.setEndLineNumFunction(endLineNumFunction);
434                 if (endLineNumFunction == -1) {
435                         WrongAnnotationException
436                                         .err(file, beginLineNum,
437                                                         "The interface definition does NOT have matching curls '}'");
438                 }
439         }
440
441         /**
442          * <p>
443          * A sub-routine to extract the ordering point construct. When called, we
444          * have already match the beginning of the construct.
445          * </p>
446          * 
447          * @param file
448          *            The file that we are processing
449          * @param beginLineNum
450          *            The beginning line number of the interface construct
451          *            annotation
452          * @param curLine
453          *            The current line that we are processing. It should be the
454          *            beginning line of the annotation construct.
455          * @param type
456          *            The type of ordering point construct we are processing
457          * @throws WrongAnnotationException
458          */
459         private void extractOPConstruct(File file, int beginLineNum,
460                         String curLine, OPType type) throws WrongAnnotationException {
461                 String condition = null;
462                 String label = null;
463
464                 // "(\(\s?(\w+)\s?\))?\s:\s?(.+)\*/\s?$"
465                 Pattern regexp = Pattern
466                                 .compile("(\\(\\s*(\\w+)\\s*\\))?\\s*:\\s*(.+)\\*/\\s*$");
467                 Matcher matcher = regexp.matcher(curLine);
468                 if (matcher.find()) {
469                         label = matcher.group(2);
470                         condition = matcher.group(3);
471                 } else {
472                         WrongAnnotationException
473                                         .err(file,
474                                                         beginLineNum,
475                                                         "Wrong syntax for the ordering point construct. You might need a colon before the condition.");
476                 }
477                 OPConstruct op = new OPConstruct(file, beginLineNum, type, label,
478                                 condition, curLine);
479                 addOPConstruct(op);
480         }
481
482         /**
483          * <p>
484          * A sub-routine to extract the entry construct. When called, we have
485          * already match the beginning of the construct.
486          * </p>
487          * 
488          * @param file
489          *            The file that we are processing
490          * @param beginLineNum
491          *            The beginning line number of the interface construct
492          *            annotation
493          * @param curLine
494          *            Current line being processed
495          * @throws WrongAnnotationException
496          */
497         public void extractEntryConstruct(File file, int beginLineNum,
498                         String curLine) throws WrongAnnotationException {
499                 addEntryConstruct(file, new EntryConstruct(file, beginLineNum, curLine));
500         }
501
502         /**
503          * <p>
504          * A sub-routine to extract those annotation constructs that take only one
505          * line --- Entry, OPDefine, PotentialOP, OPCheck, OPClear and OPClearDefin.
506          * </p>
507          * 
508          * @param file
509          *            The file that we are processing
510          * @param beginLineNum
511          *            The beginning line number of the interface construct
512          *            annotation
513          * @param curLine
514          *            The current line that we are processing. It should be the
515          *            beginning line of the annotation construct.
516          * @throws WrongAnnotationException
517          */
518         private void extractOneLineConstruct(File file, int beginLineNum,
519                         String curLine) throws WrongAnnotationException {
520                 // "/\*\*\s*@(Entry|OPDefine|PotentialOP|OPCheck|OPClear|OPClearDefine)"
521                 Pattern regexpBegin = Pattern.compile("/\\*\\*\\s*@(\\w+)");
522                 Matcher matcher = regexpBegin.matcher(curLine);
523                 matcher.reset(curLine);
524                 if (matcher.find()) {
525                         String name = matcher.group(1);
526                         if (name.equals("Entry"))
527                                 extractEntryConstruct(file, beginLineNum, curLine);
528                         else if (name.equals("OPDefine") || name.equals("PotentialOP")
529                                         || name.equals("OPCheck") || name.equals("OPClear")
530                                         || name.equals("OPClearDefine"))
531                                 extractOPConstruct(file, beginLineNum, curLine,
532                                                 OPType.valueOf(name));
533                 }
534         }
535
536         /**
537          * <p>
538          * This function will process a given C/C++ file ( .h, .c or .cc). It will
539          * extract all the headers included in that file, and all the annotation
540          * constructs specified in that file. We then will store the information in
541          * the corresponding containers.
542          * </p>
543          * 
544          * <p>
545          * The basic idea is to read the file line by line, and then use regular
546          * expression to match the specific annotations or the header files.
547          * </p>
548          * 
549          * @param file
550          *            The file object of the corresponding file to be processed
551          * @throws WrongAnnotationException
552          * @throws ParseException
553          */
554         public void extractConstruct(File file) throws WrongAnnotationException,
555                         ParseException {
556                 BufferedReader br = null;
557                 LineNumberReader lineReader = null;
558                 try {
559                         // Initial settings for processing the lines
560                         br = new BufferedReader(new FileReader(file));
561                         lineReader = new LineNumberReader(br);
562                         // "/\*\*\s*@(DeclareState|Interface)"
563                         Pattern regexpBegin = Pattern
564                                         .compile("/\\*\\*\\s*@(DeclareState|Interface)");
565                         Matcher matcher = regexpBegin.matcher("");
566
567                         String line;
568                         while ((line = lineReader.readLine()) != null) {
569                                 // Start to process the line
570
571                                 // First try to process the line to see if it's a header file
572                                 // include
573                                 boolean succ = extractHeaders(line);
574                                 if (succ) // It's a header line and we successfully extract it
575                                         continue;
576
577                                 int beginLineNum = lineReader.getLineNumber();
578                                 // Extract the one-liner construct first
579                                 extractOneLineConstruct(file, beginLineNum, line);
580
581                                 // Now we process the line to see if it's an annotation (State
582                                 // or Interface)
583                                 matcher.reset(line); // reset the input
584                                 if (matcher.find()) { // Found the beginning line
585                                         // The matching annotation name
586                                         String constructName = matcher.group(1);
587
588                                         // Process each annotation accordingly
589                                         if (constructName.equals(SpecNaming.DeclareState)) {
590                                                 extractGlobalConstruct(file, lineReader, line,
591                                                                 beginLineNum);
592                                         } else if (constructName.equals(SpecNaming.Interface)) {
593                                                 extractInterfaceConstruct(file, lineReader, line,
594                                                                 beginLineNum);
595                                         } else {
596                                                 WrongAnnotationException.err(file, beginLineNum,
597                                                                 constructName
598                                                                                 + " is not a supported annotation.");
599                                         }
600
601                                 }
602                         }
603                 } catch (FileNotFoundException e) {
604                         e.printStackTrace();
605                 } catch (IOException e) {
606                         e.printStackTrace();
607                 } finally {
608                         try {
609                                 lineReader.close();
610                         } catch (IOException e) {
611                                 e.printStackTrace();
612                         }
613                 }
614         }
615
616         /**
617          * <p>
618          * Given a list of files, it scans each file and add found SpecConstrcut to
619          * the _constructs list.
620          * </p>
621          * 
622          * @param files
623          *            The list of files that needs to be processed. In general, this
624          *            list only need to contain those that have specification
625          *            annotations
626          * @throws WrongAnnotationException
627          * @throws ParseException
628          */
629         public void extract(File[] files) throws WrongAnnotationException,
630                         ParseException {
631                 for (int i = 0; i < files.length; i++)
632                         extract(files[i]);
633
634                 // Check basic specification semantics
635                 checkSemantics();
636         }
637
638         public void extract(ArrayList<File> files) throws WrongAnnotationException,
639                         ParseException {
640                 for (int i = 0; i < files.size(); i++)
641                         extract(files.get(i));
642
643                 // Check basic specification semantics
644                 checkSemantics();
645         }
646
647         /**
648          * <p>
649          * Extract the specification annotations and header files in the current
650          * file. This function should generally be called by extractFiles.
651          * </p>
652          * 
653          * @param files
654          *            The list of files that needs to be processed. In general, this
655          *            list only need to contain those that have specification
656          *            annotations
657          * @throws WrongAnnotationException
658          * @throws ParseException
659          */
660         public void extract(File file) throws WrongAnnotationException,
661                         ParseException {
662                 extractConstruct(file);
663         }
664 }