edits
[cdsspec-compiler.git] / src / edu / uci / eecs / specExtraction / SpecExtractor.java
1 package edu.uci.eecs.specExtraction;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.FileReader;
7 import java.io.IOException;
8 import java.io.LineNumberReader;
9 import java.util.ArrayList;
10 import java.util.Collections;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15
16 import edu.uci.eecs.codeGenerator.CodeGeneratorUtils;
17 import edu.uci.eecs.codeGenerator.Environment;
18 import edu.uci.eecs.utilParser.ParseException;
19
20 /**
21  * <p>
22  * This class represents the specification extractor of the specification. The
23  * main function of this class is to read C/C++11 source files and extract the
24  * corresponding specifications, and record corresponding information such as
25  * location, e.g., the file name and the line number, to help the code
26  * generation process.
27  * </p>
28  * 
29  * @author Peizhao Ou
30  * 
31  */
32 public class SpecExtractor {
33         public final HashMap<File, ArrayList<InterfaceConstruct>> interfaceListMap;
34         public final HashMap<File, ArrayList<OPConstruct>> OPListMap;
35         public final HashSet<String> OPLabelSet;
36         // Note that we only allow one entry per file at most
37         public final HashMap<File, EntryConstruct> entryMap;
38
39         public final HashSet<String> headerFiles;
40         
41         // In the generated header file, we need to forward the user-defined 
42         public final HashSet<String> forwardClass;
43
44         private GlobalConstruct globalConstruct;
45
46         public SpecExtractor() {
47                 interfaceListMap = new HashMap<File, ArrayList<InterfaceConstruct>>();
48                 OPListMap = new HashMap<File, ArrayList<OPConstruct>>();
49                 OPLabelSet = new HashSet<String>();
50                 entryMap = new HashMap<File, EntryConstruct>();
51                 headerFiles = new HashSet<String>();
52                 forwardClass = new HashSet<String>();
53                 globalConstruct = null;
54         }
55
56         private void addInterfaceConstruct(InterfaceConstruct construct) {
57                 ArrayList<InterfaceConstruct> list = interfaceListMap
58                                 .get(construct.file);
59                 if (list == null) {
60                         list = new ArrayList<InterfaceConstruct>();
61                         interfaceListMap.put(construct.file, list);
62                 }
63                 list.add(construct);
64         }
65
66         private void addOPConstruct(OPConstruct construct) {
67                 ArrayList<OPConstruct> list = OPListMap.get(construct.file);
68                 if (list == null) {
69                         list = new ArrayList<OPConstruct>();
70                         OPListMap.put(construct.file, list);
71                 }
72                 list.add(construct);
73         }
74
75         private void addEntryConstruct(File file, EntryConstruct construct)
76                         throws WrongAnnotationException {
77                 EntryConstruct old = entryMap.get(file);
78                 if (old == null)
79                         entryMap.put(file, construct);
80                 else { // Error processing
81                         String errMsg = "Multiple @Entry annotations in the same file.\n\t Other @Entry at Line "
82                                         + old.beginLineNum + ".";
83                         WrongAnnotationException.err(file, construct.beginLineNum, errMsg);
84                 }
85         }
86
87         public GlobalConstruct getGlobalConstruct() {
88                 return this.globalConstruct;
89         }
90
91         /**
92          * <p>
93          * A print out function for the purpose of debugging. Note that we better
94          * call this function after having called the checkSemantics() function to
95          * check annotation consistency.
96          * </p>
97          */
98         public void printAnnotations() {
99                 System.out
100                                 .println("/**********    Print out of specification extraction    **********/");
101                 System.out.println("// Extracted header files");
102                 for (String header : headerFiles)
103                         System.out.println(header);
104
105                 System.out.println("// Global State Construct");
106                 if (globalConstruct != null)
107                         System.out.println(globalConstruct);
108
109                 for (File file : interfaceListMap.keySet()) {
110                         ArrayList<InterfaceConstruct> list = interfaceListMap.get(file);
111                         System.out.println("// Interface in file: " + file.getName());
112                         for (InterfaceConstruct construct : list) {
113                                 System.out.println(construct);
114                                 System.out.println("EndLineNumFunc: "
115                                                 + construct.getEndLineNumFunction());
116                         }
117                 }
118
119                 for (File file : OPListMap.keySet()) {
120                         System.out.println("// Ordering points in file: " + file.getName());
121                         ArrayList<OPConstruct> list = OPListMap.get(file);
122                         for (OPConstruct construct : list)
123                                 System.out.println(construct);
124                 }
125
126                 for (File file : entryMap.keySet()) {
127                         System.out.println("// Entry in file: " + file.getName());
128                         System.out.println(entryMap.get(file));
129                 }
130         }
131
132         /**
133          * <p>
134          * Perform basic semantics checking of the extracted specification.
135          * </p>
136          * 
137          * @return
138          * @throws WrongAnnotationException
139          */
140         public void checkSemantics() throws WrongAnnotationException {
141                 String errMsg = null;
142
143                 // Assert that we have defined and only defined one global state
144                 // annotation
145                 if (globalConstruct == null) {
146                         errMsg = "Spec error: There should be one global state annotation.\n";
147                         throw new WrongAnnotationException(errMsg);
148                 }
149
150                 // Assert that the interface constructs have unique label name
151                 HashMap<String, InterfaceConstruct> interfaceMap = new HashMap<String, InterfaceConstruct>();
152                 for (File f : interfaceListMap.keySet()) {
153                         ArrayList<InterfaceConstruct> list = interfaceListMap.get(f);
154                         if (list != null) {
155                                 for (InterfaceConstruct construct : list) {
156                                         InterfaceConstruct existingConstruct = interfaceMap
157                                                         .get(construct.getName());
158                                         if (existingConstruct != null) { // Error
159                                                 errMsg = "Interface labels duplication with: \""
160                                                                 + construct.getName() + "\" in File \""
161                                                                 + existingConstruct.file.getName()
162                                                                 + "\", Line " + existingConstruct.beginLineNum
163                                                                 + ".";
164                                                 WrongAnnotationException.err(construct.file,
165                                                                 construct.beginLineNum, errMsg);
166                                         } else {
167                                                 interfaceMap.put(construct.getName(), construct);
168                                         }
169                                 }
170                         }
171                 }
172
173                 // Process ordering point labels
174                 for (File file : OPListMap.keySet()) {
175                         ArrayList<OPConstruct> list = OPListMap.get(file);
176                         for (OPConstruct construct : list) {
177                                 if (construct.type == OPType.OPCheck
178                                                 || construct.type == OPType.PotentialOP) {
179                                         String label = construct.label;
180                                         OPLabelSet.add(label);
181                                 }
182                         }
183                 }
184
185         }
186
187         /**
188          * <p>
189          * This function applies on a String (a plain line of text) to check whether
190          * the current line is a C/C++ header include statement. If it is, it
191          * extracts the header file name and store it, and returns true; otherwise,
192          * it returns false.
193          * </p>
194          * 
195          * @param line
196          *            The line of text to be processed
197          * @return Returns true if the current line is a C/C++ header include
198          *         statement
199          */
200         public boolean extractHeaders(String line) {
201                 // "^( |\t)*#include( |\t)+("|<)([a-zA-Z_0-9\-\.])+("|>)"
202                 Pattern regexp = Pattern
203                                 .compile("^( |\\t)*(#include)( |\\t)+(\"|<)([a-zA-Z_0-9\\-\\.]+)(\"|>)");
204                 Matcher matcher = regexp.matcher(line);
205
206                 // process the line.
207                 if (matcher.find()) {
208                         String header = null;
209                         String braceSymbol = matcher.group(4);
210                         if (braceSymbol.equals("<"))
211                                 header = "<" + matcher.group(5) + ">";
212                         else
213                                 header = "\"" + matcher.group(5) + "\"";
214                         if (!SpecNaming.isPreIncludedHeader(header)) {
215                                 headerFiles.add(header);
216                         }
217                         return true;
218                 } else
219                         return false;
220         }
221
222         /**
223          * <p>
224          * A sub-routine to extract the construct from beginning till end. When
225          * called, we have already match the beginning of the construct. We will
226          * call this sub-routine when we extract the interface construct and the
227          * global state construct.
228          * </p>
229          * 
230          * <p>
231          * The side effect of this function is that the lineReader has just read the
232          * end of the construct, meaning that the caller can get the end line number
233          * by calling lineReader.getLineNumber().
234          * </p>
235          * 
236          * @param file
237          *            The file that we are processing
238          * @param lineReader
239          *            The LineNumberReader that we are using when processing the
240          *            current file.
241          * @param file
242          *            The file that we are processing
243          * @param curLine
244          *            The current line that we are processing. It should be the
245          *            beginning line of the annotation construct.
246          * @param beginLineNum
247          *            The beginning line number of the interface construct
248          *            annotation
249          * @return Returns the annotation string list of the current construct
250          * @throws WrongAnnotationException
251          */
252         private ArrayList<String> extractTillConstructEnd(File file,
253                         LineNumberReader lineReader, String curLine, int beginLineNum)
254                         throws WrongAnnotationException {
255                 ArrayList<String> annotations = new ArrayList<String>();
256                 annotations.add(curLine);
257                 // System.out.println(curLine);
258                 // Initial settings for matching lines
259                 // "\*/( |\t)*$"
260                 Pattern regexpEnd = Pattern.compile("\\*/( |\\t)*$");
261                 Matcher matcher = regexpEnd.matcher(curLine);
262                 if (matcher.find()) { // The beginning line is also the end line
263                         annotations.add(curLine);
264                         return annotations;
265                 } else {
266                         try {
267                                 String line;
268                                 while ((line = lineReader.readLine()) != null) {
269                                         // process the line.
270                                         // System.out.println(line);
271
272                                         matcher.reset(line); // reset the input
273                                         annotations.add(line);
274                                         if (matcher.find())
275                                                 return annotations;
276                                 }
277                                 WrongAnnotationException
278                                                 .err(file,
279                                                                 beginLineNum,
280                                                                 "The interface annotation should have the matching closing symbol closing \"*/\"");
281                         } catch (IOException e) {
282                                 e.printStackTrace();
283                         }
284                 }
285                 return null;
286         }
287
288         /**
289          * <p>
290          * A sub-routine to extract the global construct. When called, we have
291          * already match the beginning of the construct.
292          * </p>
293          * 
294          * @param file
295          *            The file that we are processing
296          * @param lineReader
297          *            The LineNumberReader that we are using when processing the
298          *            current file.
299          * @param curLine
300          *            The current line that we are processing. It should be the
301          *            beginning line of the annotation construct.
302          * @param beginLineNum
303          *            The beginning line number of the interface construct
304          *            annotation
305          * @throws WrongAnnotationException
306          */
307         private void extractGlobalConstruct(File file, LineNumberReader lineReader,
308                         String curLine, int beginLineNum) throws WrongAnnotationException {
309                 ArrayList<String> annotations = extractTillConstructEnd(file,
310                                 lineReader, curLine, beginLineNum);
311                 GlobalConstruct construct = new GlobalConstruct(file, beginLineNum,
312                                 annotations);
313                 if (globalConstruct != null) { // Check if we have seen a global state
314                                                                                 // construct earlier
315                         File otherDefinitionFile = globalConstruct.file;
316                         int otherDefinitionLine = globalConstruct.beginLineNum;
317                         String errMsg = "Multiple definition of global state.\n"
318                                         + "\tAnother definition is in File \""
319                                         + otherDefinitionFile.getName() + "\" (Line "
320                                         + otherDefinitionLine + ").";
321                         WrongAnnotationException.err(file, beginLineNum, errMsg);
322                 }
323                 globalConstruct = construct;
324         }
325
326         /**
327          * @param file
328          *            The current file we are processing
329          * @param lineReader
330          *            Call this function when the lineReader will read the beginning
331          *            of the definition right away
332          * @param startingLine
333          *            The line that we should start processing
334          * @return The line number of the ending line of the interfae definition. If
335          *         returning -1, it means the curl symbols in the interface do not
336          *         match
337          * @throws WrongAnnotationException
338          */
339         private int findEndLineNumFunction(File file, LineNumberReader lineReader,
340                         String startingLine) throws WrongAnnotationException {
341                 String line = startingLine;
342                 // FIXME: We assume that in the string of the code, there does not exist
343                 // the symbol '{' & '{'
344                 try {
345                         boolean foundFirstCurl = false;
346                         int unmatchedCnt = 0;
347                         do {
348                                 // process the line.
349                                 // System.out.println(line);
350
351                                 // Extract the one-liner construct first
352                                 extractOneLineConstruct(file, lineReader.getLineNumber(), line);
353
354                                 for (int i = 0; i < line.length(); i++) {
355                                         char ch = line.charAt(i);
356                                         if (ch == '{') {
357                                                 foundFirstCurl = true;
358                                                 unmatchedCnt++;
359                                         } else if (ch == '}') {
360                                                 unmatchedCnt--;
361                                         }
362                                         // The current line is the end of the function
363                                         if (foundFirstCurl && unmatchedCnt == 0) {
364                                                 int endLineNumFunction = lineReader.getLineNumber();
365                                                 return endLineNumFunction;
366                                         }
367                                 }
368                         } while ((line = lineReader.readLine()) != null);
369                 } catch (IOException e) {
370                         e.printStackTrace();
371                 }
372                 // -1 means the curl symbols in the interface do not match
373                 return -1;
374         }
375
376         /**
377          * <p>
378          * A sub-routine to extract the interface construct. When called, we have
379          * already match the beginning of the construct, and we also need to find
380          * the ending line number of the closing brace of the corresponding
381          * function.
382          * </p>
383          * 
384          * @param file
385          *            The file that we are processing
386          * @param lineReader
387          *            The LineNumberReader that we are using when processing the
388          *            current file.
389          * @param curLine
390          *            The current line that we are processing. It should be the
391          *            beginning line of the annotation construct.
392          * @param beginLineNum
393          *            The beginning line number of the interface construct
394          *            annotation
395          * @throws WrongAnnotationException
396          * @throws IOException
397          * @throws ParseException
398          */
399         private void extractInterfaceConstruct(File file,
400                         LineNumberReader lineReader, String curLine, int beginLineNum)
401                         throws WrongAnnotationException, IOException, ParseException {
402                 ArrayList<String> annotations = extractTillConstructEnd(file,
403                                 lineReader, curLine, beginLineNum);
404                 int endLineNum = lineReader.getLineNumber();
405                 InterfaceConstruct construct = new InterfaceConstruct(file,
406                                 beginLineNum, endLineNum, annotations);
407                 addInterfaceConstruct(construct);
408
409                 // Process the corresponding interface function declaration header
410                 String line = null;
411                 int lineNum = -1;
412                 String errMsg;
413                 try {
414                         line = lineReader.readLine();
415                         lineNum = lineReader.getLineNumber();
416                         construct.processFunctionDeclaration(line);
417                         
418                         // Record those user-defined struct
419                         // RET
420                         String returnType = construct.getFunctionHeader().returnType;
421                         if (SpecUtils.isUserDefinedStruct(returnType))
422                                 forwardClass.add(SpecUtils.getPlainType(returnType));
423                         // Arguments
424                         for (VariableDeclaration decl : construct.getFunctionHeader().args) {
425                                 if (SpecUtils.isUserDefinedStruct(decl.type))
426                                         forwardClass.add(SpecUtils.getPlainType(decl.type));
427                         }
428                         
429                 } catch (IOException e) {
430                         errMsg = "Spec error in file \""
431                                         + file.getName()
432                                         + "\", Line "
433                                         + lineNum
434                                         + " :\n\tThe function declaration should take only one line and have the correct syntax (follow the annotations immediately)\n";
435                         System.out.println(errMsg);
436                         throw e;
437                 } catch (ParseException e) {
438                         errMsg = "Spec error in file \""
439                                         + file.getName()
440                                         + "\", Line "
441                                         + lineNum
442                                         + " :\n\tThe function declaration should take only one line and have the correct syntax (follow the annotations immediately)\n";
443                         System.out.println(errMsg);
444                         throw e;
445                 }
446
447                 // Now we find the end of the interface definition
448                 int endLineNumFunction = findEndLineNumFunction(file, lineReader, line);
449                 construct.setEndLineNumFunction(endLineNumFunction);
450                 if (endLineNumFunction == -1) {
451                         WrongAnnotationException
452                                         .err(file, beginLineNum,
453                                                         "The interface definition does NOT have matching curls '}'");
454                 }
455         }
456
457         /**
458          * <p>
459          * A sub-routine to extract the ordering point construct. When called, we
460          * have already match the beginning of the construct.
461          * </p>
462          * 
463          * @param file
464          *            The file that we are processing
465          * @param beginLineNum
466          *            The beginning line number of the interface construct
467          *            annotation
468          * @param curLine
469          *            The current line that we are processing. It should be the
470          *            beginning line of the annotation construct.
471          * @param type
472          *            The type of ordering point construct we are processing
473          * @throws WrongAnnotationException
474          */
475         private void extractOPConstruct(File file, int beginLineNum,
476                         String curLine, OPType type) throws WrongAnnotationException {
477                 String condition = null;
478                 String label = null;
479
480                 // "(\(\s?(\w+)\s?\))?\s:\s?(.+)\*/\s?$"
481                 Pattern regexp = Pattern
482                                 .compile("(\\(\\s*(\\w+)\\s*\\))?\\s*:\\s*(.+)\\*/\\s*$");
483                 Matcher matcher = regexp.matcher(curLine);
484                 if (matcher.find()) {
485                         label = matcher.group(2);
486                         condition = matcher.group(3);
487                 } else {
488                         WrongAnnotationException
489                                         .err(file,
490                                                         beginLineNum,
491                                                         "Wrong syntax for the ordering point construct. You might need a colon before the condition.");
492                 }
493                 OPConstruct op = new OPConstruct(file, beginLineNum, type, label,
494                                 condition, curLine);
495                 addOPConstruct(op);
496         }
497
498         /**
499          * <p>
500          * A sub-routine to extract the entry construct. When called, we have
501          * already match the beginning of the construct.
502          * </p>
503          * 
504          * @param file
505          *            The file that we are processing
506          * @param beginLineNum
507          *            The beginning line number of the interface construct
508          *            annotation
509          * @param curLine
510          *            Current line being processed
511          * @throws WrongAnnotationException
512          */
513         public void extractEntryConstruct(File file, int beginLineNum,
514                         String curLine) throws WrongAnnotationException {
515                 addEntryConstruct(file, new EntryConstruct(file, beginLineNum, curLine));
516         }
517
518         /**
519          * <p>
520          * A sub-routine to extract those annotation constructs that take only one
521          * line --- Entry, OPDefine, PotentialOP, OPCheck, OPClear and OPClearDefin.
522          * </p>
523          * 
524          * @param file
525          *            The file that we are processing
526          * @param beginLineNum
527          *            The beginning line number of the interface construct
528          *            annotation
529          * @param curLine
530          *            The current line that we are processing. It should be the
531          *            beginning line of the annotation construct.
532          * @throws WrongAnnotationException
533          */
534         private void extractOneLineConstruct(File file, int beginLineNum,
535                         String curLine) throws WrongAnnotationException {
536                 // "/\*\*\s*@(Entry|OPDefine|PotentialOP|OPCheck|OPClear|OPClearDefine)"
537                 Pattern regexpBegin = Pattern.compile("/\\*\\*\\s*@(\\w+)");
538                 Matcher matcher = regexpBegin.matcher(curLine);
539                 matcher.reset(curLine);
540                 if (matcher.find()) {
541                         String name = matcher.group(1);
542                         if (name.equals("Entry"))
543                                 extractEntryConstruct(file, beginLineNum, curLine);
544                         else if (name.equals("OPDefine") || name.equals("PotentialOP")
545                                         || name.equals("OPCheck") || name.equals("OPClear")
546                                         || name.equals("OPClearDefine"))
547                                 extractOPConstruct(file, beginLineNum, curLine,
548                                                 OPType.valueOf(name));
549                 }
550         }
551
552         /**
553          * <p>
554          * This function will process a given C/C++ file ( .h, .c or .cc). It will
555          * extract all the headers included in that file, and all the annotation
556          * constructs specified in that file. We then will store the information in
557          * the corresponding containers.
558          * </p>
559          * 
560          * <p>
561          * The basic idea is to read the file line by line, and then use regular
562          * expression to match the specific annotations or the header files.
563          * </p>
564          * 
565          * @param file
566          *            The file object of the corresponding file to be processed
567          * @throws WrongAnnotationException
568          * @throws ParseException
569          */
570         public void extractConstruct(File file) throws WrongAnnotationException,
571                         ParseException {
572                 BufferedReader br = null;
573                 LineNumberReader lineReader = null;
574                 try {
575                         // Initial settings for processing the lines
576                         br = new BufferedReader(new FileReader(file));
577                         lineReader = new LineNumberReader(br);
578                         // "/\*\*\s*@(DeclareState|Interface)"
579                         Pattern regexpBegin = Pattern
580                                         .compile("/\\*\\*\\s*@(DeclareState|Interface)");
581                         Matcher matcher = regexpBegin.matcher("");
582
583                         String line;
584                         while ((line = lineReader.readLine()) != null) {
585                                 // Start to process the line
586
587                                 // First try to process the line to see if it's a header file
588                                 // include
589                                 boolean succ = extractHeaders(line);
590                                 if (succ) // It's a header line and we successfully extract it
591                                         continue;
592
593                                 int beginLineNum = lineReader.getLineNumber();
594                                 // Extract the one-liner construct first
595                                 extractOneLineConstruct(file, beginLineNum, line);
596
597                                 // Now we process the line to see if it's an annotation (State
598                                 // or Interface)
599                                 matcher.reset(line); // reset the input
600                                 if (matcher.find()) { // Found the beginning line
601                                         // The matching annotation name
602                                         String constructName = matcher.group(1);
603
604                                         // Process each annotation accordingly
605                                         if (constructName.equals(SpecNaming.DeclareState)) {
606                                                 extractGlobalConstruct(file, lineReader, line,
607                                                                 beginLineNum);
608                                         } else if (constructName.equals(SpecNaming.Interface)) {
609                                                 extractInterfaceConstruct(file, lineReader, line,
610                                                                 beginLineNum);
611                                         } else {
612                                                 WrongAnnotationException.err(file, beginLineNum,
613                                                                 constructName
614                                                                                 + " is not a supported annotation.");
615                                         }
616
617                                 }
618                         }
619                 } catch (FileNotFoundException e) {
620                         e.printStackTrace();
621                 } catch (IOException e) {
622                         e.printStackTrace();
623                 } finally {
624                         try {
625                                 lineReader.close();
626                         } catch (IOException e) {
627                                 e.printStackTrace();
628                         }
629                 }
630         }
631
632         /**
633          * <p>
634          * Given a list of files, it scans each file and add found SpecConstrcut to
635          * the _constructs list.
636          * </p>
637          * 
638          * @param files
639          *            The list of files that needs to be processed. In general, this
640          *            list only need to contain those that have specification
641          *            annotations
642          * @throws WrongAnnotationException
643          * @throws ParseException
644          */
645         public void extract(File[] files) throws WrongAnnotationException,
646                         ParseException {
647                 for (int i = 0; i < files.length; i++)
648                         extract(files[i]);
649
650                 // Check basic specification semantics
651                 checkSemantics();
652         }
653
654         public void extract(ArrayList<File> files) throws WrongAnnotationException,
655                         ParseException {
656                 for (int i = 0; i < files.size(); i++)
657                         extract(files.get(i));
658
659                 // Check basic specification semantics
660                 checkSemantics();
661         }
662
663         /**
664          * <p>
665          * Extract the specification annotations and header files in the current
666          * file. This function should generally be called by extractFiles.
667          * </p>
668          * 
669          * @param files
670          *            The list of files that needs to be processed. In general, this
671          *            list only need to contain those that have specification
672          *            annotations
673          * @throws WrongAnnotationException
674          * @throws ParseException
675          */
676         public void extract(File file) throws WrongAnnotationException,
677                         ParseException {
678                 extractConstruct(file);
679         }
680 }