X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=Robust%2Fcup%2Fjava_cup%2Flexer.java;fp=Robust%2Fcup%2Fjava_cup%2Flexer.java;h=0000000000000000000000000000000000000000;hb=cdcf09c40af1419fa42932aae249cb79b69b5daf;hp=2230d123f10e9c7838440ac47a4199d61e31cfab;hpb=ac6191b514c0e54b468623bf868134e1ce809df5;p=IRC.git diff --git a/Robust/cup/java_cup/lexer.java b/Robust/cup/java_cup/lexer.java deleted file mode 100644 index 2230d123..00000000 --- a/Robust/cup/java_cup/lexer.java +++ /dev/null @@ -1,543 +0,0 @@ -package java_cup; - -import java_cup.runtime.Symbol; -import java.util.Hashtable; - -/** This class implements a small scanner (aka lexical analyzer or lexer) for - * the JavaCup specification. This scanner reads characters from standard - * input (System.in) and returns integers corresponding to the terminal - * number of the next Symbol. Once end of input is reached the EOF Symbol is - * returned on every subsequent call.

- * Symbols currently returned include:

- *    Symbol        Constant Returned     Symbol        Constant Returned
- *    ------        -----------------     ------        -----------------
- *    "package"     PACKAGE               "import"      IMPORT 
- *    "code"        CODE                  "action"      ACTION 
- *    "parser"      PARSER                "terminal"    TERMINAL
- *    "non"         NON                   "init"        INIT 
- *    "scan"        SCAN                  "with"        WITH
- *    "start"       START                 "precedence"  PRECEDENCE
- *    "left"        LEFT		  "right"       RIGHT
- *    "nonassoc"    NONASSOC		  "%prec        PRECENT_PREC  
- *      [           LBRACK                  ]           RBRACK
- *      ;           SEMI 
- *      ,           COMMA                   *           STAR 
- *      .           DOT                     :           COLON
- *      ::=         COLON_COLON_EQUALS      |           BAR
- *    identifier    ID                    {:...:}       CODE_STRING
- *    "nonterminal" NONTERMINAL
- *  
- * All symbol constants are defined in sym.java which is generated by - * JavaCup from parser.cup.

- * - * In addition to the scanner proper (called first via init() then with - * next_token() to get each Symbol) this class provides simple error and - * warning routines and keeps a count of errors and warnings that is - * publicly accessible.

- * - * This class is "static" (i.e., it has only static members and methods). - * - * @version last updated: 7/3/96 - * @author Frank Flannery - */ -public class lexer { - - /*-----------------------------------------------------------*/ - /*--- Constructor(s) ----------------------------------------*/ - /*-----------------------------------------------------------*/ - - /** The only constructor is private, so no instances can be created. */ - private lexer() { } - - /*-----------------------------------------------------------*/ - /*--- Static (Class) Variables ------------------------------*/ - /*-----------------------------------------------------------*/ - - /** First character of lookahead. */ - protected static int next_char; - - /** Second character of lookahead. */ - protected static int next_char2; - - /** Second character of lookahead. */ - protected static int next_char3; - - /** Second character of lookahead. */ - protected static int next_char4; - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** EOF constant. */ - protected static final int EOF_CHAR = -1; - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Table of keywords. Keywords are initially treated as identifiers. - * Just before they are returned we look them up in this table to see if - * they match one of the keywords. The string of the name is the key here, - * which indexes Integer objects holding the symbol number. - */ - protected static Hashtable keywords = new Hashtable(23); - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Table of single character symbols. For ease of implementation, we - * store all unambiguous single character Symbols in this table of Integer - * objects keyed by Integer objects with the numerical value of the - * appropriate char (currently Character objects have a bug which precludes - * their use in tables). - */ - protected static Hashtable char_symbols = new Hashtable(11); - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Current line number for use in error messages. */ - protected static int current_line = 1; - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Character position in current line. */ - protected static int current_position = 1; - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Character position in current line. */ - protected static int absolute_position = 1; - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Count of total errors detected so far. */ - public static int error_count = 0; - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Count of warnings issued so far */ - public static int warning_count = 0; - - /*-----------------------------------------------------------*/ - /*--- Static Methods ----------------------------------------*/ - /*-----------------------------------------------------------*/ - - /** Initialize the scanner. This sets up the keywords and char_symbols - * tables and reads the first two characters of lookahead. - */ - public static void init() throws java.io.IOException - { - /* set up the keyword table */ - keywords.put("package", new Integer(sym.PACKAGE)); - keywords.put("import", new Integer(sym.IMPORT)); - keywords.put("code", new Integer(sym.CODE)); - keywords.put("action", new Integer(sym.ACTION)); - keywords.put("parser", new Integer(sym.PARSER)); - keywords.put("terminal", new Integer(sym.TERMINAL)); - keywords.put("non", new Integer(sym.NON)); - keywords.put("nonterminal",new Integer(sym.NONTERMINAL));// [CSA] - keywords.put("init", new Integer(sym.INIT)); - keywords.put("scan", new Integer(sym.SCAN)); - keywords.put("with", new Integer(sym.WITH)); - keywords.put("start", new Integer(sym.START)); - keywords.put("precedence", new Integer(sym.PRECEDENCE)); - keywords.put("left", new Integer(sym.LEFT)); - keywords.put("right", new Integer(sym.RIGHT)); - keywords.put("nonassoc", new Integer(sym.NONASSOC)); - - /* set up the table of single character symbols */ - char_symbols.put(new Integer(';'), new Integer(sym.SEMI)); - char_symbols.put(new Integer(','), new Integer(sym.COMMA)); - char_symbols.put(new Integer('*'), new Integer(sym.STAR)); - char_symbols.put(new Integer('.'), new Integer(sym.DOT)); - char_symbols.put(new Integer('|'), new Integer(sym.BAR)); - char_symbols.put(new Integer('['), new Integer(sym.LBRACK)); - char_symbols.put(new Integer(']'), new Integer(sym.RBRACK)); - - /* read two characters of lookahead */ - next_char = System.in.read(); - if (next_char == EOF_CHAR) { - next_char2 = EOF_CHAR; - next_char3 = EOF_CHAR; - next_char4 = EOF_CHAR; - } else { - next_char2 = System.in.read(); - if (next_char2 == EOF_CHAR) { - next_char3 = EOF_CHAR; - next_char4 = EOF_CHAR; - } else { - next_char3 = System.in.read(); - if (next_char3 == EOF_CHAR) { - next_char4 = EOF_CHAR; - } else { - next_char4 = System.in.read(); - } - } - } - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Advance the scanner one character in the input stream. This moves - * next_char2 to next_char and then reads a new next_char2. - */ - protected static void advance() throws java.io.IOException - { - int old_char; - - old_char = next_char; - next_char = next_char2; - if (next_char == EOF_CHAR) { - next_char2 = EOF_CHAR; - next_char3 = EOF_CHAR; - next_char4 = EOF_CHAR; - } else { - next_char2 = next_char3; - if (next_char2 == EOF_CHAR) { - next_char3 = EOF_CHAR; - next_char4 = EOF_CHAR; - } else { - next_char3 = next_char4; - if (next_char3 == EOF_CHAR) { - next_char4 = EOF_CHAR; - } else { - next_char4 = System.in.read(); - } - } - } - - /* count this */ - absolute_position++; - current_position++; - if (old_char == '\n' || (old_char == '\r' && next_char!='\n')) - { - current_line++; - current_position = 1; - } - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Emit an error message. The message will be marked with both the - * current line number and the position in the line. Error messages - * are printed on standard error (System.err). - * @param message the message to print. - */ - public static void emit_error(String message) - { - System.err.println("Error at " + current_line + "(" + current_position + - "): " + message); - error_count++; - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Emit a warning message. The message will be marked with both the - * current line number and the position in the line. Messages are - * printed on standard error (System.err). - * @param message the message to print. - */ - public static void emit_warn(String message) - { - System.err.println("Warning at " + current_line + "(" + current_position + - "): " + message); - warning_count++; - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Determine if a character is ok to start an id. - * @param ch the character in question. - */ - protected static boolean id_start_char(int ch) - { - /* allow for % in identifiers. a hack to allow my - %prec in. Should eventually make lex spec for this - frankf */ - return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - (ch == '_'); - - // later need to deal with non-8-bit chars here - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Determine if a character is ok for the middle of an id. - * @param ch the character in question. - */ - protected static boolean id_char(int ch) - { - return id_start_char(ch) || (ch >= '0' && ch <= '9'); - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Try to look up a single character symbol, returns -1 for not found. - * @param ch the character in question. - */ - protected static int find_single_char(int ch) - { - Integer result; - - result = (Integer)char_symbols.get(new Integer((char)ch)); - if (result == null) - return -1; - else - return result.intValue(); - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Handle swallowing up a comment. Both old style C and new style C++ - * comments are handled. - */ - protected static void swallow_comment() throws java.io.IOException - { - /* next_char == '/' at this point */ - - /* is it a traditional comment */ - if (next_char2 == '*') - { - /* swallow the opener */ - advance(); advance(); - - /* swallow the comment until end of comment or EOF */ - for (;;) - { - /* if its EOF we have an error */ - if (next_char == EOF_CHAR) - { - emit_error("Specification file ends inside a comment"); - return; - } - - /* if we can see the closer we are done */ - if (next_char == '*' && next_char2 == '/') - { - advance(); - advance(); - return; - } - - /* otherwise swallow char and move on */ - advance(); - } - } - - /* is its a new style comment */ - if (next_char2 == '/') - { - /* swallow the opener */ - advance(); advance(); - - /* swallow to '\n', '\r', '\f', or EOF */ - while (next_char != '\n' && next_char != '\r' && - next_char != '\f' && next_char!=EOF_CHAR) - advance(); - - return; - - } - - /* shouldn't get here, but... if we get here we have an error */ - emit_error("Malformed comment in specification -- ignored"); - advance(); - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Swallow up a code string. Code strings begin with "{:" and include - all characters up to the first occurrence of ":}" (there is no way to - include ":}" inside a code string). The routine returns a String - object suitable for return by the scanner. - */ - protected static Symbol do_code_string() throws java.io.IOException - { - StringBuffer result = new StringBuffer(); - - /* at this point we have lookahead of "{:" -- swallow that */ - advance(); advance(); - - /* save chars until we see ":}" */ - while (!(next_char == ':' && next_char2 == '}')) - { - /* if we have run off the end issue a message and break out of loop */ - if (next_char == EOF_CHAR) - { - emit_error("Specification file ends inside a code string"); - break; - } - - /* otherwise record the char and move on */ - result.append(new Character((char)next_char)); - advance(); - } - - /* advance past the closer and build a return Symbol */ - advance(); advance(); - return new Symbol(sym.CODE_STRING, result.toString()); - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Process an identifier. Identifiers begin with a letter, underscore, - * or dollar sign, which is followed by zero or more letters, numbers, - * underscores or dollar signs. This routine returns a String suitable - * for return by the scanner. - */ - protected static Symbol do_id() throws java.io.IOException - { - StringBuffer result = new StringBuffer(); - String result_str; - Integer keyword_num; - char buffer[] = new char[1]; - - /* next_char holds first character of id */ - buffer[0] = (char)next_char; - result.append(buffer,0,1); - advance(); - - /* collect up characters while they fit in id */ - while(id_char(next_char)) - { - buffer[0] = (char)next_char; - result.append(buffer,0,1); - advance(); - } - - /* extract a string and try to look it up as a keyword */ - result_str = result.toString(); - keyword_num = (Integer)keywords.get(result_str); - - /* if we found something, return that keyword */ - if (keyword_num != null) - return new Symbol(keyword_num.intValue()); - - /* otherwise build and return an id Symbol with an attached string */ - return new Symbol(sym.ID, result_str); - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Return one Symbol. This is the main external interface to the scanner. - * It consumes sufficient characters to determine the next input Symbol - * and returns it. To help with debugging, this routine actually calls - * real_next_token() which does the work. If you need to debug the - * parser, this can be changed to call debug_next_token() which prints - * a debugging message before returning the Symbol. - */ - public static Symbol next_token() throws java.io.IOException - { - return real_next_token(); - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** Debugging version of next_token(). This routine calls the real scanning - * routine, prints a message on System.out indicating what the Symbol is, - * then returns it. - */ - public static Symbol debug_next_token() throws java.io.IOException - { - Symbol result = real_next_token(); - System.out.println("# next_Symbol() => " + result.sym); - return result; - } - - /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/ - - /** The actual routine to return one Symbol. This is normally called from - * next_token(), but for debugging purposes can be called indirectly from - * debug_next_token(). - */ - protected static Symbol real_next_token() throws java.io.IOException - { - int sym_num; - - for (;;) - { - /* look for white space */ - if (next_char == ' ' || next_char == '\t' || next_char == '\n' || - next_char == '\f' || next_char == '\r') - { - /* advance past it and try the next character */ - advance(); - continue; - } - - /* look for a single character symbol */ - sym_num = find_single_char(next_char); - if (sym_num != -1) - { - /* found one -- advance past it and return a Symbol for it */ - advance(); - return new Symbol(sym_num); - } - - /* look for : or ::= */ - if (next_char == ':') - { - /* if we don't have a second ':' return COLON */ - if (next_char2 != ':') - { - advance(); - return new Symbol(sym.COLON); - } - - /* move forward and look for the '=' */ - advance(); - if (next_char2 == '=') - { - advance(); advance(); - return new Symbol(sym.COLON_COLON_EQUALS); - } - else - { - /* return just the colon (already consumed) */ - return new Symbol(sym.COLON); - } - } - - /* find a "%prec" string and return it. otherwise, a '%' was found, - which has no right being in the specification otherwise */ - if (next_char == '%') { - advance(); - if ((next_char == 'p') && (next_char2 == 'r') && (next_char3 == 'e') && - (next_char4 == 'c')) { - advance(); - advance(); - advance(); - advance(); - return new Symbol(sym.PERCENT_PREC); - } else { - emit_error("Found extraneous percent sign"); - } - } - - /* look for a comment */ - if (next_char == '/' && (next_char2 == '*' || next_char2 == '/')) - { - /* swallow then continue the scan */ - swallow_comment(); - continue; - } - - /* look for start of code string */ - if (next_char == '{' && next_char2 == ':') - return do_code_string(); - - /* look for an id or keyword */ - if (id_start_char(next_char)) return do_id(); - - /* look for EOF */ - if (next_char == EOF_CHAR) return new Symbol(sym.EOF); - - /* if we get here, we have an unrecognized character */ - emit_warn("Unrecognized character '" + - new Character((char)next_char) + "'(" + next_char + - ") -- ignored"); - - /* advance past it */ - advance(); - } - } - - /*-----------------------------------------------------------*/ -} -