+++ /dev/null
-package java_cup;
-
-import java_cup.runtime.Symbol;
-import java.util.Hashtable;
-
-/** This class implements a small scanner (aka lexical analyzer or lexer) for
- * the JavaCup specification. This scanner reads characters from standard
- * input (System.in) and returns integers corresponding to the terminal
- * number of the next Symbol. Once end of input is reached the EOF Symbol is
- * returned on every subsequent call.<p>
- * Symbols currently returned include: <pre>
- * Symbol Constant Returned Symbol Constant Returned
- * ------ ----------------- ------ -----------------
- * "package" PACKAGE "import" IMPORT
- * "code" CODE "action" ACTION
- * "parser" PARSER "terminal" TERMINAL
- * "non" NON "init" INIT
- * "scan" SCAN "with" WITH
- * "start" START "precedence" PRECEDENCE
- * "left" LEFT "right" RIGHT
- * "nonassoc" NONASSOC "%prec PRECENT_PREC
- * [ LBRACK ] RBRACK
- * ; SEMI
- * , COMMA * STAR
- * . DOT : COLON
- * ::= COLON_COLON_EQUALS | BAR
- * identifier ID {:...:} CODE_STRING
- * "nonterminal" NONTERMINAL
- * </pre>
- * All symbol constants are defined in sym.java which is generated by
- * JavaCup from parser.cup.<p>
- *
- * In addition to the scanner proper (called first via init() then with
- * next_token() to get each Symbol) this class provides simple error and
- * warning routines and keeps a count of errors and warnings that is
- * publicly accessible.<p>
- *
- * This class is "static" (i.e., it has only static members and methods).
- *
- * @version last updated: 7/3/96
- * @author Frank Flannery
- */
-public class lexer {
-
- /*-----------------------------------------------------------*/
- /*--- Constructor(s) ----------------------------------------*/
- /*-----------------------------------------------------------*/
-
- /** The only constructor is private, so no instances can be created. */
- private lexer() { }
-
- /*-----------------------------------------------------------*/
- /*--- Static (Class) Variables ------------------------------*/
- /*-----------------------------------------------------------*/
-
- /** First character of lookahead. */
- protected static int next_char;
-
- /** Second character of lookahead. */
- protected static int next_char2;
-
- /** Second character of lookahead. */
- protected static int next_char3;
-
- /** Second character of lookahead. */
- protected static int next_char4;
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** EOF constant. */
- protected static final int EOF_CHAR = -1;
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Table of keywords. Keywords are initially treated as identifiers.
- * Just before they are returned we look them up in this table to see if
- * they match one of the keywords. The string of the name is the key here,
- * which indexes Integer objects holding the symbol number.
- */
- protected static Hashtable keywords = new Hashtable(23);
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Table of single character symbols. For ease of implementation, we
- * store all unambiguous single character Symbols in this table of Integer
- * objects keyed by Integer objects with the numerical value of the
- * appropriate char (currently Character objects have a bug which precludes
- * their use in tables).
- */
- protected static Hashtable char_symbols = new Hashtable(11);
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Current line number for use in error messages. */
- protected static int current_line = 1;
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Character position in current line. */
- protected static int current_position = 1;
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Character position in current line. */
- protected static int absolute_position = 1;
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Count of total errors detected so far. */
- public static int error_count = 0;
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Count of warnings issued so far */
- public static int warning_count = 0;
-
- /*-----------------------------------------------------------*/
- /*--- Static Methods ----------------------------------------*/
- /*-----------------------------------------------------------*/
-
- /** Initialize the scanner. This sets up the keywords and char_symbols
- * tables and reads the first two characters of lookahead.
- */
- public static void init() throws java.io.IOException
- {
- /* set up the keyword table */
- keywords.put("package", new Integer(sym.PACKAGE));
- keywords.put("import", new Integer(sym.IMPORT));
- keywords.put("code", new Integer(sym.CODE));
- keywords.put("action", new Integer(sym.ACTION));
- keywords.put("parser", new Integer(sym.PARSER));
- keywords.put("terminal", new Integer(sym.TERMINAL));
- keywords.put("non", new Integer(sym.NON));
- keywords.put("nonterminal",new Integer(sym.NONTERMINAL));// [CSA]
- keywords.put("init", new Integer(sym.INIT));
- keywords.put("scan", new Integer(sym.SCAN));
- keywords.put("with", new Integer(sym.WITH));
- keywords.put("start", new Integer(sym.START));
- keywords.put("precedence", new Integer(sym.PRECEDENCE));
- keywords.put("left", new Integer(sym.LEFT));
- keywords.put("right", new Integer(sym.RIGHT));
- keywords.put("nonassoc", new Integer(sym.NONASSOC));
-
- /* set up the table of single character symbols */
- char_symbols.put(new Integer(';'), new Integer(sym.SEMI));
- char_symbols.put(new Integer(','), new Integer(sym.COMMA));
- char_symbols.put(new Integer('*'), new Integer(sym.STAR));
- char_symbols.put(new Integer('.'), new Integer(sym.DOT));
- char_symbols.put(new Integer('|'), new Integer(sym.BAR));
- char_symbols.put(new Integer('['), new Integer(sym.LBRACK));
- char_symbols.put(new Integer(']'), new Integer(sym.RBRACK));
-
- /* read two characters of lookahead */
- next_char = System.in.read();
- if (next_char == EOF_CHAR) {
- next_char2 = EOF_CHAR;
- next_char3 = EOF_CHAR;
- next_char4 = EOF_CHAR;
- } else {
- next_char2 = System.in.read();
- if (next_char2 == EOF_CHAR) {
- next_char3 = EOF_CHAR;
- next_char4 = EOF_CHAR;
- } else {
- next_char3 = System.in.read();
- if (next_char3 == EOF_CHAR) {
- next_char4 = EOF_CHAR;
- } else {
- next_char4 = System.in.read();
- }
- }
- }
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Advance the scanner one character in the input stream. This moves
- * next_char2 to next_char and then reads a new next_char2.
- */
- protected static void advance() throws java.io.IOException
- {
- int old_char;
-
- old_char = next_char;
- next_char = next_char2;
- if (next_char == EOF_CHAR) {
- next_char2 = EOF_CHAR;
- next_char3 = EOF_CHAR;
- next_char4 = EOF_CHAR;
- } else {
- next_char2 = next_char3;
- if (next_char2 == EOF_CHAR) {
- next_char3 = EOF_CHAR;
- next_char4 = EOF_CHAR;
- } else {
- next_char3 = next_char4;
- if (next_char3 == EOF_CHAR) {
- next_char4 = EOF_CHAR;
- } else {
- next_char4 = System.in.read();
- }
- }
- }
-
- /* count this */
- absolute_position++;
- current_position++;
- if (old_char == '\n' || (old_char == '\r' && next_char!='\n'))
- {
- current_line++;
- current_position = 1;
- }
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Emit an error message. The message will be marked with both the
- * current line number and the position in the line. Error messages
- * are printed on standard error (System.err).
- * @param message the message to print.
- */
- public static void emit_error(String message)
- {
- System.err.println("Error at " + current_line + "(" + current_position +
- "): " + message);
- error_count++;
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Emit a warning message. The message will be marked with both the
- * current line number and the position in the line. Messages are
- * printed on standard error (System.err).
- * @param message the message to print.
- */
- public static void emit_warn(String message)
- {
- System.err.println("Warning at " + current_line + "(" + current_position +
- "): " + message);
- warning_count++;
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Determine if a character is ok to start an id.
- * @param ch the character in question.
- */
- protected static boolean id_start_char(int ch)
- {
- /* allow for % in identifiers. a hack to allow my
- %prec in. Should eventually make lex spec for this
- frankf */
- return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
- (ch == '_');
-
- // later need to deal with non-8-bit chars here
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Determine if a character is ok for the middle of an id.
- * @param ch the character in question.
- */
- protected static boolean id_char(int ch)
- {
- return id_start_char(ch) || (ch >= '0' && ch <= '9');
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Try to look up a single character symbol, returns -1 for not found.
- * @param ch the character in question.
- */
- protected static int find_single_char(int ch)
- {
- Integer result;
-
- result = (Integer)char_symbols.get(new Integer((char)ch));
- if (result == null)
- return -1;
- else
- return result.intValue();
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Handle swallowing up a comment. Both old style C and new style C++
- * comments are handled.
- */
- protected static void swallow_comment() throws java.io.IOException
- {
- /* next_char == '/' at this point */
-
- /* is it a traditional comment */
- if (next_char2 == '*')
- {
- /* swallow the opener */
- advance(); advance();
-
- /* swallow the comment until end of comment or EOF */
- for (;;)
- {
- /* if its EOF we have an error */
- if (next_char == EOF_CHAR)
- {
- emit_error("Specification file ends inside a comment");
- return;
- }
-
- /* if we can see the closer we are done */
- if (next_char == '*' && next_char2 == '/')
- {
- advance();
- advance();
- return;
- }
-
- /* otherwise swallow char and move on */
- advance();
- }
- }
-
- /* is its a new style comment */
- if (next_char2 == '/')
- {
- /* swallow the opener */
- advance(); advance();
-
- /* swallow to '\n', '\r', '\f', or EOF */
- while (next_char != '\n' && next_char != '\r' &&
- next_char != '\f' && next_char!=EOF_CHAR)
- advance();
-
- return;
-
- }
-
- /* shouldn't get here, but... if we get here we have an error */
- emit_error("Malformed comment in specification -- ignored");
- advance();
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Swallow up a code string. Code strings begin with "{:" and include
- all characters up to the first occurrence of ":}" (there is no way to
- include ":}" inside a code string). The routine returns a String
- object suitable for return by the scanner.
- */
- protected static Symbol do_code_string() throws java.io.IOException
- {
- StringBuffer result = new StringBuffer();
-
- /* at this point we have lookahead of "{:" -- swallow that */
- advance(); advance();
-
- /* save chars until we see ":}" */
- while (!(next_char == ':' && next_char2 == '}'))
- {
- /* if we have run off the end issue a message and break out of loop */
- if (next_char == EOF_CHAR)
- {
- emit_error("Specification file ends inside a code string");
- break;
- }
-
- /* otherwise record the char and move on */
- result.append(new Character((char)next_char));
- advance();
- }
-
- /* advance past the closer and build a return Symbol */
- advance(); advance();
- return new Symbol(sym.CODE_STRING, result.toString());
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Process an identifier. Identifiers begin with a letter, underscore,
- * or dollar sign, which is followed by zero or more letters, numbers,
- * underscores or dollar signs. This routine returns a String suitable
- * for return by the scanner.
- */
- protected static Symbol do_id() throws java.io.IOException
- {
- StringBuffer result = new StringBuffer();
- String result_str;
- Integer keyword_num;
- char buffer[] = new char[1];
-
- /* next_char holds first character of id */
- buffer[0] = (char)next_char;
- result.append(buffer,0,1);
- advance();
-
- /* collect up characters while they fit in id */
- while(id_char(next_char))
- {
- buffer[0] = (char)next_char;
- result.append(buffer,0,1);
- advance();
- }
-
- /* extract a string and try to look it up as a keyword */
- result_str = result.toString();
- keyword_num = (Integer)keywords.get(result_str);
-
- /* if we found something, return that keyword */
- if (keyword_num != null)
- return new Symbol(keyword_num.intValue());
-
- /* otherwise build and return an id Symbol with an attached string */
- return new Symbol(sym.ID, result_str);
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Return one Symbol. This is the main external interface to the scanner.
- * It consumes sufficient characters to determine the next input Symbol
- * and returns it. To help with debugging, this routine actually calls
- * real_next_token() which does the work. If you need to debug the
- * parser, this can be changed to call debug_next_token() which prints
- * a debugging message before returning the Symbol.
- */
- public static Symbol next_token() throws java.io.IOException
- {
- return real_next_token();
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** Debugging version of next_token(). This routine calls the real scanning
- * routine, prints a message on System.out indicating what the Symbol is,
- * then returns it.
- */
- public static Symbol debug_next_token() throws java.io.IOException
- {
- Symbol result = real_next_token();
- System.out.println("# next_Symbol() => " + result.sym);
- return result;
- }
-
- /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
-
- /** The actual routine to return one Symbol. This is normally called from
- * next_token(), but for debugging purposes can be called indirectly from
- * debug_next_token().
- */
- protected static Symbol real_next_token() throws java.io.IOException
- {
- int sym_num;
-
- for (;;)
- {
- /* look for white space */
- if (next_char == ' ' || next_char == '\t' || next_char == '\n' ||
- next_char == '\f' || next_char == '\r')
- {
- /* advance past it and try the next character */
- advance();
- continue;
- }
-
- /* look for a single character symbol */
- sym_num = find_single_char(next_char);
- if (sym_num != -1)
- {
- /* found one -- advance past it and return a Symbol for it */
- advance();
- return new Symbol(sym_num);
- }
-
- /* look for : or ::= */
- if (next_char == ':')
- {
- /* if we don't have a second ':' return COLON */
- if (next_char2 != ':')
- {
- advance();
- return new Symbol(sym.COLON);
- }
-
- /* move forward and look for the '=' */
- advance();
- if (next_char2 == '=')
- {
- advance(); advance();
- return new Symbol(sym.COLON_COLON_EQUALS);
- }
- else
- {
- /* return just the colon (already consumed) */
- return new Symbol(sym.COLON);
- }
- }
-
- /* find a "%prec" string and return it. otherwise, a '%' was found,
- which has no right being in the specification otherwise */
- if (next_char == '%') {
- advance();
- if ((next_char == 'p') && (next_char2 == 'r') && (next_char3 == 'e') &&
- (next_char4 == 'c')) {
- advance();
- advance();
- advance();
- advance();
- return new Symbol(sym.PERCENT_PREC);
- } else {
- emit_error("Found extraneous percent sign");
- }
- }
-
- /* look for a comment */
- if (next_char == '/' && (next_char2 == '*' || next_char2 == '/'))
- {
- /* swallow then continue the scan */
- swallow_comment();
- continue;
- }
-
- /* look for start of code string */
- if (next_char == '{' && next_char2 == ':')
- return do_code_string();
-
- /* look for an id or keyword */
- if (id_start_char(next_char)) return do_id();
-
- /* look for EOF */
- if (next_char == EOF_CHAR) return new Symbol(sym.EOF);
-
- /* if we get here, we have an unrecognized character */
- emit_warn("Unrecognized character '" +
- new Character((char)next_char) + "'(" + next_char +
- ") -- ignored");
-
- /* advance past it */
- advance();
- }
- }
-
- /*-----------------------------------------------------------*/
-}
-