4 import java.io.LineNumberReader;
8 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
9 * This program is released under the terms of the GPL; see the file
10 * COPYING for more details. There is NO WARRANTY on this code.
14 LineNumberReader reader;
21 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
23 public Lexer(Reader reader) {
24 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
29 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
30 java_cup.runtime.Symbol sym =
31 lookahead==null ? _nextToken() : lookahead.get();
35 private boolean shouldBePLT() throws java.io.IOException {
36 // look ahead to see if this LT should be changed to a PLT
37 if (last==null || last.sym!=Sym.IDENTIFIER)
39 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
40 java_cup.runtime.Symbol next() throws java.io.IOException
41 { return _nextToken(); }
44 // skip past IDENTIFIER (DOT IDENTIFIER)*
45 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
47 while (lookahead.peek(i).sym == Sym.DOT) {
49 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
52 // skip past (LBRACK RBRACK)*
53 while (lookahead.peek(i).sym == Sym.LBRACK) {
55 if (lookahead.peek(i++).sym != Sym.RBRACK)
58 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
59 switch(lookahead.peek(i).sym) {
69 private java_cup.runtime.Symbol last = null;
70 private FIFO lookahead = null;
71 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
73 * Identifiers/Keywords/true/false/null (start with java letter)
74 * numeric literal (start with number)
75 * character literal (start with single quote)
76 * string (start with double quote)
77 * separator (parens, braces, brackets, semicolon, comma, period)
78 * operator (equals, plus, minus, etc)
80 * comment (start with slash)
85 startpos = lineL.head + line_pos;
86 ie = getInputElement();
87 if (ie instanceof DocumentationComment)
88 comment = ((Comment)ie).getComment();
89 } while (!(ie instanceof Token));
90 endpos = lineL.head + line_pos - 1;
92 //System.out.println(ie.toString()); // uncomment to debug lexer.
93 java_cup.runtime.Symbol sym = ((Token)ie).token();
94 // fix up left/right positions.
95 sym.left = startpos; sym.right = endpos;
99 public boolean debug_lex() throws java.io.IOException {
100 InputElement ie = getInputElement();
101 System.out.println(ie);
102 return !(ie instanceof EOF);
106 public String lastComment() { return comment; }
107 public void clearComment() { comment=""; }
109 InputElement getInputElement() throws java.io.IOException {
114 if (line.length()<=line_pos) { // end of line.
120 switch (line.charAt(line_pos)) {
123 case ' ': // ASCII SP
124 case '\t': // ASCII HT
125 case '\f': // ASCII FF
126 case '\n': // LineTerminator
127 return new WhiteSpace(consume());
130 case '\020': // ASCII SUB
143 // May get Token instead of Comment.
144 InputElement getComment() throws java.io.IOException {
146 // line.charAt(line_pos+0) is '/'
147 switch (line.charAt(line_pos+1)) {
148 case '/': // EndOfLineComment
149 comment = line.substring(line_pos+2);
150 line_pos = line.length();
151 return new EndOfLineComment(comment);
152 case '*': // TraditionalComment or DocumentationComment
154 if (line.charAt(line_pos)=='*') { // DocumentationComment
155 return snarfComment(new DocumentationComment());
156 } else { // TraditionalComment
157 return snarfComment(new TraditionalComment());
159 default: // it's a token, not a comment.
164 Comment snarfComment(Comment c) throws java.io.IOException {
165 StringBuffer text=new StringBuffer();
166 while(true) { // Grab CommentTail
167 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
168 int star_pos = line.indexOf('*', line_pos);
170 text.append(line.substring(line_pos));
171 c.appendLine(text.toString()); text.setLength(0);
172 line_pos = line.length();
175 throw new Error("Unterminated comment at end of file.");
177 text.append(line.substring(line_pos, star_pos));
181 // At this point, line.charAt(line_pos)=='*'
182 // Grab CommentTailStar starting at line_pos+1.
183 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
184 c.appendLine(text.toString()); line_pos+=2; return c;
186 text.append(line.charAt(line_pos++)); // add the '*'
191 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
192 switch (line.charAt(line_pos)) {
193 // Separators: (period is a special case)
202 return new Separator(consume());
220 return getOperator();
222 return getCharLiteral();
224 return getStringLiteral();
226 // a period is a special case:
228 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
229 return getNumericLiteral();
231 line.charAt(line_pos+1)=='.' &&
232 line.charAt(line_pos+2)=='.') {
233 consume(); consume(); consume();
234 return new Separator('\u2026'); // unicode ellipsis character.
235 } else return new Separator(consume());
239 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
240 return getIdentifier();
241 if (Character.isDigit(line.charAt(line_pos)))
242 return getNumericLiteral();
243 throw new Error("Illegal character on line "+line_num);
246 static final String[] keywords = new String[] {
247 "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char",
248 "class", "const", "continue", "default", "do", "double", "else", "enum",
249 "extends", "final", "finally", "float", "for", "goto", "if",
250 "implements", "import", "instanceof", "int", "interface", "long",
251 "native", "new", "package", "private", "protected", "public",
252 "return", "short", "static", "strictfp", "super", "switch",
253 "synchronized", "this", "throw", "throws", "transient", "try", "void",
255 //keywords for failure aware computation
256 "flag", "tag", "task"};
257 Token getIdentifier() {
259 StringBuffer sb = new StringBuffer().append(consume());
261 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
262 throw new Error("Invalid Java Identifier on line "+line_num);
263 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
264 sb.append(consume());
265 String s = sb.toString();
266 // Now check against boolean literals and null literal.
267 if (s.equals("null")) return new NullLiteral();
268 if (s.equals("true")) return new BooleanLiteral(true);
269 if (s.equals("false")) return new BooleanLiteral(false);
270 // Check against keywords.
271 // pre-java 1.5 compatibility:
272 if (!isJava15 && s.equals("enum")) return new Identifier(s);
273 // pre-java 1.4 compatibility:
274 if (!isJava14 && s.equals("assert")) return new Identifier(s);
275 // pre-java 1.2 compatibility:
276 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
277 // use binary search.
278 for (int l=0, r=keywords.length; r > l; ) {
279 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
280 if (cmp < 0) r=x; else l=x+1;
281 if (cmp== 0) return new Keyword(s);
284 return new Identifier(s);
286 NumericLiteral getNumericLiteral() {
288 // leading decimal indicates float.
289 if (line.charAt(line_pos)=='.')
290 return getFloatingPointLiteral();
292 if (line.charAt(line_pos)=='0' &&
293 (line.charAt(line_pos+1)=='x' ||
294 line.charAt(line_pos+1)=='X')) {
295 line_pos+=2; return getIntegerLiteral(/*base*/16);
297 // otherwise scan to first non-numeric
298 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
300 switch(line.charAt(i)) { // discriminate based on first non-numeric
308 return getFloatingPointLiteral();
312 if (line.charAt(line_pos)=='0')
313 return getIntegerLiteral(/*base*/8);
314 return getIntegerLiteral(/*base*/10);
317 NumericLiteral getIntegerLiteral(int radix) {
319 while (Character.digit(line.charAt(line_pos),radix)!=-1)
320 val = (val*radix) + Character.digit(consume(),radix);
321 if (line.charAt(line_pos) == 'l' ||
322 line.charAt(line_pos) == 'L') {
324 return new LongLiteral(val);
326 // we compare MAX_VALUE against val/2 to allow constants like
327 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
328 if ((val/2) > Integer.MAX_VALUE ||
329 val < Integer.MIN_VALUE)
330 throw new Error("Constant does not fit in integer on line "+line_num);
331 return new IntegerLiteral((int)val);
333 NumericLiteral getFloatingPointLiteral() {
334 String rep = getDigits();
335 if (line.charAt(line_pos)=='.')
336 rep+=consume() + getDigits();
337 if (line.charAt(line_pos)=='e' ||
338 line.charAt(line_pos)=='E') {
340 if (line.charAt(line_pos)=='+' ||
341 line.charAt(line_pos)=='-')
346 switch (line.charAt(line_pos)) {
350 return new FloatLiteral(Float.valueOf(rep).floatValue());
356 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
358 } catch (NumberFormatException e) {
359 throw new Error("Illegal floating-point on line "+line_num+": "+e);
363 StringBuffer sb = new StringBuffer();
364 while (Character.digit(line.charAt(line_pos),10)!=-1)
365 sb.append(consume());
366 return sb.toString();
369 Operator getOperator() {
370 char first = consume();
371 char second= line.charAt(line_pos);
374 // single-character operators.
378 return new Operator(new String(new char[] {first}));
385 return new Operator(new String(new char[] {first, consume()}));
389 // Check for trailing '='
391 return new Operator(new String(new char[] {first, consume()}));
393 // Special-case '<<', '>>' and '>>>'
394 if ((first=='<' && second=='<') || // <<
395 (first=='>' && second=='>')) { // >>
396 String op = new String(new char[] {first, consume()});
397 if (first=='>' && line.charAt(line_pos)=='>') // >>>
399 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
401 return new Operator(op);
404 // Otherwise return single operator.
405 return new Operator(new String(new char[] {first}));
408 CharacterLiteral getCharLiteral() {
409 char firstquote = consume();
411 switch (line.charAt(line_pos)) {
413 val = getEscapeSequence();
416 throw new Error("Invalid character literal on line "+line_num);
418 throw new Error("Invalid character literal on line "+line_num);
423 char secondquote = consume();
424 if (firstquote != '\'' || secondquote != '\'')
425 throw new Error("Invalid character literal on line "+line_num);
426 return new CharacterLiteral(val);
428 StringLiteral getStringLiteral() {
429 char openquote = consume();
430 StringBuffer val = new StringBuffer();
431 while (line.charAt(line_pos)!='\"') {
432 switch(line.charAt(line_pos)) {
434 val.append(getEscapeSequence());
437 throw new Error("Invalid string literal on line " + line_num);
439 val.append(consume());
443 char closequote = consume();
444 if (openquote != '\"' || closequote != '\"')
445 throw new Error("Invalid string literal on line " + line_num);
447 return new StringLiteral(val.toString().intern());
450 char getEscapeSequence() {
451 if (consume() != '\\')
452 throw new Error("Invalid escape sequence on line " + line_num);
453 switch(line.charAt(line_pos)) {
455 consume(); return '\b';
457 consume(); return '\t';
459 consume(); return '\n';
461 consume(); return '\f';
463 consume(); return '\r';
465 consume(); return '\"';
467 consume(); return '\'';
469 consume(); return '\\';
474 return (char) getOctal(3);
479 return (char) getOctal(2);
481 throw new Error("Invalid escape sequence on line " + line_num);
484 int getOctal(int maxlength) {
486 for (i=0; i<maxlength; i++)
487 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
488 val = (8*val) + Character.digit(consume(), 8);
490 if ((i==0) || (val>0xFF)) // impossible.
491 throw new Error("Invalid octal escape sequence in line " + line_num);
495 char consume() { return line.charAt(line_pos++); }
496 void nextLine() throws java.io.IOException {
497 line=reader.readLine();
498 if (line!=null) line=line+'\n';
499 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
504 // Deal with error messages.
505 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
506 int n=line_num, c=info.left-lineL.head;
507 for (LineList p = lineL; p!=null; p=p.tail, n--)
508 if (p.head<=info.left) { c=info.left-p.head; break; }
509 System.err.println(msg+" at line "+n);
512 private int num_errors = 0;
513 public int numErrors() { return num_errors; }
518 LineList(int head, LineList tail) { this.head = head; this.tail = tail; }