4 import java.io.LineNumberReader;
8 * Copyright (C) 2002 C. Scott Ananian <cananian@alumni.princeton.edu>
9 * This program is released under the terms of the GPL; see the file
10 * COPYING for more details. There is NO WARRANTY on this code.
14 LineNumberReader reader;
21 LineList lineL = new LineList(-line_pos, null); // sentinel for line #0
23 public Lexer(Reader reader) {
24 this.reader = new LineNumberReader(new EscapedUnicodeReader(reader));
29 public java_cup.runtime.Symbol nextToken() throws java.io.IOException {
30 java_cup.runtime.Symbol sym =
31 lookahead==null ? _nextToken() : lookahead.get();
35 private boolean shouldBePLT() throws java.io.IOException {
36 // look ahead to see if this LT should be changed to a PLT
37 if (last==null || last.sym!=Sym.IDENTIFIER)
39 if (lookahead==null) lookahead = new FIFO(new FIFO.Getter() {
40 java_cup.runtime.Symbol next() throws java.io.IOException
41 { return _nextToken(); }
44 // skip past IDENTIFIER (DOT IDENTIFIER)*
45 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
47 while (lookahead.peek(i).sym == Sym.DOT) {
49 if (lookahead.peek(i++).sym != Sym.IDENTIFIER)
52 // skip past (LBRACK RBRACK)*
53 while (lookahead.peek(i).sym == Sym.LBRACK) {
55 if (lookahead.peek(i++).sym != Sym.RBRACK)
58 // now the next sym has to be one of LT GT COMMA EXTENDS IMPLEMENTS
59 switch(lookahead.peek(i).sym) {
69 private java_cup.runtime.Symbol last = null;
70 private FIFO lookahead = null;
71 public java_cup.runtime.Symbol _nextToken() throws java.io.IOException {
73 * Identifiers/Keywords/true/false/null (start with java letter)
74 * numeric literal (start with number)
75 * character literal (start with single quote)
76 * string (start with double quote)
77 * separator (parens, braces, brackets, semicolon, comma, period)
78 * operator (equals, plus, minus, etc)
80 * comment (start with slash)
85 startpos = lineL.head + line_pos;
86 ie = getInputElement();
87 if (ie instanceof DocumentationComment)
88 comment = ((Comment)ie).getComment();
89 } while (!(ie instanceof Token));
90 endpos = lineL.head + line_pos - 1;
92 // System.out.println(ie.toString()); // uncomment to debug lexer.
93 java_cup.runtime.Symbol sym = ((Token)ie).token();
94 // fix up left/right positions.
95 sym.left = startpos; sym.right = endpos;
99 public boolean debug_lex() throws java.io.IOException {
100 InputElement ie = getInputElement();
101 System.out.println(ie);
102 return !(ie instanceof EOF);
106 public String lastComment() { return comment; }
107 public void clearComment() { comment=""; }
109 InputElement getInputElement() throws java.io.IOException {
114 if (line.length()<=line_pos) { // end of line.
120 switch (line.charAt(line_pos)) {
123 case ' ': // ASCII SP
124 case '\t': // ASCII HT
125 case '\f': // ASCII FF
126 case '\n': // LineTerminator
127 return new WhiteSpace(consume());
130 case '\020': // ASCII SUB
143 // May get Token instead of Comment.
144 InputElement getComment() throws java.io.IOException {
146 // line.charAt(line_pos+0) is '/'
147 switch (line.charAt(line_pos+1)) {
148 case '/': // EndOfLineComment
149 comment = line.substring(line_pos+2);
150 line_pos = line.length();
151 return new EndOfLineComment(comment);
152 case '*': // TraditionalComment or DocumentationComment
154 if (line.charAt(line_pos)=='*') { // DocumentationComment
155 return snarfComment(new DocumentationComment());
156 } else { // TraditionalComment
157 return snarfComment(new TraditionalComment());
159 default: // it's a token, not a comment.
164 Comment snarfComment(Comment c) throws java.io.IOException {
165 StringBuffer text=new StringBuffer();
166 while(true) { // Grab CommentTail
167 while (line.charAt(line_pos)!='*') { // Add NotStar to comment.
168 int star_pos = line.indexOf('*', line_pos);
170 text.append(line.substring(line_pos));
171 c.appendLine(text.toString()); text.setLength(0);
172 line_pos = line.length();
175 throw new Error("Unterminated comment at end of file.");
177 text.append(line.substring(line_pos, star_pos));
181 // At this point, line.charAt(line_pos)=='*'
182 // Grab CommentTailStar starting at line_pos+1.
183 if (line.charAt(line_pos+1)=='/') { // safe because line ends with '\n'
184 c.appendLine(text.toString()); line_pos+=2; return c;
186 text.append(line.charAt(line_pos++)); // add the '*'
191 // Tokens are: Identifiers, Keywords, Literals, Separators, Operators.
192 switch (line.charAt(line_pos)) {
193 // Separators: (period is a special case)
202 return new Separator(consume());
220 return getOperator();
222 return getCharLiteral();
224 return getStringLiteral();
226 // a period is a special case:
228 if (Character.digit(line.charAt(line_pos+1),10)!=-1)
229 return getNumericLiteral();
231 line.charAt(line_pos+1)=='.' &&
232 line.charAt(line_pos+2)=='.') {
233 consume(); consume(); consume();
234 return new Separator('\u2026'); // unicode ellipsis character.
235 } else return new Separator(consume());
239 if (Character.isJavaIdentifierStart(line.charAt(line_pos)))
240 return getIdentifier();
241 if (Character.isDigit(line.charAt(line_pos)))
242 return getNumericLiteral();
243 throw new Error("Illegal character on line "+line_num);
246 static final String[] keywords = new String[] {
247 "abstract", "assert", "atomic", "boolean", "break", "byte", "case", "catch", "char",
248 "class", "const", "continue", "default", "do", "double", "else", "enum",
249 "extends", "external", "final", "finally",
250 "flag", //keyword for failure aware computation
251 "float", "for", "global", "goto", "if",
252 "implements", "import", "instanceof", "int", "interface", "long",
253 "native", "new", "optional", "package", "private", "protected", "public",
254 "return", "short", "static", "strictfp", "super", "switch", "synchronized",
255 "tag", "task", "taskexit", //keywords for failure aware computation
256 "this", "throw", "throws", "transient", "try", "void",
257 "volatile", "while"};
258 Token getIdentifier() {
260 StringBuffer sb = new StringBuffer().append(consume());
262 if (!Character.isJavaIdentifierStart(sb.charAt(0)))
263 throw new Error("Invalid Java Identifier on line "+line_num);
264 while (Character.isJavaIdentifierPart(line.charAt(line_pos)))
265 sb.append(consume());
266 String s = sb.toString();
267 // Now check against boolean literals and null literal.
268 if (s.equals("null")) return new NullLiteral();
269 if (s.equals("true")) return new BooleanLiteral(true);
270 if (s.equals("false")) return new BooleanLiteral(false);
271 // Check against keywords.
272 // pre-java 1.5 compatibility:
273 if (!isJava15 && s.equals("enum")) return new Identifier(s);
274 // pre-java 1.4 compatibility:
275 if (!isJava14 && s.equals("assert")) return new Identifier(s);
276 // pre-java 1.2 compatibility:
277 if (!isJava12 && s.equals("strictfp")) return new Identifier(s);
278 // use binary search.
279 for (int l=0, r=keywords.length; r > l; ) {
280 int x = (l+r)/2, cmp = s.compareTo(keywords[x]);
281 if (cmp < 0) r=x; else l=x+1;
282 if (cmp== 0) return new Keyword(s);
285 return new Identifier(s);
287 NumericLiteral getNumericLiteral() {
289 // leading decimal indicates float.
290 if (line.charAt(line_pos)=='.')
291 return getFloatingPointLiteral();
293 if (line.charAt(line_pos)=='0' &&
294 (line.charAt(line_pos+1)=='x' ||
295 line.charAt(line_pos+1)=='X')) {
296 line_pos+=2; return getIntegerLiteral(/*base*/16);
298 // otherwise scan to first non-numeric
299 for (i=line_pos; Character.digit(line.charAt(i),10)!=-1; )
301 switch(line.charAt(i)) { // discriminate based on first non-numeric
309 return getFloatingPointLiteral();
313 if (line.charAt(line_pos)=='0')
314 return getIntegerLiteral(/*base*/8);
315 return getIntegerLiteral(/*base*/10);
318 NumericLiteral getIntegerLiteral(int radix) {
320 while (Character.digit(line.charAt(line_pos),radix)!=-1)
321 val = (val*radix) + Character.digit(consume(),radix);
322 if (line.charAt(line_pos) == 'l' ||
323 line.charAt(line_pos) == 'L') {
325 return new LongLiteral(val);
327 // we compare MAX_VALUE against val/2 to allow constants like
328 // 0xFFFF0000 to get past the test. (unsigned long->signed int)
329 if ((val/2) > Integer.MAX_VALUE ||
330 val < Integer.MIN_VALUE)
331 throw new Error("Constant does not fit in integer on line "+line_num);
332 return new IntegerLiteral((int)val);
334 NumericLiteral getFloatingPointLiteral() {
335 String rep = getDigits();
336 if (line.charAt(line_pos)=='.')
337 rep+=consume() + getDigits();
338 if (line.charAt(line_pos)=='e' ||
339 line.charAt(line_pos)=='E') {
341 if (line.charAt(line_pos)=='+' ||
342 line.charAt(line_pos)=='-')
347 switch (line.charAt(line_pos)) {
351 return new FloatLiteral(Float.valueOf(rep).floatValue());
357 return new DoubleLiteral(Double.valueOf(rep).doubleValue());
359 } catch (NumberFormatException e) {
360 throw new Error("Illegal floating-point on line "+line_num+": "+e);
364 StringBuffer sb = new StringBuffer();
365 while (Character.digit(line.charAt(line_pos),10)!=-1)
366 sb.append(consume());
367 return sb.toString();
370 Operator getOperator() {
371 char first = consume();
372 char second= line.charAt(line_pos);
375 // single-character operators.
379 return new Operator(new String(new char[] {first}));
386 return new Operator(new String(new char[] {first, consume()}));
390 // Check for trailing '='
392 return new Operator(new String(new char[] {first, consume()}));
394 // Special-case '<<', '>>' and '>>>'
395 if ((first=='<' && second=='<') || // <<
396 (first=='>' && second=='>')) { // >>
397 String op = new String(new char[] {first, consume()});
398 if (first=='>' && line.charAt(line_pos)=='>') // >>>
400 if (line.charAt(line_pos)=='=') // <<=, >>=, >>>=
402 return new Operator(op);
405 // Otherwise return single operator.
406 return new Operator(new String(new char[] {first}));
409 CharacterLiteral getCharLiteral() {
410 char firstquote = consume();
412 switch (line.charAt(line_pos)) {
414 val = getEscapeSequence();
417 throw new Error("Invalid character literal on line "+line_num);
419 throw new Error("Invalid character literal on line "+line_num);
424 char secondquote = consume();
425 if (firstquote != '\'' || secondquote != '\'')
426 throw new Error("Invalid character literal on line "+line_num);
427 return new CharacterLiteral(val);
429 StringLiteral getStringLiteral() {
430 char openquote = consume();
431 StringBuffer val = new StringBuffer();
432 while (line.charAt(line_pos)!='\"') {
433 switch(line.charAt(line_pos)) {
435 val.append(getEscapeSequence());
438 throw new Error("Invalid string literal on line " + line_num);
440 val.append(consume());
444 char closequote = consume();
445 if (openquote != '\"' || closequote != '\"')
446 throw new Error("Invalid string literal on line " + line_num);
448 return new StringLiteral(val.toString().intern());
451 char getEscapeSequence() {
452 if (consume() != '\\')
453 throw new Error("Invalid escape sequence on line " + line_num);
454 switch(line.charAt(line_pos)) {
456 consume(); return '\b';
458 consume(); return '\t';
460 consume(); return '\n';
462 consume(); return '\f';
464 consume(); return '\r';
466 consume(); return '\"';
468 consume(); return '\'';
470 consume(); return '\\';
475 return (char) getOctal(3);
480 return (char) getOctal(2);
482 throw new Error("Invalid escape sequence on line " + line_num);
485 int getOctal(int maxlength) {
487 for (i=0; i<maxlength; i++)
488 if (Character.digit(line.charAt(line_pos), 8)!=-1) {
489 val = (8*val) + Character.digit(consume(), 8);
491 if ((i==0) || (val>0xFF)) // impossible.
492 throw new Error("Invalid octal escape sequence in line " + line_num);
496 char consume() { return line.charAt(line_pos++); }
497 void nextLine() throws java.io.IOException {
498 line=reader.readLine();
499 if (line!=null) line=line+'\n';
500 lineL = new LineList(lineL.head+line_pos, lineL); // for error reporting
505 // Deal with error messages.
506 public void errorMsg(String msg, java_cup.runtime.Symbol info) {
507 int n=line_num, c=info.left-lineL.head;
508 for (LineList p = lineL; p!=null; p=p.tail, n--)
509 if (p.head<=info.left) { c=info.left-p.head; break; }
510 System.err.println(msg+" at line "+n);
513 private int num_errors = 0;
514 public int numErrors() { return num_errors; }
519 LineList(int head, LineList tail) { this.head = head; this.tail = tail; }