//===----------------------------------------------------------------------===//
#include "TGLexer.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Config/config.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h" // for strtoull()/strtoll() define
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
#include <cctype>
+#include <cerrno>
#include <cstdio>
#include <cstdlib>
#include <cstring>
-#include <cerrno>
+
using namespace llvm;
TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
- CurBuffer = 0;
- CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
- CurPtr = CurBuf->getBufferStart();
- TokStart = 0;
+ CurBuffer = SrcMgr.getMainFileID();
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
+ CurPtr = CurBuf.begin();
+ TokStart = nullptr;
}
SMLoc TGLexer::getLoc() const {
case 0: {
// A nul character in the stream is either the end of the current buffer or
// a random nul in the file. Disambiguate that here.
- if (CurPtr-1 != CurBuf->getBufferEnd())
+ if (CurPtr-1 != CurBuf.end())
return 0; // Just whitespace.
// If this is the end of an included file, pop the parent file off the
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
- CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
CurPtr = ParentIncludeLoc.getPointer();
return getNextChar();
}
}
}
+int TGLexer::peekNextChar(int Index) {
+ return *(CurPtr + Index);
+}
+
tgtok::TokKind TGLexer::LexToken() {
TokStart = CurPtr;
// This always consumes at least one character.
switch (CurChar) {
default:
- // Handle letters: [a-zA-Z_#]
- if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
+ // Handle letters: [a-zA-Z_]
+ if (isalpha(CurChar) || CurChar == '_')
return LexIdentifier();
-
+
// Unknown character, emit an error.
return ReturnError(TokStart, "Unexpected character");
case EOF: return tgtok::Eof;
case ')': return tgtok::r_paren;
case '=': return tgtok::equal;
case '?': return tgtok::question;
+ case '#': return tgtok::paste;
case 0:
case ' ':
return LexToken();
case '-': case '+':
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
- case '7': case '8': case '9':
+ case '7': case '8': case '9': {
+ int NextChar = 0;
+ if (isdigit(CurChar)) {
+ // Allow identifiers to start with a number if it is followed by
+ // an identifier. This can happen with paste operations like
+ // foo#8i.
+ int i = 0;
+ do {
+ NextChar = peekNextChar(i++);
+ } while (isdigit(NextChar));
+
+ if (NextChar == 'x' || NextChar == 'b') {
+ // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
+ // likely a number.
+ int NextNextChar = peekNextChar(i);
+ switch (NextNextChar) {
+ default:
+ break;
+ case '0': case '1':
+ if (NextChar == 'b')
+ return LexNumber();
+ // Fallthrough
+ case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ if (NextChar == 'x')
+ return LexNumber();
+ break;
+ }
+ }
+ }
+
+ if (isalpha(NextChar) || NextChar == '_')
+ return LexIdentifier();
+
return LexNumber();
+ }
case '"': return LexString();
case '$': return LexVarName();
case '[': return LexBracket();
while (*CurPtr != '"') {
// If we hit the end of the buffer, report an error.
- if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd())
+ if (*CurPtr == 0 && CurPtr == CurBuf.end())
return ReturnError(StrStart, "End of file in string literal");
if (*CurPtr == '\n' || *CurPtr == '\r')
// If we hit the end of the buffer, report an error.
case '\0':
- if (CurPtr == CurBuf->getBufferEnd())
+ if (CurPtr == CurBuf.end())
return ReturnError(StrStart, "End of file in string literal");
// FALL THROUGH
default:
const char *IdentStart = TokStart;
// Match the rest of the identifier regex: [0-9a-zA-Z_#]*
- while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
- *CurPtr == '#')
+ while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
++CurPtr;
// Check to see if this identifier is a keyword.
StringRef Str(IdentStart, CurPtr-IdentStart);
- if (Str == "int") return tgtok::Int;
- if (Str == "bit") return tgtok::Bit;
- if (Str == "bits") return tgtok::Bits;
- if (Str == "string") return tgtok::String;
- if (Str == "list") return tgtok::List;
- if (Str == "code") return tgtok::Code;
- if (Str == "dag") return tgtok::Dag;
-
- if (Str == "class") return tgtok::Class;
- if (Str == "def") return tgtok::Def;
- if (Str == "multidef") return tgtok::MultiDef;
- if (Str == "defm") return tgtok::Defm;
- if (Str == "multiclass") return tgtok::MultiClass;
- if (Str == "field") return tgtok::Field;
- if (Str == "let") return tgtok::Let;
- if (Str == "in") return tgtok::In;
-
if (Str == "include") {
if (LexInclude()) return tgtok::Error;
return Lex();
}
- CurStrVal.assign(Str.begin(), Str.end());
- return tgtok::Id;
+ tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
+ .Case("int", tgtok::Int)
+ .Case("bit", tgtok::Bit)
+ .Case("bits", tgtok::Bits)
+ .Case("string", tgtok::String)
+ .Case("list", tgtok::List)
+ .Case("code", tgtok::Code)
+ .Case("dag", tgtok::Dag)
+ .Case("class", tgtok::Class)
+ .Case("def", tgtok::Def)
+ .Case("foreach", tgtok::Foreach)
+ .Case("defm", tgtok::Defm)
+ .Case("multiclass", tgtok::MultiClass)
+ .Case("field", tgtok::Field)
+ .Case("let", tgtok::Let)
+ .Case("in", tgtok::In)
+ .Default(tgtok::Id);
+
+ if (Kind == tgtok::Id)
+ CurStrVal.assign(Str.begin(), Str.end());
+ return Kind;
}
/// LexInclude - We just read the "include" token. Get the string token that
CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
IncludedFile);
- if (CurBuffer == -1) {
+ if (!CurBuffer) {
PrintError(getLoc(), "Could not find include file '" + Filename + "'");
return true;
}
- Dependencies.push_back(IncludedFile);
+ DependenciesMapTy::const_iterator Found = Dependencies.find(IncludedFile);
+ if (Found != Dependencies.end()) {
+ PrintError(getLoc(),
+ "File '" + IncludedFile + "' has already been included.");
+ SrcMgr.PrintMessage(Found->second, SourceMgr::DK_Note,
+ "previously included here");
+ return true;
+ }
+ Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
// Save the line number and lex buffer of the includer.
- CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
- CurPtr = CurBuf->getBufferStart();
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
+ CurPtr = CurBuf.begin();
return false;
}
return; // Newline is end of comment.
case 0:
// If this is the end of the buffer, end the comment.
- if (CurPtr == CurBuf->getBufferEnd())
+ if (CurPtr == CurBuf.end())
return;
break;
}
return ReturnError(TokStart, "Invalid hexadecimal number");
errno = 0;
- CurIntVal = strtoll(NumStart, 0, 16);
+ CurIntVal = strtoll(NumStart, nullptr, 16);
if (errno == EINVAL)
return ReturnError(TokStart, "Invalid hexadecimal number");
if (errno == ERANGE) {
errno = 0;
- CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+ CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
if (errno == EINVAL)
return ReturnError(TokStart, "Invalid hexadecimal number");
if (errno == ERANGE)
// Requires at least one binary digit.
if (CurPtr == NumStart)
return ReturnError(CurPtr-2, "Invalid binary number");
- CurIntVal = strtoll(NumStart, 0, 2);
- return tgtok::IntVal;
+ CurIntVal = strtoll(NumStart, nullptr, 2);
+ return tgtok::BinaryIntVal;
}
}
while (isdigit(CurPtr[0]))
++CurPtr;
- CurIntVal = strtoll(TokStart, 0, 10);
+ CurIntVal = strtoll(TokStart, nullptr, 10);
return tgtok::IntVal;
}
.Case("head", tgtok::XHead)
.Case("tail", tgtok::XTail)
.Case("con", tgtok::XConcat)
+ .Case("add", tgtok::XADD)
+ .Case("and", tgtok::XAND)
.Case("shl", tgtok::XSHL)
.Case("sra", tgtok::XSRA)
.Case("srl", tgtok::XSRL)
.Case("empty", tgtok::XEmpty)
.Case("subst", tgtok::XSubst)
.Case("foreach", tgtok::XForEach)
+ .Case("listconcat", tgtok::XListConcat)
.Case("strconcat", tgtok::XStrConcat)
.Default(tgtok::Error);