#include "AsmLexer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Config/config.h" // for strtoull.
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
using namespace llvm;
-AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
+AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
CurBuffer = 0;
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
CurPtr = CurBuf->getBufferStart();
TokStart = 0;
}
-void AsmLexer::PrintError(const char *Loc, const std::string &Msg) const {
- SrcMgr.PrintError(SMLoc::getFromPointer(Loc), Msg);
+AsmLexer::~AsmLexer() {
}
-void AsmLexer::PrintError(SMLoc Loc, const std::string &Msg) const {
- SrcMgr.PrintError(Loc, Msg);
+SMLoc AsmLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
}
+void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg,
+ const char *Type) const {
+ SrcMgr.PrintMessage(Loc, Msg, Type);
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location. This is defined to always return AsmToken::Error.
+AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+ SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
+ return AsmToken(AsmToken::Error, StringRef(Loc, 0));
+}
+
+/// EnterIncludeFile - Enter the specified file. This prints an error and
+/// returns true on failure.
+bool AsmLexer::EnterIncludeFile(const std::string &Filename) {
+ int NewBuf = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
+ if (NewBuf == -1)
+ return true;
+
+ // Save the line number and lex buffer of the includer.
+ CurBuffer = NewBuf;
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurPtr = CurBuf->getBufferStart();
+ return false;
+}
+
+
int AsmLexer::getNextChar() {
char CurChar = *CurPtr++;
switch (CurChar) {
CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
CurPtr = ParentIncludeLoc.getPointer();
+
+ // Reset the token start pointer to the start of the new file.
+ TokStart = CurPtr;
+
return getNextChar();
}
}
}
-asmtok::TokKind AsmLexer::LexToken() {
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+AsmToken AsmLexer::LexIdentifier() {
+ while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
+ *CurPtr == '.' || *CurPtr == '@')
+ ++CurPtr;
+ return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexPercent: Register: %[a-zA-Z0-9]+
+AsmToken AsmLexer::LexPercent() {
+ if (!isalnum(*CurPtr))
+ return AsmToken(AsmToken::Percent, StringRef(CurPtr, 1)); // Single %.
+
+ while (isalnum(*CurPtr))
+ ++CurPtr;
+
+ return AsmToken(AsmToken::Register, StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexSlash: Slash: /
+/// C-Style Comment: /* ... */
+AsmToken AsmLexer::LexSlash() {
+ switch (*CurPtr) {
+ case '*': break; // C style comment.
+ case '/': return ++CurPtr, LexLineComment();
+ default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
+ }
+
+ // C Style comment.
+ ++CurPtr; // skip the star.
+ while (1) {
+ int CurChar = getNextChar();
+ switch (CurChar) {
+ case EOF:
+ return ReturnError(TokStart, "unterminated comment");
+ case '*':
+ // End of the comment?
+ if (CurPtr[0] != '/') break;
+
+ ++CurPtr; // End the */.
+ return LexToken();
+ }
+ }
+}
+
+/// LexLineComment: Comment: #[^\n]*
+/// : //[^\n]*
+AsmToken AsmLexer::LexLineComment() {
+ // FIXME: This is broken if we happen to a comment at the end of a file, which
+ // was .included, and which doesn't end with a newline.
+ int CurChar = getNextChar();
+ while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+ CurChar = getNextChar();
+
+ if (CurChar == EOF)
+ return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
+ return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
+}
+
+
+/// LexDigit: First character is [0-9].
+/// Local Label: [0-9][:]
+/// Forward/Backward Label: [0-9][fb]
+/// Binary integer: 0b[01]+
+/// Octal integer: 0[0-7]+
+/// Hex integer: 0x[0-9a-fA-F]+
+/// Decimal integer: [1-9][0-9]*
+/// TODO: FP literal.
+AsmToken AsmLexer::LexDigit() {
+ if (*CurPtr == ':')
+ return ReturnError(TokStart, "FIXME: local label not implemented");
+ if (*CurPtr == 'f' || *CurPtr == 'b')
+ return ReturnError(TokStart, "FIXME: directional label not implemented");
+
+ // Decimal integer: [1-9][0-9]*
+ if (CurPtr[-1] != '0') {
+ while (isdigit(*CurPtr))
+ ++CurPtr;
+ return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+ strtoll(TokStart, 0, 10));
+ }
+
+ if (*CurPtr == 'b') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (CurPtr[0] == '0' || CurPtr[0] == '1')
+ ++CurPtr;
+
+ // Requires at least one binary digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid binary number");
+ return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+ strtoll(NumStart, 0, 2));
+ }
+
+ if (*CurPtr == 'x') {
+ ++CurPtr;
+ const char *NumStart = CurPtr;
+ while (isxdigit(CurPtr[0]))
+ ++CurPtr;
+
+ // Requires at least one hex digit.
+ if (CurPtr == NumStart)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+
+ errno = 0;
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE) {
+ errno = 0;
+ if (errno == EINVAL)
+ return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+ if (errno == ERANGE)
+ return ReturnError(CurPtr-2, "Hexadecimal number out of range");
+ }
+ return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+ (int64_t) strtoull(NumStart, 0, 16));
+ }
+
+ // Must be an octal number, it starts with 0.
+ while (*CurPtr >= '0' && *CurPtr <= '7')
+ ++CurPtr;
+ return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+ strtoll(TokStart, 0, 8));
+}
+
+/// LexQuote: String: "..."
+AsmToken AsmLexer::LexQuote() {
+ int CurChar = getNextChar();
+ // TODO: does gas allow multiline string constants?
+ while (CurChar != '"') {
+ if (CurChar == '\\') {
+ // Allow \", etc.
+ CurChar = getNextChar();
+ }
+
+ if (CurChar == EOF)
+ return ReturnError(TokStart, "unterminated string constant");
+
+ CurChar = getNextChar();
+ }
+
+ return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
+}
+
+StringRef AsmLexer::LexUntilEndOfStatement() {
+ TokStart = CurPtr;
+
+ while (*CurPtr != '#' && // Start of line comment.
+ *CurPtr != ';' && // End of statement marker.
+ *CurPtr != '\n' &&
+ *CurPtr != '\r' &&
+ (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd()))
+ ++CurPtr;
+ return StringRef(TokStart, CurPtr-TokStart);
+}
+
+AsmToken AsmLexer::LexToken() {
TokStart = CurPtr;
// This always consumes at least one character.
int CurChar = getNextChar();
switch (CurChar) {
default:
- // Handle letters: [a-zA-Z_]
-// if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
-// return LexIdentifier();
+ // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+ return LexIdentifier();
// Unknown character, emit an error.
- return asmtok::Error;
- case EOF: return asmtok::Eof;
+ return ReturnError(TokStart, "invalid character in input");
+ case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
case 0:
case ' ':
case '\t':
- case '\n':
- case '\r':
// Ignore whitespace.
return LexToken();
- case ':': return asmtok::Colon;
- case '+': return asmtok::Plus;
- case '-': return asmtok::Minus;
+ case '\n': // FALL THROUGH.
+ case '\r': // FALL THROUGH.
+ case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+ case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
+ case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
+ case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
+ case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
+ case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
+ case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
+ case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
+ case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
+ case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
+ case '=':
+ if (*CurPtr == '=')
+ return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
+ case '|':
+ if (*CurPtr == '|')
+ return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
+ case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
+ case '&':
+ if (*CurPtr == '&')
+ return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
+ case '!':
+ if (*CurPtr == '=')
+ return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
+ return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
+ case '%': return LexPercent();
+ case '/': return LexSlash();
+ case '#': return LexLineComment();
+ case '"': return LexQuote();
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ return LexDigit();
+ case '<':
+ switch (*CurPtr) {
+ case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
+ StringRef(TokStart, 2));
+ case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
+ StringRef(TokStart, 2));
+ case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
+ StringRef(TokStart, 2));
+ default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
+ }
+ case '>':
+ switch (*CurPtr) {
+ case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
+ StringRef(TokStart, 2));
+ case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
+ StringRef(TokStart, 2));
+ default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
+ }
+
+ // TODO: Quoted identifiers (objc methods etc)
+ // local labels: [0-9][:]
+ // Forward/backward labels: [0-9][fb]
+ // Integers, fp constants, character constants.
}
-}
\ No newline at end of file
+}