From cbbe2484418536264b1a26c517c16d505a61d5c8 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Tue, 28 Jul 2009 17:58:44 +0000 Subject: [PATCH] Expose Tokens to target specific assembly parsers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77337 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/MC/MCAsmLexer.h | 95 +++++++++++++++++++++++++++++++++++- lib/MC/MCAsmLexer.cpp | 2 +- tools/llvm-mc/AsmLexer.cpp | 1 - tools/llvm-mc/AsmLexer.h | 86 +++----------------------------- 4 files changed, 101 insertions(+), 83 deletions(-) diff --git a/include/llvm/MC/MCAsmLexer.h b/include/llvm/MC/MCAsmLexer.h index 80628c3a3ad..be5083becc4 100644 --- a/include/llvm/MC/MCAsmLexer.h +++ b/include/llvm/MC/MCAsmLexer.h @@ -10,21 +10,114 @@ #ifndef LLVM_MC_MCASMLEXER_H #define LLVM_MC_MCASMLEXER_H +#include "llvm/ADT/StringRef.h" + namespace llvm { class MCAsmLexer; class MCInst; +class SMLoc; class Target; +/// AsmToken - Target independent representation for an assembler token. +struct AsmToken { + enum TokenKind { + // Markers + Eof, Error, + + // String values. + Identifier, + Register, + String, + + // Integer values. + Integer, + + // No-value. + EndOfStatement, + Colon, + Plus, Minus, Tilde, + Slash, // '/' + LParen, RParen, + Star, Comma, Dollar, Equal, EqualEqual, + + Pipe, PipePipe, Caret, + Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, + Less, LessEqual, LessLess, LessGreater, + Greater, GreaterEqual, GreaterGreater + }; + + TokenKind Kind; + + /// A reference to the entire token contents; this is always a pointer into + /// a memory buffer owned by the source manager. + StringRef Str; + + int64_t IntVal; + +public: + AsmToken() {} + AsmToken(TokenKind _Kind, const StringRef &_Str, int64_t _IntVal = 0) + : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} + + TokenKind getKind() const { return Kind; } + bool is(TokenKind K) const { return Kind == K; } + bool isNot(TokenKind K) const { return Kind != K; } + + SMLoc getLoc() const; + + /// getString - Get the string for the current token, this includes all + /// characters (for example, the quotes on strings) in the token. + /// + /// The returned StringRef points into the source manager's memory buffer, and + /// is safe to store across calls to Lex(). + StringRef getString() const { return Str; } + + // FIXME: Don't compute this in advance, it makes every token larger, and is + // also not generally what we want (it is nicer for recovery etc. to lex 123br + // as a single token, then diagnose as an invalid number). + int64_t getIntVal() const { + assert(Kind == Integer && "This token isn't an integer"); + return IntVal; + } +}; + /// MCAsmLexer - Generic assembler lexer interface, for use by target specific /// assembly lexers. class MCAsmLexer { + /// The current token, stored in the base class for faster access. + AsmToken CurTok; + MCAsmLexer(const MCAsmLexer &); // DO NOT IMPLEMENT void operator=(const MCAsmLexer &); // DO NOT IMPLEMENT protected: // Can only create subclasses. MCAsmLexer(); - + + virtual AsmToken LexToken() = 0; + public: virtual ~MCAsmLexer(); + + /// Lex - Consume the next token from the input stream and return it. + /// + /// The lexer will continuosly return the end-of-file token once the end of + /// the main input file has been reached. + const AsmToken &Lex() { + return CurTok = LexToken(); + } + + /// getTok - Get the current (last) lexed token. + const AsmToken &getTok() { + return CurTok; + } + + /// getKind - Get the kind of current token. + AsmToken::TokenKind getKind() const { return CurTok.getKind(); } + + /// is - Check if the current token has kind \arg K. + bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } + + /// isNot - Check if the current token has kind \arg K. + bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } }; } // End llvm namespace diff --git a/lib/MC/MCAsmLexer.cpp b/lib/MC/MCAsmLexer.cpp index 5cbcbfd6de9..c5365b7a956 100644 --- a/lib/MC/MCAsmLexer.cpp +++ b/lib/MC/MCAsmLexer.cpp @@ -11,7 +11,7 @@ using namespace llvm; -MCAsmLexer::MCAsmLexer() { +MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()) { } MCAsmLexer::~MCAsmLexer() { diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp index 09fb34d6d9b..28b0c1e3c93 100644 --- a/tools/llvm-mc/AsmLexer.cpp +++ b/tools/llvm-mc/AsmLexer.cpp @@ -24,7 +24,6 @@ AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) { CurBuffer = 0; CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); CurPtr = CurBuf->getBufferStart(); - CurTok = AsmToken(AsmToken::Error, StringRef(CurPtr, 0)); TokStart = 0; } diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h index 2918e43b6e4..4200137f640 100644 --- a/tools/llvm-mc/AsmLexer.h +++ b/tools/llvm-mc/AsmLexer.h @@ -25,69 +25,6 @@ class MemoryBuffer; class SourceMgr; class SMLoc; -/// AsmToken - Target independent representation for an assembler token. -struct AsmToken { - enum TokenKind { - // Markers - Eof, Error, - - // String values. - Identifier, - Register, - String, - - // Integer values. - Integer, - - // No-value. - EndOfStatement, - Colon, - Plus, Minus, Tilde, - Slash, // '/' - LParen, RParen, - Star, Comma, Dollar, Equal, EqualEqual, - - Pipe, PipePipe, Caret, - Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, - Less, LessEqual, LessLess, LessGreater, - Greater, GreaterEqual, GreaterGreater - }; - - TokenKind Kind; - - /// A reference to the entire token contents; this is always a pointer into - /// a memory buffer owned by the source manager. - StringRef Str; - - int64_t IntVal; - -public: - AsmToken() {} - AsmToken(TokenKind _Kind, const StringRef &_Str, int64_t _IntVal = 0) - : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} - - TokenKind getKind() const { return Kind; } - bool is(TokenKind K) const { return Kind == K; } - bool isNot(TokenKind K) const { return Kind != K; } - - SMLoc getLoc() const; - - /// getString - Get the string for the current token, this includes all - /// characters (for example, the quotes on strings) in the token. - /// - /// The returned StringRef points into the source manager's memory buffer, and - /// is safe to store across calls to Lex(). - StringRef getString() const { return Str; } - - // FIXME: Don't compute this in advance, it makes every token larger, and is - // also not generally what we want (it is nicer for recovery etc. to lex 123br - // as a single token, then diagnose as an invalid number). - int64_t getIntVal() const { - assert(Kind == Integer && "This token isn't an integer"); - return IntVal; - } -}; - /// AsmLexer - Lexer class for assembly files. class AsmLexer : public MCAsmLexer { SourceMgr &SrcMgr; @@ -97,32 +34,23 @@ class AsmLexer : public MCAsmLexer { const char *TokStart; - /// The current token. - AsmToken CurTok; - /// This is the current buffer index we're lexing from as managed by the /// SourceMgr object. int CurBuffer; void operator=(const AsmLexer&); // DO NOT IMPLEMENT AsmLexer(const AsmLexer&); // DO NOT IMPLEMENT + +protected: + /// LexToken - Read the next token and return its code. + virtual AsmToken LexToken(); + public: AsmLexer(SourceMgr &SrcMgr); ~AsmLexer(); - const AsmToken &Lex() { - return CurTok = LexToken(); - } - - AsmToken::TokenKind getKind() const { return CurTok.getKind(); } - bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } - bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } - SMLoc getLoc() const; - - /// getTok - Return a reference to the current (last) lexed token. - const AsmToken &getTok() const { return CurTok; } - + /// EnterIncludeFile - Enter the specified file. This returns true on failure. bool EnterIncludeFile(const std::string &Filename); @@ -132,8 +60,6 @@ private: int getNextChar(); AsmToken ReturnError(const char *Loc, const std::string &Msg); - /// LexToken - Read the next token and return its code. - AsmToken LexToken(); AsmToken LexIdentifier(); AsmToken LexPercent(); AsmToken LexSlash(); -- 2.34.1