X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FAsmParser%2FLLLexer.cpp;h=34595e7a4eefdfc905b4070ef545d7ca2908499e;hp=5b1f0740dce25750d1930dc75d912c02bc50d795;hb=bb811a244567aa8a1522203f15588f4d001b7353;hpb=ed4a2f168873527e1737deaa7a0c6c045a2cff7d diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 5b1f0740dce..34595e7a4ee 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by Chris Lattner and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -12,13 +12,25 @@ //===----------------------------------------------------------------------===// #include "LLLexer.h" -#include "ParserInternals.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instruction.h" +#include "llvm/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" - -#include -#include "llvmAsmParser.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Assembly/Parser.h" +#include +#include +#include using namespace llvm; +bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const { + ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error"); + return true; +} + //===----------------------------------------------------------------------===// // Helper functions. //===----------------------------------------------------------------------===// @@ -27,21 +39,21 @@ using namespace llvm; // long representation... this does not have to do input error checking, // because we know that the input will be matched by a suitable regex... // -static uint64_t atoull(const char *Buffer, const char *End) { +uint64_t LLLexer::atoull(const char *Buffer, const char *End) { uint64_t Result = 0; for (; Buffer != End; Buffer++) { uint64_t OldRes = Result; Result *= 10; Result += *Buffer-'0'; if (Result < OldRes) { // Uh, oh, overflow detected!!! - GenerateError("constant bigger than 64 bits detected!"); + Error("constant bigger than 64 bits detected!"); return 0; } } return Result; } -static uint64_t HexIntToVal(const char *Buffer, const char *End) { +uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { uint64_t Result = 0; for (; Buffer != End; ++Buffer) { uint64_t OldRes = Result; @@ -53,23 +65,17 @@ static uint64_t HexIntToVal(const char *Buffer, const char *End) { Result += C-'A'+10; else if (C >= 'a' && C <= 'f') Result += C-'a'+10; - + if (Result < OldRes) { // Uh, oh, overflow detected!!! - GenerateError("constant bigger than 64 bits detected!"); + Error("constant bigger than 64 bits detected!"); return 0; } } return Result; } -// HexToFP - Convert the ascii string in hexadecimal format to the floating -// point representation of it. -// -static double HexToFP(const char *Buffer, const char *End) { - return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double -} - -static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ +void LLLexer::HexToIntPair(const char *Buffer, const char *End, + uint64_t Pair[2]) { Pair[0] = 0; for (int i=0; i<16; i++, Buffer++) { assert(Buffer != End); @@ -94,14 +100,45 @@ static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ Pair[1] += C-'a'+10; } if (Buffer != End) - GenerateError("constant bigger than 128 bits detected!"); + Error("constant bigger than 128 bits detected!"); +} + +/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into +/// { low64, high16 } as usual for an APInt. +void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, + uint64_t Pair[2]) { + Pair[1] = 0; + for (int i=0; i<4 && Buffer != End; i++, Buffer++) { + assert(Buffer != End); + Pair[1] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[1] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[1] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[1] += C-'a'+10; + } + Pair[0] = 0; + for (int i=0; i<16; i++, Buffer++) { + Pair[0] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[0] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[0] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[0] += C-'a'+10; + } + if (Buffer != End) + Error("constant bigger than 128 bits detected!"); } // UnEscapeLexed - Run through the specified buffer and change \xx codes to the // appropriate character. static void UnEscapeLexed(std::string &Str) { if (Str.empty()) return; - + char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size(); char *BOut = Buffer; for (char *BIn = Buffer; BIn != EndBuffer; ) { @@ -146,11 +183,9 @@ static const char *isLabelTail(const char *CurPtr) { // Lexer definition. //===----------------------------------------------------------------------===// -// FIXME: REMOVE THIS. -#define YYEOF 0 -#define YYERROR -2 - -LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { +LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err, + LLVMContext &C) + : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) { CurPtr = CurBuf->getBufferStart(); } @@ -167,38 +202,26 @@ int LLLexer::getNextChar() { // a random nul in the file. Disambiguate that here. if (CurPtr-1 != CurBuf->getBufferEnd()) return 0; // Just whitespace. - + // Otherwise, return end of file. - --CurPtr; // Another call to lex will return EOF again. + --CurPtr; // Another call to lex will return EOF again. return EOF; - case '\n': - case '\r': - // Handle the newline character by ignoring it and incrementing the line - // count. However, be careful about 'dos style' files with \n\r in them. - // Only treat a \n\r or \r\n as a single line. - if ((*CurPtr == '\n' || (*CurPtr == '\r')) && - *CurPtr != CurChar) - ++CurPtr; // Eat the two char newline sequence. - - ++CurLineNo; - return '\n'; - } + } } -int LLLexer::LexToken() { +lltok::Kind LLLexer::LexToken() { TokStart = CurPtr; - + int CurChar = getNextChar(); - switch (CurChar) { default: // Handle letters: [a-zA-Z_] if (isalpha(CurChar) || CurChar == '_') return LexIdentifier(); - - return CurChar; - case EOF: return YYEOF; + + return lltok::Error; + case EOF: return lltok::Eof; case 0: case ' ': case '\t': @@ -213,28 +236,41 @@ int LLLexer::LexToken() { case '.': if (const char *Ptr = isLabelTail(CurPtr)) { CurPtr = Ptr; - llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); - return LABELSTR; + StrVal.assign(TokStart, CurPtr-1); + return lltok::LabelStr; } if (CurPtr[0] == '.' && CurPtr[1] == '.') { CurPtr += 2; - return DOTDOTDOT; + return lltok::dotdotdot; } - return '.'; + return lltok::Error; case '$': if (const char *Ptr = isLabelTail(CurPtr)) { CurPtr = Ptr; - llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); - return LABELSTR; + StrVal.assign(TokStart, CurPtr-1); + return lltok::LabelStr; } - return '$'; + return lltok::Error; case ';': SkipLineComment(); return LexToken(); + case '!': return LexExclaim(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': - case '-': + case '-': return LexDigitOrNegative(); + case '=': return lltok::equal; + case '[': return lltok::lsquare; + case ']': return lltok::rsquare; + case '{': return lltok::lbrace; + case '}': return lltok::rbrace; + case '<': return lltok::less; + case '>': return lltok::greater; + case '(': return lltok::lparen; + case ')': return lltok::rparen; + case ',': return lltok::comma; + case '*': return lltok::star; + case '\\': return lltok::backslash; } } @@ -246,130 +282,132 @@ void LLLexer::SkipLineComment() { } /// LexAt - Lex all tokens that start with an @ character: -/// AtStringConstant @\"[^\"]*\" -/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]* -/// GlobalVarID @[0-9]+ -int LLLexer::LexAt() { +/// GlobalVar @\"[^\"]*\" +/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// GlobalVarID @[0-9]+ +lltok::Kind LLLexer::LexAt() { // Handle AtStringConstant: @\"[^\"]*\" if (CurPtr[0] == '"') { ++CurPtr; - + while (1) { int CurChar = getNextChar(); - - if (CurChar == EOF) { - GenerateError("End of file in global variable name"); - return YYERROR; + + if (CurChar == EOF) { + Error("end of file in global variable name"); + return lltok::Error; } if (CurChar == '"') { - llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); - UnEscapeLexed(*llvmAsmlval.StrVal); - return ATSTRINGCONSTANT; + StrVal.assign(TokStart+2, CurPtr-1); + UnEscapeLexed(StrVal); + return lltok::GlobalVar; } } } - + // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* - if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') { ++CurPtr; - while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') ++CurPtr; - llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @ - return GLOBALVAR; + StrVal.assign(TokStart+1, CurPtr); // Skip @ + return lltok::GlobalVar; } - + // Handle GlobalVarID: @[0-9]+ if (isdigit(CurPtr[0])) { - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); - + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + /*empty*/; + uint64_t Val = atoull(TokStart+1, CurPtr); if ((unsigned)Val != Val) - GenerateError("Invalid value number (too large)!"); - llvmAsmlval.UIntVal = unsigned(Val); - return GLOBALVAL_ID; + Error("invalid value number (too large)!"); + UIntVal = unsigned(Val); + return lltok::GlobalID; } - - return '@'; + + return lltok::Error; } /// LexPercent - Lex all tokens that start with a % character: -/// PctStringConstant %\"[^\"]*\" -/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]* -/// LocalVarID %[0-9]+ -int LLLexer::LexPercent() { - // Handle PctStringConstant: %\"[^\"]*\" +/// LocalVar ::= %\"[^\"]*\" +/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// LocalVarID ::= %[0-9]+ +lltok::Kind LLLexer::LexPercent() { + // Handle LocalVarName: %\"[^\"]*\" if (CurPtr[0] == '"') { ++CurPtr; - + while (1) { int CurChar = getNextChar(); - - if (CurChar == EOF) { - GenerateError("End of file in local variable name"); - return YYERROR; + + if (CurChar == EOF) { + Error("end of file in string constant"); + return lltok::Error; } if (CurChar == '"') { - llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); - UnEscapeLexed(*llvmAsmlval.StrVal); - return PCTSTRINGCONSTANT; + StrVal.assign(TokStart+2, CurPtr-1); + UnEscapeLexed(StrVal); + return lltok::LocalVar; } } } - + // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* - if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') { ++CurPtr; - while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') ++CurPtr; - - llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip % - return LOCALVAR; + + StrVal.assign(TokStart+1, CurPtr); // Skip % + return lltok::LocalVar; } - + // Handle LocalVarID: %[0-9]+ if (isdigit(CurPtr[0])) { - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); - + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + /*empty*/; + uint64_t Val = atoull(TokStart+1, CurPtr); if ((unsigned)Val != Val) - GenerateError("Invalid value number (too large)!"); - llvmAsmlval.UIntVal = unsigned(Val); - return LOCALVAL_ID; + Error("invalid value number (too large)!"); + UIntVal = unsigned(Val); + return lltok::LocalVarID; } - - return '%'; + + return lltok::Error; } /// LexQuote - Lex all tokens that start with a " character: /// QuoteLabel "[^"]+": /// StringConstant "[^"]*" -int LLLexer::LexQuote() { +lltok::Kind LLLexer::LexQuote() { while (1) { int CurChar = getNextChar(); - - if (CurChar == EOF) { - GenerateError("End of file in quoted string"); - return YYERROR; + + if (CurChar == EOF) { + Error("end of file in quoted string"); + return lltok::Error; } - + if (CurChar != '"') continue; if (CurPtr[0] != ':') { - llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1); - UnEscapeLexed(*llvmAsmlval.StrVal); - return STRINGCONSTANT; + StrVal.assign(TokStart+1, CurPtr-1); + UnEscapeLexed(StrVal); + return lltok::StringConstant; } - + ++CurPtr; - llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2); - UnEscapeLexed(*llvmAsmlval.StrVal); - return LABELSTR; + StrVal.assign(TokStart+1, CurPtr-2); + UnEscapeLexed(StrVal); + return lltok::LabelStr; } } @@ -384,145 +422,180 @@ static bool JustWhitespaceNewLine(const char *&Ptr) { return false; } +/// LexExclaim: +/// !foo +/// ! +lltok::Kind LLLexer::LexExclaim() { + // Lex a metadata name as a MetadataVar. + if (isalpha(CurPtr[0])) { + ++CurPtr; + while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') + ++CurPtr; + StrVal.assign(TokStart+1, CurPtr); // Skip ! + return lltok::MetadataVar; + } + return lltok::exclaim; +} + /// LexIdentifier: Handle several related productions: /// Label [-a-zA-Z$._0-9]+: /// IntegerType i[0-9]+ /// Keyword sdiv, float, ... /// HexIntConstant [us]0x[0-9A-Fa-f]+ -int LLLexer::LexIdentifier() { +lltok::Kind LLLexer::LexIdentifier() { const char *StartChar = CurPtr; const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; const char *KeywordEnd = 0; - + for (; isLabelChar(*CurPtr); ++CurPtr) { // If we decide this is an integer, remember the end of the sequence. if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; } - + // If we stopped due to a colon, this really is a label. if (*CurPtr == ':') { - llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++); - return LABELSTR; + StrVal.assign(StartChar-1, CurPtr++); + return lltok::LabelStr; } - + // Otherwise, this wasn't a label. If this was valid as an integer type, // return it. if (IntEnd == 0) IntEnd = CurPtr; if (IntEnd != StartChar) { CurPtr = IntEnd; uint64_t NumBits = atoull(StartChar, CurPtr); - if (NumBits < IntegerType::MIN_INT_BITS || + if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { - GenerateError("Bitwidth for integer type out of range!"); - return YYERROR; + Error("bitwidth for integer type out of range!"); + return lltok::Error; } - const Type* Ty = IntegerType::get(NumBits); - llvmAsmlval.PrimType = Ty; - return INTTYPE; + TyVal = IntegerType::get(Context, NumBits); + return lltok::Type; } - + // Otherwise, this was a letter sequence. See which keyword this is. if (KeywordEnd == 0) KeywordEnd = CurPtr; CurPtr = KeywordEnd; --StartChar; unsigned Len = CurPtr-StartChar; -#define KEYWORD(STR, TOK) \ - if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK; - - KEYWORD("begin", BEGINTOK); - KEYWORD("end", ENDTOK); - KEYWORD("true", TRUETOK); - KEYWORD("false", FALSETOK); - KEYWORD("declare", DECLARE); - KEYWORD("define", DEFINE); - KEYWORD("global", GLOBAL); - KEYWORD("constant", CONSTANT); - - KEYWORD("internal", INTERNAL); - KEYWORD("linkonce", LINKONCE); - KEYWORD("weak", WEAK); - KEYWORD("appending", APPENDING); - KEYWORD("dllimport", DLLIMPORT); - KEYWORD("dllexport", DLLEXPORT); - KEYWORD("hidden", HIDDEN); - KEYWORD("protected", PROTECTED); - KEYWORD("extern_weak", EXTERN_WEAK); - KEYWORD("external", EXTERNAL); - KEYWORD("thread_local", THREAD_LOCAL); - KEYWORD("zeroinitializer", ZEROINITIALIZER); - KEYWORD("undef", UNDEF); - KEYWORD("null", NULL_TOK); - KEYWORD("to", TO); - KEYWORD("tail", TAIL); - KEYWORD("target", TARGET); - KEYWORD("triple", TRIPLE); - KEYWORD("deplibs", DEPLIBS); - KEYWORD("datalayout", DATALAYOUT); - KEYWORD("volatile", VOLATILE); - KEYWORD("align", ALIGN); - KEYWORD("section", SECTION); - KEYWORD("alias", ALIAS); - KEYWORD("module", MODULE); - KEYWORD("asm", ASM_TOK); - KEYWORD("sideeffect", SIDEEFFECT); - - KEYWORD("cc", CC_TOK); - KEYWORD("ccc", CCC_TOK); - KEYWORD("fastcc", FASTCC_TOK); - KEYWORD("coldcc", COLDCC_TOK); - KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK); - KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK); - - KEYWORD("signext", SIGNEXT); - KEYWORD("zeroext", ZEROEXT); - KEYWORD("inreg", INREG); - KEYWORD("sret", SRET); - KEYWORD("nounwind", NOUNWIND); - KEYWORD("noreturn", NORETURN); - KEYWORD("noalias", NOALIAS); - KEYWORD("byval", BYVAL); - KEYWORD("nest", NEST); - KEYWORD("readnone", READNONE); - KEYWORD("readonly", READONLY); - - KEYWORD("type", TYPE); - KEYWORD("opaque", OPAQUE); - - KEYWORD("eq" , EQ); - KEYWORD("ne" , NE); - KEYWORD("slt", SLT); - KEYWORD("sgt", SGT); - KEYWORD("sle", SLE); - KEYWORD("sge", SGE); - KEYWORD("ult", ULT); - KEYWORD("ugt", UGT); - KEYWORD("ule", ULE); - KEYWORD("uge", UGE); - KEYWORD("oeq", OEQ); - KEYWORD("one", ONE); - KEYWORD("olt", OLT); - KEYWORD("ogt", OGT); - KEYWORD("ole", OLE); - KEYWORD("oge", OGE); - KEYWORD("ord", ORD); - KEYWORD("uno", UNO); - KEYWORD("ueq", UEQ); - KEYWORD("une", UNE); +#define KEYWORD(STR) \ + if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ + return lltok::kw_##STR; + + KEYWORD(begin); KEYWORD(end); + KEYWORD(true); KEYWORD(false); + KEYWORD(declare); KEYWORD(define); + KEYWORD(global); KEYWORD(constant); + + KEYWORD(private); + KEYWORD(linker_private); + KEYWORD(linker_private_weak); + KEYWORD(linker_private_weak_def_auto); + KEYWORD(internal); + KEYWORD(available_externally); + KEYWORD(linkonce); + KEYWORD(linkonce_odr); + KEYWORD(weak); + KEYWORD(weak_odr); + KEYWORD(appending); + KEYWORD(dllimport); + KEYWORD(dllexport); + KEYWORD(common); + KEYWORD(default); + KEYWORD(hidden); + KEYWORD(protected); + KEYWORD(extern_weak); + KEYWORD(external); + KEYWORD(thread_local); + KEYWORD(zeroinitializer); + KEYWORD(undef); + KEYWORD(null); + KEYWORD(to); + KEYWORD(tail); + KEYWORD(target); + KEYWORD(triple); + KEYWORD(deplibs); + KEYWORD(datalayout); + KEYWORD(volatile); + KEYWORD(nuw); + KEYWORD(nsw); + KEYWORD(exact); + KEYWORD(inbounds); + KEYWORD(align); + KEYWORD(addrspace); + KEYWORD(section); + KEYWORD(alias); + KEYWORD(module); + KEYWORD(asm); + KEYWORD(sideeffect); + KEYWORD(alignstack); + KEYWORD(gc); + + KEYWORD(ccc); + KEYWORD(fastcc); + KEYWORD(coldcc); + KEYWORD(x86_stdcallcc); + KEYWORD(x86_fastcallcc); + KEYWORD(x86_thiscallcc); + KEYWORD(arm_apcscc); + KEYWORD(arm_aapcscc); + KEYWORD(arm_aapcs_vfpcc); + KEYWORD(msp430_intrcc); + + KEYWORD(cc); + KEYWORD(c); + + KEYWORD(signext); + KEYWORD(zeroext); + KEYWORD(inreg); + KEYWORD(sret); + KEYWORD(nounwind); + KEYWORD(noreturn); + KEYWORD(noalias); + KEYWORD(nocapture); + KEYWORD(byval); + KEYWORD(nest); + KEYWORD(readnone); + KEYWORD(readonly); + + KEYWORD(inlinehint); + KEYWORD(noinline); + KEYWORD(alwaysinline); + KEYWORD(optsize); + KEYWORD(ssp); + KEYWORD(sspreq); + KEYWORD(noredzone); + KEYWORD(noimplicitfloat); + KEYWORD(naked); + + KEYWORD(type); + KEYWORD(opaque); + + KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); + KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); + KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); + KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); + + KEYWORD(x); + KEYWORD(blockaddress); #undef KEYWORD // Keywords for types. -#define TYPEKEYWORD(STR, LLVMTY, TOK) \ +#define TYPEKEYWORD(STR, LLVMTY) \ if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ - llvmAsmlval.PrimType = LLVMTY; return TOK; } - TYPEKEYWORD("void", Type::VoidTy, VOID); - TYPEKEYWORD("float", Type::FloatTy, FLOAT); - TYPEKEYWORD("double", Type::DoubleTy, DOUBLE); - TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80); - TYPEKEYWORD("fp128", Type::FP128Ty, FP128); - TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128); - TYPEKEYWORD("label", Type::LabelTy, LABEL); + TyVal = LLVMTY; return lltok::Type; } + TYPEKEYWORD("void", Type::getVoidTy(Context)); + TYPEKEYWORD("float", Type::getFloatTy(Context)); + TYPEKEYWORD("double", Type::getDoubleTy(Context)); + TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context)); + TYPEKEYWORD("fp128", Type::getFP128Ty(Context)); + TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context)); + TYPEKEYWORD("label", Type::getLabelTy(Context)); + TYPEKEYWORD("metadata", Type::getMetadataTy(Context)); + TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); #undef TYPEKEYWORD // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is @@ -530,110 +603,103 @@ int LLLexer::LexIdentifier() { if (Len == 4 && !memcmp(StartChar, "sext", 4)) { // Scan CurPtr ahead, seeing if there is just whitespace before the newline. if (JustWhitespaceNewLine(CurPtr)) - return SIGNEXT; + return lltok::kw_signext; } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { // Scan CurPtr ahead, seeing if there is just whitespace before the newline. if (JustWhitespaceNewLine(CurPtr)) - return ZEROEXT; + return lltok::kw_zeroext; + } else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) { + // FIXME: Remove in LLVM 3.0. + // Autoupgrade malloc instruction. + return lltok::kw_malloc; + } else if (Len == 4 && !memcmp(StartChar, "free", 4)) { + // FIXME: Remove in LLVM 3.0. + // Autoupgrade malloc instruction. + return lltok::kw_free; } - + // Keywords for instructions. -#define INSTKEYWORD(STR, type, Enum, TOK) \ - if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ - llvmAsmlval.type = Instruction::Enum; return TOK; } - - INSTKEYWORD("add", BinaryOpVal, Add, ADD); - INSTKEYWORD("sub", BinaryOpVal, Sub, SUB); - INSTKEYWORD("mul", BinaryOpVal, Mul, MUL); - INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV); - INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV); - INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV); - INSTKEYWORD("urem", BinaryOpVal, URem, UREM); - INSTKEYWORD("srem", BinaryOpVal, SRem, SREM); - INSTKEYWORD("frem", BinaryOpVal, FRem, FREM); - INSTKEYWORD("shl", BinaryOpVal, Shl, SHL); - INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR); - INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR); - INSTKEYWORD("and", BinaryOpVal, And, AND); - INSTKEYWORD("or", BinaryOpVal, Or , OR ); - INSTKEYWORD("xor", BinaryOpVal, Xor, XOR); - INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP); - INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP); - - INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK); - INSTKEYWORD("call", OtherOpVal, Call, CALL); - INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC); - INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT); - INSTKEYWORD("sext", CastOpVal, SExt, SEXT); - INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC); - INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT); - INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP); - INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP); - INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI); - INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI); - INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR); - INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT); - INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST); - INSTKEYWORD("select", OtherOpVal, Select, SELECT); - INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG); - INSTKEYWORD("ret", TermOpVal, Ret, RET); - INSTKEYWORD("br", TermOpVal, Br, BR); - INSTKEYWORD("switch", TermOpVal, Switch, SWITCH); - INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE); - INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND); - INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE); - - INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC); - INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA); - INSTKEYWORD("free", MemOpVal, Free, FREE); - INSTKEYWORD("load", MemOpVal, Load, LOAD); - INSTKEYWORD("store", MemOpVal, Store, STORE); - INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR); - - INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT); - INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT); - INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR); -#undef INSTKEYWORD - +#define INSTKEYWORD(STR, Enum) \ + if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ + UIntVal = Instruction::Enum; return lltok::kw_##STR; } + + INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); + INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); + INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul); + INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv); + INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem); + INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr); + INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor); + INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp); + + INSTKEYWORD(phi, PHI); + INSTKEYWORD(call, Call); + INSTKEYWORD(trunc, Trunc); + INSTKEYWORD(zext, ZExt); + INSTKEYWORD(sext, SExt); + INSTKEYWORD(fptrunc, FPTrunc); + INSTKEYWORD(fpext, FPExt); + INSTKEYWORD(uitofp, UIToFP); + INSTKEYWORD(sitofp, SIToFP); + INSTKEYWORD(fptoui, FPToUI); + INSTKEYWORD(fptosi, FPToSI); + INSTKEYWORD(inttoptr, IntToPtr); + INSTKEYWORD(ptrtoint, PtrToInt); + INSTKEYWORD(bitcast, BitCast); + INSTKEYWORD(select, Select); + INSTKEYWORD(va_arg, VAArg); + INSTKEYWORD(ret, Ret); + INSTKEYWORD(br, Br); + INSTKEYWORD(switch, Switch); + INSTKEYWORD(indirectbr, IndirectBr); + INSTKEYWORD(invoke, Invoke); + INSTKEYWORD(unwind, Unwind); + INSTKEYWORD(unreachable, Unreachable); + + INSTKEYWORD(alloca, Alloca); + INSTKEYWORD(load, Load); + INSTKEYWORD(store, Store); + INSTKEYWORD(getelementptr, GetElementPtr); + + INSTKEYWORD(extractelement, ExtractElement); + INSTKEYWORD(insertelement, InsertElement); + INSTKEYWORD(shufflevector, ShuffleVector); + INSTKEYWORD(getresult, ExtractValue); + INSTKEYWORD(extractvalue, ExtractValue); + INSTKEYWORD(insertvalue, InsertValue); +#undef INSTKEYWORD + // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by // the CFE to avoid forcing it to deal with 64-bit numbers. if ((TokStart[0] == 'u' || TokStart[0] == 's') && TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { int len = CurPtr-TokStart-3; uint32_t bits = len * 4; - APInt Tmp(bits, TokStart+3, len, 16); + APInt Tmp(bits, StringRef(TokStart+3, len), 16); uint32_t activeBits = Tmp.getActiveBits(); if (activeBits > 0 && activeBits < bits) Tmp.trunc(activeBits); - if (Tmp.getBitWidth() > 64) { - llvmAsmlval.APIntVal = new APInt(Tmp); - return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL; - } else if (TokStart[0] == 's') { - llvmAsmlval.SInt64Val = Tmp.getSExtValue(); - return ESINT64VAL; - } else { - llvmAsmlval.UInt64Val = Tmp.getZExtValue(); - return EUINT64VAL; - } + APSIntVal = APSInt(Tmp, TokStart[0] == 'u'); + return lltok::APSInt; } - + // If this is "cc1234", return this as just "cc". if (TokStart[0] == 'c' && TokStart[1] == 'c') { CurPtr = TokStart+2; - return CC_TOK; + return lltok::kw_cc; } - + // If this starts with "call", return it as CALL. This is to support old // broken .ll files. FIXME: remove this with LLVM 3.0. if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { CurPtr = TokStart+4; - llvmAsmlval.OtherOpVal = Instruction::Call; - return CALL; + UIntVal = Instruction::Call; + return lltok::kw_call; } - - // Finally, if this isn't known, return just a single character. + + // Finally, if this isn't known, return an error. CurPtr = TokStart+1; - return TokStart[0]; + return lltok::Error; } @@ -643,9 +709,9 @@ int LLLexer::LexIdentifier() { /// HexFP80Constant 0xK[0-9A-Fa-f]+ /// HexFP128Constant 0xL[0-9A-Fa-f]+ /// HexPPC128Constant 0xM[0-9A-Fa-f]+ -int LLLexer::Lex0x() { +lltok::Kind LLLexer::Lex0x() { CurPtr = TokStart + 2; - + char Kind; if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') { Kind = *CurPtr++; @@ -654,38 +720,40 @@ int LLLexer::Lex0x() { } if (!isxdigit(CurPtr[0])) { - // Bad token, return it as just zero. + // Bad token, return it as an error. CurPtr = TokStart+1; - return '0'; + return lltok::Error; } - + while (isxdigit(CurPtr[0])) ++CurPtr; - + if (Kind == 'J') { // HexFPConstant - Floating point constant represented in IEEE format as a // hexadecimal number for when exponential notation is not precise enough. // Float and double only. - llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr)); - return FPVAL; + APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr))); + return lltok::APFloat; } - + uint64_t Pair[2]; - HexToIntPair(TokStart+3, CurPtr, Pair); switch (Kind) { - default: assert(0 && "Unknown kind!"); + default: llvm_unreachable("Unknown kind!"); case 'K': // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) - llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair)); - return FPVAL; + FP80HexToIntPair(TokStart+3, CurPtr, Pair); + APFloatVal = APFloat(APInt(80, 2, Pair)); + return lltok::APFloat; case 'L': // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) - llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true); - return FPVAL; + HexToIntPair(TokStart+3, CurPtr, Pair); + APFloatVal = APFloat(APInt(128, 2, Pair), true); + return lltok::APFloat; case 'M': // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) - llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair)); - return FPVAL; + HexToIntPair(TokStart+3, CurPtr, Pair); + APFloatVal = APFloat(APInt(128, 2, Pair)); + return lltok::APFloat; } } @@ -698,33 +766,34 @@ int LLLexer::Lex0x() { /// HexFP80Constant 0xK[0-9A-Fa-f]+ /// HexFP128Constant 0xL[0-9A-Fa-f]+ /// HexPPC128Constant 0xM[0-9A-Fa-f]+ -int LLLexer::LexDigitOrNegative() { +lltok::Kind LLLexer::LexDigitOrNegative() { // If the letter after the negative is a number, this is probably a label. if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { // Okay, this is not a number after the -, it's probably a label. if (const char *End = isLabelTail(CurPtr)) { - llvmAsmlval.StrVal = new std::string(TokStart, End-1); + StrVal.assign(TokStart, End-1); CurPtr = End; - return LABELSTR; + return lltok::LabelStr; } - - return CurPtr[-1]; + + return lltok::Error; } - + // At this point, it is either a label, int or fp constant. - + // Skip digits, we have at least one. - for (; isdigit(CurPtr[0]); ++CurPtr); - + for (; isdigit(CurPtr[0]); ++CurPtr) + /*empty*/; + // Check to see if this really is a label afterall, e.g. "-1:". if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { if (const char *End = isLabelTail(CurPtr)) { - llvmAsmlval.StrVal = new std::string(TokStart, End-1); + StrVal.assign(TokStart, End-1); CurPtr = End; - return LABELSTR; + return lltok::LabelStr; } } - + // If the next character is a '.', then it is a fp value, otherwise its // integer. if (CurPtr[0] != '.') { @@ -732,103 +801,68 @@ int LLLexer::LexDigitOrNegative() { return Lex0x(); unsigned Len = CurPtr-TokStart; uint32_t numBits = ((Len * 64) / 19) + 2; - APInt Tmp(numBits, TokStart, Len, 10); + APInt Tmp(numBits, StringRef(TokStart, Len), 10); if (TokStart[0] == '-') { uint32_t minBits = Tmp.getMinSignedBits(); if (minBits > 0 && minBits < numBits) Tmp.trunc(minBits); - if (Tmp.getBitWidth() > 64) { - llvmAsmlval.APIntVal = new APInt(Tmp); - return ESAPINTVAL; - } else { - llvmAsmlval.SInt64Val = Tmp.getSExtValue(); - return ESINT64VAL; - } + APSIntVal = APSInt(Tmp, false); } else { uint32_t activeBits = Tmp.getActiveBits(); if (activeBits > 0 && activeBits < numBits) Tmp.trunc(activeBits); - if (Tmp.getBitWidth() > 64) { - llvmAsmlval.APIntVal = new APInt(Tmp); - return EUAPINTVAL; - } else { - llvmAsmlval.UInt64Val = Tmp.getZExtValue(); - return EUINT64VAL; - } + APSIntVal = APSInt(Tmp, true); } + return lltok::APSInt; } ++CurPtr; - + // Skip over [0-9]*([eE][-+]?[0-9]+)? while (isdigit(CurPtr[0])) ++CurPtr; - + if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { - if (isdigit(CurPtr[1]) || + if (isdigit(CurPtr[1]) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { CurPtr += 2; while (isdigit(CurPtr[0])) ++CurPtr; } } - - llvmAsmlval.FPVal = new APFloat(atof(TokStart)); - return FPVAL; + + APFloatVal = APFloat(atof(TokStart)); + return lltok::APFloat; } /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? -int LLLexer::LexPositive() { +lltok::Kind LLLexer::LexPositive() { // If the letter after the negative is a number, this is probably not a // label. if (!isdigit(CurPtr[0])) - return CurPtr[-1]; - + return lltok::Error; + // Skip digits. - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr); + for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + /*empty*/; // At this point, we need a '.'. if (CurPtr[0] != '.') { CurPtr = TokStart+1; - return TokStart[0]; + return lltok::Error; } - + ++CurPtr; - + // Skip over [0-9]*([eE][-+]?[0-9]+)? while (isdigit(CurPtr[0])) ++CurPtr; - + if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { - if (isdigit(CurPtr[1]) || + if (isdigit(CurPtr[1]) || ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { CurPtr += 2; while (isdigit(CurPtr[0])) ++CurPtr; } } - - llvmAsmlval.FPVal = new APFloat(atof(TokStart)); - return FPVAL; -} - -//===----------------------------------------------------------------------===// -// Define the interface to this file. -//===----------------------------------------------------------------------===// - -static LLLexer *TheLexer; - -void InitLLLexer(llvm::MemoryBuffer *MB) { - assert(TheLexer == 0 && "LL Lexer isn't reentrant yet"); - TheLexer = new LLLexer(MB); -} - -int llvmAsmlex() { - return TheLexer->LexToken(); -} -const char *LLLgetTokenStart() { return TheLexer->getTokStart(); } -unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); } -std::string LLLgetFilename() { return TheLexer->getFilename(); } -unsigned LLLgetLineNo() { return TheLexer->getLineNo(); } - -void FreeLexer() { - delete TheLexer; - TheLexer = 0; + APFloatVal = APFloat(atof(TokStart)); + return lltok::APFloat; }