X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FAsmParser%2FLLLexer.cpp;h=34595e7a4eefdfc905b4070ef545d7ca2908499e;hp=b8f497a5e90d22198bf6c5423758e2147a5752e9;hb=bb811a244567aa8a1522203f15588f4d001b7353;hpb=73ddd4f00dd2a4b7b68a1500bc7e3322cab51270 diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index b8f497a5e90..34595e7a4ee 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -12,16 +12,25 @@ //===----------------------------------------------------------------------===// #include "LLLexer.h" -#include "ParserInternals.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instruction.h" +#include "llvm/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MathExtras.h" - -#include -#include "llvmAsmParser.h" - +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Assembly/Parser.h" +#include +#include #include using namespace llvm; +bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const { + ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error"); + return true; +} + //===----------------------------------------------------------------------===// // Helper functions. //===----------------------------------------------------------------------===// @@ -30,21 +39,21 @@ using namespace llvm; // long representation... this does not have to do input error checking, // because we know that the input will be matched by a suitable regex... // -static uint64_t atoull(const char *Buffer, const char *End) { +uint64_t LLLexer::atoull(const char *Buffer, const char *End) { uint64_t Result = 0; for (; Buffer != End; Buffer++) { uint64_t OldRes = Result; Result *= 10; Result += *Buffer-'0'; if (Result < OldRes) { // Uh, oh, overflow detected!!! - GenerateError("constant bigger than 64 bits detected!"); + Error("constant bigger than 64 bits detected!"); return 0; } } return Result; } -static uint64_t HexIntToVal(const char *Buffer, const char *End) { +uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { uint64_t Result = 0; for (; Buffer != End; ++Buffer) { uint64_t OldRes = Result; @@ -58,21 +67,15 @@ static uint64_t HexIntToVal(const char *Buffer, const char *End) { Result += C-'a'+10; if (Result < OldRes) { // Uh, oh, overflow detected!!! - GenerateError("constant bigger than 64 bits detected!"); + Error("constant bigger than 64 bits detected!"); return 0; } } return Result; } -// HexToFP - Convert the ascii string in hexadecimal format to the floating -// point representation of it. -// -static double HexToFP(const char *Buffer, const char *End) { - return BitsToDouble(HexIntToVal(Buffer, End)); // Cast Hex constant to double -} - -static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ +void LLLexer::HexToIntPair(const char *Buffer, const char *End, + uint64_t Pair[2]) { Pair[0] = 0; for (int i=0; i<16; i++, Buffer++) { assert(Buffer != End); @@ -97,7 +100,38 @@ static void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]){ Pair[1] += C-'a'+10; } if (Buffer != End) - GenerateError("constant bigger than 128 bits detected!"); + Error("constant bigger than 128 bits detected!"); +} + +/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into +/// { low64, high16 } as usual for an APInt. +void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, + uint64_t Pair[2]) { + Pair[1] = 0; + for (int i=0; i<4 && Buffer != End; i++, Buffer++) { + assert(Buffer != End); + Pair[1] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[1] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[1] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[1] += C-'a'+10; + } + Pair[0] = 0; + for (int i=0; i<16; i++, Buffer++) { + Pair[0] *= 16; + char C = *Buffer; + if (C >= '0' && C <= '9') + Pair[0] += C-'0'; + else if (C >= 'A' && C <= 'F') + Pair[0] += C-'A'+10; + else if (C >= 'a' && C <= 'f') + Pair[0] += C-'a'+10; + } + if (Buffer != End) + Error("constant bigger than 128 bits detected!"); } // UnEscapeLexed - Run through the specified buffer and change \xx codes to the @@ -149,11 +183,9 @@ static const char *isLabelTail(const char *CurPtr) { // Lexer definition. //===----------------------------------------------------------------------===// -// FIXME: REMOVE THIS. -#define YYEOF 0 -#define YYERROR -2 - -LLLexer::LLLexer(MemoryBuffer *StartBuf) : CurLineNo(1), CurBuf(StartBuf) { +LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err, + LLVMContext &C) + : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) { CurPtr = CurBuf->getBufferStart(); } @@ -174,34 +206,22 @@ int LLLexer::getNextChar() { // Otherwise, return end of file. --CurPtr; // Another call to lex will return EOF again. return EOF; - case '\n': - case '\r': - // Handle the newline character by ignoring it and incrementing the line - // count. However, be careful about 'dos style' files with \n\r in them. - // Only treat a \n\r or \r\n as a single line. - if ((*CurPtr == '\n' || (*CurPtr == '\r')) && - *CurPtr != CurChar) - ++CurPtr; // Eat the two char newline sequence. - - ++CurLineNo; - return '\n'; } } -int LLLexer::LexToken() { +lltok::Kind LLLexer::LexToken() { TokStart = CurPtr; int CurChar = getNextChar(); - switch (CurChar) { default: // Handle letters: [a-zA-Z_] if (isalpha(CurChar) || CurChar == '_') return LexIdentifier(); - return CurChar; - case EOF: return YYEOF; + return lltok::Error; + case EOF: return lltok::Eof; case 0: case ' ': case '\t': @@ -216,28 +236,41 @@ int LLLexer::LexToken() { case '.': if (const char *Ptr = isLabelTail(CurPtr)) { CurPtr = Ptr; - llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); - return LABELSTR; + StrVal.assign(TokStart, CurPtr-1); + return lltok::LabelStr; } if (CurPtr[0] == '.' && CurPtr[1] == '.') { CurPtr += 2; - return DOTDOTDOT; + return lltok::dotdotdot; } - return '.'; + return lltok::Error; case '$': if (const char *Ptr = isLabelTail(CurPtr)) { CurPtr = Ptr; - llvmAsmlval.StrVal = new std::string(TokStart, CurPtr-1); - return LABELSTR; + StrVal.assign(TokStart, CurPtr-1); + return lltok::LabelStr; } - return '$'; + return lltok::Error; case ';': SkipLineComment(); return LexToken(); + case '!': return LexExclaim(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': return LexDigitOrNegative(); + case '=': return lltok::equal; + case '[': return lltok::lsquare; + case ']': return lltok::rsquare; + case '{': return lltok::lbrace; + case '}': return lltok::rbrace; + case '<': return lltok::less; + case '>': return lltok::greater; + case '(': return lltok::lparen; + case ')': return lltok::rparen; + case ',': return lltok::comma; + case '*': return lltok::star; + case '\\': return lltok::backslash; } } @@ -249,10 +282,10 @@ void LLLexer::SkipLineComment() { } /// LexAt - Lex all tokens that start with an @ character: -/// AtStringConstant @\"[^\"]*\" -/// GlobalVarName @[-a-zA-Z$._][-a-zA-Z$._0-9]* -/// GlobalVarID @[0-9]+ -int LLLexer::LexAt() { +/// GlobalVar @\"[^\"]*\" +/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// GlobalVarID @[0-9]+ +lltok::Kind LLLexer::LexAt() { // Handle AtStringConstant: @\"[^\"]*\" if (CurPtr[0] == '"') { ++CurPtr; @@ -261,13 +294,13 @@ int LLLexer::LexAt() { int CurChar = getNextChar(); if (CurChar == EOF) { - GenerateError("End of file in global variable name"); - return YYERROR; + Error("end of file in global variable name"); + return lltok::Error; } if (CurChar == '"') { - llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); - UnEscapeLexed(*llvmAsmlval.StrVal); - return ATSTRINGCONSTANT; + StrVal.assign(TokStart+2, CurPtr-1); + UnEscapeLexed(StrVal); + return lltok::GlobalVar; } } } @@ -280,8 +313,8 @@ int LLLexer::LexAt() { CurPtr[0] == '.' || CurPtr[0] == '_') ++CurPtr; - llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip @ - return GLOBALVAR; + StrVal.assign(TokStart+1, CurPtr); // Skip @ + return lltok::GlobalVar; } // Handle GlobalVarID: @[0-9]+ @@ -291,21 +324,21 @@ int LLLexer::LexAt() { uint64_t Val = atoull(TokStart+1, CurPtr); if ((unsigned)Val != Val) - GenerateError("Invalid value number (too large)!"); - llvmAsmlval.UIntVal = unsigned(Val); - return GLOBALVAL_ID; + Error("invalid value number (too large)!"); + UIntVal = unsigned(Val); + return lltok::GlobalID; } - return '@'; + return lltok::Error; } /// LexPercent - Lex all tokens that start with a % character: -/// PctStringConstant %\"[^\"]*\" -/// LocalVarName %[-a-zA-Z$._][-a-zA-Z$._0-9]* -/// LocalVarID %[0-9]+ -int LLLexer::LexPercent() { - // Handle PctStringConstant: %\"[^\"]*\" +/// LocalVar ::= %\"[^\"]*\" +/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// LocalVarID ::= %[0-9]+ +lltok::Kind LLLexer::LexPercent() { + // Handle LocalVarName: %\"[^\"]*\" if (CurPtr[0] == '"') { ++CurPtr; @@ -313,13 +346,13 @@ int LLLexer::LexPercent() { int CurChar = getNextChar(); if (CurChar == EOF) { - GenerateError("End of file in local variable name"); - return YYERROR; + Error("end of file in string constant"); + return lltok::Error; } if (CurChar == '"') { - llvmAsmlval.StrVal = new std::string(TokStart+2, CurPtr-1); - UnEscapeLexed(*llvmAsmlval.StrVal); - return PCTSTRINGCONSTANT; + StrVal.assign(TokStart+2, CurPtr-1); + UnEscapeLexed(StrVal); + return lltok::LocalVar; } } } @@ -332,8 +365,8 @@ int LLLexer::LexPercent() { CurPtr[0] == '.' || CurPtr[0] == '_') ++CurPtr; - llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr); // Skip % - return LOCALVAR; + StrVal.assign(TokStart+1, CurPtr); // Skip % + return lltok::LocalVar; } // Handle LocalVarID: %[0-9]+ @@ -343,38 +376,38 @@ int LLLexer::LexPercent() { uint64_t Val = atoull(TokStart+1, CurPtr); if ((unsigned)Val != Val) - GenerateError("Invalid value number (too large)!"); - llvmAsmlval.UIntVal = unsigned(Val); - return LOCALVAL_ID; + Error("invalid value number (too large)!"); + UIntVal = unsigned(Val); + return lltok::LocalVarID; } - return '%'; + return lltok::Error; } /// LexQuote - Lex all tokens that start with a " character: /// QuoteLabel "[^"]+": /// StringConstant "[^"]*" -int LLLexer::LexQuote() { +lltok::Kind LLLexer::LexQuote() { while (1) { int CurChar = getNextChar(); if (CurChar == EOF) { - GenerateError("End of file in quoted string"); - return YYERROR; + Error("end of file in quoted string"); + return lltok::Error; } if (CurChar != '"') continue; if (CurPtr[0] != ':') { - llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-1); - UnEscapeLexed(*llvmAsmlval.StrVal); - return STRINGCONSTANT; + StrVal.assign(TokStart+1, CurPtr-1); + UnEscapeLexed(StrVal); + return lltok::StringConstant; } ++CurPtr; - llvmAsmlval.StrVal = new std::string(TokStart+1, CurPtr-2); - UnEscapeLexed(*llvmAsmlval.StrVal); - return LABELSTR; + StrVal.assign(TokStart+1, CurPtr-2); + UnEscapeLexed(StrVal); + return lltok::LabelStr; } } @@ -389,13 +422,29 @@ static bool JustWhitespaceNewLine(const char *&Ptr) { return false; } +/// LexExclaim: +/// !foo +/// ! +lltok::Kind LLLexer::LexExclaim() { + // Lex a metadata name as a MetadataVar. + if (isalpha(CurPtr[0])) { + ++CurPtr; + while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + CurPtr[0] == '.' || CurPtr[0] == '_') + ++CurPtr; + StrVal.assign(TokStart+1, CurPtr); // Skip ! + return lltok::MetadataVar; + } + return lltok::exclaim; +} + /// LexIdentifier: Handle several related productions: /// Label [-a-zA-Z$._0-9]+: /// IntegerType i[0-9]+ /// Keyword sdiv, float, ... /// HexIntConstant [us]0x[0-9A-Fa-f]+ -int LLLexer::LexIdentifier() { +lltok::Kind LLLexer::LexIdentifier() { const char *StartChar = CurPtr; const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; const char *KeywordEnd = 0; @@ -408,8 +457,8 @@ int LLLexer::LexIdentifier() { // If we stopped due to a colon, this really is a label. if (*CurPtr == ':') { - llvmAsmlval.StrVal = new std::string(StartChar-1, CurPtr++); - return LABELSTR; + StrVal.assign(StartChar-1, CurPtr++); + return lltok::LabelStr; } // Otherwise, this wasn't a label. If this was valid as an integer type, @@ -420,12 +469,11 @@ int LLLexer::LexIdentifier() { uint64_t NumBits = atoull(StartChar, CurPtr); if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { - GenerateError("Bitwidth for integer type out of range!"); - return YYERROR; + Error("bitwidth for integer type out of range!"); + return lltok::Error; } - const Type* Ty = IntegerType::get(NumBits); - llvmAsmlval.PrimType = Ty; - return INTTYPE; + TyVal = IntegerType::get(Context, NumBits); + return lltok::Type; } // Otherwise, this was a letter sequence. See which keyword this is. @@ -433,112 +481,121 @@ int LLLexer::LexIdentifier() { CurPtr = KeywordEnd; --StartChar; unsigned Len = CurPtr-StartChar; -#define KEYWORD(STR, TOK) \ - if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) return TOK; - - KEYWORD("begin", BEGINTOK); - KEYWORD("end", ENDTOK); - KEYWORD("true", TRUETOK); - KEYWORD("false", FALSETOK); - KEYWORD("declare", DECLARE); - KEYWORD("define", DEFINE); - KEYWORD("global", GLOBAL); - KEYWORD("constant", CONSTANT); - - KEYWORD("internal", INTERNAL); - KEYWORD("linkonce", LINKONCE); - KEYWORD("weak", WEAK); - KEYWORD("appending", APPENDING); - KEYWORD("dllimport", DLLIMPORT); - KEYWORD("dllexport", DLLEXPORT); - KEYWORD("common", COMMON); - KEYWORD("default", DEFAULT); - KEYWORD("hidden", HIDDEN); - KEYWORD("protected", PROTECTED); - KEYWORD("extern_weak", EXTERN_WEAK); - KEYWORD("external", EXTERNAL); - KEYWORD("thread_local", THREAD_LOCAL); - KEYWORD("zeroinitializer", ZEROINITIALIZER); - KEYWORD("undef", UNDEF); - KEYWORD("null", NULL_TOK); - KEYWORD("to", TO); - KEYWORD("tail", TAIL); - KEYWORD("target", TARGET); - KEYWORD("triple", TRIPLE); - KEYWORD("deplibs", DEPLIBS); - KEYWORD("datalayout", DATALAYOUT); - KEYWORD("volatile", VOLATILE); - KEYWORD("align", ALIGN); - KEYWORD("addrspace", ADDRSPACE); - KEYWORD("section", SECTION); - KEYWORD("alias", ALIAS); - KEYWORD("module", MODULE); - KEYWORD("asm", ASM_TOK); - KEYWORD("sideeffect", SIDEEFFECT); - KEYWORD("gc", GC); - - KEYWORD("cc", CC_TOK); - KEYWORD("ccc", CCC_TOK); - KEYWORD("fastcc", FASTCC_TOK); - KEYWORD("coldcc", COLDCC_TOK); - KEYWORD("x86_stdcallcc", X86_STDCALLCC_TOK); - KEYWORD("x86_fastcallcc", X86_FASTCALLCC_TOK); - - KEYWORD("signext", SIGNEXT); - KEYWORD("zeroext", ZEROEXT); - KEYWORD("inreg", INREG); - KEYWORD("sret", SRET); - KEYWORD("nounwind", NOUNWIND); - KEYWORD("noreturn", NORETURN); - KEYWORD("noalias", NOALIAS); - KEYWORD("nocapture", NOCAPTURE); - KEYWORD("byval", BYVAL); - KEYWORD("nest", NEST); - KEYWORD("readnone", READNONE); - KEYWORD("readonly", READONLY); - - KEYWORD("noinline", NOINLINE); - KEYWORD("alwaysinline", ALWAYSINLINE); - KEYWORD("optsize", OPTSIZE); - KEYWORD("ssp", SSP); - KEYWORD("sspreq", SSPREQ); - - KEYWORD("type", TYPE); - KEYWORD("opaque", OPAQUE); - - KEYWORD("eq" , EQ); - KEYWORD("ne" , NE); - KEYWORD("slt", SLT); - KEYWORD("sgt", SGT); - KEYWORD("sle", SLE); - KEYWORD("sge", SGE); - KEYWORD("ult", ULT); - KEYWORD("ugt", UGT); - KEYWORD("ule", ULE); - KEYWORD("uge", UGE); - KEYWORD("oeq", OEQ); - KEYWORD("one", ONE); - KEYWORD("olt", OLT); - KEYWORD("ogt", OGT); - KEYWORD("ole", OLE); - KEYWORD("oge", OGE); - KEYWORD("ord", ORD); - KEYWORD("uno", UNO); - KEYWORD("ueq", UEQ); - KEYWORD("une", UNE); +#define KEYWORD(STR) \ + if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ + return lltok::kw_##STR; + + KEYWORD(begin); KEYWORD(end); + KEYWORD(true); KEYWORD(false); + KEYWORD(declare); KEYWORD(define); + KEYWORD(global); KEYWORD(constant); + + KEYWORD(private); + KEYWORD(linker_private); + KEYWORD(linker_private_weak); + KEYWORD(linker_private_weak_def_auto); + KEYWORD(internal); + KEYWORD(available_externally); + KEYWORD(linkonce); + KEYWORD(linkonce_odr); + KEYWORD(weak); + KEYWORD(weak_odr); + KEYWORD(appending); + KEYWORD(dllimport); + KEYWORD(dllexport); + KEYWORD(common); + KEYWORD(default); + KEYWORD(hidden); + KEYWORD(protected); + KEYWORD(extern_weak); + KEYWORD(external); + KEYWORD(thread_local); + KEYWORD(zeroinitializer); + KEYWORD(undef); + KEYWORD(null); + KEYWORD(to); + KEYWORD(tail); + KEYWORD(target); + KEYWORD(triple); + KEYWORD(deplibs); + KEYWORD(datalayout); + KEYWORD(volatile); + KEYWORD(nuw); + KEYWORD(nsw); + KEYWORD(exact); + KEYWORD(inbounds); + KEYWORD(align); + KEYWORD(addrspace); + KEYWORD(section); + KEYWORD(alias); + KEYWORD(module); + KEYWORD(asm); + KEYWORD(sideeffect); + KEYWORD(alignstack); + KEYWORD(gc); + + KEYWORD(ccc); + KEYWORD(fastcc); + KEYWORD(coldcc); + KEYWORD(x86_stdcallcc); + KEYWORD(x86_fastcallcc); + KEYWORD(x86_thiscallcc); + KEYWORD(arm_apcscc); + KEYWORD(arm_aapcscc); + KEYWORD(arm_aapcs_vfpcc); + KEYWORD(msp430_intrcc); + + KEYWORD(cc); + KEYWORD(c); + + KEYWORD(signext); + KEYWORD(zeroext); + KEYWORD(inreg); + KEYWORD(sret); + KEYWORD(nounwind); + KEYWORD(noreturn); + KEYWORD(noalias); + KEYWORD(nocapture); + KEYWORD(byval); + KEYWORD(nest); + KEYWORD(readnone); + KEYWORD(readonly); + + KEYWORD(inlinehint); + KEYWORD(noinline); + KEYWORD(alwaysinline); + KEYWORD(optsize); + KEYWORD(ssp); + KEYWORD(sspreq); + KEYWORD(noredzone); + KEYWORD(noimplicitfloat); + KEYWORD(naked); + + KEYWORD(type); + KEYWORD(opaque); + + KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); + KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); + KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); + KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une); + + KEYWORD(x); + KEYWORD(blockaddress); #undef KEYWORD // Keywords for types. -#define TYPEKEYWORD(STR, LLVMTY, TOK) \ +#define TYPEKEYWORD(STR, LLVMTY) \ if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ - llvmAsmlval.PrimType = LLVMTY; return TOK; } - TYPEKEYWORD("void", Type::VoidTy, VOID); - TYPEKEYWORD("float", Type::FloatTy, FLOAT); - TYPEKEYWORD("double", Type::DoubleTy, DOUBLE); - TYPEKEYWORD("x86_fp80", Type::X86_FP80Ty, X86_FP80); - TYPEKEYWORD("fp128", Type::FP128Ty, FP128); - TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty, PPC_FP128); - TYPEKEYWORD("label", Type::LabelTy, LABEL); + TyVal = LLVMTY; return lltok::Type; } + TYPEKEYWORD("void", Type::getVoidTy(Context)); + TYPEKEYWORD("float", Type::getFloatTy(Context)); + TYPEKEYWORD("double", Type::getDoubleTy(Context)); + TYPEKEYWORD("x86_fp80", Type::getX86_FP80Ty(Context)); + TYPEKEYWORD("fp128", Type::getFP128Ty(Context)); + TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context)); + TYPEKEYWORD("label", Type::getLabelTy(Context)); + TYPEKEYWORD("metadata", Type::getMetadataTy(Context)); + TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); #undef TYPEKEYWORD // Handle special forms for autoupgrading. Drop these in LLVM 3.0. This is @@ -546,74 +603,70 @@ int LLLexer::LexIdentifier() { if (Len == 4 && !memcmp(StartChar, "sext", 4)) { // Scan CurPtr ahead, seeing if there is just whitespace before the newline. if (JustWhitespaceNewLine(CurPtr)) - return SIGNEXT; + return lltok::kw_signext; } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) { // Scan CurPtr ahead, seeing if there is just whitespace before the newline. if (JustWhitespaceNewLine(CurPtr)) - return ZEROEXT; + return lltok::kw_zeroext; + } else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) { + // FIXME: Remove in LLVM 3.0. + // Autoupgrade malloc instruction. + return lltok::kw_malloc; + } else if (Len == 4 && !memcmp(StartChar, "free", 4)) { + // FIXME: Remove in LLVM 3.0. + // Autoupgrade malloc instruction. + return lltok::kw_free; } // Keywords for instructions. -#define INSTKEYWORD(STR, type, Enum, TOK) \ - if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ - llvmAsmlval.type = Instruction::Enum; return TOK; } - - INSTKEYWORD("add", BinaryOpVal, Add, ADD); - INSTKEYWORD("sub", BinaryOpVal, Sub, SUB); - INSTKEYWORD("mul", BinaryOpVal, Mul, MUL); - INSTKEYWORD("udiv", BinaryOpVal, UDiv, UDIV); - INSTKEYWORD("sdiv", BinaryOpVal, SDiv, SDIV); - INSTKEYWORD("fdiv", BinaryOpVal, FDiv, FDIV); - INSTKEYWORD("urem", BinaryOpVal, URem, UREM); - INSTKEYWORD("srem", BinaryOpVal, SRem, SREM); - INSTKEYWORD("frem", BinaryOpVal, FRem, FREM); - INSTKEYWORD("shl", BinaryOpVal, Shl, SHL); - INSTKEYWORD("lshr", BinaryOpVal, LShr, LSHR); - INSTKEYWORD("ashr", BinaryOpVal, AShr, ASHR); - INSTKEYWORD("and", BinaryOpVal, And, AND); - INSTKEYWORD("or", BinaryOpVal, Or , OR ); - INSTKEYWORD("xor", BinaryOpVal, Xor, XOR); - INSTKEYWORD("icmp", OtherOpVal, ICmp, ICMP); - INSTKEYWORD("fcmp", OtherOpVal, FCmp, FCMP); - INSTKEYWORD("vicmp", OtherOpVal, VICmp, VICMP); - INSTKEYWORD("vfcmp", OtherOpVal, VFCmp, VFCMP); - - INSTKEYWORD("phi", OtherOpVal, PHI, PHI_TOK); - INSTKEYWORD("call", OtherOpVal, Call, CALL); - INSTKEYWORD("trunc", CastOpVal, Trunc, TRUNC); - INSTKEYWORD("zext", CastOpVal, ZExt, ZEXT); - INSTKEYWORD("sext", CastOpVal, SExt, SEXT); - INSTKEYWORD("fptrunc", CastOpVal, FPTrunc, FPTRUNC); - INSTKEYWORD("fpext", CastOpVal, FPExt, FPEXT); - INSTKEYWORD("uitofp", CastOpVal, UIToFP, UITOFP); - INSTKEYWORD("sitofp", CastOpVal, SIToFP, SITOFP); - INSTKEYWORD("fptoui", CastOpVal, FPToUI, FPTOUI); - INSTKEYWORD("fptosi", CastOpVal, FPToSI, FPTOSI); - INSTKEYWORD("inttoptr", CastOpVal, IntToPtr, INTTOPTR); - INSTKEYWORD("ptrtoint", CastOpVal, PtrToInt, PTRTOINT); - INSTKEYWORD("bitcast", CastOpVal, BitCast, BITCAST); - INSTKEYWORD("select", OtherOpVal, Select, SELECT); - INSTKEYWORD("va_arg", OtherOpVal, VAArg , VAARG); - INSTKEYWORD("ret", TermOpVal, Ret, RET); - INSTKEYWORD("br", TermOpVal, Br, BR); - INSTKEYWORD("switch", TermOpVal, Switch, SWITCH); - INSTKEYWORD("invoke", TermOpVal, Invoke, INVOKE); - INSTKEYWORD("unwind", TermOpVal, Unwind, UNWIND); - INSTKEYWORD("unreachable", TermOpVal, Unreachable, UNREACHABLE); - - INSTKEYWORD("malloc", MemOpVal, Malloc, MALLOC); - INSTKEYWORD("alloca", MemOpVal, Alloca, ALLOCA); - INSTKEYWORD("free", MemOpVal, Free, FREE); - INSTKEYWORD("load", MemOpVal, Load, LOAD); - INSTKEYWORD("store", MemOpVal, Store, STORE); - INSTKEYWORD("getelementptr", MemOpVal, GetElementPtr, GETELEMENTPTR); - - INSTKEYWORD("extractelement", OtherOpVal, ExtractElement, EXTRACTELEMENT); - INSTKEYWORD("insertelement", OtherOpVal, InsertElement, INSERTELEMENT); - INSTKEYWORD("shufflevector", OtherOpVal, ShuffleVector, SHUFFLEVECTOR); - INSTKEYWORD("getresult", OtherOpVal, ExtractValue, GETRESULT); - INSTKEYWORD("extractvalue", OtherOpVal, ExtractValue, EXTRACTVALUE); - INSTKEYWORD("insertvalue", OtherOpVal, InsertValue, INSERTVALUE); +#define INSTKEYWORD(STR, Enum) \ + if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ + UIntVal = Instruction::Enum; return lltok::kw_##STR; } + + INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); + INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); + INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul); + INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv); + INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem); + INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr); + INSTKEYWORD(and, And); INSTKEYWORD(or, Or); INSTKEYWORD(xor, Xor); + INSTKEYWORD(icmp, ICmp); INSTKEYWORD(fcmp, FCmp); + + INSTKEYWORD(phi, PHI); + INSTKEYWORD(call, Call); + INSTKEYWORD(trunc, Trunc); + INSTKEYWORD(zext, ZExt); + INSTKEYWORD(sext, SExt); + INSTKEYWORD(fptrunc, FPTrunc); + INSTKEYWORD(fpext, FPExt); + INSTKEYWORD(uitofp, UIToFP); + INSTKEYWORD(sitofp, SIToFP); + INSTKEYWORD(fptoui, FPToUI); + INSTKEYWORD(fptosi, FPToSI); + INSTKEYWORD(inttoptr, IntToPtr); + INSTKEYWORD(ptrtoint, PtrToInt); + INSTKEYWORD(bitcast, BitCast); + INSTKEYWORD(select, Select); + INSTKEYWORD(va_arg, VAArg); + INSTKEYWORD(ret, Ret); + INSTKEYWORD(br, Br); + INSTKEYWORD(switch, Switch); + INSTKEYWORD(indirectbr, IndirectBr); + INSTKEYWORD(invoke, Invoke); + INSTKEYWORD(unwind, Unwind); + INSTKEYWORD(unreachable, Unreachable); + + INSTKEYWORD(alloca, Alloca); + INSTKEYWORD(load, Load); + INSTKEYWORD(store, Store); + INSTKEYWORD(getelementptr, GetElementPtr); + + INSTKEYWORD(extractelement, ExtractElement); + INSTKEYWORD(insertelement, InsertElement); + INSTKEYWORD(shufflevector, ShuffleVector); + INSTKEYWORD(getresult, ExtractValue); + INSTKEYWORD(extractvalue, ExtractValue); + INSTKEYWORD(insertvalue, InsertValue); #undef INSTKEYWORD // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by @@ -622,39 +675,31 @@ int LLLexer::LexIdentifier() { TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { int len = CurPtr-TokStart-3; uint32_t bits = len * 4; - APInt Tmp(bits, TokStart+3, len, 16); + APInt Tmp(bits, StringRef(TokStart+3, len), 16); uint32_t activeBits = Tmp.getActiveBits(); if (activeBits > 0 && activeBits < bits) Tmp.trunc(activeBits); - if (Tmp.getBitWidth() > 64) { - llvmAsmlval.APIntVal = new APInt(Tmp); - return TokStart[0] == 's' ? ESAPINTVAL : EUAPINTVAL; - } else if (TokStart[0] == 's') { - llvmAsmlval.SInt64Val = Tmp.getSExtValue(); - return ESINT64VAL; - } else { - llvmAsmlval.UInt64Val = Tmp.getZExtValue(); - return EUINT64VAL; - } + APSIntVal = APSInt(Tmp, TokStart[0] == 'u'); + return lltok::APSInt; } // If this is "cc1234", return this as just "cc". if (TokStart[0] == 'c' && TokStart[1] == 'c') { CurPtr = TokStart+2; - return CC_TOK; + return lltok::kw_cc; } // If this starts with "call", return it as CALL. This is to support old // broken .ll files. FIXME: remove this with LLVM 3.0. if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) { CurPtr = TokStart+4; - llvmAsmlval.OtherOpVal = Instruction::Call; - return CALL; + UIntVal = Instruction::Call; + return lltok::kw_call; } - // Finally, if this isn't known, return just a single character. + // Finally, if this isn't known, return an error. CurPtr = TokStart+1; - return TokStart[0]; + return lltok::Error; } @@ -664,7 +709,7 @@ int LLLexer::LexIdentifier() { /// HexFP80Constant 0xK[0-9A-Fa-f]+ /// HexFP128Constant 0xL[0-9A-Fa-f]+ /// HexPPC128Constant 0xM[0-9A-Fa-f]+ -int LLLexer::Lex0x() { +lltok::Kind LLLexer::Lex0x() { CurPtr = TokStart + 2; char Kind; @@ -675,9 +720,9 @@ int LLLexer::Lex0x() { } if (!isxdigit(CurPtr[0])) { - // Bad token, return it as just zero. + // Bad token, return it as an error. CurPtr = TokStart+1; - return '0'; + return lltok::Error; } while (isxdigit(CurPtr[0])) @@ -687,26 +732,28 @@ int LLLexer::Lex0x() { // HexFPConstant - Floating point constant represented in IEEE format as a // hexadecimal number for when exponential notation is not precise enough. // Float and double only. - llvmAsmlval.FPVal = new APFloat(HexToFP(TokStart+2, CurPtr)); - return FPVAL; + APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr))); + return lltok::APFloat; } uint64_t Pair[2]; - HexToIntPair(TokStart+3, CurPtr, Pair); switch (Kind) { - default: assert(0 && "Unknown kind!"); + default: llvm_unreachable("Unknown kind!"); case 'K': // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) - llvmAsmlval.FPVal = new APFloat(APInt(80, 2, Pair)); - return FPVAL; + FP80HexToIntPair(TokStart+3, CurPtr, Pair); + APFloatVal = APFloat(APInt(80, 2, Pair)); + return lltok::APFloat; case 'L': // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) - llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair), true); - return FPVAL; + HexToIntPair(TokStart+3, CurPtr, Pair); + APFloatVal = APFloat(APInt(128, 2, Pair), true); + return lltok::APFloat; case 'M': // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) - llvmAsmlval.FPVal = new APFloat(APInt(128, 2, Pair)); - return FPVAL; + HexToIntPair(TokStart+3, CurPtr, Pair); + APFloatVal = APFloat(APInt(128, 2, Pair)); + return lltok::APFloat; } } @@ -719,17 +766,17 @@ int LLLexer::Lex0x() { /// HexFP80Constant 0xK[0-9A-Fa-f]+ /// HexFP128Constant 0xL[0-9A-Fa-f]+ /// HexPPC128Constant 0xM[0-9A-Fa-f]+ -int LLLexer::LexDigitOrNegative() { +lltok::Kind LLLexer::LexDigitOrNegative() { // If the letter after the negative is a number, this is probably a label. if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { // Okay, this is not a number after the -, it's probably a label. if (const char *End = isLabelTail(CurPtr)) { - llvmAsmlval.StrVal = new std::string(TokStart, End-1); + StrVal.assign(TokStart, End-1); CurPtr = End; - return LABELSTR; + return lltok::LabelStr; } - return CurPtr[-1]; + return lltok::Error; } // At this point, it is either a label, int or fp constant. @@ -741,9 +788,9 @@ int LLLexer::LexDigitOrNegative() { // Check to see if this really is a label afterall, e.g. "-1:". if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') { if (const char *End = isLabelTail(CurPtr)) { - llvmAsmlval.StrVal = new std::string(TokStart, End-1); + StrVal.assign(TokStart, End-1); CurPtr = End; - return LABELSTR; + return lltok::LabelStr; } } @@ -754,30 +801,19 @@ int LLLexer::LexDigitOrNegative() { return Lex0x(); unsigned Len = CurPtr-TokStart; uint32_t numBits = ((Len * 64) / 19) + 2; - APInt Tmp(numBits, TokStart, Len, 10); + APInt Tmp(numBits, StringRef(TokStart, Len), 10); if (TokStart[0] == '-') { uint32_t minBits = Tmp.getMinSignedBits(); if (minBits > 0 && minBits < numBits) Tmp.trunc(minBits); - if (Tmp.getBitWidth() > 64) { - llvmAsmlval.APIntVal = new APInt(Tmp); - return ESAPINTVAL; - } else { - llvmAsmlval.SInt64Val = Tmp.getSExtValue(); - return ESINT64VAL; - } + APSIntVal = APSInt(Tmp, false); } else { uint32_t activeBits = Tmp.getActiveBits(); if (activeBits > 0 && activeBits < numBits) Tmp.trunc(activeBits); - if (Tmp.getBitWidth() > 64) { - llvmAsmlval.APIntVal = new APInt(Tmp); - return EUAPINTVAL; - } else { - llvmAsmlval.UInt64Val = Tmp.getZExtValue(); - return EUINT64VAL; - } + APSIntVal = APSInt(Tmp, true); } + return lltok::APSInt; } ++CurPtr; @@ -793,16 +829,16 @@ int LLLexer::LexDigitOrNegative() { } } - llvmAsmlval.FPVal = new APFloat(atof(TokStart)); - return FPVAL; + APFloatVal = APFloat(atof(TokStart)); + return lltok::APFloat; } /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? -int LLLexer::LexPositive() { +lltok::Kind LLLexer::LexPositive() { // If the letter after the negative is a number, this is probably not a // label. if (!isdigit(CurPtr[0])) - return CurPtr[-1]; + return lltok::Error; // Skip digits. for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) @@ -811,7 +847,7 @@ int LLLexer::LexPositive() { // At this point, we need a '.'. if (CurPtr[0] != '.') { CurPtr = TokStart+1; - return TokStart[0]; + return lltok::Error; } ++CurPtr; @@ -827,31 +863,6 @@ int LLLexer::LexPositive() { } } - llvmAsmlval.FPVal = new APFloat(atof(TokStart)); - return FPVAL; -} - - -//===----------------------------------------------------------------------===// -// Define the interface to this file. -//===----------------------------------------------------------------------===// - -static LLLexer *TheLexer; - -void InitLLLexer(llvm::MemoryBuffer *MB) { - assert(TheLexer == 0 && "LL Lexer isn't reentrant yet"); - TheLexer = new LLLexer(MB); -} - -int llvmAsmlex() { - return TheLexer->LexToken(); -} -const char *LLLgetTokenStart() { return TheLexer->getTokStart(); } -unsigned LLLgetTokenLength() { return TheLexer->getTokLength(); } -std::string LLLgetFilename() { return TheLexer->getFilename(); } -unsigned LLLgetLineNo() { return TheLexer->getLineNo(); } - -void FreeLexer() { - delete TheLexer; - TheLexer = 0; + APFloatVal = APFloat(atof(TokStart)); + return lltok::APFloat; }