X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FAsmParser%2FLLLexer.cpp;h=91ce1fd7669dfebba2dc27b9c0048f70b1002c8f;hb=4afa7f1d5714a58ab1e79f749652872d666177a8;hp=ca3b7902b03bb95c88764ffc94d6b1ee7b2cc607;hpb=bc65a8d518ebe0408deb15d9314fd2ff5cbd0686;p=oota-llvm.git diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index ca3b7902b03..91ce1fd7669 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -34,6 +34,10 @@ bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const { return true; } +void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const { + SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); +} + //===----------------------------------------------------------------------===// // Helper functions. //===----------------------------------------------------------------------===// @@ -74,13 +78,15 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { void LLLexer::HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]) { Pair[0] = 0; - for (int i=0; i<16; i++, Buffer++) { - assert(Buffer != End); - Pair[0] *= 16; - Pair[0] += hexDigitValue(*Buffer); + if (End - Buffer >= 16) { + for (int i = 0; i < 16; i++, Buffer++) { + assert(Buffer != End); + Pair[0] *= 16; + Pair[0] += hexDigitValue(*Buffer); + } } Pair[1] = 0; - for (int i=0; i<16 && Buffer != End; i++, Buffer++) { + for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) { Pair[1] *= 16; Pair[1] += hexDigitValue(*Buffer); } @@ -99,7 +105,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, Pair[1] += hexDigitValue(*Buffer); } Pair[0] = 0; - for (int i=0; i<16; i++, Buffer++) { + for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) { Pair[0] *= 16; Pair[0] += hexDigitValue(*Buffer); } @@ -146,7 +152,7 @@ static bool isLabelChar(char C) { static const char *isLabelTail(const char *CurPtr) { while (1) { if (CurPtr[0] == ':') return CurPtr+1; - if (!isLabelChar(CurPtr[0])) return 0; + if (!isLabelChar(CurPtr[0])) return nullptr; ++CurPtr; } } @@ -157,14 +163,10 @@ static const char *isLabelTail(const char *CurPtr) { // Lexer definition. //===----------------------------------------------------------------------===// -LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err, +LLLexer::LLLexer(StringRef StartBuf, SourceMgr &sm, SMDiagnostic &Err, LLVMContext &C) : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) { - CurPtr = CurBuf->getBufferStart(); -} - -std::string LLLexer::getFilename() const { - return CurBuf->getBufferIdentifier(); + CurPtr = CurBuf.begin(); } int LLLexer::getNextChar() { @@ -174,7 +176,7 @@ int LLLexer::getNextChar() { case 0: // A nul character in the stream is either the end of the current buffer or // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf->getBufferEnd()) + if (CurPtr-1 != CurBuf.end()) return 0; // Just whitespace. // Otherwise, return end of file. @@ -205,6 +207,7 @@ lltok::Kind LLLexer::LexToken() { return LexToken(); case '+': return LexPositive(); case '@': return LexAt(); + case '$': return LexDollar(); case '%': return LexPercent(); case '"': return LexQuote(); case '.': @@ -218,13 +221,6 @@ lltok::Kind LLLexer::LexToken() { return lltok::dotdotdot; } return lltok::Error; - case '$': - if (const char *Ptr = isLabelTail(CurPtr)) { - CurPtr = Ptr; - StrVal.assign(TokStart, CurPtr-1); - return lltok::LabelStr; - } - return lltok::Error; case ';': SkipLineComment(); return LexToken(); @@ -245,7 +241,7 @@ lltok::Kind LLLexer::LexToken() { case ')': return lltok::rparen; case ',': return lltok::comma; case '*': return lltok::star; - case '\\': return lltok::backslash; + case '|': return lltok::bar; } } @@ -256,12 +252,22 @@ void LLLexer::SkipLineComment() { } } -/// LexAt - Lex all tokens that start with an @ character: +/// Lex all tokens that start with an @ character. /// GlobalVar @\"[^\"]*\" /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* /// GlobalVarID @[0-9]+ lltok::Kind LLLexer::LexAt() { - // Handle AtStringConstant: @\"[^\"]*\" + return LexVar(lltok::GlobalVar, lltok::GlobalID); +} + +lltok::Kind LLLexer::LexDollar() { + if (const char *Ptr = isLabelTail(TokStart)) { + CurPtr = Ptr; + StrVal.assign(TokStart, CurPtr - 1); + return lltok::LabelStr; + } + + // Handle DollarStringConstant: $\"[^\"]*\" if (CurPtr[0] == '"') { ++CurPtr; @@ -269,36 +275,24 @@ lltok::Kind LLLexer::LexAt() { int CurChar = getNextChar(); if (CurChar == EOF) { - Error("end of file in global variable name"); + Error("end of file in COMDAT variable name"); return lltok::Error; } if (CurChar == '"') { - StrVal.assign(TokStart+2, CurPtr-1); + StrVal.assign(TokStart + 2, CurPtr - 1); UnEscapeLexed(StrVal); if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { Error("Null bytes are not allowed in names"); return lltok::Error; } - return lltok::GlobalVar; + return lltok::ComdatVar; } } } - // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* + // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]* if (ReadVarName()) - return lltok::GlobalVar; - - // Handle GlobalVarID: @[0-9]+ - if (isdigit(static_cast(CurPtr[0]))) { - for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) - /*empty*/; - - uint64_t Val = atoull(TokStart+1, CurPtr); - if ((unsigned)Val != Val) - Error("invalid value number (too large)!"); - UIntVal = unsigned(Val); - return lltok::GlobalID; - } + return lltok::ComdatVar; return lltok::Error; } @@ -339,22 +333,35 @@ bool LLLexer::ReadVarName() { return false; } -/// LexPercent - Lex all tokens that start with a % character: -/// LocalVar ::= %\"[^\"]*\" -/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* -/// LocalVarID ::= %[0-9]+ -lltok::Kind LLLexer::LexPercent() { - // Handle LocalVarName: %\"[^\"]*\" +lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { + // Handle StringConstant: \"[^\"]*\" if (CurPtr[0] == '"') { ++CurPtr; - return ReadString(lltok::LocalVar); + + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + Error("end of file in global variable name"); + return lltok::Error; + } + if (CurChar == '"') { + StrVal.assign(TokStart+2, CurPtr-1); + UnEscapeLexed(StrVal); + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + return lltok::Error; + } + return Var; + } + } } - // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* + // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]* if (ReadVarName()) - return lltok::LocalVar; + return Var; - // Handle LocalVarID: %[0-9]+ + // Handle VarID: [0-9]+ if (isdigit(static_cast(CurPtr[0]))) { for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; @@ -363,13 +370,20 @@ lltok::Kind LLLexer::LexPercent() { if ((unsigned)Val != Val) Error("invalid value number (too large)!"); UIntVal = unsigned(Val); - return lltok::LocalVarID; + return VarID; } - return lltok::Error; } -/// LexQuote - Lex all tokens that start with a " character: +/// Lex all tokens that start with a % character. +/// LocalVar ::= %\"[^\"]*\" +/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// LocalVarID ::= %[0-9]+ +lltok::Kind LLLexer::LexPercent() { + return LexVar(lltok::LocalVar, lltok::LocalVarID); +} + +/// Lex all tokens that start with a " character. /// QuoteLabel "[^"]+": /// StringConstant "[^"]*" lltok::Kind LLLexer::LexQuote() { @@ -379,13 +393,18 @@ lltok::Kind LLLexer::LexQuote() { if (CurPtr[0] == ':') { ++CurPtr; - kind = lltok::LabelStr; + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + kind = lltok::Error; + } else { + kind = lltok::LabelStr; + } } return kind; } -/// LexExclaim: +/// Lex all tokens that start with a ! character. /// !foo /// ! lltok::Kind LLLexer::LexExclaim() { @@ -406,7 +425,7 @@ lltok::Kind LLLexer::LexExclaim() { return lltok::exclaim; } -/// LexHash - Lex all tokens that start with a # character: +/// Lex all tokens that start with a # character. /// AttrGrpID ::= #[0-9]+ lltok::Kind LLLexer::LexHash() { // Handle AttrGrpID: #[0-9]+ @@ -424,15 +443,15 @@ lltok::Kind LLLexer::LexHash() { return lltok::Error; } -/// LexIdentifier: Handle several related productions: +/// Lex a label, integer type, keyword, or hexadecimal integer constant. /// Label [-a-zA-Z$._0-9]+: /// IntegerType i[0-9]+ /// Keyword sdiv, float, ... /// HexIntConstant [us]0x[0-9A-Fa-f]+ lltok::Kind LLLexer::LexIdentifier() { const char *StartChar = CurPtr; - const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; - const char *KeywordEnd = 0; + const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar; + const char *KeywordEnd = nullptr; for (; isLabelChar(*CurPtr); ++CurPtr) { // If we decide this is an integer, remember the end of the sequence. @@ -451,7 +470,7 @@ lltok::Kind LLLexer::LexIdentifier() { // Otherwise, this wasn't a label. If this was valid as an integer type, // return it. - if (IntEnd == 0) IntEnd = CurPtr; + if (!IntEnd) IntEnd = CurPtr; if (IntEnd != StartChar) { CurPtr = IntEnd; uint64_t NumBits = atoull(StartChar, CurPtr); @@ -465,14 +484,14 @@ lltok::Kind LLLexer::LexIdentifier() { } // Otherwise, this was a letter sequence. See which keyword this is. - if (KeywordEnd == 0) KeywordEnd = CurPtr; + if (!KeywordEnd) KeywordEnd = CurPtr; CurPtr = KeywordEnd; --StartChar; - unsigned Len = CurPtr-StartChar; -#define KEYWORD(STR) \ - do { \ - if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ - return lltok::kw_##STR; \ + StringRef Keyword(StartChar, CurPtr - StartChar); +#define KEYWORD(STR) \ + do { \ + if (Keyword == #STR) \ + return lltok::kw_##STR; \ } while (0) KEYWORD(true); KEYWORD(false); @@ -480,13 +499,11 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(global); KEYWORD(constant); KEYWORD(private); - KEYWORD(linker_private); - KEYWORD(linker_private_weak); KEYWORD(internal); KEYWORD(available_externally); KEYWORD(linkonce); KEYWORD(linkonce_odr); - KEYWORD(weak); + KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg". KEYWORD(weak_odr); KEYWORD(appending); KEYWORD(dllimport); @@ -507,7 +524,10 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(undef); KEYWORD(null); KEYWORD(to); + KEYWORD(caller); KEYWORD(tail); + KEYWORD(musttail); + KEYWORD(notail); KEYWORD(target); KEYWORD(triple); KEYWORD(unwind); @@ -543,6 +563,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(inteldialect); KEYWORD(gc); KEYWORD(prefix); + KEYWORD(prologue); KEYWORD(ccc); KEYWORD(fastcc); @@ -550,6 +571,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_stdcallcc); KEYWORD(x86_fastcallcc); KEYWORD(x86_thiscallcc); + KEYWORD(x86_vectorcallcc); KEYWORD(arm_apcscc); KEYWORD(arm_aapcscc); KEYWORD(arm_aapcs_vfpcc); @@ -563,6 +585,11 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_64_win64cc); KEYWORD(webkit_jscc); KEYWORD(anyregcc); + KEYWORD(preserve_mostcc); + KEYWORD(preserve_allcc); + KEYWORD(ghccc); + KEYWORD(hhvmcc); + KEYWORD(hhvm_ccc); KEYWORD(cc); KEYWORD(c); @@ -570,12 +597,17 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(attributes); KEYWORD(alwaysinline); + KEYWORD(argmemonly); KEYWORD(builtin); KEYWORD(byval); KEYWORD(inalloca); KEYWORD(cold); + KEYWORD(convergent); + KEYWORD(dereferenceable); + KEYWORD(dereferenceable_or_null); KEYWORD(inlinehint); KEYWORD(inreg); + KEYWORD(jumptable); KEYWORD(minsize); KEYWORD(naked); KEYWORD(nest); @@ -585,7 +617,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(noduplicate); KEYWORD(noimplicitfloat); KEYWORD(noinline); + KEYWORD(norecurse); KEYWORD(nonlazybind); + KEYWORD(nonnull); KEYWORD(noredzone); KEYWORD(noreturn); KEYWORD(nounwind); @@ -600,6 +634,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(ssp); KEYWORD(sspreq); KEYWORD(sspstrong); + KEYWORD(safestack); KEYWORD(sanitize_address); KEYWORD(sanitize_thread); KEYWORD(sanitize_memory); @@ -609,6 +644,15 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(type); KEYWORD(opaque); + KEYWORD(comdat); + + // Comdat types + KEYWORD(any); + KEYWORD(exactmatch); + KEYWORD(largest); + KEYWORD(noduplicates); + KEYWORD(samesize); + KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); @@ -620,6 +664,13 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x); KEYWORD(blockaddress); + // Metadata types. + KEYWORD(distinct); + + // Use-list order directives. + KEYWORD(uselistorder); + KEYWORD(uselistorder_bb); + KEYWORD(personality); KEYWORD(cleanup); KEYWORD(catch); @@ -627,9 +678,13 @@ lltok::Kind LLLexer::LexIdentifier() { #undef KEYWORD // Keywords for types. -#define TYPEKEYWORD(STR, LLVMTY) \ - if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ - TyVal = LLVMTY; return lltok::Type; } +#define TYPEKEYWORD(STR, LLVMTY) \ + do { \ + if (Keyword == STR) { \ + TyVal = LLVMTY; \ + return lltok::Type; \ + } \ + } while (false) TYPEKEYWORD("void", Type::getVoidTy(Context)); TYPEKEYWORD("half", Type::getHalfTy(Context)); TYPEKEYWORD("float", Type::getFloatTy(Context)); @@ -640,12 +695,17 @@ lltok::Kind LLLexer::LexIdentifier() { TYPEKEYWORD("label", Type::getLabelTy(Context)); TYPEKEYWORD("metadata", Type::getMetadataTy(Context)); TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); + TYPEKEYWORD("token", Type::getTokenTy(Context)); #undef TYPEKEYWORD // Keywords for instructions. -#define INSTKEYWORD(STR, Enum) \ - if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ - UIntVal = Instruction::Enum; return lltok::kw_##STR; } +#define INSTKEYWORD(STR, Enum) \ + do { \ + if (Keyword == #STR) { \ + UIntVal = Instruction::Enum; \ + return lltok::kw_##STR; \ + } \ + } while (false) INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); @@ -695,8 +755,34 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(extractvalue, ExtractValue); INSTKEYWORD(insertvalue, InsertValue); INSTKEYWORD(landingpad, LandingPad); + INSTKEYWORD(cleanupret, CleanupRet); + INSTKEYWORD(catchret, CatchRet); + INSTKEYWORD(catchpad, CatchPad); + INSTKEYWORD(terminatepad, TerminatePad); + INSTKEYWORD(cleanuppad, CleanupPad); + INSTKEYWORD(catchendpad, CatchEndPad); + INSTKEYWORD(cleanupendpad, CleanupEndPad); #undef INSTKEYWORD +#define DWKEYWORD(TYPE, TOKEN) \ + do { \ + if (Keyword.startswith("DW_" #TYPE "_")) { \ + StrVal.assign(Keyword.begin(), Keyword.end()); \ + return lltok::TOKEN; \ + } \ + } while (false) + DWKEYWORD(TAG, DwarfTag); + DWKEYWORD(ATE, DwarfAttEncoding); + DWKEYWORD(VIRTUALITY, DwarfVirtuality); + DWKEYWORD(LANG, DwarfLang); + DWKEYWORD(OP, DwarfOp); +#undef DWKEYWORD + + if (Keyword.startswith("DIFlag")) { + StrVal.assign(Keyword.begin(), Keyword.end()); + return lltok::DIFlag; + } + // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by // the CFE to avoid forcing it to deal with 64-bit numbers. if ((TokStart[0] == 'u' || TokStart[0] == 's') && @@ -704,7 +790,13 @@ lltok::Kind LLLexer::LexIdentifier() { isxdigit(static_cast(TokStart[3]))) { int len = CurPtr-TokStart-3; uint32_t bits = len * 4; - APInt Tmp(bits, StringRef(TokStart+3, len), 16); + StringRef HexStr(TokStart + 3, len); + if (!std::all_of(HexStr.begin(), HexStr.end(), isxdigit)) { + // Bad token, return it as an error. + CurPtr = TokStart+3; + return lltok::Error; + } + APInt Tmp(bits, HexStr, 16); uint32_t activeBits = Tmp.getActiveBits(); if (activeBits > 0 && activeBits < bits) Tmp = Tmp.trunc(activeBits); @@ -723,9 +815,8 @@ lltok::Kind LLLexer::LexIdentifier() { return lltok::Error; } - -/// Lex0x: Handle productions that start with 0x, knowing that it matches and -/// that this is not a label: +/// Lex all tokens that start with a 0x prefix, knowing they match and are not +/// labels. /// HexFPConstant 0x[0-9A-Fa-f]+ /// HexFP80Constant 0xK[0-9A-Fa-f]+ /// HexFP128Constant 0xL[0-9A-Fa-f]+ @@ -783,7 +874,7 @@ lltok::Kind LLLexer::Lex0x() { } } -/// LexIdentifier: Handle several related productions: +/// Lex tokens for a label or a numeric constant, possibly starting with -. /// Label [-a-zA-Z$._0-9]+: /// NInteger -[0-9]+ /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? @@ -826,20 +917,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { if (CurPtr[0] != '.') { if (TokStart[0] == '0' && TokStart[1] == 'x') return Lex0x(); - unsigned Len = CurPtr-TokStart; - uint32_t numBits = ((Len * 64) / 19) + 2; - APInt Tmp(numBits, StringRef(TokStart, Len), 10); - if (TokStart[0] == '-') { - uint32_t minBits = Tmp.getMinSignedBits(); - if (minBits > 0 && minBits < numBits) - Tmp = Tmp.trunc(minBits); - APSIntVal = APSInt(Tmp, false); - } else { - uint32_t activeBits = Tmp.getActiveBits(); - if (activeBits > 0 && activeBits < numBits) - Tmp = Tmp.trunc(activeBits); - APSIntVal = APSInt(Tmp, true); - } + APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart)); return lltok::APSInt; } @@ -861,6 +939,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { return lltok::APFloat; } +/// Lex a floating point constant starting with +. /// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)? lltok::Kind LLLexer::LexPositive() { // If the letter after the negative is a number, this is probably not a