X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FAsmParser%2FLLLexer.cpp;h=a72f713c49000934e26e09658c5fd2ad0ff566fa;hp=4c9439a3ecff6b01f8757630e3ebb3220f5273cb;hb=5ff590799654e01ceea6383932fc25e8e622c46a;hpb=623d2e618f4e672c47edff9ec63ed6d733ac81d3 diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 4c9439a3ecf..a72f713c490 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -14,7 +14,7 @@ #include "LLLexer.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" -#include "llvm/Assembly/Parser.h" +#include "llvm/AsmParser/Parser.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" @@ -34,6 +34,10 @@ bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const { return true; } +void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const { + SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); +} + //===----------------------------------------------------------------------===// // Helper functions. //===----------------------------------------------------------------------===// @@ -74,13 +78,15 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { void LLLexer::HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]) { Pair[0] = 0; - for (int i=0; i<16; i++, Buffer++) { - assert(Buffer != End); - Pair[0] *= 16; - Pair[0] += hexDigitValue(*Buffer); + if (End - Buffer >= 16) { + for (int i = 0; i < 16; i++, Buffer++) { + assert(Buffer != End); + Pair[0] *= 16; + Pair[0] += hexDigitValue(*Buffer); + } } Pair[1] = 0; - for (int i=0; i<16 && Buffer != End; i++, Buffer++) { + for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) { Pair[1] *= 16; Pair[1] += hexDigitValue(*Buffer); } @@ -146,7 +152,7 @@ static bool isLabelChar(char C) { static const char *isLabelTail(const char *CurPtr) { while (1) { if (CurPtr[0] == ':') return CurPtr+1; - if (!isLabelChar(CurPtr[0])) return 0; + if (!isLabelChar(CurPtr[0])) return nullptr; ++CurPtr; } } @@ -157,14 +163,10 @@ static const char *isLabelTail(const char *CurPtr) { // Lexer definition. //===----------------------------------------------------------------------===// -LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err, +LLLexer::LLLexer(StringRef StartBuf, SourceMgr &sm, SMDiagnostic &Err, LLVMContext &C) : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) { - CurPtr = CurBuf->getBufferStart(); -} - -std::string LLLexer::getFilename() const { - return CurBuf->getBufferIdentifier(); + CurPtr = CurBuf.begin(); } int LLLexer::getNextChar() { @@ -174,7 +176,7 @@ int LLLexer::getNextChar() { case 0: // A nul character in the stream is either the end of the current buffer or // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf->getBufferEnd()) + if (CurPtr-1 != CurBuf.end()) return 0; // Just whitespace. // Otherwise, return end of file. @@ -205,6 +207,7 @@ lltok::Kind LLLexer::LexToken() { return LexToken(); case '+': return LexPositive(); case '@': return LexAt(); + case '$': return LexDollar(); case '%': return LexPercent(); case '"': return LexQuote(); case '.': @@ -218,13 +221,6 @@ lltok::Kind LLLexer::LexToken() { return lltok::dotdotdot; } return lltok::Error; - case '$': - if (const char *Ptr = isLabelTail(CurPtr)) { - CurPtr = Ptr; - StrVal.assign(TokStart, CurPtr-1); - return lltok::LabelStr; - } - return lltok::Error; case ';': SkipLineComment(); return LexToken(); @@ -245,7 +241,7 @@ lltok::Kind LLLexer::LexToken() { case ')': return lltok::rparen; case ',': return lltok::comma; case '*': return lltok::star; - case '\\': return lltok::backslash; + case '|': return lltok::bar; } } @@ -261,7 +257,17 @@ void LLLexer::SkipLineComment() { /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* /// GlobalVarID @[0-9]+ lltok::Kind LLLexer::LexAt() { - // Handle AtStringConstant: @\"[^\"]*\" + return LexVar(lltok::GlobalVar, lltok::GlobalID); +} + +lltok::Kind LLLexer::LexDollar() { + if (const char *Ptr = isLabelTail(TokStart)) { + CurPtr = Ptr; + StrVal.assign(TokStart, CurPtr - 1); + return lltok::LabelStr; + } + + // Handle DollarStringConstant: $\"[^\"]*\" if (CurPtr[0] == '"') { ++CurPtr; @@ -269,32 +275,24 @@ lltok::Kind LLLexer::LexAt() { int CurChar = getNextChar(); if (CurChar == EOF) { - Error("end of file in global variable name"); + Error("end of file in COMDAT variable name"); return lltok::Error; } if (CurChar == '"') { - StrVal.assign(TokStart+2, CurPtr-1); + StrVal.assign(TokStart + 2, CurPtr - 1); UnEscapeLexed(StrVal); - return lltok::GlobalVar; + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + return lltok::Error; + } + return lltok::ComdatVar; } } } - // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* + // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]* if (ReadVarName()) - return lltok::GlobalVar; - - // Handle GlobalVarID: @[0-9]+ - if (isdigit(static_cast(CurPtr[0]))) { - for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) - /*empty*/; - - uint64_t Val = atoull(TokStart+1, CurPtr); - if ((unsigned)Val != Val) - Error("invalid value number (too large)!"); - UIntVal = unsigned(Val); - return lltok::GlobalID; - } + return lltok::ComdatVar; return lltok::Error; } @@ -335,22 +333,35 @@ bool LLLexer::ReadVarName() { return false; } -/// LexPercent - Lex all tokens that start with a % character: -/// LocalVar ::= %\"[^\"]*\" -/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* -/// LocalVarID ::= %[0-9]+ -lltok::Kind LLLexer::LexPercent() { - // Handle LocalVarName: %\"[^\"]*\" +lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { + // Handle StringConstant: \"[^\"]*\" if (CurPtr[0] == '"') { ++CurPtr; - return ReadString(lltok::LocalVar); + + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + Error("end of file in global variable name"); + return lltok::Error; + } + if (CurChar == '"') { + StrVal.assign(TokStart+2, CurPtr-1); + UnEscapeLexed(StrVal); + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + return lltok::Error; + } + return Var; + } + } } - // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* + // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]* if (ReadVarName()) - return lltok::LocalVar; + return Var; - // Handle LocalVarID: %[0-9]+ + // Handle VarID: [0-9]+ if (isdigit(static_cast(CurPtr[0]))) { for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; @@ -359,12 +370,19 @@ lltok::Kind LLLexer::LexPercent() { if ((unsigned)Val != Val) Error("invalid value number (too large)!"); UIntVal = unsigned(Val); - return lltok::LocalVarID; + return VarID; } - return lltok::Error; } +/// LexPercent - Lex all tokens that start with a % character: +/// LocalVar ::= %\"[^\"]*\" +/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// LocalVarID ::= %[0-9]+ +lltok::Kind LLLexer::LexPercent() { + return LexVar(lltok::LocalVar, lltok::LocalVarID); +} + /// LexQuote - Lex all tokens that start with a " character: /// QuoteLabel "[^"]+": /// StringConstant "[^"]*" @@ -375,7 +393,12 @@ lltok::Kind LLLexer::LexQuote() { if (CurPtr[0] == ':') { ++CurPtr; - kind = lltok::LabelStr; + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + kind = lltok::Error; + } else { + kind = lltok::LabelStr; + } } return kind; @@ -427,8 +450,8 @@ lltok::Kind LLLexer::LexHash() { /// HexIntConstant [us]0x[0-9A-Fa-f]+ lltok::Kind LLLexer::LexIdentifier() { const char *StartChar = CurPtr; - const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; - const char *KeywordEnd = 0; + const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar; + const char *KeywordEnd = nullptr; for (; isLabelChar(*CurPtr); ++CurPtr) { // If we decide this is an integer, remember the end of the sequence. @@ -447,7 +470,7 @@ lltok::Kind LLLexer::LexIdentifier() { // Otherwise, this wasn't a label. If this was valid as an integer type, // return it. - if (IntEnd == 0) IntEnd = CurPtr; + if (!IntEnd) IntEnd = CurPtr; if (IntEnd != StartChar) { CurPtr = IntEnd; uint64_t NumBits = atoull(StartChar, CurPtr); @@ -461,14 +484,14 @@ lltok::Kind LLLexer::LexIdentifier() { } // Otherwise, this was a letter sequence. See which keyword this is. - if (KeywordEnd == 0) KeywordEnd = CurPtr; + if (!KeywordEnd) KeywordEnd = CurPtr; CurPtr = KeywordEnd; --StartChar; - unsigned Len = CurPtr-StartChar; -#define KEYWORD(STR) \ - do { \ - if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ - return lltok::kw_##STR; \ + StringRef Keyword(StartChar, CurPtr - StartChar); +#define KEYWORD(STR) \ + do { \ + if (Keyword == #STR) \ + return lltok::kw_##STR; \ } while (0) KEYWORD(true); KEYWORD(false); @@ -476,13 +499,11 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(global); KEYWORD(constant); KEYWORD(private); - KEYWORD(linker_private); - KEYWORD(linker_private_weak); KEYWORD(internal); KEYWORD(available_externally); KEYWORD(linkonce); KEYWORD(linkonce_odr); - KEYWORD(weak); + KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg". KEYWORD(weak_odr); KEYWORD(appending); KEYWORD(dllimport); @@ -504,6 +525,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(null); KEYWORD(to); KEYWORD(tail); + KEYWORD(musttail); KEYWORD(target); KEYWORD(triple); KEYWORD(unwind); @@ -539,6 +561,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(inteldialect); KEYWORD(gc); KEYWORD(prefix); + KEYWORD(prologue); KEYWORD(ccc); KEYWORD(fastcc); @@ -546,6 +569,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_stdcallcc); KEYWORD(x86_fastcallcc); KEYWORD(x86_thiscallcc); + KEYWORD(x86_vectorcallcc); KEYWORD(arm_apcscc); KEYWORD(arm_aapcscc); KEYWORD(arm_aapcs_vfpcc); @@ -559,6 +583,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_64_win64cc); KEYWORD(webkit_jscc); KEYWORD(anyregcc); + KEYWORD(preserve_mostcc); + KEYWORD(preserve_allcc); + KEYWORD(ghccc); KEYWORD(cc); KEYWORD(c); @@ -568,9 +595,13 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(alwaysinline); KEYWORD(builtin); KEYWORD(byval); + KEYWORD(inalloca); KEYWORD(cold); + KEYWORD(dereferenceable); + KEYWORD(dereferenceable_or_null); KEYWORD(inlinehint); KEYWORD(inreg); + KEYWORD(jumptable); KEYWORD(minsize); KEYWORD(naked); KEYWORD(nest); @@ -581,6 +612,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(noimplicitfloat); KEYWORD(noinline); KEYWORD(nonlazybind); + KEYWORD(nonnull); KEYWORD(noredzone); KEYWORD(noreturn); KEYWORD(nounwind); @@ -604,6 +636,15 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(type); KEYWORD(opaque); + KEYWORD(comdat); + + // Comdat types + KEYWORD(any); + KEYWORD(exactmatch); + KEYWORD(largest); + KEYWORD(noduplicates); + KEYWORD(samesize); + KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); @@ -615,6 +656,13 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x); KEYWORD(blockaddress); + // Metadata types. + KEYWORD(distinct); + + // Use-list order directives. + KEYWORD(uselistorder); + KEYWORD(uselistorder_bb); + KEYWORD(personality); KEYWORD(cleanup); KEYWORD(catch); @@ -622,9 +670,13 @@ lltok::Kind LLLexer::LexIdentifier() { #undef KEYWORD // Keywords for types. -#define TYPEKEYWORD(STR, LLVMTY) \ - if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ - TyVal = LLVMTY; return lltok::Type; } +#define TYPEKEYWORD(STR, LLVMTY) \ + do { \ + if (Keyword == STR) { \ + TyVal = LLVMTY; \ + return lltok::Type; \ + } \ + } while (false) TYPEKEYWORD("void", Type::getVoidTy(Context)); TYPEKEYWORD("half", Type::getHalfTy(Context)); TYPEKEYWORD("float", Type::getFloatTy(Context)); @@ -638,9 +690,13 @@ lltok::Kind LLLexer::LexIdentifier() { #undef TYPEKEYWORD // Keywords for instructions. -#define INSTKEYWORD(STR, Enum) \ - if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ - UIntVal = Instruction::Enum; return lltok::kw_##STR; } +#define INSTKEYWORD(STR, Enum) \ + do { \ + if (Keyword == #STR) { \ + UIntVal = Instruction::Enum; \ + return lltok::kw_##STR; \ + } \ + } while (false) INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); @@ -665,6 +721,7 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(inttoptr, IntToPtr); INSTKEYWORD(ptrtoint, PtrToInt); INSTKEYWORD(bitcast, BitCast); + INSTKEYWORD(addrspacecast, AddrSpaceCast); INSTKEYWORD(select, Select); INSTKEYWORD(va_arg, VAArg); INSTKEYWORD(ret, Ret); @@ -691,6 +748,25 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(landingpad, LandingPad); #undef INSTKEYWORD +#define DWKEYWORD(TYPE, TOKEN) \ + do { \ + if (Keyword.startswith("DW_" #TYPE "_")) { \ + StrVal.assign(Keyword.begin(), Keyword.end()); \ + return lltok::TOKEN; \ + } \ + } while (false) + DWKEYWORD(TAG, DwarfTag); + DWKEYWORD(ATE, DwarfAttEncoding); + DWKEYWORD(VIRTUALITY, DwarfVirtuality); + DWKEYWORD(LANG, DwarfLang); + DWKEYWORD(OP, DwarfOp); +#undef DWKEYWORD + + if (Keyword.startswith("DIFlag")) { + StrVal.assign(Keyword.begin(), Keyword.end()); + return lltok::DIFlag; + } + // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by // the CFE to avoid forcing it to deal with 64-bit numbers. if ((TokStart[0] == 'u' || TokStart[0] == 's') && @@ -698,7 +774,13 @@ lltok::Kind LLLexer::LexIdentifier() { isxdigit(static_cast(TokStart[3]))) { int len = CurPtr-TokStart-3; uint32_t bits = len * 4; - APInt Tmp(bits, StringRef(TokStart+3, len), 16); + StringRef HexStr(TokStart + 3, len); + if (!std::all_of(HexStr.begin(), HexStr.end(), isxdigit)) { + // Bad token, return it as an error. + CurPtr = TokStart+3; + return lltok::Error; + } + APInt Tmp(bits, HexStr, 16); uint32_t activeBits = Tmp.getActiveBits(); if (activeBits > 0 && activeBits < bits) Tmp = Tmp.trunc(activeBits);