void LLLexer::HexToIntPair(const char *Buffer, const char *End,
uint64_t Pair[2]) {
Pair[0] = 0;
- for (int i=0; i<16; i++, Buffer++) {
- assert(Buffer != End);
- Pair[0] *= 16;
- Pair[0] += hexDigitValue(*Buffer);
+ if (End - Buffer >= 16) {
+ for (int i = 0; i < 16; i++, Buffer++) {
+ assert(Buffer != End);
+ Pair[0] *= 16;
+ Pair[0] += hexDigitValue(*Buffer);
+ }
}
Pair[1] = 0;
- for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
+ for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
Pair[1] *= 16;
Pair[1] += hexDigitValue(*Buffer);
}
Pair[1] += hexDigitValue(*Buffer);
}
Pair[0] = 0;
- for (int i=0; i<16; i++, Buffer++) {
+ for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
Pair[0] *= 16;
Pair[0] += hexDigitValue(*Buffer);
}
// Lexer definition.
//===----------------------------------------------------------------------===//
-LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
+LLLexer::LLLexer(StringRef StartBuf, SourceMgr &sm, SMDiagnostic &Err,
LLVMContext &C)
: CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
- CurPtr = CurBuf->getBufferStart();
-}
-
-std::string LLLexer::getFilename() const {
- return CurBuf->getBufferIdentifier();
+ CurPtr = CurBuf.begin();
}
int LLLexer::getNextChar() {
case 0:
// A nul character in the stream is either the end of the current buffer or
// a random nul in the file. Disambiguate that here.
- if (CurPtr-1 != CurBuf->getBufferEnd())
+ if (CurPtr-1 != CurBuf.end())
return 0; // Just whitespace.
// Otherwise, return end of file.
return LexToken();
case '+': return LexPositive();
case '@': return LexAt();
+ case '$': return LexDollar();
case '%': return LexPercent();
case '"': return LexQuote();
case '.':
return lltok::dotdotdot;
}
return lltok::Error;
- case '$':
- if (const char *Ptr = isLabelTail(CurPtr)) {
- CurPtr = Ptr;
- StrVal.assign(TokStart, CurPtr-1);
- return lltok::LabelStr;
- }
- return lltok::Error;
case ';':
SkipLineComment();
return LexToken();
case ')': return lltok::rparen;
case ',': return lltok::comma;
case '*': return lltok::star;
- case '\\': return lltok::backslash;
+ case '|': return lltok::bar;
}
}
}
}
-/// LexAt - Lex all tokens that start with an @ character:
+/// Lex all tokens that start with an @ character.
/// GlobalVar @\"[^\"]*\"
/// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]*
/// GlobalVarID @[0-9]+
lltok::Kind LLLexer::LexAt() {
- // Handle AtStringConstant: @\"[^\"]*\"
+ return LexVar(lltok::GlobalVar, lltok::GlobalID);
+}
+
+lltok::Kind LLLexer::LexDollar() {
+ if (const char *Ptr = isLabelTail(TokStart)) {
+ CurPtr = Ptr;
+ StrVal.assign(TokStart, CurPtr - 1);
+ return lltok::LabelStr;
+ }
+
+ // Handle DollarStringConstant: $\"[^\"]*\"
if (CurPtr[0] == '"') {
++CurPtr;
int CurChar = getNextChar();
if (CurChar == EOF) {
- Error("end of file in global variable name");
+ Error("end of file in COMDAT variable name");
return lltok::Error;
}
if (CurChar == '"') {
- StrVal.assign(TokStart+2, CurPtr-1);
+ StrVal.assign(TokStart + 2, CurPtr - 1);
UnEscapeLexed(StrVal);
if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
Error("Null bytes are not allowed in names");
return lltok::Error;
}
- return lltok::GlobalVar;
+ return lltok::ComdatVar;
}
}
}
- // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+ // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
if (ReadVarName())
- return lltok::GlobalVar;
-
- // Handle GlobalVarID: @[0-9]+
- if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
- for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
- /*empty*/;
-
- uint64_t Val = atoull(TokStart+1, CurPtr);
- if ((unsigned)Val != Val)
- Error("invalid value number (too large)!");
- UIntVal = unsigned(Val);
- return lltok::GlobalID;
- }
+ return lltok::ComdatVar;
return lltok::Error;
}
return false;
}
-/// LexPercent - Lex all tokens that start with a % character:
-/// LocalVar ::= %\"[^\"]*\"
-/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
-/// LocalVarID ::= %[0-9]+
-lltok::Kind LLLexer::LexPercent() {
- // Handle LocalVarName: %\"[^\"]*\"
+lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
+ // Handle StringConstant: \"[^\"]*\"
if (CurPtr[0] == '"') {
++CurPtr;
- return ReadString(lltok::LocalVar);
+
+ while (1) {
+ int CurChar = getNextChar();
+
+ if (CurChar == EOF) {
+ Error("end of file in global variable name");
+ return lltok::Error;
+ }
+ if (CurChar == '"') {
+ StrVal.assign(TokStart+2, CurPtr-1);
+ UnEscapeLexed(StrVal);
+ if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
+ Error("Null bytes are not allowed in names");
+ return lltok::Error;
+ }
+ return Var;
+ }
+ }
}
- // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+ // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
if (ReadVarName())
- return lltok::LocalVar;
+ return Var;
- // Handle LocalVarID: %[0-9]+
+ // Handle VarID: [0-9]+
if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
/*empty*/;
if ((unsigned)Val != Val)
Error("invalid value number (too large)!");
UIntVal = unsigned(Val);
- return lltok::LocalVarID;
+ return VarID;
}
-
return lltok::Error;
}
-/// LexQuote - Lex all tokens that start with a " character:
+/// Lex all tokens that start with a % character.
+/// LocalVar ::= %\"[^\"]*\"
+/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+/// LocalVarID ::= %[0-9]+
+lltok::Kind LLLexer::LexPercent() {
+ return LexVar(lltok::LocalVar, lltok::LocalVarID);
+}
+
+/// Lex all tokens that start with a " character.
/// QuoteLabel "[^"]+":
/// StringConstant "[^"]*"
lltok::Kind LLLexer::LexQuote() {
if (CurPtr[0] == ':') {
++CurPtr;
- kind = lltok::LabelStr;
+ if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
+ Error("Null bytes are not allowed in names");
+ kind = lltok::Error;
+ } else {
+ kind = lltok::LabelStr;
+ }
}
return kind;
}
-/// LexExclaim:
+/// Lex all tokens that start with a ! character.
/// !foo
/// !
lltok::Kind LLLexer::LexExclaim() {
return lltok::exclaim;
}
-/// LexHash - Lex all tokens that start with a # character:
+/// Lex all tokens that start with a # character.
/// AttrGrpID ::= #[0-9]+
lltok::Kind LLLexer::LexHash() {
// Handle AttrGrpID: #[0-9]+
return lltok::Error;
}
-/// LexIdentifier: Handle several related productions:
+/// Lex a label, integer type, keyword, or hexadecimal integer constant.
/// Label [-a-zA-Z$._0-9]+:
/// IntegerType i[0-9]+
/// Keyword sdiv, float, ...
if (!KeywordEnd) KeywordEnd = CurPtr;
CurPtr = KeywordEnd;
--StartChar;
- unsigned Len = CurPtr-StartChar;
-#define KEYWORD(STR) \
- do { \
- if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
- return lltok::kw_##STR; \
+ StringRef Keyword(StartChar, CurPtr - StartChar);
+#define KEYWORD(STR) \
+ do { \
+ if (Keyword == #STR) \
+ return lltok::kw_##STR; \
} while (0)
KEYWORD(true); KEYWORD(false);
KEYWORD(private);
KEYWORD(internal);
- KEYWORD(linker_private); // NOTE: deprecated, for parser compatibility
- KEYWORD(linker_private_weak); // NOTE: deprecated, for parser compatibility
KEYWORD(available_externally);
KEYWORD(linkonce);
KEYWORD(linkonce_odr);
- KEYWORD(weak);
+ KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
KEYWORD(weak_odr);
KEYWORD(appending);
KEYWORD(dllimport);
KEYWORD(zeroinitializer);
KEYWORD(undef);
KEYWORD(null);
+ KEYWORD(none);
KEYWORD(to);
+ KEYWORD(caller);
+ KEYWORD(within);
+ KEYWORD(from);
KEYWORD(tail);
KEYWORD(musttail);
+ KEYWORD(notail);
KEYWORD(target);
KEYWORD(triple);
KEYWORD(unwind);
KEYWORD(inteldialect);
KEYWORD(gc);
KEYWORD(prefix);
+ KEYWORD(prologue);
KEYWORD(ccc);
KEYWORD(fastcc);
KEYWORD(x86_stdcallcc);
KEYWORD(x86_fastcallcc);
KEYWORD(x86_thiscallcc);
+ KEYWORD(x86_vectorcallcc);
KEYWORD(arm_apcscc);
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
KEYWORD(anyregcc);
KEYWORD(preserve_mostcc);
KEYWORD(preserve_allcc);
+ KEYWORD(ghccc);
+ KEYWORD(x86_intrcc);
+ KEYWORD(hhvmcc);
+ KEYWORD(hhvm_ccc);
+ KEYWORD(cxx_fast_tlscc);
KEYWORD(cc);
KEYWORD(c);
KEYWORD(attributes);
KEYWORD(alwaysinline);
+ KEYWORD(argmemonly);
KEYWORD(builtin);
KEYWORD(byval);
KEYWORD(inalloca);
KEYWORD(cold);
+ KEYWORD(convergent);
+ KEYWORD(dereferenceable);
+ KEYWORD(dereferenceable_or_null);
+ KEYWORD(inaccessiblememonly);
+ KEYWORD(inaccessiblemem_or_argmemonly);
KEYWORD(inlinehint);
KEYWORD(inreg);
KEYWORD(jumptable);
KEYWORD(noduplicate);
KEYWORD(noimplicitfloat);
KEYWORD(noinline);
+ KEYWORD(norecurse);
KEYWORD(nonlazybind);
KEYWORD(nonnull);
KEYWORD(noredzone);
KEYWORD(ssp);
KEYWORD(sspreq);
KEYWORD(sspstrong);
+ KEYWORD(safestack);
KEYWORD(sanitize_address);
KEYWORD(sanitize_thread);
KEYWORD(sanitize_memory);
KEYWORD(type);
KEYWORD(opaque);
+ KEYWORD(comdat);
+
+ // Comdat types
+ KEYWORD(any);
+ KEYWORD(exactmatch);
+ KEYWORD(largest);
+ KEYWORD(noduplicates);
+ KEYWORD(samesize);
+
KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
KEYWORD(x);
KEYWORD(blockaddress);
+ // Metadata types.
+ KEYWORD(distinct);
+
+ // Use-list order directives.
+ KEYWORD(uselistorder);
+ KEYWORD(uselistorder_bb);
+
KEYWORD(personality);
KEYWORD(cleanup);
KEYWORD(catch);
#undef KEYWORD
// Keywords for types.
-#define TYPEKEYWORD(STR, LLVMTY) \
- if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
- TyVal = LLVMTY; return lltok::Type; }
+#define TYPEKEYWORD(STR, LLVMTY) \
+ do { \
+ if (Keyword == STR) { \
+ TyVal = LLVMTY; \
+ return lltok::Type; \
+ } \
+ } while (false)
TYPEKEYWORD("void", Type::getVoidTy(Context));
TYPEKEYWORD("half", Type::getHalfTy(Context));
TYPEKEYWORD("float", Type::getFloatTy(Context));
TYPEKEYWORD("label", Type::getLabelTy(Context));
TYPEKEYWORD("metadata", Type::getMetadataTy(Context));
TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
+ TYPEKEYWORD("token", Type::getTokenTy(Context));
#undef TYPEKEYWORD
// Keywords for instructions.
-#define INSTKEYWORD(STR, Enum) \
- if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
- UIntVal = Instruction::Enum; return lltok::kw_##STR; }
+#define INSTKEYWORD(STR, Enum) \
+ do { \
+ if (Keyword == #STR) { \
+ UIntVal = Instruction::Enum; \
+ return lltok::kw_##STR; \
+ } \
+ } while (false)
INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd);
INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub);
INSTKEYWORD(extractvalue, ExtractValue);
INSTKEYWORD(insertvalue, InsertValue);
INSTKEYWORD(landingpad, LandingPad);
+ INSTKEYWORD(cleanupret, CleanupRet);
+ INSTKEYWORD(catchret, CatchRet);
+ INSTKEYWORD(catchswitch, CatchSwitch);
+ INSTKEYWORD(catchpad, CatchPad);
+ INSTKEYWORD(cleanuppad, CleanupPad);
#undef INSTKEYWORD
+#define DWKEYWORD(TYPE, TOKEN) \
+ do { \
+ if (Keyword.startswith("DW_" #TYPE "_")) { \
+ StrVal.assign(Keyword.begin(), Keyword.end()); \
+ return lltok::TOKEN; \
+ } \
+ } while (false)
+ DWKEYWORD(TAG, DwarfTag);
+ DWKEYWORD(ATE, DwarfAttEncoding);
+ DWKEYWORD(VIRTUALITY, DwarfVirtuality);
+ DWKEYWORD(LANG, DwarfLang);
+ DWKEYWORD(OP, DwarfOp);
+ DWKEYWORD(MACINFO, DwarfMacinfo);
+#undef DWKEYWORD
+
+ if (Keyword.startswith("DIFlag")) {
+ StrVal.assign(Keyword.begin(), Keyword.end());
+ return lltok::DIFlag;
+ }
+
// Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
// the CFE to avoid forcing it to deal with 64-bit numbers.
if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
isxdigit(static_cast<unsigned char>(TokStart[3]))) {
int len = CurPtr-TokStart-3;
uint32_t bits = len * 4;
- APInt Tmp(bits, StringRef(TokStart+3, len), 16);
+ StringRef HexStr(TokStart + 3, len);
+ if (!std::all_of(HexStr.begin(), HexStr.end(), isxdigit)) {
+ // Bad token, return it as an error.
+ CurPtr = TokStart+3;
+ return lltok::Error;
+ }
+ APInt Tmp(bits, HexStr, 16);
uint32_t activeBits = Tmp.getActiveBits();
if (activeBits > 0 && activeBits < bits)
Tmp = Tmp.trunc(activeBits);
return lltok::Error;
}
-
-/// Lex0x: Handle productions that start with 0x, knowing that it matches and
-/// that this is not a label:
+/// Lex all tokens that start with a 0x prefix, knowing they match and are not
+/// labels.
/// HexFPConstant 0x[0-9A-Fa-f]+
/// HexFP80Constant 0xK[0-9A-Fa-f]+
/// HexFP128Constant 0xL[0-9A-Fa-f]+
}
}
-/// LexIdentifier: Handle several related productions:
+/// Lex tokens for a label or a numeric constant, possibly starting with -.
/// Label [-a-zA-Z$._0-9]+:
/// NInteger -[0-9]+
/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
if (CurPtr[0] != '.') {
if (TokStart[0] == '0' && TokStart[1] == 'x')
return Lex0x();
- unsigned Len = CurPtr-TokStart;
- uint32_t numBits = ((Len * 64) / 19) + 2;
- APInt Tmp(numBits, StringRef(TokStart, Len), 10);
- if (TokStart[0] == '-') {
- uint32_t minBits = Tmp.getMinSignedBits();
- if (minBits > 0 && minBits < numBits)
- Tmp = Tmp.trunc(minBits);
- APSIntVal = APSInt(Tmp, false);
- } else {
- uint32_t activeBits = Tmp.getActiveBits();
- if (activeBits > 0 && activeBits < numBits)
- Tmp = Tmp.trunc(activeBits);
- APSIntVal = APSInt(Tmp, true);
- }
+ APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
return lltok::APSInt;
}
return lltok::APFloat;
}
+/// Lex a floating point constant starting with +.
/// FPConstant [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
lltok::Kind LLLexer::LexPositive() {
// If the letter after the negative is a number, this is probably not a