From: Colin LeMahieu Date: Mon, 9 Nov 2015 00:31:07 +0000 (+0000) Subject: [AsmParser] Backends can parameterize ASM tokenization. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=1539acf27c907e253b92d1343f670b5a034dc9b6 [AsmParser] Backends can parameterize ASM tokenization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252439 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 01c6e343208..859e21b1957 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -965,6 +965,15 @@ class AsmParserVariant { // register tokens as constrained registers, instead of tokens, for the // purposes of matching. string RegisterPrefix = ""; + + // TokenizingCharacters - Characters that are standalone tokens + string TokenizingCharacters = "[]*!"; + + // SeparatorCharacters - Characters that are not tokens + string SeparatorCharacters = " \t,"; + + // BreakCharacters - Characters that start new identifiers + string BreakCharacters = ""; } def DefaultAsmParserVariant : AsmParserVariant; diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index aa3489a0573..245ba44a249 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -1334,6 +1334,15 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, // Treat '.' as a valid identifier in this context. Lex(); IDVal = "."; + } else if (Lexer.is(AsmToken::LCurly)) { + // Treat '{' as a valid identifier in this context. + Lex(); + IDVal = "{"; + + } else if (Lexer.is(AsmToken::RCurly)) { + // Treat '}' as a valid identifier in this context. + Lex(); + IDVal = "}"; } else if (parseIdentifier(IDVal)) { if (!TheCondState.Ignore) return TokError("unexpected token at start of statement"); diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index ad6a79ed718..af30ff96f18 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -125,11 +125,13 @@ def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; def GenericAsmParserVariant : AsmParserVariant { int Variant = 0; string Name = "generic"; + string BreakCharacters = "."; } def AppleAsmParserVariant : AsmParserVariant { int Variant = 1; string Name = "apple-neon"; + string BreakCharacters = "."; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index e7423c3dcf8..855b41c03c9 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -511,8 +511,15 @@ def ARMAsmWriter : AsmWriter { bit isMCAsmWriter = 1; } +def ARMAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "ARM"; + string BreakCharacters = "."; +} + def ARM : Target { // Pull in Instruction Info: let InstructionSet = ARMInstrInfo; let AssemblyWriters = [ARMAsmWriter]; + let AssemblyParserVariants = [ARMAsmParserVariant]; } diff --git a/lib/Target/BPF/BPF.td b/lib/Target/BPF/BPF.td index a4ce90af043..8493b0fd1e4 100644 --- a/lib/Target/BPF/BPF.td +++ b/lib/Target/BPF/BPF.td @@ -25,7 +25,14 @@ def BPFInstPrinter : AsmWriter { bit isMCAsmWriter = 1; } +def BPFAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "BPF"; + string BreakCharacters = "."; +} + def BPF : Target { let InstructionSet = BPFInstrInfo; let AssemblyWriters = [BPFInstPrinter]; + let AssemblyParserVariants = [BPFAsmParserVariant]; } diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 641b2377de4..f50100e7a8a 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -403,6 +403,7 @@ def PPCAsmParserVariant : AsmParserVariant { // InstAlias definitions use immediate literals. Set RegisterPrefix // so that those are not misinterpreted as registers. string RegisterPrefix = "%"; + string BreakCharacters = "."; } def PPC : Target { diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index 3964330a4a8..2d6b399d60f 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -294,6 +294,13 @@ public: } }; +class AsmVariantInfo { +public: + std::string TokenizingCharacters; + std::string SeparatorCharacters; + std::string BreakCharacters; +}; + /// MatchableInfo - Helper class for storing the necessary information for an /// instruction or alias which is capable of being matched. struct MatchableInfo { @@ -484,7 +491,8 @@ struct MatchableInfo { void initialize(const AsmMatcherInfo &Info, SmallPtrSetImpl &SingletonRegisters, - int AsmVariantNo, StringRef RegisterPrefix); + int AsmVariantNo, StringRef RegisterPrefix, + AsmVariantInfo const &Variant); /// validate - Return true if this matchable is a valid thing to match against /// and perform a bunch of validity checking. @@ -584,8 +592,10 @@ struct MatchableInfo { void dump() const; private: - void tokenizeAsmString(const AsmMatcherInfo &Info); - void addAsmOperand(size_t Start, size_t End); + void tokenizeAsmString(AsmMatcherInfo const &Info, + AsmVariantInfo const &Variant); + void addAsmOperand(size_t Start, size_t End, + std::string const &SeparatorCharacters); }; /// SubtargetFeatureInfo - Helper class for storing information on a subtarget @@ -828,12 +838,13 @@ extractSingletonRegisterForAsmOperand(MatchableInfo::AsmOperand &Op, void MatchableInfo::initialize(const AsmMatcherInfo &Info, SmallPtrSetImpl &SingletonRegisters, - int AsmVariantNo, StringRef RegisterPrefix) { + int AsmVariantNo, StringRef RegisterPrefix, + AsmVariantInfo const &Variant) { AsmVariantID = AsmVariantNo; AsmString = CodeGenInstruction::FlattenAsmStringVariants(AsmString, AsmVariantNo); - tokenizeAsmString(Info); + tokenizeAsmString(Info, Variant); // Compute the require features. for (Record *Predicate : TheDef->getValueAsListOfDefs("Predicates")) @@ -857,9 +868,9 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info, } /// Append an AsmOperand for the given substring of AsmString. -void MatchableInfo::addAsmOperand(size_t Start, size_t End) { +void MatchableInfo::addAsmOperand(size_t Start, size_t End, + std::string const &Separators) { StringRef String = AsmString; - StringRef Separators = "[]*! \t,"; // Look for separators before and after to figure out is this token is // isolated. Accept '$$' as that's how we escape '$'. bool IsIsolatedToken = @@ -870,42 +881,54 @@ void MatchableInfo::addAsmOperand(size_t Start, size_t End) { } /// tokenizeAsmString - Tokenize a simplified assembly string. -void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) { +void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info, + AsmVariantInfo const &Variant) { StringRef String = AsmString; - size_t Prev = 0; - bool InTok = true; - for (size_t i = 0, e = String.size(); i != e; ++i) { - switch (String[i]) { - case '[': - case ']': - case '*': - case '!': - case ' ': - case '\t': - case ',': - if (InTok) { - addAsmOperand(Prev, i); + unsigned Prev = 0; + bool InTok = false; + std::string Separators = Variant.TokenizingCharacters + + Variant.SeparatorCharacters; + for (unsigned i = 0, e = String.size(); i != e; ++i) { + if(Variant.BreakCharacters.find(String[i]) != std::string::npos) { + if(InTok) { + addAsmOperand(Prev, i, Separators); + Prev = i; + } + InTok = true; + continue; + } + if(Variant.TokenizingCharacters.find(String[i]) != std::string::npos) { + if(InTok) { + addAsmOperand(Prev, i, Separators); InTok = false; } - if (!isspace(String[i]) && String[i] != ',') - addAsmOperand(i, i + 1); + addAsmOperand(i, i + 1, Separators); Prev = i + 1; - break; - + continue; + } + if(Variant.SeparatorCharacters.find(String[i]) != std::string::npos) { + if(InTok) { + addAsmOperand(Prev, i, Separators); + InTok = false; + } + Prev = i + 1; + continue; + } + switch (String[i]) { case '\\': if (InTok) { - addAsmOperand(Prev, i); + addAsmOperand(Prev, i, Separators); InTok = false; } ++i; assert(i != String.size() && "Invalid quoted character"); - addAsmOperand(i, i + 1); + addAsmOperand(i, i + 1, Separators); Prev = i + 1; break; case '$': { - if (InTok) { - addAsmOperand(Prev, i); + if (InTok && Prev != i) { + addAsmOperand(Prev, i, Separators); InTok = false; } @@ -915,31 +938,20 @@ void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) { break; } - // If this is "${" find the next "}" and make an identifier like "${xxx}" - size_t EndPos = String.find('}', i); - assert(EndPos != StringRef::npos && - "Missing brace in operand reference!"); - addAsmOperand(i, EndPos+1); + StringRef::iterator End = std::find(String.begin() + i, String.end(),'}'); + assert(End != String.end() && "Missing brace in operand reference!"); + size_t EndPos = End - String.begin(); + addAsmOperand(i, EndPos+1, Separators); Prev = EndPos + 1; i = EndPos; break; } - - case '.': - if (!Info.AsmParser->getValueAsBit("MnemonicContainsDot")) { - if (InTok) - addAsmOperand(Prev, i); - Prev = i; - } - InTok = true; - break; - default: InTok = true; } } if (InTok && Prev != String.size()) - addAsmOperand(Prev, StringRef::npos); + addAsmOperand(Prev, StringRef::npos, Separators); // The first token of the instruction is the mnemonic, which must be a // simple string, not a $foo variable or a singleton register. @@ -1373,6 +1385,13 @@ void AsmMatcherInfo::buildInfo() { std::string CommentDelimiter = AsmVariant->getValueAsString("CommentDelimiter"); std::string RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix"); + AsmVariantInfo Variant; + Variant.TokenizingCharacters = + AsmVariant->getValueAsString("TokenizingCharacters"); + Variant.SeparatorCharacters = + AsmVariant->getValueAsString("SeparatorCharacters"); + Variant.BreakCharacters = + AsmVariant->getValueAsString("BreakCharacters"); int AsmVariantNo = AsmVariant->getValueAsInt("Variant"); for (const CodeGenInstruction *CGI : Target.instructions()) { @@ -1388,7 +1407,8 @@ void AsmMatcherInfo::buildInfo() { auto II = llvm::make_unique(*CGI); - II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix); + II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix, + Variant); // Ignore instructions which shouldn't be matched and diagnose invalid // instruction definitions with an error. @@ -1415,7 +1435,8 @@ void AsmMatcherInfo::buildInfo() { auto II = llvm::make_unique(std::move(Alias)); - II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix); + II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix, + Variant); // Validate the alias definitions. II->validate(CommentDelimiter, false);