From 70add884e4967f511e2cbb35c61534186b9b418a Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 8 Aug 2009 20:02:57 +0000 Subject: [PATCH] add a little function to do arbitrary string pattern matching in a much more efficient way than a sequence of if's. Switch MatchRegisterName to use it. It would be nice if someone could factor this out to a shared place in tblgen :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78492 91177308-0d34-0410-b5e6-96231b3b80d8 --- utils/TableGen/AsmMatcherEmitter.cpp | 141 ++++++++++++++++++++++++++- 1 file changed, 138 insertions(+), 3 deletions(-) diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp index cd95920702e..015a910ee20 100644 --- a/utils/TableGen/AsmMatcherEmitter.cpp +++ b/utils/TableGen/AsmMatcherEmitter.cpp @@ -709,6 +709,8 @@ static void EmitMatchTokenString(CodeGenTarget &Target, OS << "}\n\n"; } + + /// EmitClassifyOperand - Emit the function to classify an operand. static void EmitClassifyOperand(CodeGenTarget &Target, std::vector &Infos, @@ -730,6 +732,135 @@ static void EmitClassifyOperand(CodeGenTarget &Target, OS << "}\n\n"; } +typedef std::pair StringPair; + +/// FindFirstNonCommonLetter - Find the first character in the keys of the +/// string pairs that is not shared across the whole set of strings. All +/// strings are assumed to have the same length. +static unsigned +FindFirstNonCommonLetter(const std::vector &Matches) { + assert(!Matches.empty()); + for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { + // Check to see if letter i is the same across the set. + char Letter = Matches[0]->first[i]; + + for (unsigned str = 0, e = Matches.size(); str != e; ++str) + if (Matches[str]->first[i] != Letter) + return i; + } + + return Matches[0]->first.size(); +} + +/// EmitStringMatcherForChar - Given a set of strings that are known to be the +/// same length and whose characters leading up to CharNo are the same, emit +/// code to verify that CharNo and later are the same. +static void EmitStringMatcherForChar(const std::string &StrVariableName, + const std::vector &Matches, + unsigned CharNo, unsigned IndentCount, + raw_ostream &OS) { + assert(!Matches.empty() && "Must have at least one string to match!"); + std::string Indent(IndentCount*2+4, ' '); + + // If we have verified that the entire string matches, we're done: output the + // matching code. + if (CharNo == Matches[0]->first.size()) { + assert(Matches.size() == 1 && "Had duplicate keys to match on"); + + // FIXME: If Matches[0].first has embeded \n, this will be bad. + OS << Indent << Matches[0]->second << "\t // \"" << Matches[0]->first + << "\"\n"; + return; + } + + // Bucket the matches by the character we are comparing. + std::map > MatchesByLetter; + + for (unsigned i = 0, e = Matches.size(); i != e; ++i) + MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); + + + // If we have exactly one bucket to match, see how many characters are common + // across the whole set and match all of them at once. + // length, just verify the rest of it with one if. + if (MatchesByLetter.size() == 1) { + unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); + unsigned NumChars = FirstNonCommonLetter-CharNo; + + if (NumChars == 1) { + // Do the comparison with if (Str[1] == 'f') + // FIXME: Need to escape general characters. + OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] == '" + << Matches[0]->first[CharNo] << "') {\n"; + } else { + // Do the comparison with if (Str.substr(1,3) == "foo"). + OS << Indent << "if (" << StrVariableName << ".substr(" << CharNo << "," + << NumChars << ") == \""; + + // FIXME: Need to escape general strings. + OS << Matches[0]->first.substr(CharNo, NumChars) << "\") {\n"; + } + + EmitStringMatcherForChar(StrVariableName, Matches, FirstNonCommonLetter, + IndentCount+1, OS); + OS << Indent << "}\n"; + return; + } + + // Otherwise, we have multiple possible things, emit a switch on the + // character. + OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; + OS << Indent << "default: break;\n"; + + for (std::map >::iterator LI = + MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { + // TODO: escape hard stuff (like \n) if we ever care about it. + OS << Indent << "case '" << LI->first << "':\t // " + << LI->second.size() << " strings to match.\n"; + EmitStringMatcherForChar(StrVariableName, LI->second, CharNo+1, + IndentCount+1, OS); + OS << Indent << " break;\n"; + } + + OS << Indent << "}\n"; + +} + + +/// EmitStringMatcher - Given a list of strings and code to execute when they +/// match, output a simple switch tree to classify the input string. If a +/// match is found, the code in Vals[i].second is executed. This code should do +/// a return to avoid falling through. If nothing matches, execution falls +/// through. StrVariableName is the name of teh variable to test. +static void EmitStringMatcher(const std::string &StrVariableName, + const std::vector &Matches, + raw_ostream &OS) { + // First level categorization: group strings by length. + std::map > MatchesByLength; + + for (unsigned i = 0, e = Matches.size(); i != e; ++i) + MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); + + // Output a switch statement on length and categorize the elements within each + // bin. + OS << " switch (" << StrVariableName << ".size()) {\n"; + OS << " default: break;\n"; + + + for (std::map >::iterator LI = + MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { + OS << " case " << LI->first << ":\t // " << LI->second.size() + << " strings to match.\n"; + EmitStringMatcherForChar(StrVariableName, LI->second, 0, 0, OS); + OS << " break;\n"; + } + + + OS << " }\n"; +} + + + /// EmitMatchRegisterName - Emit the function to match a string to the target /// specific register enum. static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser, @@ -740,16 +871,20 @@ static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser, << AsmParser->getValueAsString("AsmParserClassName") << "::MatchRegisterName(const StringRef &Name, unsigned &RegNo) {\n"; + std::vector Matches; + // FIXME: TableGen should have a fast string matcher generator. for (unsigned i = 0, e = Registers.size(); i != e; ++i) { const CodeGenRegister &Reg = Registers[i]; if (Reg.TheDef->getValueAsString("AsmName").empty()) continue; - OS << " if (Name == \"" - << Reg.TheDef->getValueAsString("AsmName") << "\")\n" - << " return RegNo=" << i + 1 << ", false;\n"; + Matches.push_back(StringPair(Reg.TheDef->getValueAsString("AsmName"), + "RegNo=" + utostr(i + 1) + "; return false;")); } + + EmitStringMatcher("Name", Matches, OS); + OS << " return true;\n"; OS << "}\n\n"; } -- 2.34.1