From: Chris Lattner Date: Thu, 24 Sep 2009 21:47:32 +0000 (+0000) Subject: add and document regex support for FileCheck. You can now do stuff like: X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=528700863adefca8de461ce28a7d903729fb96b4;p=oota-llvm.git add and document regex support for FileCheck. You can now do stuff like: ; CHECK: movl {{%e[a-z][xi]}}, %eax or whatever. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82717 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html index 28a5e8a983d..bc19ab42626 100644 --- a/docs/TestingGuide.html +++ b/docs/TestingGuide.html @@ -624,6 +624,40 @@ define i8 @coerce_offset0(i32 %V, i32* %P) { + +
FileCheck Pattern Matting Syntax
+ +
+ +

The CHECK: and CHECK-NOT: directives both take a pattern to match. For most +uses of FileCheck, fixed string matching is perfectly sufficient. For some +things, a more flexible form of matching is desired. To support this, FileCheck +allows you to specify regular expressions in matching strings, surrounded by +double braces: {{yourregex}}. Because we want to use fixed string +matching for a majority of what we do, FileCheck has been designed to support +mixing and matching fixed string matching with regular expressions. This allows +you to write things like this:

+ +
+
+; CHECK: movhpd	{{[0-9]+}}(%esp), {{%xmm[0-7]}}
+
+
+ +

In this case, any offset from the ESP register will be allowed, and any xmm +register will be allowed.

+ +

Because regular expressions are enclosed with double braces, they are +visually distinct, and you don't need to use escape characters within the double +braces like you would in C. In the rare case that you want to match double +braces explicitly from the input, you can use something ugly like +{{[{][{]}} as your pattern.

+ +
+ + +
Variables and substitutions
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h index 4c4229eb57b..0bf253f4c19 100644 --- a/include/llvm/Support/Regex.h +++ b/include/llvm/Support/Regex.h @@ -11,11 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" +#include struct llvm_regex; + namespace llvm { + class StringRef; + template class SmallVectorImpl; + class Regex { public: enum { @@ -54,6 +57,8 @@ namespace llvm { /// Matches. /// For this feature to be enabled you must construct the regex using /// Regex("...", Regex::Sub) constructor. + /// + /// This returns true on a successful match. bool match(const StringRef &String, SmallVectorImpl *Matches=0); private: struct llvm_regex *preg; diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index 9f5fbb0278c..285e01f02b1 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -14,13 +14,14 @@ #include "llvm/Support/Regex.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" #include "regex_impl.h" #include using namespace llvm; Regex::Regex(const StringRef ®ex, unsigned Flags) { unsigned flags = 0; - preg = new struct llvm_regex; + preg = new llvm_regex(); preg->re_endp = regex.end(); if (Flags & IgnoreCase) flags |= REG_ICASE; @@ -60,7 +61,7 @@ bool Regex::match(const StringRef &String, SmallVectorImpl *Matches){ } // pmatch needs to have at least one element. - SmallVector pm; + SmallVector pm; pm.resize(nmatch > 0 ? nmatch : 1); pm[0].rm_so = 0; pm[0].rm_eo = String.size(); diff --git a/unittests/Support/RegexTest.cpp b/unittests/Support/RegexTest.cpp index b323e284bcb..28a85a1586f 100644 --- a/unittests/Support/RegexTest.cpp +++ b/unittests/Support/RegexTest.cpp @@ -9,6 +9,7 @@ #include "gtest/gtest.h" #include "llvm/Support/Regex.h" +#include "llvm/ADT/SmallVector.h" #include using namespace llvm; diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index cd62870dfd7..8e8c1cde927 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Signals.h" @@ -44,8 +45,9 @@ NoCanonicalizeWhiteSpace("strict-whitespace", //===----------------------------------------------------------------------===// class Pattern { - /// Str - The string to match. - StringRef Str; + /// Chunks - The pattern chunks to match. If the bool is false, it is a fixed + /// string match, if it is true, it is a regex match. + SmallVector, 4> Chunks; public: Pattern() { } @@ -55,10 +57,7 @@ public: /// Match - Match the pattern string against the input buffer Buffer. This /// returns the position that is matched or npos if there is no match. If /// there is a match, the size of the matched string is returned in MatchLen. - size_t Match(StringRef Buffer, size_t &MatchLen) const { - MatchLen = Str.size(); - return Buffer.find(Str); - } + size_t Match(StringRef Buffer, size_t &MatchLen) const; }; bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { @@ -74,11 +73,117 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { "error"); return true; } + + // Scan the pattern to break it into regex and non-regex pieces. + while (!PatternStr.empty()) { + // Handle fixed string matches. + if (PatternStr.size() < 2 || + PatternStr[0] != '{' || PatternStr[1] != '{') { + // Find the end, which is the start of the next regex. + size_t FixedMatchEnd = PatternStr.find("{{"); + + Chunks.push_back(std::make_pair(PatternStr.substr(0, FixedMatchEnd), + false)); + PatternStr = PatternStr.substr(FixedMatchEnd); + continue; + } + + // Otherwise, this is the start of a regex match. Scan for the }}. + size_t End = PatternStr.find("}}"); + if (End == StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), + "found start of regex string with no end '}}'", "error"); + return true; + } + + Regex R(PatternStr.substr(2, End-2)); + std::string Error; + if (!R.isValid(Error)) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2), + "invalid regex: " + Error, "error"); + return true; + } + + Chunks.push_back(std::make_pair(PatternStr.substr(2, End-2), true)); + PatternStr = PatternStr.substr(End+2); + } + + return false; +} +/// Match - Match the pattern string against the input buffer Buffer. This +/// returns the position that is matched or npos if there is no match. If +/// there is a match, the size of the matched string is returned in MatchLen. +size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const { + size_t FirstMatch = StringRef::npos; + MatchLen = 0; + SmallVector MatchInfo; - Str = PatternStr; - return false; + while (!Buffer.empty()) { + StringRef MatchAttempt = Buffer; + + unsigned ChunkNo = 0, e = Chunks.size(); + for (; ChunkNo != e; ++ChunkNo) { + StringRef PatternStr = Chunks[ChunkNo].first; + + size_t ThisMatch = StringRef::npos; + size_t ThisLength = StringRef::npos; + if (!Chunks[ChunkNo].second) { + // Fixed string match. + ThisMatch = MatchAttempt.find(Chunks[ChunkNo].first); + ThisLength = Chunks[ChunkNo].first.size(); + } else if (Regex(Chunks[ChunkNo].first, Regex::Sub).match(MatchAttempt, &MatchInfo)) { + // Successful regex match. + assert(!MatchInfo.empty() && "Didn't get any match"); + StringRef FullMatch = MatchInfo[0]; + MatchInfo.clear(); + + ThisMatch = FullMatch.data()-MatchAttempt.data(); + ThisLength = FullMatch.size(); + } + + // Otherwise, what we do depends on if this is the first match or not. If + // this is the first match, it doesn't match to match at the start of + // MatchAttempt. + if (ChunkNo == 0) { + // If the first match fails then this pattern will never match in + // Buffer. + if (ThisMatch == StringRef::npos) + return ThisMatch; + + FirstMatch = ThisMatch; + MatchAttempt = MatchAttempt.substr(FirstMatch); + ThisMatch = 0; + } + + // If this chunk didn't match, then the entire pattern didn't match from + // FirstMatch, try later in the buffer. + if (ThisMatch == StringRef::npos) + break; + + // Ok, if the match didn't match at the beginning of MatchAttempt, then we + // have something like "ABC{{DEF}} and something was in-between. Reject + // the match. + if (ThisMatch != 0) + break; + + // Otherwise, match the string and move to the next chunk. + MatchLen += ThisLength; + MatchAttempt = MatchAttempt.substr(ThisLength); + } + + // If the whole thing matched, we win. + if (ChunkNo == e) + return FirstMatch; + + // Otherwise, try matching again after FirstMatch to see if this pattern + // matches later in the buffer. + Buffer = Buffer.substr(FirstMatch+1); + } + + // If we ran out of stuff to scan, then we didn't match. + return StringRef::npos; } @@ -367,14 +472,14 @@ int main(int argc, char **argv) { // If this match had "not strings", verify that they don't exist in the // skipped region. - for (unsigned i = 0, e = CheckStr.NotStrings.size(); i != e; ++i) { + for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) { size_t MatchLen = 0; - size_t Pos = CheckStr.NotStrings[i].second.Match(SkippedRegion, MatchLen); + size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen); if (Pos == StringRef::npos) continue; SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), CheckPrefix+"-NOT: string occurred!", "error"); - SM.PrintMessage(CheckStr.NotStrings[i].first, + SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, CheckPrefix+"-NOT: pattern specified here", "note"); return 1; }