1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content. This is useful for regression tests etc.
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
17 //===----------------------------------------------------------------------===//
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
34 #include <system_error>
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
53 static cl::list<std::string> ImplicitCheckNot(
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
60 static cl::opt<bool> AllowEmptyInput(
61 "allow-empty", cl::init(false),
62 cl::desc("Allow the input file to be empty. This is useful when making\n"
63 "checks that some error message does not occur, for example."));
65 typedef cl::list<std::string>::const_iterator prefix_iterator;
67 //===----------------------------------------------------------------------===//
68 // Pattern Handling Code.
69 //===----------------------------------------------------------------------===//
81 /// MatchEOF - When set, this pattern only matches the end of file. This is
82 /// used for trailing CHECK-NOTs.
90 Check::CheckType CheckTy;
92 /// FixedStr - If non-empty, this pattern is a fixed string match with the
93 /// specified fixed string.
96 /// RegEx - If non-empty, this is a regex pattern.
99 /// \brief Contains the number of line this pattern is in.
102 /// VariableUses - Entries in this vector map to uses of a variable in the
103 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
104 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
105 /// value of bar at offset 3.
106 std::vector<std::pair<StringRef, unsigned> > VariableUses;
108 /// VariableDefs - Maps definitions of variables to their parenthesized
110 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
111 std::map<StringRef, unsigned> VariableDefs;
115 Pattern(Check::CheckType Ty)
118 /// getLoc - Return the location in source code.
119 SMLoc getLoc() const { return PatternLoc; }
121 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
122 /// which prefix is being matched, SM provides the SourceMgr used for error
123 /// reports, and LineNumber is the line number in the input file from which
124 /// the pattern string was read. Returns true in case of an error, false
126 bool ParsePattern(StringRef PatternStr,
129 unsigned LineNumber);
131 /// Match - Match the pattern string against the input buffer Buffer. This
132 /// returns the position that is matched or npos if there is no match. If
133 /// there is a match, the size of the matched string is returned in MatchLen.
135 /// The VariableTable StringMap provides the current values of filecheck
136 /// variables and is updated if this match defines new values.
137 size_t Match(StringRef Buffer, size_t &MatchLen,
138 StringMap<StringRef> &VariableTable) const;
140 /// PrintFailureInfo - Print additional information about a failure to match
141 /// involving this pattern.
142 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
143 const StringMap<StringRef> &VariableTable) const;
145 bool hasVariable() const { return !(VariableUses.empty() &&
146 VariableDefs.empty()); }
148 Check::CheckType getCheckTy() const { return CheckTy; }
151 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
152 void AddBackrefToRegEx(unsigned BackrefNum);
154 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
155 /// matching this pattern at the start of \arg Buffer; a distance of zero
156 /// should correspond to a perfect match.
157 unsigned ComputeMatchDistance(StringRef Buffer,
158 const StringMap<StringRef> &VariableTable) const;
160 /// \brief Evaluates expression and stores the result to \p Value.
161 /// \return true on success. false when the expression has invalid syntax.
162 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
164 /// \brief Finds the closing sequence of a regex variable usage or
165 /// definition. Str has to point in the beginning of the definition
166 /// (right after the opening sequence).
167 /// \return offset of the closing sequence within Str, or npos if it was not
169 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
173 bool Pattern::ParsePattern(StringRef PatternStr,
176 unsigned LineNumber) {
177 this->LineNumber = LineNumber;
178 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
180 // Ignore trailing whitespace.
181 while (!PatternStr.empty() &&
182 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
183 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
185 // Check that there is something on the line.
186 if (PatternStr.empty()) {
187 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
188 "found empty check string with prefix '" +
193 // Check to see if this is a fixed string, or if it has regex pieces.
194 if (PatternStr.size() < 2 ||
195 (PatternStr.find("{{") == StringRef::npos &&
196 PatternStr.find("[[") == StringRef::npos)) {
197 FixedStr = PatternStr;
201 // Paren value #0 is for the fully matched string. Any new parenthesized
202 // values add from there.
203 unsigned CurParen = 1;
205 // Otherwise, there is at least one regex piece. Build up the regex pattern
206 // by escaping scary characters in fixed strings, building up one big regex.
207 while (!PatternStr.empty()) {
209 if (PatternStr.startswith("{{")) {
210 // This is the start of a regex match. Scan for the }}.
211 size_t End = PatternStr.find("}}");
212 if (End == StringRef::npos) {
213 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
215 "found start of regex string with no end '}}'");
219 // Enclose {{}} patterns in parens just like [[]] even though we're not
220 // capturing the result for any purpose. This is required in case the
221 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
222 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
226 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
230 PatternStr = PatternStr.substr(End+2);
234 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
235 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
236 // second form is [[foo]] which is a reference to foo. The variable name
237 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
238 // it. This is to catch some common errors.
239 if (PatternStr.startswith("[[")) {
240 // Find the closing bracket pair ending the match. End is going to be an
241 // offset relative to the beginning of the match string.
242 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
244 if (End == StringRef::npos) {
245 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
247 "invalid named regex reference, no ]] found");
251 StringRef MatchStr = PatternStr.substr(2, End);
252 PatternStr = PatternStr.substr(End+4);
254 // Get the regex name (e.g. "foo").
255 size_t NameEnd = MatchStr.find(':');
256 StringRef Name = MatchStr.substr(0, NameEnd);
259 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
260 "invalid name in named regex: empty name");
264 // Verify that the name/expression is well formed. FileCheck currently
265 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
266 // is relaxed, more strict check is performed in \c EvaluateExpression.
267 bool IsExpression = false;
268 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
269 if (i == 0 && Name[i] == '@') {
270 if (NameEnd != StringRef::npos) {
271 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
273 "invalid name in named regex definition");
279 if (Name[i] != '_' && !isalnum(Name[i]) &&
280 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
281 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
282 SourceMgr::DK_Error, "invalid name in named regex");
287 // Name can't start with a digit.
288 if (isdigit(static_cast<unsigned char>(Name[0]))) {
289 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
290 "invalid name in named regex");
295 if (NameEnd == StringRef::npos) {
296 // Handle variables that were defined earlier on the same line by
297 // emitting a backreference.
298 if (VariableDefs.find(Name) != VariableDefs.end()) {
299 unsigned VarParenNum = VariableDefs[Name];
300 if (VarParenNum < 1 || VarParenNum > 9) {
301 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
303 "Can't back-reference more than 9 variables");
306 AddBackrefToRegEx(VarParenNum);
308 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
313 // Handle [[foo:.*]].
314 VariableDefs[Name] = CurParen;
318 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
324 // Handle fixed string matches.
325 // Find the end, which is the start of the next regex.
326 size_t FixedMatchEnd = PatternStr.find("{{");
327 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
328 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
329 PatternStr = PatternStr.substr(FixedMatchEnd);
335 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
339 if (!R.isValid(Error)) {
340 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
341 "invalid regex: " + Error);
345 RegExStr += RS.str();
346 CurParen += R.getNumMatches();
350 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
351 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
352 std::string Backref = std::string("\\") +
353 std::string(1, '0' + BackrefNum);
357 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
358 // The only supported expression is @LINE([\+-]\d+)?
359 if (!Expr.startswith("@LINE"))
361 Expr = Expr.substr(StringRef("@LINE").size());
365 Expr = Expr.substr(1);
366 else if (Expr[0] != '-')
368 if (Expr.getAsInteger(10, Offset))
371 Value = llvm::itostr(LineNumber + Offset);
375 /// Match - Match the pattern string against the input buffer Buffer. This
376 /// returns the position that is matched or npos if there is no match. If
377 /// there is a match, the size of the matched string is returned in MatchLen.
378 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
379 StringMap<StringRef> &VariableTable) const {
380 // If this is the EOF pattern, match it immediately.
381 if (CheckTy == Check::CheckEOF) {
383 return Buffer.size();
386 // If this is a fixed string pattern, just match it now.
387 if (!FixedStr.empty()) {
388 MatchLen = FixedStr.size();
389 return Buffer.find(FixedStr);
394 // If there are variable uses, we need to create a temporary string with the
396 StringRef RegExToMatch = RegExStr;
398 if (!VariableUses.empty()) {
401 unsigned InsertOffset = 0;
402 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
405 if (VariableUses[i].first[0] == '@') {
406 if (!EvaluateExpression(VariableUses[i].first, Value))
407 return StringRef::npos;
409 StringMap<StringRef>::iterator it =
410 VariableTable.find(VariableUses[i].first);
411 // If the variable is undefined, return an error.
412 if (it == VariableTable.end())
413 return StringRef::npos;
415 // Look up the value and escape it so that we can put it into the regex.
416 Value += Regex::escape(it->second);
419 // Plop it into the regex at the adjusted offset.
420 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
421 Value.begin(), Value.end());
422 InsertOffset += Value.size();
425 // Match the newly constructed regex.
426 RegExToMatch = TmpStr;
430 SmallVector<StringRef, 4> MatchInfo;
431 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
432 return StringRef::npos;
434 // Successful regex match.
435 assert(!MatchInfo.empty() && "Didn't get any match");
436 StringRef FullMatch = MatchInfo[0];
438 // If this defines any variables, remember their values.
439 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
440 E = VariableDefs.end();
442 assert(I->second < MatchInfo.size() && "Internal paren error");
443 VariableTable[I->first] = MatchInfo[I->second];
446 MatchLen = FullMatch.size();
447 return FullMatch.data()-Buffer.data();
450 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
451 const StringMap<StringRef> &VariableTable) const {
452 // Just compute the number of matching characters. For regular expressions, we
453 // just compare against the regex itself and hope for the best.
455 // FIXME: One easy improvement here is have the regex lib generate a single
456 // example regular expression which matches, and use that as the example
458 StringRef ExampleString(FixedStr);
459 if (ExampleString.empty())
460 ExampleString = RegExStr;
462 // Only compare up to the first line in the buffer, or the string size.
463 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
464 BufferPrefix = BufferPrefix.split('\n').first;
465 return BufferPrefix.edit_distance(ExampleString);
468 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
469 const StringMap<StringRef> &VariableTable) const{
470 // If this was a regular expression using variables, print the current
472 if (!VariableUses.empty()) {
473 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
474 SmallString<256> Msg;
475 raw_svector_ostream OS(Msg);
476 StringRef Var = VariableUses[i].first;
479 if (EvaluateExpression(Var, Value)) {
480 OS << "with expression \"";
481 OS.write_escaped(Var) << "\" equal to \"";
482 OS.write_escaped(Value) << "\"";
484 OS << "uses incorrect expression \"";
485 OS.write_escaped(Var) << "\"";
488 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
490 // Check for undefined variable references.
491 if (it == VariableTable.end()) {
492 OS << "uses undefined variable \"";
493 OS.write_escaped(Var) << "\"";
495 OS << "with variable \"";
496 OS.write_escaped(Var) << "\" equal to \"";
497 OS.write_escaped(it->second) << "\"";
501 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
506 // Attempt to find the closest/best fuzzy match. Usually an error happens
507 // because some string in the output didn't exactly match. In these cases, we
508 // would like to show the user a best guess at what "should have" matched, to
509 // save them having to actually check the input manually.
510 size_t NumLinesForward = 0;
511 size_t Best = StringRef::npos;
512 double BestQuality = 0;
514 // Use an arbitrary 4k limit on how far we will search.
515 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
516 if (Buffer[i] == '\n')
519 // Patterns have leading whitespace stripped, so skip whitespace when
520 // looking for something which looks like a pattern.
521 if (Buffer[i] == ' ' || Buffer[i] == '\t')
524 // Compute the "quality" of this match as an arbitrary combination of the
525 // match distance and the number of lines skipped to get to this match.
526 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
527 double Quality = Distance + (NumLinesForward / 100.);
529 if (Quality < BestQuality || Best == StringRef::npos) {
531 BestQuality = Quality;
535 // Print the "possible intended match here" line if we found something
536 // reasonable and not equal to what we showed in the "scanning from here"
538 if (Best && Best != StringRef::npos && BestQuality < 50) {
539 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
540 SourceMgr::DK_Note, "possible intended match here");
542 // FIXME: If we wanted to be really friendly we would show why the match
543 // failed, as it can be hard to spot simple one character differences.
547 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
548 // Offset keeps track of the current offset within the input Str
550 // [...] Nesting depth
551 size_t BracketDepth = 0;
553 while (!Str.empty()) {
554 if (Str.startswith("]]") && BracketDepth == 0)
556 if (Str[0] == '\\') {
557 // Backslash escapes the next char within regexes, so skip them both.
568 if (BracketDepth == 0) {
569 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
571 "missing closing \"]\" for regex variable");
582 return StringRef::npos;
586 //===----------------------------------------------------------------------===//
588 //===----------------------------------------------------------------------===//
590 /// CheckString - This is a check that we found in the input file.
592 /// Pat - The pattern to match.
595 /// Prefix - Which prefix name this check matched.
598 /// Loc - The location in the match file that the check string was specified.
601 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
602 /// as opposed to a CHECK: directive.
603 Check::CheckType CheckTy;
605 /// DagNotStrings - These are all of the strings that are disallowed from
606 /// occurring between this match string and the previous one (or start of
608 std::vector<Pattern> DagNotStrings;
611 CheckString(const Pattern &P,
615 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
617 /// Check - Match check string and its "not strings" and/or "dag strings".
618 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
619 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
621 /// CheckNext - Verify there is a single line in the given buffer.
622 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
624 /// CheckSame - Verify there is no newline in the given buffer.
625 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
627 /// CheckNot - Verify there's no "not strings" in the given buffer.
628 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
629 const std::vector<const Pattern *> &NotStrings,
630 StringMap<StringRef> &VariableTable) const;
632 /// CheckDag - Match "dag strings" and their mixed "not strings".
633 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
634 std::vector<const Pattern *> &NotStrings,
635 StringMap<StringRef> &VariableTable) const;
638 /// Canonicalize whitespaces in the input file. Line endings are replaced
639 /// with UNIX-style '\n'.
641 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
642 /// characters to a single space.
643 static std::unique_ptr<MemoryBuffer>
644 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
645 bool PreserveHorizontal) {
646 SmallString<128> NewFile;
647 NewFile.reserve(MB->getBufferSize());
649 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
651 // Eliminate trailing dosish \r.
652 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
656 // If current char is not a horizontal whitespace or if horizontal
657 // whitespace canonicalization is disabled, dump it to output as is.
658 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
659 NewFile.push_back(*Ptr);
663 // Otherwise, add one space and advance over neighboring space.
664 NewFile.push_back(' ');
665 while (Ptr+1 != End &&
666 (Ptr[1] == ' ' || Ptr[1] == '\t'))
670 return std::unique_ptr<MemoryBuffer>(
671 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
674 static bool IsPartOfWord(char c) {
675 return (isalnum(c) || c == '-' || c == '_');
678 // Get the size of the prefix extension.
679 static size_t CheckTypeSize(Check::CheckType Ty) {
681 case Check::CheckNone:
684 case Check::CheckPlain:
685 return sizeof(":") - 1;
687 case Check::CheckNext:
688 return sizeof("-NEXT:") - 1;
690 case Check::CheckSame:
691 return sizeof("-SAME:") - 1;
693 case Check::CheckNot:
694 return sizeof("-NOT:") - 1;
696 case Check::CheckDAG:
697 return sizeof("-DAG:") - 1;
699 case Check::CheckLabel:
700 return sizeof("-LABEL:") - 1;
702 case Check::CheckEOF:
703 llvm_unreachable("Should not be using EOF size");
706 llvm_unreachable("Bad check type");
709 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
710 char NextChar = Buffer[Prefix.size()];
712 // Verify that the : is present after the prefix.
714 return Check::CheckPlain;
717 return Check::CheckNone;
719 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
720 if (Rest.startswith("NEXT:"))
721 return Check::CheckNext;
723 if (Rest.startswith("SAME:"))
724 return Check::CheckSame;
726 if (Rest.startswith("NOT:"))
727 return Check::CheckNot;
729 if (Rest.startswith("DAG:"))
730 return Check::CheckDAG;
732 if (Rest.startswith("LABEL:"))
733 return Check::CheckLabel;
735 return Check::CheckNone;
738 // From the given position, find the next character after the word.
739 static size_t SkipWord(StringRef Str, size_t Loc) {
740 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
745 // Try to find the first match in buffer for any prefix. If a valid match is
746 // found, return that prefix and set its type and location. If there are almost
747 // matches (e.g. the actual prefix string is found, but is not an actual check
748 // string), but no valid match, return an empty string and set the position to
749 // resume searching from. If no partial matches are found, return an empty
750 // string and the location will be StringRef::npos. If one prefix is a substring
751 // of another, the maximal match should be found. e.g. if "A" and "AA" are
752 // prefixes then AA-CHECK: should match the second one.
753 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
754 Check::CheckType &CheckTy,
756 StringRef FirstPrefix;
757 size_t FirstLoc = StringRef::npos;
758 size_t SearchLoc = StringRef::npos;
759 Check::CheckType FirstTy = Check::CheckNone;
761 CheckTy = Check::CheckNone;
762 CheckLoc = StringRef::npos;
764 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
766 StringRef Prefix(*I);
767 size_t PrefixLoc = Buffer.find(Prefix);
769 if (PrefixLoc == StringRef::npos)
772 // Track where we are searching for invalid prefixes that look almost right.
773 // We need to only advance to the first partial match on the next attempt
774 // since a partial match could be a substring of a later, valid prefix.
775 // Need to skip to the end of the word, otherwise we could end up
776 // matching a prefix in a substring later.
777 if (PrefixLoc < SearchLoc)
778 SearchLoc = SkipWord(Buffer, PrefixLoc);
780 // We only want to find the first match to avoid skipping some.
781 if (PrefixLoc > FirstLoc)
783 // If one matching check-prefix is a prefix of another, choose the
785 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
788 StringRef Rest = Buffer.drop_front(PrefixLoc);
789 // Make sure we have actually found the prefix, and not a word containing
790 // it. This should also prevent matching the wrong prefix when one is a
791 // substring of another.
792 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
793 FirstTy = Check::CheckNone;
795 FirstTy = FindCheckType(Rest, Prefix);
797 FirstLoc = PrefixLoc;
798 FirstPrefix = Prefix;
801 // If the first prefix is invalid, we should continue the search after it.
802 if (FirstTy == Check::CheckNone) {
803 CheckLoc = SearchLoc;
812 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
813 unsigned &LineNumber,
814 Check::CheckType &CheckTy,
816 while (!Buffer.empty()) {
817 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
818 // If we found a real match, we are done.
819 if (!Prefix.empty()) {
820 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
824 // We didn't find any almost matches either, we are also done.
825 if (CheckLoc == StringRef::npos)
828 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
830 // Advance to the last possible match we found and try again.
831 Buffer = Buffer.drop_front(CheckLoc + 1);
837 /// ReadCheckFile - Read the check file, which specifies the sequence of
838 /// expected strings. The strings are added to the CheckStrings vector.
839 /// Returns true in case of an error, false otherwise.
840 static bool ReadCheckFile(SourceMgr &SM,
841 std::vector<CheckString> &CheckStrings) {
842 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
843 MemoryBuffer::getFileOrSTDIN(CheckFilename);
844 if (std::error_code EC = FileOrErr.getError()) {
845 errs() << "Could not open check file '" << CheckFilename
846 << "': " << EC.message() << '\n';
850 // If we want to canonicalize whitespace, strip excess whitespace from the
851 // buffer containing the CHECK lines. Remove DOS style line endings.
852 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
853 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
855 // Find all instances of CheckPrefix followed by : in the file.
856 StringRef Buffer = F->getBuffer();
858 SM.AddNewSourceBuffer(std::move(F), SMLoc());
860 std::vector<Pattern> ImplicitNegativeChecks;
861 for (const auto &PatternString : ImplicitCheckNot) {
862 // Create a buffer with fake command line content in order to display the
863 // command line option responsible for the specific implicit CHECK-NOT.
864 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
865 std::string Suffix = "'";
866 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
867 Prefix + PatternString + Suffix, "command line");
869 StringRef PatternInBuffer =
870 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
871 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
873 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
874 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
875 "IMPLICIT-CHECK", SM, 0);
879 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
881 // LineNumber keeps track of the line on which CheckPrefix instances are
883 unsigned LineNumber = 1;
886 Check::CheckType CheckTy;
889 // See if a prefix occurs in the memory buffer.
890 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
894 if (UsedPrefix.empty())
897 Buffer = Buffer.drop_front(PrefixLoc);
899 // Location to use for error messages.
900 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
902 // PrefixLoc is to the start of the prefix. Skip to the end.
903 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
905 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
906 // leading and trailing whitespace.
907 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
909 // Scan ahead to the end of line.
910 size_t EOL = Buffer.find_first_of("\n\r");
912 // Remember the location of the start of the pattern, for diagnostics.
913 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
915 // Parse the pattern.
917 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
920 // Verify that CHECK-LABEL lines do not define or use variables
921 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
922 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
924 "found '" + UsedPrefix + "-LABEL:'"
925 " with variable definition or use");
929 Buffer = Buffer.substr(EOL);
931 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
932 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
933 CheckStrings.empty()) {
934 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
935 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
937 "found '" + UsedPrefix + "-" + Type + "' without previous '"
938 + UsedPrefix + ": line");
942 // Handle CHECK-DAG/-NOT.
943 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
944 DagNotMatches.push_back(P);
948 // Okay, add the string we captured to the output vector and move on.
949 CheckStrings.push_back(CheckString(P,
953 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
954 DagNotMatches = ImplicitNegativeChecks;
957 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
958 // prefix as a filler for the error message.
959 if (!DagNotMatches.empty()) {
960 CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
962 SMLoc::getFromPointer(Buffer.data()),
964 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
967 if (CheckStrings.empty()) {
968 errs() << "error: no check strings found with prefix"
969 << (CheckPrefixes.size() > 1 ? "es " : " ");
970 for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
971 StringRef Prefix(CheckPrefixes[I]);
972 errs() << '\'' << Prefix << ":'";
984 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
985 const Pattern &Pat, StringRef Buffer,
986 StringMap<StringRef> &VariableTable) {
987 // Otherwise, we have an error, emit an error message.
988 SM.PrintMessage(Loc, SourceMgr::DK_Error,
989 "expected string not found in input");
991 // Print the "scanning from here" line. If the current position is at the
992 // end of a line, advance to the start of the next line.
993 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
995 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
996 "scanning from here");
998 // Allow the pattern to print additional information if desired.
999 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1002 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1004 StringMap<StringRef> &VariableTable) {
1005 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1008 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1010 static unsigned CountNumNewlinesBetween(StringRef Range,
1011 const char *&FirstNewLine) {
1012 unsigned NumNewLines = 0;
1014 // Scan for newline.
1015 Range = Range.substr(Range.find_first_of("\n\r"));
1016 if (Range.empty()) return NumNewLines;
1020 // Handle \n\r and \r\n as a single newline.
1021 if (Range.size() > 1 &&
1022 (Range[1] == '\n' || Range[1] == '\r') &&
1023 (Range[0] != Range[1]))
1024 Range = Range.substr(1);
1025 Range = Range.substr(1);
1027 if (NumNewLines == 1)
1028 FirstNewLine = Range.begin();
1032 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1033 bool IsLabelScanMode, size_t &MatchLen,
1034 StringMap<StringRef> &VariableTable) const {
1036 std::vector<const Pattern *> NotStrings;
1038 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1039 // bounds; we have not processed variable definitions within the bounded block
1040 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1041 // over the block again (including the last CHECK-LABEL) in normal mode.
1042 if (!IsLabelScanMode) {
1043 // Match "dag strings" (with mixed "not strings" if any).
1044 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1045 if (LastPos == StringRef::npos)
1046 return StringRef::npos;
1049 // Match itself from the last position after matching CHECK-DAG.
1050 StringRef MatchBuffer = Buffer.substr(LastPos);
1051 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1052 if (MatchPos == StringRef::npos) {
1053 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1054 return StringRef::npos;
1056 MatchPos += LastPos;
1058 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1060 if (!IsLabelScanMode) {
1061 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1063 // If this check is a "CHECK-NEXT", verify that the previous match was on
1064 // the previous line (i.e. that there is one newline between them).
1065 if (CheckNext(SM, SkippedRegion))
1066 return StringRef::npos;
1068 // If this check is a "CHECK-SAME", verify that the previous match was on
1069 // the same line (i.e. that there is no newline between them).
1070 if (CheckSame(SM, SkippedRegion))
1071 return StringRef::npos;
1073 // If this match had "not strings", verify that they don't exist in the
1075 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1076 return StringRef::npos;
1082 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1083 if (CheckTy != Check::CheckNext)
1086 // Count the number of newlines between the previous match and this one.
1087 assert(Buffer.data() !=
1089 SM.FindBufferContainingLoc(
1090 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1091 "CHECK-NEXT can't be the first check in a file");
1093 const char *FirstNewLine = nullptr;
1094 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1096 if (NumNewLines == 0) {
1097 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1098 "-NEXT: is on the same line as previous match");
1099 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1100 SourceMgr::DK_Note, "'next' match was here");
1101 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1102 "previous match ended here");
1106 if (NumNewLines != 1) {
1107 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1108 "-NEXT: is not on the line after the previous match");
1109 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1110 SourceMgr::DK_Note, "'next' match was here");
1111 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1112 "previous match ended here");
1113 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1114 "non-matching line after previous match is here");
1121 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1122 if (CheckTy != Check::CheckSame)
1125 // Count the number of newlines between the previous match and this one.
1126 assert(Buffer.data() !=
1127 SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1128 SMLoc::getFromPointer(Buffer.data())))
1129 ->getBufferStart() &&
1130 "CHECK-SAME can't be the first check in a file");
1132 const char *FirstNewLine = nullptr;
1133 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1135 if (NumNewLines != 0) {
1136 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1138 "-SAME: is not on the same line as the previous match");
1139 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1140 "'next' match was here");
1141 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1142 "previous match ended here");
1149 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1150 const std::vector<const Pattern *> &NotStrings,
1151 StringMap<StringRef> &VariableTable) const {
1152 for (unsigned ChunkNo = 0, e = NotStrings.size();
1153 ChunkNo != e; ++ChunkNo) {
1154 const Pattern *Pat = NotStrings[ChunkNo];
1155 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1157 size_t MatchLen = 0;
1158 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1160 if (Pos == StringRef::npos) continue;
1162 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1163 SourceMgr::DK_Error,
1164 Prefix + "-NOT: string occurred!");
1165 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1166 Prefix + "-NOT: pattern specified here");
1173 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1174 std::vector<const Pattern *> &NotStrings,
1175 StringMap<StringRef> &VariableTable) const {
1176 if (DagNotStrings.empty())
1180 size_t StartPos = LastPos;
1182 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1183 ChunkNo != e; ++ChunkNo) {
1184 const Pattern &Pat = DagNotStrings[ChunkNo];
1186 assert((Pat.getCheckTy() == Check::CheckDAG ||
1187 Pat.getCheckTy() == Check::CheckNot) &&
1188 "Invalid CHECK-DAG or CHECK-NOT!");
1190 if (Pat.getCheckTy() == Check::CheckNot) {
1191 NotStrings.push_back(&Pat);
1195 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1197 size_t MatchLen = 0, MatchPos;
1199 // CHECK-DAG always matches from the start.
1200 StringRef MatchBuffer = Buffer.substr(StartPos);
1201 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1202 // With a group of CHECK-DAGs, a single mismatching means the match on
1203 // that group of CHECK-DAGs fails immediately.
1204 if (MatchPos == StringRef::npos) {
1205 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1206 return StringRef::npos;
1208 // Re-calc it as the offset relative to the start of the original string.
1209 MatchPos += StartPos;
1211 if (!NotStrings.empty()) {
1212 if (MatchPos < LastPos) {
1214 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1215 SourceMgr::DK_Error,
1216 Prefix + "-DAG: found a match of CHECK-DAG"
1217 " reordering across a CHECK-NOT");
1218 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1220 Prefix + "-DAG: the farthest match of CHECK-DAG"
1222 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1223 Prefix + "-NOT: the crossed pattern specified"
1225 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1226 Prefix + "-DAG: the reordered pattern specified"
1228 return StringRef::npos;
1230 // All subsequent CHECK-DAGs should be matched from the farthest
1231 // position of all precedent CHECK-DAGs (including this one.)
1233 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1234 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1236 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1237 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1238 return StringRef::npos;
1239 // Clear "not strings".
1243 // Update the last position with CHECK-DAG matches.
1244 LastPos = std::max(MatchPos + MatchLen, LastPos);
1250 // A check prefix must contain only alphanumeric, hyphens and underscores.
1251 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1252 Regex Validator("^[a-zA-Z0-9_-]*$");
1253 return Validator.match(CheckPrefix);
1256 static bool ValidateCheckPrefixes() {
1257 StringSet<> PrefixSet;
1259 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1261 StringRef Prefix(*I);
1263 // Reject empty prefixes.
1267 if (!PrefixSet.insert(Prefix).second)
1270 if (!ValidateCheckPrefix(Prefix))
1277 // I don't think there's a way to specify an initial value for cl::list,
1278 // so if nothing was specified, add the default
1279 static void AddCheckPrefixIfNeeded() {
1280 if (CheckPrefixes.empty())
1281 CheckPrefixes.push_back("CHECK");
1284 int main(int argc, char **argv) {
1285 sys::PrintStackTraceOnErrorSignal();
1286 PrettyStackTraceProgram X(argc, argv);
1287 cl::ParseCommandLineOptions(argc, argv);
1289 if (!ValidateCheckPrefixes()) {
1290 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1291 "start with a letter and contain only alphanumeric characters, "
1292 "hyphens and underscores\n";
1296 AddCheckPrefixIfNeeded();
1300 // Read the expected strings from the check file.
1301 std::vector<CheckString> CheckStrings;
1302 if (ReadCheckFile(SM, CheckStrings))
1305 // Open the file to check and add it to SourceMgr.
1306 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1307 MemoryBuffer::getFileOrSTDIN(InputFilename);
1308 if (std::error_code EC = FileOrErr.getError()) {
1309 errs() << "Could not open input file '" << InputFilename
1310 << "': " << EC.message() << '\n';
1313 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1315 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1316 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1320 // Remove duplicate spaces in the input file if requested.
1321 // Remove DOS style line endings.
1322 std::unique_ptr<MemoryBuffer> F =
1323 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1325 // Check that we have all of the expected strings, in order, in the input
1327 StringRef Buffer = F->getBuffer();
1329 SM.AddNewSourceBuffer(std::move(F), SMLoc());
1331 /// VariableTable - This holds all the current filecheck variables.
1332 StringMap<StringRef> VariableTable;
1334 bool hasError = false;
1336 unsigned i = 0, j = 0, e = CheckStrings.size();
1339 StringRef CheckRegion;
1341 CheckRegion = Buffer;
1343 const CheckString &CheckLabelStr = CheckStrings[j];
1344 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1349 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1350 size_t MatchLabelLen = 0;
1351 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1352 MatchLabelLen, VariableTable);
1353 if (MatchLabelPos == StringRef::npos) {
1358 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1359 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1363 for ( ; i != j; ++i) {
1364 const CheckString &CheckStr = CheckStrings[i];
1366 // Check each string within the scanned region, including a second check
1367 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1368 size_t MatchLen = 0;
1369 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1372 if (MatchPos == StringRef::npos) {
1378 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1385 return hasError ? 1 : 0;