1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content. This is useful for regression tests etc.
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
17 //===----------------------------------------------------------------------===//
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
34 #include <system_error>
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
53 static cl::list<std::string> ImplicitCheckNot(
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
60 static cl::opt<bool> AllowEmptyInput(
61 "allow-empty", cl::init(false),
62 cl::desc("Allow the input file to be empty. This is useful when making\n"
63 "checks that some error message does not occur, for example."));
65 typedef cl::list<std::string>::const_iterator prefix_iterator;
67 //===----------------------------------------------------------------------===//
68 // Pattern Handling Code.
69 //===----------------------------------------------------------------------===//
80 /// MatchEOF - When set, this pattern only matches the end of file. This is
81 /// used for trailing CHECK-NOTs.
89 Check::CheckType CheckTy;
91 /// FixedStr - If non-empty, this pattern is a fixed string match with the
92 /// specified fixed string.
95 /// RegEx - If non-empty, this is a regex pattern.
98 /// \brief Contains the number of line this pattern is in.
101 /// VariableUses - Entries in this vector map to uses of a variable in the
102 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
103 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
104 /// value of bar at offset 3.
105 std::vector<std::pair<StringRef, unsigned> > VariableUses;
107 /// VariableDefs - Maps definitions of variables to their parenthesized
109 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
110 std::map<StringRef, unsigned> VariableDefs;
114 Pattern(Check::CheckType Ty)
117 /// getLoc - Return the location in source code.
118 SMLoc getLoc() const { return PatternLoc; }
120 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
121 /// which prefix is being matched, SM provides the SourceMgr used for error
122 /// reports, and LineNumber is the line number in the input file from which
123 /// the pattern string was read. Returns true in case of an error, false
125 bool ParsePattern(StringRef PatternStr,
128 unsigned LineNumber);
130 /// Match - Match the pattern string against the input buffer Buffer. This
131 /// returns the position that is matched or npos if there is no match. If
132 /// there is a match, the size of the matched string is returned in MatchLen.
134 /// The VariableTable StringMap provides the current values of filecheck
135 /// variables and is updated if this match defines new values.
136 size_t Match(StringRef Buffer, size_t &MatchLen,
137 StringMap<StringRef> &VariableTable) const;
139 /// PrintFailureInfo - Print additional information about a failure to match
140 /// involving this pattern.
141 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
142 const StringMap<StringRef> &VariableTable) const;
144 bool hasVariable() const { return !(VariableUses.empty() &&
145 VariableDefs.empty()); }
147 Check::CheckType getCheckTy() const { return CheckTy; }
150 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
151 void AddBackrefToRegEx(unsigned BackrefNum);
153 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
154 /// matching this pattern at the start of \arg Buffer; a distance of zero
155 /// should correspond to a perfect match.
156 unsigned ComputeMatchDistance(StringRef Buffer,
157 const StringMap<StringRef> &VariableTable) const;
159 /// \brief Evaluates expression and stores the result to \p Value.
160 /// \return true on success. false when the expression has invalid syntax.
161 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
163 /// \brief Finds the closing sequence of a regex variable usage or
164 /// definition. Str has to point in the beginning of the definition
165 /// (right after the opening sequence).
166 /// \return offset of the closing sequence within Str, or npos if it was not
168 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
172 bool Pattern::ParsePattern(StringRef PatternStr,
175 unsigned LineNumber) {
176 this->LineNumber = LineNumber;
177 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
179 // Ignore trailing whitespace.
180 while (!PatternStr.empty() &&
181 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
182 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
184 // Check that there is something on the line.
185 if (PatternStr.empty()) {
186 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
187 "found empty check string with prefix '" +
192 // Check to see if this is a fixed string, or if it has regex pieces.
193 if (PatternStr.size() < 2 ||
194 (PatternStr.find("{{") == StringRef::npos &&
195 PatternStr.find("[[") == StringRef::npos)) {
196 FixedStr = PatternStr;
200 // Paren value #0 is for the fully matched string. Any new parenthesized
201 // values add from there.
202 unsigned CurParen = 1;
204 // Otherwise, there is at least one regex piece. Build up the regex pattern
205 // by escaping scary characters in fixed strings, building up one big regex.
206 while (!PatternStr.empty()) {
208 if (PatternStr.startswith("{{")) {
209 // This is the start of a regex match. Scan for the }}.
210 size_t End = PatternStr.find("}}");
211 if (End == StringRef::npos) {
212 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
214 "found start of regex string with no end '}}'");
218 // Enclose {{}} patterns in parens just like [[]] even though we're not
219 // capturing the result for any purpose. This is required in case the
220 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
221 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
225 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
229 PatternStr = PatternStr.substr(End+2);
233 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
234 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
235 // second form is [[foo]] which is a reference to foo. The variable name
236 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
237 // it. This is to catch some common errors.
238 if (PatternStr.startswith("[[")) {
239 // Find the closing bracket pair ending the match. End is going to be an
240 // offset relative to the beginning of the match string.
241 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
243 if (End == StringRef::npos) {
244 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
246 "invalid named regex reference, no ]] found");
250 StringRef MatchStr = PatternStr.substr(2, End);
251 PatternStr = PatternStr.substr(End+4);
253 // Get the regex name (e.g. "foo").
254 size_t NameEnd = MatchStr.find(':');
255 StringRef Name = MatchStr.substr(0, NameEnd);
258 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
259 "invalid name in named regex: empty name");
263 // Verify that the name/expression is well formed. FileCheck currently
264 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
265 // is relaxed, more strict check is performed in \c EvaluateExpression.
266 bool IsExpression = false;
267 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
268 if (i == 0 && Name[i] == '@') {
269 if (NameEnd != StringRef::npos) {
270 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
272 "invalid name in named regex definition");
278 if (Name[i] != '_' && !isalnum(Name[i]) &&
279 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
280 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
281 SourceMgr::DK_Error, "invalid name in named regex");
286 // Name can't start with a digit.
287 if (isdigit(static_cast<unsigned char>(Name[0]))) {
288 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
289 "invalid name in named regex");
294 if (NameEnd == StringRef::npos) {
295 // Handle variables that were defined earlier on the same line by
296 // emitting a backreference.
297 if (VariableDefs.find(Name) != VariableDefs.end()) {
298 unsigned VarParenNum = VariableDefs[Name];
299 if (VarParenNum < 1 || VarParenNum > 9) {
300 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
302 "Can't back-reference more than 9 variables");
305 AddBackrefToRegEx(VarParenNum);
307 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
312 // Handle [[foo:.*]].
313 VariableDefs[Name] = CurParen;
317 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
323 // Handle fixed string matches.
324 // Find the end, which is the start of the next regex.
325 size_t FixedMatchEnd = PatternStr.find("{{");
326 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
327 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
328 PatternStr = PatternStr.substr(FixedMatchEnd);
334 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
338 if (!R.isValid(Error)) {
339 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
340 "invalid regex: " + Error);
344 RegExStr += RS.str();
345 CurParen += R.getNumMatches();
349 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
350 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
351 std::string Backref = std::string("\\") +
352 std::string(1, '0' + BackrefNum);
356 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
357 // The only supported expression is @LINE([\+-]\d+)?
358 if (!Expr.startswith("@LINE"))
360 Expr = Expr.substr(StringRef("@LINE").size());
364 Expr = Expr.substr(1);
365 else if (Expr[0] != '-')
367 if (Expr.getAsInteger(10, Offset))
370 Value = llvm::itostr(LineNumber + Offset);
374 /// Match - Match the pattern string against the input buffer Buffer. This
375 /// returns the position that is matched or npos if there is no match. If
376 /// there is a match, the size of the matched string is returned in MatchLen.
377 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
378 StringMap<StringRef> &VariableTable) const {
379 // If this is the EOF pattern, match it immediately.
380 if (CheckTy == Check::CheckEOF) {
382 return Buffer.size();
385 // If this is a fixed string pattern, just match it now.
386 if (!FixedStr.empty()) {
387 MatchLen = FixedStr.size();
388 return Buffer.find(FixedStr);
393 // If there are variable uses, we need to create a temporary string with the
395 StringRef RegExToMatch = RegExStr;
397 if (!VariableUses.empty()) {
400 unsigned InsertOffset = 0;
401 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
404 if (VariableUses[i].first[0] == '@') {
405 if (!EvaluateExpression(VariableUses[i].first, Value))
406 return StringRef::npos;
408 StringMap<StringRef>::iterator it =
409 VariableTable.find(VariableUses[i].first);
410 // If the variable is undefined, return an error.
411 if (it == VariableTable.end())
412 return StringRef::npos;
414 // Look up the value and escape it so that we can put it into the regex.
415 Value += Regex::escape(it->second);
418 // Plop it into the regex at the adjusted offset.
419 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
420 Value.begin(), Value.end());
421 InsertOffset += Value.size();
424 // Match the newly constructed regex.
425 RegExToMatch = TmpStr;
429 SmallVector<StringRef, 4> MatchInfo;
430 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
431 return StringRef::npos;
433 // Successful regex match.
434 assert(!MatchInfo.empty() && "Didn't get any match");
435 StringRef FullMatch = MatchInfo[0];
437 // If this defines any variables, remember their values.
438 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
439 E = VariableDefs.end();
441 assert(I->second < MatchInfo.size() && "Internal paren error");
442 VariableTable[I->first] = MatchInfo[I->second];
445 MatchLen = FullMatch.size();
446 return FullMatch.data()-Buffer.data();
449 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
450 const StringMap<StringRef> &VariableTable) const {
451 // Just compute the number of matching characters. For regular expressions, we
452 // just compare against the regex itself and hope for the best.
454 // FIXME: One easy improvement here is have the regex lib generate a single
455 // example regular expression which matches, and use that as the example
457 StringRef ExampleString(FixedStr);
458 if (ExampleString.empty())
459 ExampleString = RegExStr;
461 // Only compare up to the first line in the buffer, or the string size.
462 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
463 BufferPrefix = BufferPrefix.split('\n').first;
464 return BufferPrefix.edit_distance(ExampleString);
467 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
468 const StringMap<StringRef> &VariableTable) const{
469 // If this was a regular expression using variables, print the current
471 if (!VariableUses.empty()) {
472 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
473 SmallString<256> Msg;
474 raw_svector_ostream OS(Msg);
475 StringRef Var = VariableUses[i].first;
478 if (EvaluateExpression(Var, Value)) {
479 OS << "with expression \"";
480 OS.write_escaped(Var) << "\" equal to \"";
481 OS.write_escaped(Value) << "\"";
483 OS << "uses incorrect expression \"";
484 OS.write_escaped(Var) << "\"";
487 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
489 // Check for undefined variable references.
490 if (it == VariableTable.end()) {
491 OS << "uses undefined variable \"";
492 OS.write_escaped(Var) << "\"";
494 OS << "with variable \"";
495 OS.write_escaped(Var) << "\" equal to \"";
496 OS.write_escaped(it->second) << "\"";
500 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
505 // Attempt to find the closest/best fuzzy match. Usually an error happens
506 // because some string in the output didn't exactly match. In these cases, we
507 // would like to show the user a best guess at what "should have" matched, to
508 // save them having to actually check the input manually.
509 size_t NumLinesForward = 0;
510 size_t Best = StringRef::npos;
511 double BestQuality = 0;
513 // Use an arbitrary 4k limit on how far we will search.
514 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
515 if (Buffer[i] == '\n')
518 // Patterns have leading whitespace stripped, so skip whitespace when
519 // looking for something which looks like a pattern.
520 if (Buffer[i] == ' ' || Buffer[i] == '\t')
523 // Compute the "quality" of this match as an arbitrary combination of the
524 // match distance and the number of lines skipped to get to this match.
525 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
526 double Quality = Distance + (NumLinesForward / 100.);
528 if (Quality < BestQuality || Best == StringRef::npos) {
530 BestQuality = Quality;
534 // Print the "possible intended match here" line if we found something
535 // reasonable and not equal to what we showed in the "scanning from here"
537 if (Best && Best != StringRef::npos && BestQuality < 50) {
538 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
539 SourceMgr::DK_Note, "possible intended match here");
541 // FIXME: If we wanted to be really friendly we would show why the match
542 // failed, as it can be hard to spot simple one character differences.
546 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
547 // Offset keeps track of the current offset within the input Str
549 // [...] Nesting depth
550 size_t BracketDepth = 0;
552 while (!Str.empty()) {
553 if (Str.startswith("]]") && BracketDepth == 0)
555 if (Str[0] == '\\') {
556 // Backslash escapes the next char within regexes, so skip them both.
567 if (BracketDepth == 0) {
568 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
570 "missing closing \"]\" for regex variable");
581 return StringRef::npos;
585 //===----------------------------------------------------------------------===//
587 //===----------------------------------------------------------------------===//
589 /// CheckString - This is a check that we found in the input file.
591 /// Pat - The pattern to match.
594 /// Prefix - Which prefix name this check matched.
597 /// Loc - The location in the match file that the check string was specified.
600 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
601 /// as opposed to a CHECK: directive.
602 Check::CheckType CheckTy;
604 /// DagNotStrings - These are all of the strings that are disallowed from
605 /// occurring between this match string and the previous one (or start of
607 std::vector<Pattern> DagNotStrings;
610 CheckString(const Pattern &P,
614 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
616 /// Check - Match check string and its "not strings" and/or "dag strings".
617 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
618 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
620 /// CheckNext - Verify there is a single line in the given buffer.
621 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
623 /// CheckNot - Verify there's no "not strings" in the given buffer.
624 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
625 const std::vector<const Pattern *> &NotStrings,
626 StringMap<StringRef> &VariableTable) const;
628 /// CheckDag - Match "dag strings" and their mixed "not strings".
629 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
630 std::vector<const Pattern *> &NotStrings,
631 StringMap<StringRef> &VariableTable) const;
634 /// Canonicalize whitespaces in the input file. Line endings are replaced
635 /// with UNIX-style '\n'.
637 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
638 /// characters to a single space.
639 static MemoryBuffer *CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
640 bool PreserveHorizontal) {
641 SmallString<128> NewFile;
642 NewFile.reserve(MB->getBufferSize());
644 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
646 // Eliminate trailing dosish \r.
647 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
651 // If current char is not a horizontal whitespace or if horizontal
652 // whitespace canonicalization is disabled, dump it to output as is.
653 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
654 NewFile.push_back(*Ptr);
658 // Otherwise, add one space and advance over neighboring space.
659 NewFile.push_back(' ');
660 while (Ptr+1 != End &&
661 (Ptr[1] == ' ' || Ptr[1] == '\t'))
665 return MemoryBuffer::getMemBufferCopy(NewFile.str(),
666 MB->getBufferIdentifier());
669 static bool IsPartOfWord(char c) {
670 return (isalnum(c) || c == '-' || c == '_');
673 // Get the size of the prefix extension.
674 static size_t CheckTypeSize(Check::CheckType Ty) {
676 case Check::CheckNone:
679 case Check::CheckPlain:
680 return sizeof(":") - 1;
682 case Check::CheckNext:
683 return sizeof("-NEXT:") - 1;
685 case Check::CheckNot:
686 return sizeof("-NOT:") - 1;
688 case Check::CheckDAG:
689 return sizeof("-DAG:") - 1;
691 case Check::CheckLabel:
692 return sizeof("-LABEL:") - 1;
694 case Check::CheckEOF:
695 llvm_unreachable("Should not be using EOF size");
698 llvm_unreachable("Bad check type");
701 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
702 char NextChar = Buffer[Prefix.size()];
704 // Verify that the : is present after the prefix.
706 return Check::CheckPlain;
709 return Check::CheckNone;
711 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
712 if (Rest.startswith("NEXT:"))
713 return Check::CheckNext;
715 if (Rest.startswith("NOT:"))
716 return Check::CheckNot;
718 if (Rest.startswith("DAG:"))
719 return Check::CheckDAG;
721 if (Rest.startswith("LABEL:"))
722 return Check::CheckLabel;
724 return Check::CheckNone;
727 // From the given position, find the next character after the word.
728 static size_t SkipWord(StringRef Str, size_t Loc) {
729 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
734 // Try to find the first match in buffer for any prefix. If a valid match is
735 // found, return that prefix and set its type and location. If there are almost
736 // matches (e.g. the actual prefix string is found, but is not an actual check
737 // string), but no valid match, return an empty string and set the position to
738 // resume searching from. If no partial matches are found, return an empty
739 // string and the location will be StringRef::npos. If one prefix is a substring
740 // of another, the maximal match should be found. e.g. if "A" and "AA" are
741 // prefixes then AA-CHECK: should match the second one.
742 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
743 Check::CheckType &CheckTy,
745 StringRef FirstPrefix;
746 size_t FirstLoc = StringRef::npos;
747 size_t SearchLoc = StringRef::npos;
748 Check::CheckType FirstTy = Check::CheckNone;
750 CheckTy = Check::CheckNone;
751 CheckLoc = StringRef::npos;
753 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
755 StringRef Prefix(*I);
756 size_t PrefixLoc = Buffer.find(Prefix);
758 if (PrefixLoc == StringRef::npos)
761 // Track where we are searching for invalid prefixes that look almost right.
762 // We need to only advance to the first partial match on the next attempt
763 // since a partial match could be a substring of a later, valid prefix.
764 // Need to skip to the end of the word, otherwise we could end up
765 // matching a prefix in a substring later.
766 if (PrefixLoc < SearchLoc)
767 SearchLoc = SkipWord(Buffer, PrefixLoc);
769 // We only want to find the first match to avoid skipping some.
770 if (PrefixLoc > FirstLoc)
772 // If one matching check-prefix is a prefix of another, choose the
774 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
777 StringRef Rest = Buffer.drop_front(PrefixLoc);
778 // Make sure we have actually found the prefix, and not a word containing
779 // it. This should also prevent matching the wrong prefix when one is a
780 // substring of another.
781 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
782 FirstTy = Check::CheckNone;
784 FirstTy = FindCheckType(Rest, Prefix);
786 FirstLoc = PrefixLoc;
787 FirstPrefix = Prefix;
790 // If the first prefix is invalid, we should continue the search after it.
791 if (FirstTy == Check::CheckNone) {
792 CheckLoc = SearchLoc;
801 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
802 unsigned &LineNumber,
803 Check::CheckType &CheckTy,
805 while (!Buffer.empty()) {
806 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
807 // If we found a real match, we are done.
808 if (!Prefix.empty()) {
809 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
813 // We didn't find any almost matches either, we are also done.
814 if (CheckLoc == StringRef::npos)
817 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
819 // Advance to the last possible match we found and try again.
820 Buffer = Buffer.drop_front(CheckLoc + 1);
826 /// ReadCheckFile - Read the check file, which specifies the sequence of
827 /// expected strings. The strings are added to the CheckStrings vector.
828 /// Returns true in case of an error, false otherwise.
829 static bool ReadCheckFile(SourceMgr &SM,
830 std::vector<CheckString> &CheckStrings) {
831 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
832 MemoryBuffer::getFileOrSTDIN(CheckFilename);
833 if (std::error_code EC = FileOrErr.getError()) {
834 errs() << "Could not open check file '" << CheckFilename
835 << "': " << EC.message() << '\n';
839 // If we want to canonicalize whitespace, strip excess whitespace from the
840 // buffer containing the CHECK lines. Remove DOS style line endings.
841 MemoryBuffer *F = CanonicalizeInputFile(std::move(FileOrErr.get()),
842 NoCanonicalizeWhiteSpace);
844 SM.AddNewSourceBuffer(F, SMLoc());
846 // Find all instances of CheckPrefix followed by : in the file.
847 StringRef Buffer = F->getBuffer();
849 std::vector<Pattern> ImplicitNegativeChecks;
850 for (const auto &PatternString : ImplicitCheckNot) {
851 // Create a buffer with fake command line content in order to display the
852 // command line option responsible for the specific implicit CHECK-NOT.
853 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
854 std::string Suffix = "'";
855 MemoryBuffer *CmdLine = MemoryBuffer::getMemBufferCopy(
856 Prefix + PatternString + Suffix, "command line");
857 StringRef PatternInBuffer =
858 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
859 SM.AddNewSourceBuffer(CmdLine, SMLoc());
861 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
862 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
863 "IMPLICIT-CHECK", SM, 0);
867 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
869 // LineNumber keeps track of the line on which CheckPrefix instances are
871 unsigned LineNumber = 1;
874 Check::CheckType CheckTy;
877 // See if a prefix occurs in the memory buffer.
878 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
882 if (UsedPrefix.empty())
885 Buffer = Buffer.drop_front(PrefixLoc);
887 // Location to use for error messages.
888 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
890 // PrefixLoc is to the start of the prefix. Skip to the end.
891 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
893 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
894 // leading and trailing whitespace.
895 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
897 // Scan ahead to the end of line.
898 size_t EOL = Buffer.find_first_of("\n\r");
900 // Remember the location of the start of the pattern, for diagnostics.
901 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
903 // Parse the pattern.
905 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
908 // Verify that CHECK-LABEL lines do not define or use variables
909 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
910 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
912 "found '" + UsedPrefix + "-LABEL:'"
913 " with variable definition or use");
917 Buffer = Buffer.substr(EOL);
919 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
920 if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
921 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
923 "found '" + UsedPrefix + "-NEXT:' without previous '"
924 + UsedPrefix + ": line");
928 // Handle CHECK-DAG/-NOT.
929 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
930 DagNotMatches.push_back(P);
934 // Okay, add the string we captured to the output vector and move on.
935 CheckStrings.push_back(CheckString(P,
939 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
940 DagNotMatches = ImplicitNegativeChecks;
943 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
944 // prefix as a filler for the error message.
945 if (!DagNotMatches.empty()) {
946 CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
948 SMLoc::getFromPointer(Buffer.data()),
950 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
953 if (CheckStrings.empty()) {
954 errs() << "error: no check strings found with prefix"
955 << (CheckPrefixes.size() > 1 ? "es " : " ");
956 for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
957 StringRef Prefix(CheckPrefixes[I]);
958 errs() << '\'' << Prefix << ":'";
970 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
971 const Pattern &Pat, StringRef Buffer,
972 StringMap<StringRef> &VariableTable) {
973 // Otherwise, we have an error, emit an error message.
974 SM.PrintMessage(Loc, SourceMgr::DK_Error,
975 "expected string not found in input");
977 // Print the "scanning from here" line. If the current position is at the
978 // end of a line, advance to the start of the next line.
979 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
981 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
982 "scanning from here");
984 // Allow the pattern to print additional information if desired.
985 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
988 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
990 StringMap<StringRef> &VariableTable) {
991 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
994 /// CountNumNewlinesBetween - Count the number of newlines in the specified
996 static unsigned CountNumNewlinesBetween(StringRef Range,
997 const char *&FirstNewLine) {
998 unsigned NumNewLines = 0;
1000 // Scan for newline.
1001 Range = Range.substr(Range.find_first_of("\n\r"));
1002 if (Range.empty()) return NumNewLines;
1006 // Handle \n\r and \r\n as a single newline.
1007 if (Range.size() > 1 &&
1008 (Range[1] == '\n' || Range[1] == '\r') &&
1009 (Range[0] != Range[1]))
1010 Range = Range.substr(1);
1011 Range = Range.substr(1);
1013 if (NumNewLines == 1)
1014 FirstNewLine = Range.begin();
1018 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1019 bool IsLabelScanMode, size_t &MatchLen,
1020 StringMap<StringRef> &VariableTable) const {
1022 std::vector<const Pattern *> NotStrings;
1024 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1025 // bounds; we have not processed variable definitions within the bounded block
1026 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1027 // over the block again (including the last CHECK-LABEL) in normal mode.
1028 if (!IsLabelScanMode) {
1029 // Match "dag strings" (with mixed "not strings" if any).
1030 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1031 if (LastPos == StringRef::npos)
1032 return StringRef::npos;
1035 // Match itself from the last position after matching CHECK-DAG.
1036 StringRef MatchBuffer = Buffer.substr(LastPos);
1037 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1038 if (MatchPos == StringRef::npos) {
1039 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1040 return StringRef::npos;
1042 MatchPos += LastPos;
1044 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1046 if (!IsLabelScanMode) {
1047 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1049 // If this check is a "CHECK-NEXT", verify that the previous match was on
1050 // the previous line (i.e. that there is one newline between them).
1051 if (CheckNext(SM, SkippedRegion))
1052 return StringRef::npos;
1054 // If this match had "not strings", verify that they don't exist in the
1056 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1057 return StringRef::npos;
1063 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1064 if (CheckTy != Check::CheckNext)
1067 // Count the number of newlines between the previous match and this one.
1068 assert(Buffer.data() !=
1070 SM.FindBufferContainingLoc(
1071 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1072 "CHECK-NEXT can't be the first check in a file");
1074 const char *FirstNewLine = nullptr;
1075 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1077 if (NumNewLines == 0) {
1078 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1079 "-NEXT: is on the same line as previous match");
1080 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1081 SourceMgr::DK_Note, "'next' match was here");
1082 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1083 "previous match ended here");
1087 if (NumNewLines != 1) {
1088 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1089 "-NEXT: is not on the line after the previous match");
1090 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1091 SourceMgr::DK_Note, "'next' match was here");
1092 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1093 "previous match ended here");
1094 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1095 "non-matching line after previous match is here");
1102 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1103 const std::vector<const Pattern *> &NotStrings,
1104 StringMap<StringRef> &VariableTable) const {
1105 for (unsigned ChunkNo = 0, e = NotStrings.size();
1106 ChunkNo != e; ++ChunkNo) {
1107 const Pattern *Pat = NotStrings[ChunkNo];
1108 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1110 size_t MatchLen = 0;
1111 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1113 if (Pos == StringRef::npos) continue;
1115 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1116 SourceMgr::DK_Error,
1117 Prefix + "-NOT: string occurred!");
1118 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1119 Prefix + "-NOT: pattern specified here");
1126 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1127 std::vector<const Pattern *> &NotStrings,
1128 StringMap<StringRef> &VariableTable) const {
1129 if (DagNotStrings.empty())
1133 size_t StartPos = LastPos;
1135 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1136 ChunkNo != e; ++ChunkNo) {
1137 const Pattern &Pat = DagNotStrings[ChunkNo];
1139 assert((Pat.getCheckTy() == Check::CheckDAG ||
1140 Pat.getCheckTy() == Check::CheckNot) &&
1141 "Invalid CHECK-DAG or CHECK-NOT!");
1143 if (Pat.getCheckTy() == Check::CheckNot) {
1144 NotStrings.push_back(&Pat);
1148 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1150 size_t MatchLen = 0, MatchPos;
1152 // CHECK-DAG always matches from the start.
1153 StringRef MatchBuffer = Buffer.substr(StartPos);
1154 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1155 // With a group of CHECK-DAGs, a single mismatching means the match on
1156 // that group of CHECK-DAGs fails immediately.
1157 if (MatchPos == StringRef::npos) {
1158 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1159 return StringRef::npos;
1161 // Re-calc it as the offset relative to the start of the original string.
1162 MatchPos += StartPos;
1164 if (!NotStrings.empty()) {
1165 if (MatchPos < LastPos) {
1167 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1168 SourceMgr::DK_Error,
1169 Prefix + "-DAG: found a match of CHECK-DAG"
1170 " reordering across a CHECK-NOT");
1171 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1173 Prefix + "-DAG: the farthest match of CHECK-DAG"
1175 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1176 Prefix + "-NOT: the crossed pattern specified"
1178 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1179 Prefix + "-DAG: the reordered pattern specified"
1181 return StringRef::npos;
1183 // All subsequent CHECK-DAGs should be matched from the farthest
1184 // position of all precedent CHECK-DAGs (including this one.)
1186 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1187 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1189 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1190 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1191 return StringRef::npos;
1192 // Clear "not strings".
1196 // Update the last position with CHECK-DAG matches.
1197 LastPos = std::max(MatchPos + MatchLen, LastPos);
1203 // A check prefix must contain only alphanumeric, hyphens and underscores.
1204 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1205 Regex Validator("^[a-zA-Z0-9_-]*$");
1206 return Validator.match(CheckPrefix);
1209 static bool ValidateCheckPrefixes() {
1210 StringSet<> PrefixSet;
1212 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1214 StringRef Prefix(*I);
1216 // Reject empty prefixes.
1220 if (!PrefixSet.insert(Prefix))
1223 if (!ValidateCheckPrefix(Prefix))
1230 // I don't think there's a way to specify an initial value for cl::list,
1231 // so if nothing was specified, add the default
1232 static void AddCheckPrefixIfNeeded() {
1233 if (CheckPrefixes.empty())
1234 CheckPrefixes.push_back("CHECK");
1237 int main(int argc, char **argv) {
1238 sys::PrintStackTraceOnErrorSignal();
1239 PrettyStackTraceProgram X(argc, argv);
1240 cl::ParseCommandLineOptions(argc, argv);
1242 if (!ValidateCheckPrefixes()) {
1243 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1244 "start with a letter and contain only alphanumeric characters, "
1245 "hyphens and underscores\n";
1249 AddCheckPrefixIfNeeded();
1253 // Read the expected strings from the check file.
1254 std::vector<CheckString> CheckStrings;
1255 if (ReadCheckFile(SM, CheckStrings))
1258 // Open the file to check and add it to SourceMgr.
1259 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1260 MemoryBuffer::getFileOrSTDIN(InputFilename);
1261 if (std::error_code EC = FileOrErr.getError()) {
1262 errs() << "Could not open input file '" << InputFilename
1263 << "': " << EC.message() << '\n';
1266 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1268 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1269 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1273 // Remove duplicate spaces in the input file if requested.
1274 // Remove DOS style line endings.
1276 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1278 SM.AddNewSourceBuffer(F, SMLoc());
1280 /// VariableTable - This holds all the current filecheck variables.
1281 StringMap<StringRef> VariableTable;
1283 // Check that we have all of the expected strings, in order, in the input
1285 StringRef Buffer = F->getBuffer();
1287 bool hasError = false;
1289 unsigned i = 0, j = 0, e = CheckStrings.size();
1292 StringRef CheckRegion;
1294 CheckRegion = Buffer;
1296 const CheckString &CheckLabelStr = CheckStrings[j];
1297 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1302 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1303 size_t MatchLabelLen = 0;
1304 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1305 MatchLabelLen, VariableTable);
1306 if (MatchLabelPos == StringRef::npos) {
1311 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1312 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1316 for ( ; i != j; ++i) {
1317 const CheckString &CheckStr = CheckStrings[i];
1319 // Check each string within the scanned region, including a second check
1320 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1321 size_t MatchLen = 0;
1322 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1325 if (MatchPos == StringRef::npos) {
1331 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1338 return hasError ? 1 : 0;