1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content. This is useful for regression tests etc.
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
17 //===----------------------------------------------------------------------===//
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
34 #include <system_error>
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
53 static cl::list<std::string> ImplicitCheckNot(
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
60 typedef cl::list<std::string>::const_iterator prefix_iterator;
62 //===----------------------------------------------------------------------===//
63 // Pattern Handling Code.
64 //===----------------------------------------------------------------------===//
75 /// MatchEOF - When set, this pattern only matches the end of file. This is
76 /// used for trailing CHECK-NOTs.
84 Check::CheckType CheckTy;
86 /// FixedStr - If non-empty, this pattern is a fixed string match with the
87 /// specified fixed string.
90 /// RegEx - If non-empty, this is a regex pattern.
93 /// \brief Contains the number of line this pattern is in.
96 /// VariableUses - Entries in this vector map to uses of a variable in the
97 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
98 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
99 /// value of bar at offset 3.
100 std::vector<std::pair<StringRef, unsigned> > VariableUses;
102 /// VariableDefs - Maps definitions of variables to their parenthesized
104 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
105 std::map<StringRef, unsigned> VariableDefs;
109 Pattern(Check::CheckType Ty)
112 /// getLoc - Return the location in source code.
113 SMLoc getLoc() const { return PatternLoc; }
115 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
116 /// which prefix is being matched, SM provides the SourceMgr used for error
117 /// reports, and LineNumber is the line number in the input file from which
118 /// the pattern string was read. Returns true in case of an error, false
120 bool ParsePattern(StringRef PatternStr,
123 unsigned LineNumber);
125 /// Match - Match the pattern string against the input buffer Buffer. This
126 /// returns the position that is matched or npos if there is no match. If
127 /// there is a match, the size of the matched string is returned in MatchLen.
129 /// The VariableTable StringMap provides the current values of filecheck
130 /// variables and is updated if this match defines new values.
131 size_t Match(StringRef Buffer, size_t &MatchLen,
132 StringMap<StringRef> &VariableTable) const;
134 /// PrintFailureInfo - Print additional information about a failure to match
135 /// involving this pattern.
136 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
137 const StringMap<StringRef> &VariableTable) const;
139 bool hasVariable() const { return !(VariableUses.empty() &&
140 VariableDefs.empty()); }
142 Check::CheckType getCheckTy() const { return CheckTy; }
145 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
146 void AddBackrefToRegEx(unsigned BackrefNum);
148 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
149 /// matching this pattern at the start of \arg Buffer; a distance of zero
150 /// should correspond to a perfect match.
151 unsigned ComputeMatchDistance(StringRef Buffer,
152 const StringMap<StringRef> &VariableTable) const;
154 /// \brief Evaluates expression and stores the result to \p Value.
155 /// \return true on success. false when the expression has invalid syntax.
156 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
158 /// \brief Finds the closing sequence of a regex variable usage or
159 /// definition. Str has to point in the beginning of the definition
160 /// (right after the opening sequence).
161 /// \return offset of the closing sequence within Str, or npos if it was not
163 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
167 bool Pattern::ParsePattern(StringRef PatternStr,
170 unsigned LineNumber) {
171 this->LineNumber = LineNumber;
172 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
174 // Ignore trailing whitespace.
175 while (!PatternStr.empty() &&
176 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
177 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
179 // Check that there is something on the line.
180 if (PatternStr.empty()) {
181 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
182 "found empty check string with prefix '" +
187 // Check to see if this is a fixed string, or if it has regex pieces.
188 if (PatternStr.size() < 2 ||
189 (PatternStr.find("{{") == StringRef::npos &&
190 PatternStr.find("[[") == StringRef::npos)) {
191 FixedStr = PatternStr;
195 // Paren value #0 is for the fully matched string. Any new parenthesized
196 // values add from there.
197 unsigned CurParen = 1;
199 // Otherwise, there is at least one regex piece. Build up the regex pattern
200 // by escaping scary characters in fixed strings, building up one big regex.
201 while (!PatternStr.empty()) {
203 if (PatternStr.startswith("{{")) {
204 // This is the start of a regex match. Scan for the }}.
205 size_t End = PatternStr.find("}}");
206 if (End == StringRef::npos) {
207 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
209 "found start of regex string with no end '}}'");
213 // Enclose {{}} patterns in parens just like [[]] even though we're not
214 // capturing the result for any purpose. This is required in case the
215 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
216 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
220 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
224 PatternStr = PatternStr.substr(End+2);
228 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
229 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
230 // second form is [[foo]] which is a reference to foo. The variable name
231 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
232 // it. This is to catch some common errors.
233 if (PatternStr.startswith("[[")) {
234 // Find the closing bracket pair ending the match. End is going to be an
235 // offset relative to the beginning of the match string.
236 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
238 if (End == StringRef::npos) {
239 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
241 "invalid named regex reference, no ]] found");
245 StringRef MatchStr = PatternStr.substr(2, End);
246 PatternStr = PatternStr.substr(End+4);
248 // Get the regex name (e.g. "foo").
249 size_t NameEnd = MatchStr.find(':');
250 StringRef Name = MatchStr.substr(0, NameEnd);
253 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
254 "invalid name in named regex: empty name");
258 // Verify that the name/expression is well formed. FileCheck currently
259 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
260 // is relaxed, more strict check is performed in \c EvaluateExpression.
261 bool IsExpression = false;
262 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
263 if (i == 0 && Name[i] == '@') {
264 if (NameEnd != StringRef::npos) {
265 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
267 "invalid name in named regex definition");
273 if (Name[i] != '_' && !isalnum(Name[i]) &&
274 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
275 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
276 SourceMgr::DK_Error, "invalid name in named regex");
281 // Name can't start with a digit.
282 if (isdigit(static_cast<unsigned char>(Name[0]))) {
283 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
284 "invalid name in named regex");
289 if (NameEnd == StringRef::npos) {
290 // Handle variables that were defined earlier on the same line by
291 // emitting a backreference.
292 if (VariableDefs.find(Name) != VariableDefs.end()) {
293 unsigned VarParenNum = VariableDefs[Name];
294 if (VarParenNum < 1 || VarParenNum > 9) {
295 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
297 "Can't back-reference more than 9 variables");
300 AddBackrefToRegEx(VarParenNum);
302 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
307 // Handle [[foo:.*]].
308 VariableDefs[Name] = CurParen;
312 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
318 // Handle fixed string matches.
319 // Find the end, which is the start of the next regex.
320 size_t FixedMatchEnd = PatternStr.find("{{");
321 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
322 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
323 PatternStr = PatternStr.substr(FixedMatchEnd);
329 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
333 if (!R.isValid(Error)) {
334 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
335 "invalid regex: " + Error);
339 RegExStr += RS.str();
340 CurParen += R.getNumMatches();
344 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
345 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
346 std::string Backref = std::string("\\") +
347 std::string(1, '0' + BackrefNum);
351 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
352 // The only supported expression is @LINE([\+-]\d+)?
353 if (!Expr.startswith("@LINE"))
355 Expr = Expr.substr(StringRef("@LINE").size());
359 Expr = Expr.substr(1);
360 else if (Expr[0] != '-')
362 if (Expr.getAsInteger(10, Offset))
365 Value = llvm::itostr(LineNumber + Offset);
369 /// Match - Match the pattern string against the input buffer Buffer. This
370 /// returns the position that is matched or npos if there is no match. If
371 /// there is a match, the size of the matched string is returned in MatchLen.
372 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
373 StringMap<StringRef> &VariableTable) const {
374 // If this is the EOF pattern, match it immediately.
375 if (CheckTy == Check::CheckEOF) {
377 return Buffer.size();
380 // If this is a fixed string pattern, just match it now.
381 if (!FixedStr.empty()) {
382 MatchLen = FixedStr.size();
383 return Buffer.find(FixedStr);
388 // If there are variable uses, we need to create a temporary string with the
390 StringRef RegExToMatch = RegExStr;
392 if (!VariableUses.empty()) {
395 unsigned InsertOffset = 0;
396 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
399 if (VariableUses[i].first[0] == '@') {
400 if (!EvaluateExpression(VariableUses[i].first, Value))
401 return StringRef::npos;
403 StringMap<StringRef>::iterator it =
404 VariableTable.find(VariableUses[i].first);
405 // If the variable is undefined, return an error.
406 if (it == VariableTable.end())
407 return StringRef::npos;
409 // Look up the value and escape it so that we can put it into the regex.
410 Value += Regex::escape(it->second);
413 // Plop it into the regex at the adjusted offset.
414 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
415 Value.begin(), Value.end());
416 InsertOffset += Value.size();
419 // Match the newly constructed regex.
420 RegExToMatch = TmpStr;
424 SmallVector<StringRef, 4> MatchInfo;
425 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
426 return StringRef::npos;
428 // Successful regex match.
429 assert(!MatchInfo.empty() && "Didn't get any match");
430 StringRef FullMatch = MatchInfo[0];
432 // If this defines any variables, remember their values.
433 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
434 E = VariableDefs.end();
436 assert(I->second < MatchInfo.size() && "Internal paren error");
437 VariableTable[I->first] = MatchInfo[I->second];
440 MatchLen = FullMatch.size();
441 return FullMatch.data()-Buffer.data();
444 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
445 const StringMap<StringRef> &VariableTable) const {
446 // Just compute the number of matching characters. For regular expressions, we
447 // just compare against the regex itself and hope for the best.
449 // FIXME: One easy improvement here is have the regex lib generate a single
450 // example regular expression which matches, and use that as the example
452 StringRef ExampleString(FixedStr);
453 if (ExampleString.empty())
454 ExampleString = RegExStr;
456 // Only compare up to the first line in the buffer, or the string size.
457 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
458 BufferPrefix = BufferPrefix.split('\n').first;
459 return BufferPrefix.edit_distance(ExampleString);
462 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
463 const StringMap<StringRef> &VariableTable) const{
464 // If this was a regular expression using variables, print the current
466 if (!VariableUses.empty()) {
467 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
468 SmallString<256> Msg;
469 raw_svector_ostream OS(Msg);
470 StringRef Var = VariableUses[i].first;
473 if (EvaluateExpression(Var, Value)) {
474 OS << "with expression \"";
475 OS.write_escaped(Var) << "\" equal to \"";
476 OS.write_escaped(Value) << "\"";
478 OS << "uses incorrect expression \"";
479 OS.write_escaped(Var) << "\"";
482 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
484 // Check for undefined variable references.
485 if (it == VariableTable.end()) {
486 OS << "uses undefined variable \"";
487 OS.write_escaped(Var) << "\"";
489 OS << "with variable \"";
490 OS.write_escaped(Var) << "\" equal to \"";
491 OS.write_escaped(it->second) << "\"";
495 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
500 // Attempt to find the closest/best fuzzy match. Usually an error happens
501 // because some string in the output didn't exactly match. In these cases, we
502 // would like to show the user a best guess at what "should have" matched, to
503 // save them having to actually check the input manually.
504 size_t NumLinesForward = 0;
505 size_t Best = StringRef::npos;
506 double BestQuality = 0;
508 // Use an arbitrary 4k limit on how far we will search.
509 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
510 if (Buffer[i] == '\n')
513 // Patterns have leading whitespace stripped, so skip whitespace when
514 // looking for something which looks like a pattern.
515 if (Buffer[i] == ' ' || Buffer[i] == '\t')
518 // Compute the "quality" of this match as an arbitrary combination of the
519 // match distance and the number of lines skipped to get to this match.
520 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
521 double Quality = Distance + (NumLinesForward / 100.);
523 if (Quality < BestQuality || Best == StringRef::npos) {
525 BestQuality = Quality;
529 // Print the "possible intended match here" line if we found something
530 // reasonable and not equal to what we showed in the "scanning from here"
532 if (Best && Best != StringRef::npos && BestQuality < 50) {
533 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
534 SourceMgr::DK_Note, "possible intended match here");
536 // FIXME: If we wanted to be really friendly we would show why the match
537 // failed, as it can be hard to spot simple one character differences.
541 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
542 // Offset keeps track of the current offset within the input Str
544 // [...] Nesting depth
545 size_t BracketDepth = 0;
547 while (!Str.empty()) {
548 if (Str.startswith("]]") && BracketDepth == 0)
550 if (Str[0] == '\\') {
551 // Backslash escapes the next char within regexes, so skip them both.
562 if (BracketDepth == 0) {
563 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
565 "missing closing \"]\" for regex variable");
576 return StringRef::npos;
580 //===----------------------------------------------------------------------===//
582 //===----------------------------------------------------------------------===//
584 /// CheckString - This is a check that we found in the input file.
586 /// Pat - The pattern to match.
589 /// Prefix - Which prefix name this check matched.
592 /// Loc - The location in the match file that the check string was specified.
595 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
596 /// as opposed to a CHECK: directive.
597 Check::CheckType CheckTy;
599 /// DagNotStrings - These are all of the strings that are disallowed from
600 /// occurring between this match string and the previous one (or start of
602 std::vector<Pattern> DagNotStrings;
605 CheckString(const Pattern &P,
609 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
611 /// Check - Match check string and its "not strings" and/or "dag strings".
612 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
613 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
615 /// CheckNext - Verify there is a single line in the given buffer.
616 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
618 /// CheckNot - Verify there's no "not strings" in the given buffer.
619 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
620 const std::vector<const Pattern *> &NotStrings,
621 StringMap<StringRef> &VariableTable) const;
623 /// CheckDag - Match "dag strings" and their mixed "not strings".
624 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
625 std::vector<const Pattern *> &NotStrings,
626 StringMap<StringRef> &VariableTable) const;
629 /// Canonicalize whitespaces in the input file. Line endings are replaced
630 /// with UNIX-style '\n'.
632 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
633 /// characters to a single space.
634 static MemoryBuffer *CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
635 bool PreserveHorizontal) {
636 SmallString<128> NewFile;
637 NewFile.reserve(MB->getBufferSize());
639 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
641 // Eliminate trailing dosish \r.
642 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
646 // If current char is not a horizontal whitespace or if horizontal
647 // whitespace canonicalization is disabled, dump it to output as is.
648 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
649 NewFile.push_back(*Ptr);
653 // Otherwise, add one space and advance over neighboring space.
654 NewFile.push_back(' ');
655 while (Ptr+1 != End &&
656 (Ptr[1] == ' ' || Ptr[1] == '\t'))
660 return MemoryBuffer::getMemBufferCopy(NewFile.str(),
661 MB->getBufferIdentifier());
664 static bool IsPartOfWord(char c) {
665 return (isalnum(c) || c == '-' || c == '_');
668 // Get the size of the prefix extension.
669 static size_t CheckTypeSize(Check::CheckType Ty) {
671 case Check::CheckNone:
674 case Check::CheckPlain:
675 return sizeof(":") - 1;
677 case Check::CheckNext:
678 return sizeof("-NEXT:") - 1;
680 case Check::CheckNot:
681 return sizeof("-NOT:") - 1;
683 case Check::CheckDAG:
684 return sizeof("-DAG:") - 1;
686 case Check::CheckLabel:
687 return sizeof("-LABEL:") - 1;
689 case Check::CheckEOF:
690 llvm_unreachable("Should not be using EOF size");
693 llvm_unreachable("Bad check type");
696 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
697 char NextChar = Buffer[Prefix.size()];
699 // Verify that the : is present after the prefix.
701 return Check::CheckPlain;
704 return Check::CheckNone;
706 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
707 if (Rest.startswith("NEXT:"))
708 return Check::CheckNext;
710 if (Rest.startswith("NOT:"))
711 return Check::CheckNot;
713 if (Rest.startswith("DAG:"))
714 return Check::CheckDAG;
716 if (Rest.startswith("LABEL:"))
717 return Check::CheckLabel;
719 return Check::CheckNone;
722 // From the given position, find the next character after the word.
723 static size_t SkipWord(StringRef Str, size_t Loc) {
724 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
729 // Try to find the first match in buffer for any prefix. If a valid match is
730 // found, return that prefix and set its type and location. If there are almost
731 // matches (e.g. the actual prefix string is found, but is not an actual check
732 // string), but no valid match, return an empty string and set the position to
733 // resume searching from. If no partial matches are found, return an empty
734 // string and the location will be StringRef::npos. If one prefix is a substring
735 // of another, the maximal match should be found. e.g. if "A" and "AA" are
736 // prefixes then AA-CHECK: should match the second one.
737 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
738 Check::CheckType &CheckTy,
740 StringRef FirstPrefix;
741 size_t FirstLoc = StringRef::npos;
742 size_t SearchLoc = StringRef::npos;
743 Check::CheckType FirstTy = Check::CheckNone;
745 CheckTy = Check::CheckNone;
746 CheckLoc = StringRef::npos;
748 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
750 StringRef Prefix(*I);
751 size_t PrefixLoc = Buffer.find(Prefix);
753 if (PrefixLoc == StringRef::npos)
756 // Track where we are searching for invalid prefixes that look almost right.
757 // We need to only advance to the first partial match on the next attempt
758 // since a partial match could be a substring of a later, valid prefix.
759 // Need to skip to the end of the word, otherwise we could end up
760 // matching a prefix in a substring later.
761 if (PrefixLoc < SearchLoc)
762 SearchLoc = SkipWord(Buffer, PrefixLoc);
764 // We only want to find the first match to avoid skipping some.
765 if (PrefixLoc > FirstLoc)
767 // If one matching check-prefix is a prefix of another, choose the
769 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
772 StringRef Rest = Buffer.drop_front(PrefixLoc);
773 // Make sure we have actually found the prefix, and not a word containing
774 // it. This should also prevent matching the wrong prefix when one is a
775 // substring of another.
776 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
777 FirstTy = Check::CheckNone;
779 FirstTy = FindCheckType(Rest, Prefix);
781 FirstLoc = PrefixLoc;
782 FirstPrefix = Prefix;
785 // If the first prefix is invalid, we should continue the search after it.
786 if (FirstTy == Check::CheckNone) {
787 CheckLoc = SearchLoc;
796 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
797 unsigned &LineNumber,
798 Check::CheckType &CheckTy,
800 while (!Buffer.empty()) {
801 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
802 // If we found a real match, we are done.
803 if (!Prefix.empty()) {
804 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
808 // We didn't find any almost matches either, we are also done.
809 if (CheckLoc == StringRef::npos)
812 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
814 // Advance to the last possible match we found and try again.
815 Buffer = Buffer.drop_front(CheckLoc + 1);
821 /// ReadCheckFile - Read the check file, which specifies the sequence of
822 /// expected strings. The strings are added to the CheckStrings vector.
823 /// Returns true in case of an error, false otherwise.
824 static bool ReadCheckFile(SourceMgr &SM,
825 std::vector<CheckString> &CheckStrings) {
826 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
827 MemoryBuffer::getFileOrSTDIN(CheckFilename);
828 if (std::error_code EC = FileOrErr.getError()) {
829 errs() << "Could not open check file '" << CheckFilename
830 << "': " << EC.message() << '\n';
834 // If we want to canonicalize whitespace, strip excess whitespace from the
835 // buffer containing the CHECK lines. Remove DOS style line endings.
836 MemoryBuffer *F = CanonicalizeInputFile(std::move(FileOrErr.get()),
837 NoCanonicalizeWhiteSpace);
839 SM.AddNewSourceBuffer(F, SMLoc());
841 // Find all instances of CheckPrefix followed by : in the file.
842 StringRef Buffer = F->getBuffer();
844 std::vector<Pattern> ImplicitNegativeChecks;
845 for (const auto &PatternString : ImplicitCheckNot) {
846 // Create a buffer with fake command line content in order to display the
847 // command line option responsible for the specific implicit CHECK-NOT.
848 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
849 std::string Suffix = "'";
850 MemoryBuffer *CmdLine = MemoryBuffer::getMemBufferCopy(
851 Prefix + PatternString + Suffix, "command line");
852 StringRef PatternInBuffer =
853 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
854 SM.AddNewSourceBuffer(CmdLine, SMLoc());
856 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
857 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
858 "IMPLICIT-CHECK", SM, 0);
862 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
864 // LineNumber keeps track of the line on which CheckPrefix instances are
866 unsigned LineNumber = 1;
869 Check::CheckType CheckTy;
872 // See if a prefix occurs in the memory buffer.
873 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
877 if (UsedPrefix.empty())
880 Buffer = Buffer.drop_front(PrefixLoc);
882 // Location to use for error messages.
883 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
885 // PrefixLoc is to the start of the prefix. Skip to the end.
886 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
888 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
889 // leading and trailing whitespace.
890 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
892 // Scan ahead to the end of line.
893 size_t EOL = Buffer.find_first_of("\n\r");
895 // Remember the location of the start of the pattern, for diagnostics.
896 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
898 // Parse the pattern.
900 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
903 // Verify that CHECK-LABEL lines do not define or use variables
904 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
905 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
907 "found '" + UsedPrefix + "-LABEL:'"
908 " with variable definition or use");
912 Buffer = Buffer.substr(EOL);
914 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
915 if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
916 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
918 "found '" + UsedPrefix + "-NEXT:' without previous '"
919 + UsedPrefix + ": line");
923 // Handle CHECK-DAG/-NOT.
924 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
925 DagNotMatches.push_back(P);
929 // Okay, add the string we captured to the output vector and move on.
930 CheckStrings.push_back(CheckString(P,
934 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
935 DagNotMatches = ImplicitNegativeChecks;
938 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
939 // prefix as a filler for the error message.
940 if (!DagNotMatches.empty()) {
941 CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
943 SMLoc::getFromPointer(Buffer.data()),
945 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
948 if (CheckStrings.empty()) {
949 errs() << "error: no check strings found with prefix"
950 << (CheckPrefixes.size() > 1 ? "es " : " ");
951 for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
952 StringRef Prefix(CheckPrefixes[I]);
953 errs() << '\'' << Prefix << ":'";
965 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
966 const Pattern &Pat, StringRef Buffer,
967 StringMap<StringRef> &VariableTable) {
968 // Otherwise, we have an error, emit an error message.
969 SM.PrintMessage(Loc, SourceMgr::DK_Error,
970 "expected string not found in input");
972 // Print the "scanning from here" line. If the current position is at the
973 // end of a line, advance to the start of the next line.
974 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
976 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
977 "scanning from here");
979 // Allow the pattern to print additional information if desired.
980 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
983 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
985 StringMap<StringRef> &VariableTable) {
986 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
989 /// CountNumNewlinesBetween - Count the number of newlines in the specified
991 static unsigned CountNumNewlinesBetween(StringRef Range,
992 const char *&FirstNewLine) {
993 unsigned NumNewLines = 0;
996 Range = Range.substr(Range.find_first_of("\n\r"));
997 if (Range.empty()) return NumNewLines;
1001 // Handle \n\r and \r\n as a single newline.
1002 if (Range.size() > 1 &&
1003 (Range[1] == '\n' || Range[1] == '\r') &&
1004 (Range[0] != Range[1]))
1005 Range = Range.substr(1);
1006 Range = Range.substr(1);
1008 if (NumNewLines == 1)
1009 FirstNewLine = Range.begin();
1013 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1014 bool IsLabelScanMode, size_t &MatchLen,
1015 StringMap<StringRef> &VariableTable) const {
1017 std::vector<const Pattern *> NotStrings;
1019 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1020 // bounds; we have not processed variable definitions within the bounded block
1021 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1022 // over the block again (including the last CHECK-LABEL) in normal mode.
1023 if (!IsLabelScanMode) {
1024 // Match "dag strings" (with mixed "not strings" if any).
1025 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1026 if (LastPos == StringRef::npos)
1027 return StringRef::npos;
1030 // Match itself from the last position after matching CHECK-DAG.
1031 StringRef MatchBuffer = Buffer.substr(LastPos);
1032 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1033 if (MatchPos == StringRef::npos) {
1034 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1035 return StringRef::npos;
1037 MatchPos += LastPos;
1039 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1041 if (!IsLabelScanMode) {
1042 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1044 // If this check is a "CHECK-NEXT", verify that the previous match was on
1045 // the previous line (i.e. that there is one newline between them).
1046 if (CheckNext(SM, SkippedRegion))
1047 return StringRef::npos;
1049 // If this match had "not strings", verify that they don't exist in the
1051 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1052 return StringRef::npos;
1058 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1059 if (CheckTy != Check::CheckNext)
1062 // Count the number of newlines between the previous match and this one.
1063 assert(Buffer.data() !=
1065 SM.FindBufferContainingLoc(
1066 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1067 "CHECK-NEXT can't be the first check in a file");
1069 const char *FirstNewLine = nullptr;
1070 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1072 if (NumNewLines == 0) {
1073 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1074 "-NEXT: is on the same line as previous match");
1075 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1076 SourceMgr::DK_Note, "'next' match was here");
1077 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1078 "previous match ended here");
1082 if (NumNewLines != 1) {
1083 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1084 "-NEXT: is not on the line after the previous match");
1085 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1086 SourceMgr::DK_Note, "'next' match was here");
1087 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1088 "previous match ended here");
1089 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1090 "non-matching line after previous match is here");
1097 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1098 const std::vector<const Pattern *> &NotStrings,
1099 StringMap<StringRef> &VariableTable) const {
1100 for (unsigned ChunkNo = 0, e = NotStrings.size();
1101 ChunkNo != e; ++ChunkNo) {
1102 const Pattern *Pat = NotStrings[ChunkNo];
1103 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1105 size_t MatchLen = 0;
1106 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1108 if (Pos == StringRef::npos) continue;
1110 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1111 SourceMgr::DK_Error,
1112 Prefix + "-NOT: string occurred!");
1113 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1114 Prefix + "-NOT: pattern specified here");
1121 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1122 std::vector<const Pattern *> &NotStrings,
1123 StringMap<StringRef> &VariableTable) const {
1124 if (DagNotStrings.empty())
1128 size_t StartPos = LastPos;
1130 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1131 ChunkNo != e; ++ChunkNo) {
1132 const Pattern &Pat = DagNotStrings[ChunkNo];
1134 assert((Pat.getCheckTy() == Check::CheckDAG ||
1135 Pat.getCheckTy() == Check::CheckNot) &&
1136 "Invalid CHECK-DAG or CHECK-NOT!");
1138 if (Pat.getCheckTy() == Check::CheckNot) {
1139 NotStrings.push_back(&Pat);
1143 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1145 size_t MatchLen = 0, MatchPos;
1147 // CHECK-DAG always matches from the start.
1148 StringRef MatchBuffer = Buffer.substr(StartPos);
1149 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1150 // With a group of CHECK-DAGs, a single mismatching means the match on
1151 // that group of CHECK-DAGs fails immediately.
1152 if (MatchPos == StringRef::npos) {
1153 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1154 return StringRef::npos;
1156 // Re-calc it as the offset relative to the start of the original string.
1157 MatchPos += StartPos;
1159 if (!NotStrings.empty()) {
1160 if (MatchPos < LastPos) {
1162 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1163 SourceMgr::DK_Error,
1164 Prefix + "-DAG: found a match of CHECK-DAG"
1165 " reordering across a CHECK-NOT");
1166 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1168 Prefix + "-DAG: the farthest match of CHECK-DAG"
1170 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1171 Prefix + "-NOT: the crossed pattern specified"
1173 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1174 Prefix + "-DAG: the reordered pattern specified"
1176 return StringRef::npos;
1178 // All subsequent CHECK-DAGs should be matched from the farthest
1179 // position of all precedent CHECK-DAGs (including this one.)
1181 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1182 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1184 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1185 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1186 return StringRef::npos;
1187 // Clear "not strings".
1191 // Update the last position with CHECK-DAG matches.
1192 LastPos = std::max(MatchPos + MatchLen, LastPos);
1198 // A check prefix must contain only alphanumeric, hyphens and underscores.
1199 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1200 Regex Validator("^[a-zA-Z0-9_-]*$");
1201 return Validator.match(CheckPrefix);
1204 static bool ValidateCheckPrefixes() {
1205 StringSet<> PrefixSet;
1207 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1209 StringRef Prefix(*I);
1211 // Reject empty prefixes.
1215 if (!PrefixSet.insert(Prefix))
1218 if (!ValidateCheckPrefix(Prefix))
1225 // I don't think there's a way to specify an initial value for cl::list,
1226 // so if nothing was specified, add the default
1227 static void AddCheckPrefixIfNeeded() {
1228 if (CheckPrefixes.empty())
1229 CheckPrefixes.push_back("CHECK");
1232 int main(int argc, char **argv) {
1233 sys::PrintStackTraceOnErrorSignal();
1234 PrettyStackTraceProgram X(argc, argv);
1235 cl::ParseCommandLineOptions(argc, argv);
1237 if (!ValidateCheckPrefixes()) {
1238 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1239 "start with a letter and contain only alphanumeric characters, "
1240 "hyphens and underscores\n";
1244 AddCheckPrefixIfNeeded();
1248 // Read the expected strings from the check file.
1249 std::vector<CheckString> CheckStrings;
1250 if (ReadCheckFile(SM, CheckStrings))
1253 // Open the file to check and add it to SourceMgr.
1254 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1255 MemoryBuffer::getFileOrSTDIN(InputFilename);
1256 if (std::error_code EC = FileOrErr.getError()) {
1257 errs() << "Could not open input file '" << InputFilename
1258 << "': " << EC.message() << '\n';
1261 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1263 if (File->getBufferSize() == 0) {
1264 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1268 // Remove duplicate spaces in the input file if requested.
1269 // Remove DOS style line endings.
1271 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1273 SM.AddNewSourceBuffer(F, SMLoc());
1275 /// VariableTable - This holds all the current filecheck variables.
1276 StringMap<StringRef> VariableTable;
1278 // Check that we have all of the expected strings, in order, in the input
1280 StringRef Buffer = F->getBuffer();
1282 bool hasError = false;
1284 unsigned i = 0, j = 0, e = CheckStrings.size();
1287 StringRef CheckRegion;
1289 CheckRegion = Buffer;
1291 const CheckString &CheckLabelStr = CheckStrings[j];
1292 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1297 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1298 size_t MatchLabelLen = 0;
1299 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1300 MatchLabelLen, VariableTable);
1301 if (MatchLabelPos == StringRef::npos) {
1306 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1307 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1311 for ( ; i != j; ++i) {
1312 const CheckString &CheckStr = CheckStrings[i];
1314 // Check each string within the scanned region, including a second check
1315 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1316 size_t MatchLen = 0;
1317 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1320 if (MatchPos == StringRef::npos) {
1326 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1333 return hasError ? 1 : 0;