1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content. This is useful for regression tests etc.
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
17 //===----------------------------------------------------------------------===//
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/MemoryBuffer.h"
21 #include "llvm/Support/PrettyStackTrace.h"
22 #include "llvm/Support/Regex.h"
23 #include "llvm/Support/SourceMgr.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "llvm/System/Signals.h"
28 static cl::opt<std::string>
29 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
31 static cl::opt<std::string>
32 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
33 cl::init("-"), cl::value_desc("filename"));
35 static cl::opt<std::string>
36 CheckPrefix("check-prefix", cl::init("CHECK"),
37 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
40 NoCanonicalizeWhiteSpace("strict-whitespace",
41 cl::desc("Do not treat all horizontal whitespace as equivalent"));
43 //===----------------------------------------------------------------------===//
44 // Pattern Handling Code.
45 //===----------------------------------------------------------------------===//
51 PatternChunk(StringRef S, bool isRE) : Str(S), isRegEx(isRE) {}
53 size_t Match(StringRef Buffer, size_t &MatchLen) const {
55 // Fixed string match.
56 MatchLen = Str.size();
57 return Buffer.find(Str);
61 SmallVector<StringRef, 4> MatchInfo;
62 if (!Regex(Str, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo))
63 return StringRef::npos;
65 // Successful regex match.
66 assert(!MatchInfo.empty() && "Didn't get any match");
67 StringRef FullMatch = MatchInfo[0];
69 MatchLen = FullMatch.size();
70 return FullMatch.data()-Buffer.data();
75 /// Chunks - The pattern chunks to match. If the bool is false, it is a fixed
76 /// string match, if it is true, it is a regex match.
77 SmallVector<PatternChunk, 4> Chunks;
82 bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
84 /// Match - Match the pattern string against the input buffer Buffer. This
85 /// returns the position that is matched or npos if there is no match. If
86 /// there is a match, the size of the matched string is returned in MatchLen.
87 size_t Match(StringRef Buffer, size_t &MatchLen) const;
90 bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
91 // Ignore trailing whitespace.
92 while (!PatternStr.empty() &&
93 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
94 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
96 // Check that there is something on the line.
97 if (PatternStr.empty()) {
98 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
99 "found empty check string with prefix '"+CheckPrefix+":'",
104 // Scan the pattern to break it into regex and non-regex pieces.
105 while (!PatternStr.empty()) {
106 // Handle fixed string matches.
107 if (PatternStr.size() < 2 ||
108 PatternStr[0] != '{' || PatternStr[1] != '{') {
109 // Find the end, which is the start of the next regex.
110 size_t FixedMatchEnd = PatternStr.find("{{");
112 Chunks.push_back(PatternChunk(PatternStr.substr(0, FixedMatchEnd),false));
113 PatternStr = PatternStr.substr(FixedMatchEnd);
117 // Otherwise, this is the start of a regex match. Scan for the }}.
118 size_t End = PatternStr.find("}}");
119 if (End == StringRef::npos) {
120 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
121 "found start of regex string with no end '}}'", "error");
125 Regex R(PatternStr.substr(2, End-2));
127 if (!R.isValid(Error)) {
128 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2),
129 "invalid regex: " + Error, "error");
133 Chunks.push_back(PatternChunk(PatternStr.substr(2, End-2), true));
134 PatternStr = PatternStr.substr(End+2);
140 /// Match - Match the pattern string against the input buffer Buffer. This
141 /// returns the position that is matched or npos if there is no match. If
142 /// there is a match, the size of the matched string is returned in MatchLen.
143 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const {
144 size_t FirstMatch = StringRef::npos;
147 while (!Buffer.empty()) {
148 StringRef MatchAttempt = Buffer;
150 unsigned ChunkNo = 0, e = Chunks.size();
151 for (; ChunkNo != e; ++ChunkNo) {
152 size_t ThisMatch, ThisLength = StringRef::npos;
153 ThisMatch = Chunks[ChunkNo].Match(MatchAttempt, ThisLength);
155 // Otherwise, what we do depends on if this is the first match or not. If
156 // this is the first match, it doesn't match to match at the start of
159 // If the first match fails then this pattern will never match in
161 if (ThisMatch == StringRef::npos)
164 FirstMatch = ThisMatch;
165 MatchAttempt = MatchAttempt.substr(FirstMatch);
169 // If this chunk didn't match, then the entire pattern didn't match from
170 // FirstMatch, try later in the buffer.
171 if (ThisMatch == StringRef::npos)
174 // Ok, if the match didn't match at the beginning of MatchAttempt, then we
175 // have something like "ABC{{DEF}} and something was in-between. Reject
180 // Otherwise, match the string and move to the next chunk.
181 MatchLen += ThisLength;
182 MatchAttempt = MatchAttempt.substr(ThisLength);
185 // If the whole thing matched, we win.
189 // Otherwise, try matching again after FirstMatch to see if this pattern
190 // matches later in the buffer.
191 Buffer = Buffer.substr(FirstMatch+1);
194 // If we ran out of stuff to scan, then we didn't match.
195 return StringRef::npos;
199 //===----------------------------------------------------------------------===//
201 //===----------------------------------------------------------------------===//
203 /// CheckString - This is a check that we found in the input file.
205 /// Pat - The pattern to match.
208 /// Loc - The location in the match file that the check string was specified.
211 /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed
212 /// to a CHECK: directive.
215 /// NotStrings - These are all of the strings that are disallowed from
216 /// occurring between this match string and the previous one (or start of
218 std::vector<std::pair<SMLoc, Pattern> > NotStrings;
220 CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
221 : Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
224 /// CanonicalizeInputFile - Remove duplicate horizontal space from the specified
225 /// memory buffer, free it, and return a new one.
226 static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
227 SmallVector<char, 16> NewFile;
228 NewFile.reserve(MB->getBufferSize());
230 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
232 // If C is not a horizontal whitespace, skip it.
233 if (*Ptr != ' ' && *Ptr != '\t') {
234 NewFile.push_back(*Ptr);
238 // Otherwise, add one space and advance over neighboring space.
239 NewFile.push_back(' ');
240 while (Ptr+1 != End &&
241 (Ptr[1] == ' ' || Ptr[1] == '\t'))
245 // Free the old buffer and return a new one.
247 MemoryBuffer::getMemBufferCopy(NewFile.data(),
248 NewFile.data() + NewFile.size(),
249 MB->getBufferIdentifier());
256 /// ReadCheckFile - Read the check file, which specifies the sequence of
257 /// expected strings. The strings are added to the CheckStrings vector.
258 static bool ReadCheckFile(SourceMgr &SM,
259 std::vector<CheckString> &CheckStrings) {
260 // Open the check file, and tell SourceMgr about it.
261 std::string ErrorStr;
263 MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr);
265 errs() << "Could not open check file '" << CheckFilename << "': "
270 // If we want to canonicalize whitespace, strip excess whitespace from the
271 // buffer containing the CHECK lines.
272 if (!NoCanonicalizeWhiteSpace)
273 F = CanonicalizeInputFile(F);
275 SM.AddNewSourceBuffer(F, SMLoc());
277 // Find all instances of CheckPrefix followed by : in the file.
278 StringRef Buffer = F->getBuffer();
280 std::vector<std::pair<SMLoc, Pattern> > NotMatches;
283 // See if Prefix occurs in the memory buffer.
284 Buffer = Buffer.substr(Buffer.find(CheckPrefix));
286 // If we didn't find a match, we're done.
290 const char *CheckPrefixStart = Buffer.data();
292 // When we find a check prefix, keep track of whether we find CHECK: or
294 bool IsCheckNext = false, IsCheckNot = false;
296 // Verify that the : is present after the prefix.
297 if (Buffer[CheckPrefix.size()] == ':') {
298 Buffer = Buffer.substr(CheckPrefix.size()+1);
299 } else if (Buffer.size() > CheckPrefix.size()+6 &&
300 memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
301 Buffer = Buffer.substr(CheckPrefix.size()+7);
303 } else if (Buffer.size() > CheckPrefix.size()+5 &&
304 memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
305 Buffer = Buffer.substr(CheckPrefix.size()+6);
308 Buffer = Buffer.substr(1);
312 // Okay, we found the prefix, yay. Remember the rest of the line, but
313 // ignore leading and trailing whitespace.
314 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
316 // Scan ahead to the end of line.
317 size_t EOL = Buffer.find_first_of("\n\r");
319 // Parse the pattern.
321 if (P.ParsePattern(Buffer.substr(0, EOL), SM))
324 Buffer = Buffer.substr(EOL);
327 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
328 if (IsCheckNext && CheckStrings.empty()) {
329 SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
330 "found '"+CheckPrefix+"-NEXT:' without previous '"+
331 CheckPrefix+ ": line", "error");
337 NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
343 // Okay, add the string we captured to the output vector and move on.
344 CheckStrings.push_back(CheckString(P,
345 SMLoc::getFromPointer(Buffer.data()),
347 std::swap(NotMatches, CheckStrings.back().NotStrings);
350 if (CheckStrings.empty()) {
351 errs() << "error: no check strings found with prefix '" << CheckPrefix
356 if (!NotMatches.empty()) {
357 errs() << "error: '" << CheckPrefix
358 << "-NOT:' not supported after last check line.\n";
365 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
367 // Otherwise, we have an error, emit an error message.
368 SM.PrintMessage(CheckStr.Loc, "expected string not found in input",
371 // Print the "scanning from here" line. If the current position is at the
372 // end of a line, advance to the start of the next line.
373 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
375 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here",
379 /// CountNumNewlinesBetween - Count the number of newlines in the specified
381 static unsigned CountNumNewlinesBetween(StringRef Range) {
382 unsigned NumNewLines = 0;
385 Range = Range.substr(Range.find_first_of("\n\r"));
386 if (Range.empty()) return NumNewLines;
390 // Handle \n\r and \r\n as a single newline.
391 if (Range.size() > 1 &&
392 (Range[1] == '\n' || Range[1] == '\r') &&
393 (Range[0] != Range[1]))
394 Range = Range.substr(1);
395 Range = Range.substr(1);
399 int main(int argc, char **argv) {
400 sys::PrintStackTraceOnErrorSignal();
401 PrettyStackTraceProgram X(argc, argv);
402 cl::ParseCommandLineOptions(argc, argv);
406 // Read the expected strings from the check file.
407 std::vector<CheckString> CheckStrings;
408 if (ReadCheckFile(SM, CheckStrings))
411 // Open the file to check and add it to SourceMgr.
412 std::string ErrorStr;
414 MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr);
416 errs() << "Could not open input file '" << InputFilename << "': "
421 // Remove duplicate spaces in the input file if requested.
422 if (!NoCanonicalizeWhiteSpace)
423 F = CanonicalizeInputFile(F);
425 SM.AddNewSourceBuffer(F, SMLoc());
427 // Check that we have all of the expected strings, in order, in the input
429 StringRef Buffer = F->getBuffer();
431 const char *LastMatch = Buffer.data();
433 for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
434 const CheckString &CheckStr = CheckStrings[StrNo];
436 StringRef SearchFrom = Buffer;
438 // Find StrNo in the file.
440 Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen));
442 // If we didn't find a match, reject the input.
443 if (Buffer.empty()) {
444 PrintCheckFailed(SM, CheckStr, SearchFrom);
448 StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch);
450 // If this check is a "CHECK-NEXT", verify that the previous match was on
451 // the previous line (i.e. that there is one newline between them).
452 if (CheckStr.IsCheckNext) {
453 // Count the number of newlines between the previous match and this one.
454 assert(LastMatch != F->getBufferStart() &&
455 "CHECK-NEXT can't be the first check in a file");
457 unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
458 if (NumNewLines == 0) {
459 SM.PrintMessage(CheckStr.Loc,
460 CheckPrefix+"-NEXT: is on the same line as previous match",
462 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
463 "'next' match was here", "note");
464 SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
465 "previous match was here", "note");
469 if (NumNewLines != 1) {
470 SM.PrintMessage(CheckStr.Loc,
472 "-NEXT: is not on the line after the previous match",
474 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
475 "'next' match was here", "note");
476 SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
477 "previous match was here", "note");
482 // If this match had "not strings", verify that they don't exist in the
484 for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) {
486 size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen);
487 if (Pos == StringRef::npos) continue;
489 SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
490 CheckPrefix+"-NOT: string occurred!", "error");
491 SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
492 CheckPrefix+"-NOT: pattern specified here", "note");
497 // Otherwise, everything is good. Step over the matched text and remember
498 // the position after the match as the end of the last match.
499 Buffer = Buffer.substr(MatchLen);
500 LastMatch = Buffer.data();