Skip to content

Commit

Permalink
add and document regex support for FileCheck. You can now do stuff like:
Browse files Browse the repository at this point in the history
; CHECK: movl {{%e[a-z][xi]}}, %eax

or whatever.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82717 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
lattner committed Sep 24, 2009
1 parent fe03dbe commit 5287008
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 15 deletions.
34 changes: 34 additions & 0 deletions docs/TestingGuide.html
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,40 @@

</div>

<!-- _______________________________________________________________________ -->
<div class="doc_subsubsection"><a
name="FileCheck-Matching">FileCheck Pattern Matting Syntax</a></div>

<div class="doc_text">

<p>The CHECK: and CHECK-NOT: directives both take a pattern to match. For most
uses of FileCheck, fixed string matching is perfectly sufficient. For some
things, a more flexible form of matching is desired. To support this, FileCheck
allows you to specify regular expressions in matching strings, surrounded by
double braces: <b>{{yourregex}}</b>. Because we want to use fixed string
matching for a majority of what we do, FileCheck has been designed to support
mixing and matching fixed string matching with regular expressions. This allows
you to write things like this:</p>

<div class="doc_code">
<pre>
; CHECK: movhpd <b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b>
</pre>
</div>

<p>In this case, any offset from the ESP register will be allowed, and any xmm
register will be allowed.</p>

<p>Because regular expressions are enclosed with double braces, they are
visually distinct, and you don't need to use escape characters within the double
braces like you would in C. In the rare case that you want to match double
braces explicitly from the input, you can use something ugly like
<b>{{[{][{]}}</b> as your pattern.</p>

</div>



<!-- _______________________________________________________________________ -->
<div class="doc_subsection"><a name="dgvars">Variables and
substitutions</a></div>
Expand Down
9 changes: 7 additions & 2 deletions include/llvm/Support/Regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include <string>

struct llvm_regex;

namespace llvm {
class StringRef;
template<typename T> class SmallVectorImpl;

class Regex {
public:
enum {
Expand Down Expand Up @@ -54,6 +57,8 @@ namespace llvm {
/// Matches.
/// For this feature to be enabled you must construct the regex using
/// Regex("...", Regex::Sub) constructor.
///
/// This returns true on a successful match.
bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
private:
struct llvm_regex *preg;
Expand Down
5 changes: 3 additions & 2 deletions lib/Support/Regex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
#include "llvm/Support/Regex.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallVector.h"
#include "regex_impl.h"
#include <string>
using namespace llvm;

Regex::Regex(const StringRef &regex, unsigned Flags) {
unsigned flags = 0;
preg = new struct llvm_regex;
preg = new llvm_regex();
preg->re_endp = regex.end();
if (Flags & IgnoreCase)
flags |= REG_ICASE;
Expand Down Expand Up @@ -60,7 +61,7 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
}

// pmatch needs to have at least one element.
SmallVector<llvm_regmatch_t, 2> pm;
SmallVector<llvm_regmatch_t, 8> pm;
pm.resize(nmatch > 0 ? nmatch : 1);
pm[0].rm_so = 0;
pm[0].rm_eo = String.size();
Expand Down
1 change: 1 addition & 0 deletions unittests/Support/RegexTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "gtest/gtest.h"
#include "llvm/Support/Regex.h"
#include "llvm/ADT/SmallVector.h"
#include <cstring>

using namespace llvm;
Expand Down
127 changes: 116 additions & 11 deletions utils/FileCheck/FileCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Signals.h"
Expand All @@ -44,8 +45,9 @@ NoCanonicalizeWhiteSpace("strict-whitespace",
//===----------------------------------------------------------------------===//

class Pattern {
/// Str - The string to match.
StringRef Str;
/// Chunks - The pattern chunks to match. If the bool is false, it is a fixed
/// string match, if it is true, it is a regex match.
SmallVector<std::pair<StringRef, bool>, 4> Chunks;
public:

Pattern() { }
Expand All @@ -55,10 +57,7 @@ class Pattern {
/// Match - Match the pattern string against the input buffer Buffer. This
/// returns the position that is matched or npos if there is no match. If
/// there is a match, the size of the matched string is returned in MatchLen.
size_t Match(StringRef Buffer, size_t &MatchLen) const {
MatchLen = Str.size();
return Buffer.find(Str);
}
size_t Match(StringRef Buffer, size_t &MatchLen) const;
};

bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
Expand All @@ -74,11 +73,117 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
"error");
return true;
}

// Scan the pattern to break it into regex and non-regex pieces.
while (!PatternStr.empty()) {
// Handle fixed string matches.
if (PatternStr.size() < 2 ||
PatternStr[0] != '{' || PatternStr[1] != '{') {
// Find the end, which is the start of the next regex.
size_t FixedMatchEnd = PatternStr.find("{{");

Chunks.push_back(std::make_pair(PatternStr.substr(0, FixedMatchEnd),
false));
PatternStr = PatternStr.substr(FixedMatchEnd);
continue;
}

// Otherwise, this is the start of a regex match. Scan for the }}.
size_t End = PatternStr.find("}}");
if (End == StringRef::npos) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
"found start of regex string with no end '}}'", "error");
return true;
}

Regex R(PatternStr.substr(2, End-2));
std::string Error;
if (!R.isValid(Error)) {
SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2),
"invalid regex: " + Error, "error");
return true;
}

Chunks.push_back(std::make_pair(PatternStr.substr(2, End-2), true));
PatternStr = PatternStr.substr(End+2);
}

return false;
}

/// Match - Match the pattern string against the input buffer Buffer. This
/// returns the position that is matched or npos if there is no match. If
/// there is a match, the size of the matched string is returned in MatchLen.
size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const {
size_t FirstMatch = StringRef::npos;
MatchLen = 0;

SmallVector<StringRef, 4> MatchInfo;

Str = PatternStr;
return false;
while (!Buffer.empty()) {
StringRef MatchAttempt = Buffer;

unsigned ChunkNo = 0, e = Chunks.size();
for (; ChunkNo != e; ++ChunkNo) {
StringRef PatternStr = Chunks[ChunkNo].first;

size_t ThisMatch = StringRef::npos;
size_t ThisLength = StringRef::npos;
if (!Chunks[ChunkNo].second) {
// Fixed string match.
ThisMatch = MatchAttempt.find(Chunks[ChunkNo].first);
ThisLength = Chunks[ChunkNo].first.size();
} else if (Regex(Chunks[ChunkNo].first, Regex::Sub).match(MatchAttempt, &MatchInfo)) {
// Successful regex match.
assert(!MatchInfo.empty() && "Didn't get any match");
StringRef FullMatch = MatchInfo[0];
MatchInfo.clear();

ThisMatch = FullMatch.data()-MatchAttempt.data();
ThisLength = FullMatch.size();
}

// Otherwise, what we do depends on if this is the first match or not. If
// this is the first match, it doesn't match to match at the start of
// MatchAttempt.
if (ChunkNo == 0) {
// If the first match fails then this pattern will never match in
// Buffer.
if (ThisMatch == StringRef::npos)
return ThisMatch;

FirstMatch = ThisMatch;
MatchAttempt = MatchAttempt.substr(FirstMatch);
ThisMatch = 0;
}

// If this chunk didn't match, then the entire pattern didn't match from
// FirstMatch, try later in the buffer.
if (ThisMatch == StringRef::npos)
break;

// Ok, if the match didn't match at the beginning of MatchAttempt, then we
// have something like "ABC{{DEF}} and something was in-between. Reject
// the match.
if (ThisMatch != 0)
break;

// Otherwise, match the string and move to the next chunk.
MatchLen += ThisLength;
MatchAttempt = MatchAttempt.substr(ThisLength);
}

// If the whole thing matched, we win.
if (ChunkNo == e)
return FirstMatch;

// Otherwise, try matching again after FirstMatch to see if this pattern
// matches later in the buffer.
Buffer = Buffer.substr(FirstMatch+1);
}

// If we ran out of stuff to scan, then we didn't match.
return StringRef::npos;
}


Expand Down Expand Up @@ -367,14 +472,14 @@ int main(int argc, char **argv) {

// If this match had "not strings", verify that they don't exist in the
// skipped region.
for (unsigned i = 0, e = CheckStr.NotStrings.size(); i != e; ++i) {
for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) {
size_t MatchLen = 0;
size_t Pos = CheckStr.NotStrings[i].second.Match(SkippedRegion, MatchLen);
size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen);
if (Pos == StringRef::npos) continue;

SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
CheckPrefix+"-NOT: string occurred!", "error");
SM.PrintMessage(CheckStr.NotStrings[i].first,
SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
CheckPrefix+"-NOT: pattern specified here", "note");
return 1;
}
Expand Down

0 comments on commit 5287008

Please sign in to comment.