//===----------------------------------------------------------------------===//
#include "llvm/Support/Regex.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallVector.h"
#include "regex_impl.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include <string>
using namespace llvm;
-Regex::Regex(const StringRef ®ex, unsigned Flags) {
+Regex::Regex(StringRef regex, unsigned Flags) {
unsigned flags = 0;
preg = new llvm_regex();
preg->re_endp = regex.end();
if (Flags & IgnoreCase)
flags |= REG_ICASE;
- if (Flags & NoSub) {
- flags |= REG_NOSUB;
- sub = false;
- } else {
- sub = true;
- }
if (Flags & Newline)
flags |= REG_NEWLINE;
- error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
+ if (!(Flags & BasicRegex))
+ flags |= REG_EXTENDED;
+ error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
+}
+
+Regex::~Regex() {
+ if (preg) {
+ llvm_regfree(preg);
+ delete preg;
+ }
}
bool Regex::isValid(std::string &Error) {
if (!error)
return true;
-
- size_t len = llvm_regerror(error, preg, NULL, 0);
- Error.resize(len);
+ size_t len = llvm_regerror(error, preg, nullptr, 0);
+
+ Error.resize(len - 1);
llvm_regerror(error, preg, &Error[0], len);
return false;
}
-Regex::~Regex() {
- llvm_regfree(preg);
- delete preg;
+/// getNumMatches - In a valid regex, return the number of parenthesized
+/// matches it contains.
+unsigned Regex::getNumMatches() const {
+ return preg->re_nsub;
}
-bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
- if (Matches) {
- assert(sub && "Substring matching requested but pattern compiled without");
- Matches->clear();
- }
-
// pmatch needs to have at least one element.
SmallVector<llvm_regmatch_t, 8> pm;
pm.resize(nmatch > 0 ? nmatch : 1);
// There was a match.
if (Matches) { // match position requested
+ Matches->clear();
+
for (unsigned i = 0; i != nmatch; ++i) {
if (pm[i].rm_so == -1) {
// this group didn't match
Matches->push_back(StringRef());
continue;
}
- assert(pm[i].rm_eo > pm[i].rm_so);
+ assert(pm[i].rm_eo >= pm[i].rm_so);
Matches->push_back(StringRef(String.data()+pm[i].rm_so,
pm[i].rm_eo-pm[i].rm_so));
}
return true;
}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+ std::string *Error) {
+ SmallVector<StringRef, 8> Matches;
+
+ // Reset error, if given.
+ if (Error && !Error->empty()) *Error = "";
+
+ // Return the input if there was no match.
+ if (!match(String, &Matches))
+ return String;
+
+ // Otherwise splice in the replacement string, starting with the prefix before
+ // the match.
+ std::string Res(String.begin(), Matches[0].begin());
+
+ // Then the replacement string, honoring possible substitutions.
+ while (!Repl.empty()) {
+ // Skip to the next escape.
+ std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+ // Add the skipped substring.
+ Res += Split.first;
+
+ // Check for terminimation and trailing backslash.
+ if (Split.second.empty()) {
+ if (Repl.size() != Split.first.size() &&
+ Error && Error->empty())
+ *Error = "replacement string contained trailing backslash";
+ break;
+ }
+
+ // Otherwise update the replacement string and interpret escapes.
+ Repl = Split.second;
+
+ // FIXME: We should have a StringExtras function for mapping C99 escapes.
+ switch (Repl[0]) {
+ // Treat all unrecognized characters as self-quoting.
+ default:
+ Res += Repl[0];
+ Repl = Repl.substr(1);
+ break;
+
+ // Single character escapes.
+ case 't':
+ Res += '\t';
+ Repl = Repl.substr(1);
+ break;
+ case 'n':
+ Res += '\n';
+ Repl = Repl.substr(1);
+ break;
+
+ // Decimal escapes are backreferences.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9': {
+ // Extract the backreference number.
+ StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+ Repl = Repl.substr(Ref.size());
+
+ unsigned RefValue;
+ if (!Ref.getAsInteger(10, RefValue) &&
+ RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error = ("invalid backreference string '" + Twine(Ref) + "'").str();
+ break;
+ }
+ }
+ }
+
+ // And finally the suffix.
+ Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+ return Res;
+}
+
+// These are the special characters matched in functions like "p_ere_exp".
+static const char RegexMetachars[] = "()^$|*+?.[]\\{}";
+
+bool Regex::isLiteralERE(StringRef Str) {
+ // Check for regex metacharacters. This list was derived from our regex
+ // implementation in regcomp.c and double checked against the POSIX extended
+ // regular expression specification.
+ return Str.find_first_of(RegexMetachars) == StringRef::npos;
+}
+
+std::string Regex::escape(StringRef String) {
+ std::string RegexStr;
+ for (unsigned i = 0, e = String.size(); i != e; ++i) {
+ if (strchr(RegexMetachars, String[i]))
+ RegexStr += '\\';
+ RegexStr += String[i];
+ }
+
+ return RegexStr;
+}