[ADT] Rewrite the StringRef::find implementation to be simpler, clearer,

author Chandler Carruth <chandlerc@gmail.com>

Thu, 10 Sep 2015 11:17:49 +0000 (11:17 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Thu, 10 Sep 2015 11:17:49 +0000 (11:17 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Thu, 10 Sep 2015 11:17:49 +0000 (11:17 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Thu, 10 Sep 2015 11:17:49 +0000 (11:17 +0000)
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp

index 88f920479dd2fefe7ca58547136fa239ce35ac31..7ecff2964c517f25274740f12adc8a9b7b114e18 100644 (file)
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -140,37 +140,44 @@ std::string StringRef::upper() const {
  /// \return - The index of the first occurrence of \arg Str, or npos if not
  /// found.
  size_t StringRef::find(StringRef Str, size_t From) const {
  /// \return - The index of the first occurrence of \arg Str, or npos if not
  /// found.
  size_t StringRef::find(StringRef Str, size_t From) const {
+  if (From > Length)
+    return npos;
+
+  const char *Needle = Str.data();
    size_t N = Str.size();
    size_t N = Str.size();
-  if (N > Length)
+  if (N == 0)
+    return From;
+
+  size_t Size = Length - From;
+  if (Size < N)
      return npos;
  
      return npos;
  
+  const char *Start = Data + From;
+  const char *Stop = Start + (Size - N + 1);
+
    // For short haystacks or unsupported needles fall back to the naive algorithm
    // For short haystacks or unsupported needles fall back to the naive algorithm
-  if (Length < 16 || N > 255 || N == 0) {
-    for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
-      if (substr(i, N).equals(Str))
-        return i;
+  if (Size < 16 || N > 255) {
+    do {
+      if (std::memcmp(Start, Needle, N) == 0)
+        return Start - Data;
+      ++Start;
+    } while (Start < Stop);
      return npos;
    }
  
      return npos;
    }
  
-  if (From >= Length)
-    return npos;
-
    // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
    uint8_t BadCharSkip[256];
    std::memset(BadCharSkip, N, 256);
    for (unsigned i = 0; i != N-1; ++i)
      BadCharSkip[(uint8_t)Str[i]] = N-1-i;
  
    // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
    uint8_t BadCharSkip[256];
    std::memset(BadCharSkip, N, 256);
    for (unsigned i = 0; i != N-1; ++i)
      BadCharSkip[(uint8_t)Str[i]] = N-1-i;
  
-  unsigned Len = Length-From, Pos = From;
-  while (Len >= N) {
-    if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
-      return Pos;
+  do {
+    if (std::memcmp(Start, Needle, N) == 0)
+      return Start - Data;
  
      // Otherwise skip the appropriate number of bytes.
  
      // Otherwise skip the appropriate number of bytes.
-    uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
-    Len -= Skip;
-    Pos += Skip;
-  }
+    Start += BadCharSkip[(uint8_t)Start[N-1]];
+  } while (Start < Stop);
  
    return npos;
  }
  
    return npos;
  }
author	Chandler Carruth <chandlerc@gmail.com>
	Thu, 10 Sep 2015 11:17:49 +0000 (11:17 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Thu, 10 Sep 2015 11:17:49 +0000 (11:17 +0000)