Avoid O(n*m) complexity in StringRef::find_first(_not)_of(StringRef).
authorBenjamin Kramer <benny.kra@googlemail.com>
Mon, 23 Aug 2010 18:16:08 +0000 (18:16 +0000)
committerBenjamin Kramer <benny.kra@googlemail.com>
Mon, 23 Aug 2010 18:16:08 +0000 (18:16 +0000)
- Cache used characters in a bitset to reduce memory overhead to just 32 bytes.
- On my core2 this code is faster except when the checked string was very short
  (smaller than the list of delimiters).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@111817 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/ADT/StringRef.h
lib/Support/StringRef.cpp

index a1be1e70c2acd569e6bf593fde8cb07e46688d42..8386d3ee428b1401643d4c6c3eba60b30ade39a1 100644 (file)
@@ -238,7 +238,7 @@ namespace llvm {
     /// find_first_of - Find the first character in the string that is in \arg
     /// Chars, or npos if not found.
     ///
-    /// Note: O(size() * Chars.size())
+    /// Note: O(size() + Chars.size())
     size_type find_first_of(StringRef Chars, size_t From = 0) const;
 
     /// find_first_not_of - Find the first character in the string that is not
@@ -248,7 +248,7 @@ namespace llvm {
     /// find_first_not_of - Find the first character in the string that is not
     /// in the string \arg Chars, or npos if not found.
     ///
-    /// Note: O(size() * Chars.size())
+    /// Note: O(size() + Chars.size())
     size_type find_first_not_of(StringRef Chars, size_t From = 0) const;
 
     /// @}
index ca0f518a88b6273b13ca47ee41886aa1ccda7331..40b732c62db6249d816994e3f8ef29b19a52c113 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/APInt.h"
+#include <bitset>
 
 using namespace llvm;
 
@@ -153,11 +154,15 @@ size_t StringRef::rfind(StringRef Str) const {
 /// find_first_of - Find the first character in the string that is in \arg
 /// Chars, or npos if not found.
 ///
-/// Note: O(size() * Chars.size())
+/// Note: O(size() + Chars.size())
 StringRef::size_type StringRef::find_first_of(StringRef Chars,
                                               size_t From) const {
+  std::bitset<1 << CHAR_BIT> CharBits;
+  for (size_type i = 0; i != Chars.size(); ++i)
+    CharBits.set((unsigned char)Chars[i]);
+
   for (size_type i = min(From, Length), e = Length; i != e; ++i)
-    if (Chars.find(Data[i]) != npos)
+    if (CharBits.test((unsigned char)Data[i]))
       return i;
   return npos;
 }
@@ -174,11 +179,15 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
 /// find_first_not_of - Find the first character in the string that is not
 /// in the string \arg Chars, or npos if not found.
 ///
-/// Note: O(size() * Chars.size())
+/// Note: O(size() + Chars.size())
 StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
                                                   size_t From) const {
+  std::bitset<1 << CHAR_BIT> CharBits;
+  for (size_type i = 0; i != Chars.size(); ++i)
+    CharBits.set((unsigned char)Chars[i]);
+
   for (size_type i = min(From, Length), e = Length; i != e; ++i)
-    if (Chars.find(Data[i]) == npos)
+    if (!CharBits.test((unsigned char)Data[i]))
       return i;
   return npos;
 }