summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
040f4f6)
and tremendously less reliant on the optimizer to fix things.
The code is always necessarily looking for the entire length of the
string when doing the equality tests in this find implementation, but it
previously was needlessly re-checking the size each time among other
annoyances.
By writing this so simply an ddirectly in terms of memcmp, it also is
about 8x faster in a debug build, which in turn makes FileCheck about 2x
faster in 'ninja check-llvm'. This saves about 8% of the time for
FileCheck-heavy parts of the test suite like the x86 backend tests.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@247269
91177308-0d34-0410-b5e6-
96231b3b80d8
/// \return - The index of the first occurrence of \arg Str, or npos if not
/// found.
size_t StringRef::find(StringRef Str, size_t From) const {
/// \return - The index of the first occurrence of \arg Str, or npos if not
/// found.
size_t StringRef::find(StringRef Str, size_t From) const {
+ if (From > Length)
+ return npos;
+
+ const char *Needle = Str.data();
+ if (N == 0)
+ return From;
+
+ size_t Size = Length - From;
+ if (Size < N)
+ const char *Start = Data + From;
+ const char *Stop = Start + (Size - N + 1);
+
// For short haystacks or unsupported needles fall back to the naive algorithm
// For short haystacks or unsupported needles fall back to the naive algorithm
- if (Length < 16 || N > 255 || N == 0) {
- for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
- if (substr(i, N).equals(Str))
- return i;
+ if (Size < 16 || N > 255) {
+ do {
+ if (std::memcmp(Start, Needle, N) == 0)
+ return Start - Data;
+ ++Start;
+ } while (Start < Stop);
- if (From >= Length)
- return npos;
-
// Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
uint8_t BadCharSkip[256];
std::memset(BadCharSkip, N, 256);
for (unsigned i = 0; i != N-1; ++i)
BadCharSkip[(uint8_t)Str[i]] = N-1-i;
// Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
uint8_t BadCharSkip[256];
std::memset(BadCharSkip, N, 256);
for (unsigned i = 0; i != N-1; ++i)
BadCharSkip[(uint8_t)Str[i]] = N-1-i;
- unsigned Len = Length-From, Pos = From;
- while (Len >= N) {
- if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
- return Pos;
+ do {
+ if (std::memcmp(Start, Needle, N) == 0)
+ return Start - Data;
// Otherwise skip the appropriate number of bytes.
// Otherwise skip the appropriate number of bytes.
- uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
- Len -= Skip;
- Pos += Skip;
- }
+ Start += BadCharSkip[(uint8_t)Start[N-1]];
+ } while (Start < Stop);