From: Lucian Grijincu Date: Tue, 18 Sep 2012 07:55:16 +0000 (-0700) Subject: folly: Range: implement find_first_of and optimize qfind(Range, char) X-Git-Tag: v0.22.0~1182 X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=commitdiff_plain;h=0010b30e696ac1e68bb3eb7992c328fd3214437c folly: Range: implement find_first_of and optimize qfind(Range, char) Summary: implement ##find_first_of## and optimize ##Range.find(char)## ============================================================================ folly/test/RangeBenchmark.cpp relative time/iter iters/s ============================================================================ LongFindSingleCharDirect 2.76ms 362.63 LongFindSingleCharRange 15.88% 17.37ms 57.58 ShortFindSingleCharDirect 53.41fs 18.72T ShortFindSingleCharRange 0.00% 29.22ns 34.22M ============================================================================ Test Plan: - added new tests - ran all folly tests fbconfig -r folly/ && mkk runtests_opt Reviewed By: tudorb@fb.com FB internal diff: D576720 --- diff --git a/folly/Range.h b/folly/Range.h index ac316bc5..ae4310f4 100644 --- a/folly/Range.h +++ b/folly/Range.h @@ -22,6 +22,8 @@ #include "folly/FBString.h" #include +#include +#include #include #include #include @@ -37,24 +39,33 @@ namespace folly { template class Range; /** -Finds the first occurrence of needle in haystack. The algorithm is on -average faster than O(haystack.size() * needle.size()) but not as fast -as Boyer-Moore. On the upside, it does not do any upfront -preprocessing and does not allocate memory. + * Finds the first occurrence of needle in haystack. The algorithm is on + * average faster than O(haystack.size() * needle.size()) but not as fast + * as Boyer-Moore. On the upside, it does not do any upfront + * preprocessing and does not allocate memory. */ template inline size_t qfind(const Range & haystack, const Range & needle); /** -Finds the first occurrence of needle in haystack. The result is the -offset reported to the beginning of haystack, or string::npos if -needle wasn't found. + * Finds the first occurrence of needle in haystack. The result is the + * offset reported to the beginning of haystack, or string::npos if + * needle wasn't found. */ template size_t qfind(const Range & haystack, const typename Range::value_type& needle); + +/** + * Finds the first occurrence of any element of needle in + * haystack. The algorithm is O(haystack.size() * needle.size()). + */ +template +inline size_t qfind_first_of(const Range & haystack, + const Range & needle); + /** * Small internal helper - returns the value just before an iterator. */ @@ -109,7 +120,7 @@ public: typedef typename std::iterator_traits::reference reference; typedef std::char_traits traits_type; - static const size_type npos = -1; + static const size_type npos = std::string::npos; // Works for all iterators Range() : b_(), e_() { @@ -348,10 +359,12 @@ public: return ret == npos ? ret : ret + pos; } + // Works only for Range which have Range(Iter) ctor size_type find(const Iter s) const { return qfind(*this, Range(s)); } + // Works only for Range which have Range(Iter) ctor size_type find(const Iter s, size_t pos) const { if (pos > size()) return std::string::npos; size_type ret = qfind(subpiece(pos), Range(s)); @@ -368,6 +381,38 @@ public: return ret == npos ? ret : ret + pos; } + size_type find_first_of(Range needles) const { + return qfind_first_of(*this, needles); + } + + size_type find_first_of(Range needles, size_t pos) const { + if (pos > size()) return std::string::npos; + size_type ret = qfind_first_of(pos ? subpiece(pos) : *this, needles); + return ret == npos ? ret : ret + pos; + } + + // Works only for Range which have Range(Iter) ctor + size_type find_first_of(Iter needles) const { + return find_first_of(Range(needles)); + } + + // Works only for Range which have Range(Iter) ctor + size_type find_first_of(Iter needles, size_t pos) const { + return find_first_of(Range(needles), pos); + } + + size_type find_first_of(Iter needles, size_t pos, size_t n) const { + return find_first_of(Range(needles, n), pos); + } + + size_type find_first_of(value_type c) const { + return find(c); + } + + size_type find_first_of(value_type c, size_t pos) const { + return find(c, pos); + } + void swap(Range& rhs) { std::swap(b_, rhs.b_); std::swap(e_, rhs.e_); @@ -547,6 +592,16 @@ size_t qfind(const Range& haystack, return std::string::npos; } +template +size_t qfind_first_of(const Range & haystack, + const Range & needle, + Comp eq) { + auto ret = std::find_first_of(haystack.begin(), haystack.end(), + needle.begin(), needle.end(), + eq); + return ret == haystack.end() ? std::string::npos : ret - haystack.begin(); +} + struct AsciiCaseSensitive { bool operator()(char lhs, char rhs) const { return lhs == rhs; @@ -571,7 +626,31 @@ size_t qfind(const Range& haystack, template size_t qfind(const Range& haystack, const typename Range::value_type& needle) { - return qfind(haystack, makeRange(&needle, &needle + 1)); + auto pos = std::find(haystack.begin(), haystack.end(), needle); + return pos == haystack.end() ? std::string::npos : pos - haystack.data(); +} + +// specialization for StringPiece +template <> +inline size_t qfind(const Range& haystack, const char& needle) { + auto pos = static_cast( + ::memchr(haystack.data(), needle, haystack.size())); + return pos == nullptr ? std::string::npos : pos - haystack.data(); +} + +// specialization for ByteRange +template <> +inline size_t qfind(const Range& haystack, + const unsigned char& needle) { + auto pos = static_cast( + ::memchr(haystack.data(), needle, haystack.size())); + return pos == nullptr ? std::string::npos : pos - haystack.data(); +} + +template +size_t qfind_first_of(const Range& haystack, + const Range& needle) { + return qfind_first_of(haystack, needle, asciiCaseSensitive); } } // !namespace folly diff --git a/folly/test/RangeFindBenchmark.cpp b/folly/test/RangeFindBenchmark.cpp new file mode 100644 index 00000000..868618f4 --- /dev/null +++ b/folly/test/RangeFindBenchmark.cpp @@ -0,0 +1,69 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/Range.h" +#include "folly/Benchmark.h" +#include "folly/Foreach.h" +#include +#include +#include + +using namespace folly; +using namespace std; + +namespace { + +std::string str; + +void initStr(int len) { + cout << "string length " << len << ':' << endl; + str.clear(); + str.reserve(len + 1); + str.append(len, 'a'); + str.append(1, 'b'); +} + +} // anonymous namespace + +BENCHMARK(FindSingleCharMemchr, n) { + StringPiece haystack(str); + FOR_EACH_RANGE (i, 0, n) { + doNotOptimizeAway(haystack.find('b')); + char x = haystack[0]; + doNotOptimizeAway(&x); + } +} + +BENCHMARK_RELATIVE(FindSingleCharRange, n) { + char c = 'b'; + StringPiece haystack(str); + folly::StringPiece needle(&c, &c + 1); + FOR_EACH_RANGE (i, 0, n) { + doNotOptimizeAway(haystack.find(needle)); + char x = haystack[0]; + doNotOptimizeAway(&x); + } +} + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + + for (int len : {10, 256, 10*1024, 10*1024*1024}) { + initStr(len); + runBenchmarks(); + } + return 0; +} diff --git a/folly/test/RangeTest.cpp b/folly/test/RangeTest.cpp index 65ff9e04..e1b055ef 100644 --- a/folly/test/RangeTest.cpp +++ b/folly/test/RangeTest.cpp @@ -86,6 +86,59 @@ TEST(StringPiece, All) { EXPECT_EQ(s.toString().find("notfound", 55), StringPiece::npos); EXPECT_EQ(s.find("z", s.size()), StringPiece::npos); EXPECT_EQ(s.find("z", 55), StringPiece::npos); + // empty needle + EXPECT_EQ(s.find(""), std::string().find("")); + EXPECT_EQ(s.find(""), 0); + + // single char finds + EXPECT_EQ(s.find('b'), 3); + EXPECT_EQ(s.find('b', 3), 3); + EXPECT_EQ(s.find('b', 4), 6); + EXPECT_EQ(s.find('o', 2), 2); + EXPECT_EQ(s.find('y'), StringPiece::npos); + EXPECT_EQ(s.find('y', 1), StringPiece::npos); + EXPECT_EQ(s.find('o', 4), StringPiece::npos); // starting position too far + // starting pos that is obviously past the end -- This works for std::string + EXPECT_EQ(s.toString().find('y', 55), StringPiece::npos); + EXPECT_EQ(s.find('z', s.size()), StringPiece::npos); + EXPECT_EQ(s.find('z', 55), StringPiece::npos); + // null char + EXPECT_EQ(s.find('\0'), std::string().find('\0')); + EXPECT_EQ(s.find('\0'), StringPiece::npos); + + // find_first_of + s.reset(foobarbaz, strlen(foobarbaz)); + EXPECT_EQ(s.find_first_of("bar"), 3); + EXPECT_EQ(s.find_first_of("ba", 3), 3); + EXPECT_EQ(s.find_first_of("ba", 4), 4); + EXPECT_EQ(s.find_first_of("xyxy"), StringPiece::npos); + EXPECT_EQ(s.find_first_of("xyxy", 1), StringPiece::npos); + // starting position too far + EXPECT_EQ(s.find_first_of("foo", 4), StringPiece::npos); + // starting pos that is obviously past the end -- This works for std::string + EXPECT_EQ(s.toString().find_first_of("xyxy", 55), StringPiece::npos); + EXPECT_EQ(s.find_first_of("z", s.size()), StringPiece::npos); + EXPECT_EQ(s.find_first_of("z", 55), StringPiece::npos); + // empty needle. Note that this returns npos, while find() returns 0! + EXPECT_EQ(s.find_first_of(""), std::string().find_first_of("")); + EXPECT_EQ(s.find_first_of(""), StringPiece::npos); + + // single char find_first_ofs + EXPECT_EQ(s.find_first_of('b'), 3); + EXPECT_EQ(s.find_first_of('b', 3), 3); + EXPECT_EQ(s.find_first_of('b', 4), 6); + EXPECT_EQ(s.find_first_of('o', 2), 2); + EXPECT_EQ(s.find_first_of('y'), StringPiece::npos); + EXPECT_EQ(s.find_first_of('y', 1), StringPiece::npos); + // starting position too far + EXPECT_EQ(s.find_first_of('o', 4), StringPiece::npos); + // starting pos that is obviously past the end -- This works for std::string + EXPECT_EQ(s.toString().find_first_of('y', 55), StringPiece::npos); + EXPECT_EQ(s.find_first_of('z', s.size()), StringPiece::npos); + EXPECT_EQ(s.find_first_of('z', 55), StringPiece::npos); + // null char + EXPECT_EQ(s.find_first_of('\0'), std::string().find_first_of('\0')); + EXPECT_EQ(s.find_first_of('\0'), StringPiece::npos); // just "barbaz" s.reset(foobarbaz + 3, strlen(foobarbaz + 3));