/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "folly/FBString.h"
#include <glog/logging.h>
+#include <algorithm>
+#include <cstring>
#include <iostream>
#include <string>
#include <stdexcept>
#include <boost/utility/enable_if.hpp>
#include <boost/type_traits.hpp>
#include <bits/c++config.h>
+#include "folly/CpuId.h"
#include "folly/Traits.h"
namespace folly {
template <class T> class Range;
/**
-Finds the first occurrence of needle in haystack. The algorithm is on
-average faster than O(haystack.size() * needle.size()) but not as fast
-as Boyer-Moore. On the upside, it does not do any upfront
-preprocessing and does not allocate memory.
+ * Finds the first occurrence of needle in haystack. The algorithm is on
+ * average faster than O(haystack.size() * needle.size()) but not as fast
+ * as Boyer-Moore. On the upside, it does not do any upfront
+ * preprocessing and does not allocate memory.
*/
template <class T>
inline size_t qfind(const Range<T> & haystack,
const Range<T> & needle);
/**
-Finds the first occurrence of needle in haystack. The result is the
-offset reported to the beginning of haystack, or string::npos if
-needle wasn't found.
+ * Finds the first occurrence of needle in haystack. The result is the
+ * offset reported to the beginning of haystack, or string::npos if
+ * needle wasn't found.
*/
template <class T>
size_t qfind(const Range<T> & haystack,
const typename Range<T>::value_type& needle);
+
+/**
+ * Finds the first occurrence of any element of needle in
+ * haystack. The algorithm is O(haystack.size() * needle.size()).
+ */
+template <class T>
+inline size_t qfind_first_of(const Range<T> & haystack,
+ const Range<T> & needle);
+
/**
* Small internal helper - returns the value just before an iterator.
*/
typedef typename std::iterator_traits<Iter>::reference reference;
typedef std::char_traits<value_type> traits_type;
- static const size_type npos = -1;
+ static const size_type npos;
// Works for all iterators
Range() : b_(), e_() {
}
-private:
- static bool reachable(Iter b, Iter e, std::forward_iterator_tag) {
- for (; b != e; ++b) {
- LOG_EVERY_N(INFO, 100000) << __FILE__ ":" << __LINE__
- << " running reachability test ("
- << google::COUNTER << " iterations)...";
- }
- return true;
- }
-
- static bool reachable(Iter b, Iter e, std::random_access_iterator_tag) {
- return b <= e;
- }
-
public:
// Works for all iterators
- Range(Iter start, Iter end)
- : b_(start), e_(end) {
- assert(reachable(b_, e_,
- typename std::iterator_traits<Iter>::iterator_category()));
+ Range(Iter start, Iter end) : b_(start), e_(end) {
}
// Works only for random-access iterators
// Allow implicit conversion from Range<const char*> (aka StringPiece) to
// Range<const unsigned char*> (aka ByteRange), as they're both frequently
- // used to represent ranges of bytes.
- template <typename std::enable_if<
- (std::is_same<Iter, const unsigned char*>::value), int>::type = 0>
- /* implicit */ Range(const Range<const char*>& other)
+ // used to represent ranges of bytes. Allow explicit conversion in the other
+ // direction.
+ template <class OtherIter, typename std::enable_if<
+ (std::is_same<Iter, const unsigned char*>::value &&
+ std::is_same<OtherIter, const char*>::value), int>::type = 0>
+ /* implicit */ Range(const Range<OtherIter>& other)
: b_(reinterpret_cast<const unsigned char*>(other.begin())),
e_(reinterpret_cast<const unsigned char*>(other.end())) {
}
+ template <class OtherIter, typename std::enable_if<
+ (std::is_same<Iter, const char*>::value &&
+ std::is_same<OtherIter, const unsigned char*>::value), int>::type = 0>
+ explicit Range(const Range<OtherIter>& other)
+ : b_(reinterpret_cast<const char*>(other.begin())),
+ e_(reinterpret_cast<const char*>(other.end())) {
+ }
+
void clear() {
b_ = Iter();
e_ = Iter();
return ret == npos ? ret : ret + pos;
}
+ // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
size_type find(const Iter s) const {
return qfind(*this, Range(s));
}
+ // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
size_type find(const Iter s, size_t pos) const {
if (pos > size()) return std::string::npos;
size_type ret = qfind(subpiece(pos), Range(s));
return ret == npos ? ret : ret + pos;
}
+ size_type find_first_of(Range needles) const {
+ return qfind_first_of(*this, needles);
+ }
+
+ size_type find_first_of(Range needles, size_t pos) const {
+ if (pos > size()) return std::string::npos;
+ size_type ret = qfind_first_of(subpiece(pos), needles);
+ return ret == npos ? ret : ret + pos;
+ }
+
+ // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
+ size_type find_first_of(Iter needles) const {
+ return find_first_of(Range(needles));
+ }
+
+ // Works only for Range<const (unsigned) char*> which have Range(Iter) ctor
+ size_type find_first_of(Iter needles, size_t pos) const {
+ return find_first_of(Range(needles), pos);
+ }
+
+ size_type find_first_of(Iter needles, size_t pos, size_t n) const {
+ return find_first_of(Range(needles, n), pos);
+ }
+
+ size_type find_first_of(value_type c) const {
+ return find(c);
+ }
+
+ size_type find_first_of(value_type c, size_t pos) const {
+ return find(c, pos);
+ }
+
void swap(Range& rhs) {
std::swap(b_, rhs.b_);
std::swap(e_, rhs.e_);
};
template <class Iter>
-const typename Range<Iter>::size_type Range<Iter>::npos;
+const typename Range<Iter>::size_type Range<Iter>::npos = std::string::npos;
template <class T>
void swap(Range<T>& lhs, Range<T>& rhs) {
return std::string::npos;
}
+namespace detail {
+
+size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
+ const StringPiece& needles);
+
+#if FOLLY_HAVE_EMMINTRIN_H
+size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
+ const StringPiece& needles);
+
+inline size_t qfind_first_byte_of(const StringPiece& haystack,
+ const StringPiece& needles) {
+ static auto const qfind_first_byte_of_fn =
+ folly::CpuId().sse42() ? qfind_first_byte_of_sse42
+ : qfind_first_byte_of_nosse;
+ return qfind_first_byte_of_fn(haystack, needles);
+}
+
+#else
+inline size_t qfind_first_byte_of(const StringPiece& haystack,
+ const StringPiece& needles) {
+ return qfind_first_byte_of_nosse(haystack, needles);
+}
+#endif // FOLLY_HAVE_EMMINTRIN_H
+
+} // namespace detail
+
+template <class T, class Comp>
+size_t qfind_first_of(const Range<T> & haystack,
+ const Range<T> & needles,
+ Comp eq) {
+ auto ret = std::find_first_of(haystack.begin(), haystack.end(),
+ needles.begin(), needles.end(),
+ eq);
+ return ret == haystack.end() ? std::string::npos : ret - haystack.begin();
+}
+
struct AsciiCaseSensitive {
bool operator()(char lhs, char rhs) const {
return lhs == rhs;
template <class T>
size_t qfind(const Range<T>& haystack,
const typename Range<T>::value_type& needle) {
- return qfind(haystack, makeRange(&needle, &needle + 1));
+ auto pos = std::find(haystack.begin(), haystack.end(), needle);
+ return pos == haystack.end() ? std::string::npos : pos - haystack.data();
+}
+
+// specialization for StringPiece
+template <>
+inline size_t qfind(const Range<const char*>& haystack, const char& needle) {
+ auto pos = static_cast<const char*>(
+ ::memchr(haystack.data(), needle, haystack.size()));
+ return pos == nullptr ? std::string::npos : pos - haystack.data();
+}
+
+// specialization for ByteRange
+template <>
+inline size_t qfind(const Range<const unsigned char*>& haystack,
+ const unsigned char& needle) {
+ auto pos = static_cast<const unsigned char*>(
+ ::memchr(haystack.data(), needle, haystack.size()));
+ return pos == nullptr ? std::string::npos : pos - haystack.data();
}
+template <class T>
+size_t qfind_first_of(const Range<T>& haystack,
+ const Range<T>& needles) {
+ return qfind_first_of(haystack, needles, asciiCaseSensitive);
+}
+
+// specialization for StringPiece
+template <>
+inline size_t qfind_first_of(const Range<const char*>& haystack,
+ const Range<const char*>& needles) {
+ return detail::qfind_first_byte_of(haystack, needles);
+}
+
+// specialization for ByteRange
+template <>
+inline size_t qfind_first_of(const Range<const unsigned char*>& haystack,
+ const Range<const unsigned char*>& needles) {
+ return detail::qfind_first_byte_of(StringPiece(haystack),
+ StringPiece(needles));
+}
} // !namespace folly
FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1(folly::Range);