/*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#ifndef FOLLY_RANGE_H_
#define FOLLY_RANGE_H_
-#include "folly/folly-config.h"
-#include "folly/FBString.h"
-#include <glog/logging.h>
+#include <folly/Portability.h>
+#include <folly/FBString.h>
#include <algorithm>
+#include <boost/operators.hpp>
#include <cstring>
+#include <glog/logging.h>
#include <iosfwd>
-#include <string>
#include <stdexcept>
+#include <string>
#include <type_traits>
-#include <boost/operators.hpp>
+
+// libc++ doesn't provide this header, nor does msvc
+#ifdef FOLLY_HAVE_BITS_CXXCONFIG_H
+// This file appears in two locations: inside fbcode and in the
+// libstdc++ source code (when embedding fbstring as std::string).
+// To aid in this schizophrenic use, two macros are defined in
+// c++config.h:
+// _LIBSTDCXX_FBSTRING - Set inside libstdc++. This is useful to
+// gate use inside fbcode v. libstdc++
#include <bits/c++config.h>
-#include "folly/CpuId.h"
-#include "folly/Traits.h"
+#endif
+
+#include <folly/CpuId.h>
+#include <folly/Traits.h>
+#include <folly/Likely.h>
+
+// Ignore shadowing warnings within this file, so includers can use -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
namespace folly {
* as Boyer-Moore. On the upside, it does not do any upfront
* preprocessing and does not allocate memory.
*/
-template <class T>
+template <class T, class Comp = std::equal_to<typename Range<T>::value_type>>
inline size_t qfind(const Range<T> & haystack,
- const Range<T> & needle);
+ const Range<T> & needle,
+ Comp eq = Comp());
/**
* Finds the first occurrence of needle in haystack. The result is the
size_t qfind(const Range<T> & haystack,
const typename Range<T>::value_type& needle);
+/**
+ * Finds the last occurrence of needle in haystack. The result is the
+ * offset reported to the beginning of haystack, or string::npos if
+ * needle wasn't found.
+ */
+template <class T>
+size_t rfind(const Range<T> & haystack,
+ const typename Range<T>::value_type& needle);
+
/**
* Finds the first occurrence of any element of needle in
typename std::iterator_traits<Iter>::reference>::type
value_type;
typedef typename std::iterator_traits<Iter>::reference reference;
- typedef std::char_traits<value_type> traits_type;
+ typedef std::char_traits<typename std::remove_const<value_type>::type>
+ traits_type;
static const size_type npos;
Range(Iter start, size_t size)
: b_(start), e_(start + size) { }
+#if FOLLY_HAVE_CONSTEXPR_STRLEN
+ // Works only for Range<const char*>
+ /* implicit */ constexpr Range(Iter str)
+ : b_(str), e_(str + strlen(str)) {}
+#else
// Works only for Range<const char*>
/* implicit */ Range(Iter str)
- : b_(str), e_(b_ + strlen(str)) {}
+ : b_(str), e_(str + strlen(str)) {}
+#endif
// Works only for Range<const char*>
/* implicit */ Range(const std::string& str)
: b_(str.data()), e_(b_ + str.size()) {}
+
// Works only for Range<const char*>
Range(const std::string& str, std::string::size_type startFrom) {
- CHECK_LE(startFrom, str.size());
+ if (UNLIKELY(startFrom > str.size())) {
+ throw std::out_of_range("index out of range");
+ }
b_ = str.data() + startFrom;
e_ = str.data() + str.size();
}
Range(const std::string& str,
std::string::size_type startFrom,
std::string::size_type size) {
- CHECK_LE(startFrom + size, str.size());
+ if (UNLIKELY(startFrom > str.size())) {
+ throw std::out_of_range("index out of range");
+ }
b_ = str.data() + startFrom;
- e_ = b_ + size;
+ if (str.size() - startFrom < size) {
+ e_ = str.data() + str.size();
+ } else {
+ e_ = b_ + size;
+ }
}
Range(const Range<Iter>& str,
size_t startFrom,
size_t size) {
- CHECK_LE(startFrom + size, str.size());
+ if (UNLIKELY(startFrom > str.size())) {
+ throw std::out_of_range("index out of range");
+ }
b_ = str.b_ + startFrom;
- e_ = b_ + size;
+ if (str.size() - startFrom < size) {
+ e_ = str.e_;
+ } else {
+ e_ = b_ + size;
+ }
}
// Works only for Range<const char*>
/* implicit */ Range(const fbstring& str)
: b_(str.data()), e_(b_ + str.size()) { }
// Works only for Range<const char*>
Range(const fbstring& str, fbstring::size_type startFrom) {
- CHECK_LE(startFrom, str.size());
+ if (UNLIKELY(startFrom > str.size())) {
+ throw std::out_of_range("index out of range");
+ }
b_ = str.data() + startFrom;
e_ = str.data() + str.size();
}
// Works only for Range<const char*>
Range(const fbstring& str, fbstring::size_type startFrom,
fbstring::size_type size) {
- CHECK_LE(startFrom + size, str.size());
+ if (UNLIKELY(startFrom > str.size())) {
+ throw std::out_of_range("index out of range");
+ }
b_ = str.data() + startFrom;
- e_ = b_ + size;
+ if (str.size() - startFrom < size) {
+ e_ = str.data() + str.size();
+ } else {
+ e_ = b_ + size;
+ }
}
// Allow implicit conversion from Range<const char*> (aka StringPiece) to
// direction.
template <class OtherIter, typename std::enable_if<
(std::is_same<Iter, const unsigned char*>::value &&
- std::is_same<OtherIter, const char*>::value), int>::type = 0>
+ (std::is_same<OtherIter, const char*>::value ||
+ std::is_same<OtherIter, char*>::value)), int>::type = 0>
/* implicit */ Range(const Range<OtherIter>& other)
: b_(reinterpret_cast<const unsigned char*>(other.begin())),
e_(reinterpret_cast<const unsigned char*>(other.end())) {
}
+ template <class OtherIter, typename std::enable_if<
+ (std::is_same<Iter, unsigned char*>::value &&
+ std::is_same<OtherIter, char*>::value), int>::type = 0>
+ /* implicit */ Range(const Range<OtherIter>& other)
+ : b_(reinterpret_cast<unsigned char*>(other.begin())),
+ e_(reinterpret_cast<unsigned char*>(other.end())) {
+ }
+
template <class OtherIter, typename std::enable_if<
(std::is_same<Iter, const char*>::value &&
- std::is_same<OtherIter, const unsigned char*>::value), int>::type = 0>
+ (std::is_same<OtherIter, const unsigned char*>::value ||
+ std::is_same<OtherIter, unsigned char*>::value)), int>::type = 0>
explicit Range(const Range<OtherIter>& other)
: b_(reinterpret_cast<const char*>(other.begin())),
e_(reinterpret_cast<const char*>(other.end())) {
}
+ template <class OtherIter, typename std::enable_if<
+ (std::is_same<Iter, char*>::value &&
+ std::is_same<OtherIter, unsigned char*>::value), int>::type = 0>
+ explicit Range(const Range<OtherIter>& other)
+ : b_(reinterpret_cast<char*>(other.begin())),
+ e_(reinterpret_cast<char*>(other.end())) {
+ }
+
+ // Allow implicit conversion from Range<From> to Range<To> if From is
+ // implicitly convertible to To.
+ template <class OtherIter, typename std::enable_if<
+ (!std::is_same<Iter, OtherIter>::value &&
+ std::is_convertible<OtherIter, Iter>::value), int>::type = 0>
+ /* implicit */ Range(const Range<OtherIter>& other)
+ : b_(other.begin()),
+ e_(other.end()) {
+ }
+
+ // Allow explicit conversion from Range<From> to Range<To> if From is
+ // explicitly convertible to To.
+ template <class OtherIter, typename std::enable_if<
+ (!std::is_same<Iter, OtherIter>::value &&
+ !std::is_convertible<OtherIter, Iter>::value &&
+ std::is_constructible<Iter, const OtherIter&>::value), int>::type = 0>
+ explicit Range(const Range<OtherIter>& other)
+ : b_(other.begin()),
+ e_(other.end()) {
+ }
+
void clear() {
b_ = Iter();
e_ = Iter();
}
value_type& operator[](size_t i) {
- CHECK_GT(size(), i);
+ DCHECK_GT(size(), i);
return b_[i];
}
const value_type& operator[](size_t i) const {
- CHECK_GT(size(), i);
+ DCHECK_GT(size(), i);
return b_[i];
}
}
void advance(size_type n) {
- CHECK_LE(n, size());
+ if (UNLIKELY(n > size())) {
+ throw std::out_of_range("index out of range");
+ }
b_ += n;
}
void subtract(size_type n) {
- CHECK_LE(n, size());
+ if (UNLIKELY(n > size())) {
+ throw std::out_of_range("index out of range");
+ }
e_ -= n;
}
Range subpiece(size_type first,
size_type length = std::string::npos) const {
- CHECK_LE(first, size());
+ if (UNLIKELY(first > size())) {
+ throw std::out_of_range("index out of range");
+ }
return Range(b_ + first,
std::min<std::string::size_type>(length, size() - first));
}
return qfind(*this, c);
}
+ size_type rfind(value_type c) const {
+ return folly::rfind(*this, c);
+ }
+
size_type find(value_type c, size_t pos) const {
if (pos > size()) return std::string::npos;
size_type ret = qfind(subpiece(pos), c);
return find(c, pos);
}
+ /**
+ * Determine whether the range contains the given subrange or item.
+ *
+ * Note: Call find() directly if the index is needed.
+ */
+ bool contains(const Range& other) const {
+ return find(other) != std::string::npos;
+ }
+
+ bool contains(const value_type& other) const {
+ return find(other) != std::string::npos;
+ }
+
void swap(Range& rhs) {
std::swap(b_, rhs.b_);
std::swap(e_, rhs.e_);
}
+ /**
+ * Does this Range start with another range?
+ */
+ bool startsWith(const Range& other) const {
+ return size() >= other.size() && subpiece(0, other.size()) == other;
+ }
+ bool startsWith(value_type c) const {
+ return !empty() && front() == c;
+ }
+
+ /**
+ * Does this Range end with another range?
+ */
+ bool endsWith(const Range& other) const {
+ return size() >= other.size() && subpiece(size() - other.size()) == other;
+ }
+ bool endsWith(value_type c) const {
+ return !empty() && back() == c;
+ }
+
+ /**
+ * Remove the given prefix and return true if the range starts with the given
+ * prefix; return false otherwise.
+ */
+ bool removePrefix(const Range& prefix) {
+ return startsWith(prefix) && (b_ += prefix.size(), true);
+ }
+ bool removePrefix(value_type prefix) {
+ return startsWith(prefix) && (++b_, true);
+ }
+
+ /**
+ * Remove the given suffix and return true if the range ends with the given
+ * suffix; return false otherwise.
+ */
+ bool removeSuffix(const Range& suffix) {
+ return endsWith(suffix) && (e_ -= suffix.size(), true);
+ }
+ bool removeSuffix(value_type suffix) {
+ return endsWith(suffix) && (--e_, true);
+ }
+
+ /**
+ * Splits this `Range` `[b, e)` in the position `i` dictated by the next
+ * occurence of `delimiter`.
+ *
+ * Returns a new `Range` `[b, i)` and adjusts this range to start right after
+ * the delimiter's position. This range will be empty if the delimiter is not
+ * found. If called on an empty `Range`, both this and the returned `Range`
+ * will be empty.
+ *
+ * Example:
+ *
+ * folly::StringPiece s("sample string for split_next");
+ * auto p = s.split_step(' ');
+ *
+ * // prints "string for split_next"
+ * cout << s << endl;
+ *
+ * // prints "sample"
+ * cout << p << endl;
+ *
+ * Example 2:
+ *
+ * void tokenize(StringPiece s, char delimiter) {
+ * while (!s.empty()) {
+ * cout << s.split_step(delimiter);
+ * }
+ * }
+ *
+ * @author: Marcelo Juchem <marcelo@fb.com>
+ */
+ Range split_step(value_type delimiter) {
+ auto i = std::find(b_, e_, delimiter);
+ Range result(b_, i);
+
+ b_ = i == e_ ? e_ : std::next(i);
+
+ return result;
+ }
+
+ Range split_step(Range delimiter) {
+ auto i = find(delimiter);
+ Range result(b_, i == std::string::npos ? size() : i);
+
+ b_ = result.end() == e_ ? e_ : std::next(result.end(), delimiter.size());
+
+ return result;
+ }
+
+ /**
+ * Convenience method that calls `split_step()` and passes the result to a
+ * functor, returning whatever the functor does. Any additional arguments
+ * `args` passed to this function are perfectly forwarded to the functor.
+ *
+ * Say you have a functor with this signature:
+ *
+ * Foo fn(Range r) { }
+ *
+ * `split_step()`'s return type will be `Foo`. It works just like:
+ *
+ * auto result = fn(myRange.split_step(' '));
+ *
+ * A functor returning `void` is also supported.
+ *
+ * Example:
+ *
+ * void do_some_parsing(folly::StringPiece s) {
+ * auto version = s.split_step(' ', [&](folly::StringPiece x) {
+ * if (x.empty()) {
+ * throw std::invalid_argument("empty string");
+ * }
+ * return std::strtoull(x.begin(), x.end(), 16);
+ * });
+ *
+ * // ...
+ * }
+ *
+ * struct Foo {
+ * void parse(folly::StringPiece s) {
+ * s.split_step(' ', parse_field, bar, 10);
+ * s.split_step('\t', parse_field, baz, 20);
+ *
+ * auto const kludge = [](folly::StringPiece x, int &out, int def) {
+ * if (x == "null") {
+ * out = 0;
+ * } else {
+ * parse_field(x, out, def);
+ * }
+ * };
+ *
+ * s.split_step('\t', kludge, gaz);
+ * s.split_step(' ', kludge, foo);
+ * }
+ *
+ * private:
+ * int bar;
+ * int baz;
+ * int gaz;
+ * int foo;
+ *
+ * static parse_field(folly::StringPiece s, int &out, int def) {
+ * try {
+ * out = folly::to<int>(s);
+ * } catch (std::exception const &) {
+ * value = def;
+ * }
+ * }
+ * };
+ *
+ * @author: Marcelo Juchem <marcelo@fb.com>
+ */
+ template <typename TProcess, typename... Args>
+ auto split_step(value_type delimiter, TProcess &&process, Args &&...args)
+ -> decltype(process(std::declval<Range>(), std::forward<Args>(args)...))
+ { return process(split_step(delimiter), std::forward<Args>(args)...); }
+
+ template <typename TProcess, typename... Args>
+ auto split_step(Range delimiter, TProcess &&process, Args &&...args)
+ -> decltype(process(std::declval<Range>(), std::forward<Args>(args)...))
+ { return process(split_step(delimiter), std::forward<Args>(args)...); }
+
private:
Iter b_, e_;
};
* Create a range from two iterators, with type deduction.
*/
template <class Iter>
-Range<Iter> makeRange(Iter first, Iter last) {
+Range<Iter> range(Iter first, Iter last) {
return Range<Iter>(first, last);
}
+/*
+ * Creates a range to reference the contents of a contiguous-storage container.
+ */
+// Use pointers for types with '.data()' member
+template <class Collection,
+ class T = typename std::remove_pointer<
+ decltype(std::declval<Collection>().data())>::type>
+Range<T*> range(Collection&& v) {
+ return Range<T*>(v.data(), v.data() + v.size());
+}
+
+template <class T, size_t n>
+Range<T*> range(T (&array)[n]) {
+ return Range<T*>(array, array + n);
+}
+
typedef Range<const char*> StringPiece;
+typedef Range<char*> MutableStringPiece;
typedef Range<const unsigned char*> ByteRange;
+typedef Range<unsigned char*> MutableByteRange;
std::ostream& operator<<(std::ostream& os, const StringPiece& piece);
size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
const StringPiece& needles);
-#if FOLLY_HAVE_EMMINTRIN_H
+#if FOLLY_HAVE_EMMINTRIN_H && __GNUC_PREREQ(4, 6)
size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
const StringPiece& needles);
}
};
+/**
+ * Check if two ascii characters are case insensitive equal.
+ * The difference between the lower/upper case characters are the 6-th bit.
+ * We also check they are alpha chars, in case of xor = 32.
+ */
struct AsciiCaseInsensitive {
bool operator()(char lhs, char rhs) const {
- return toupper(lhs) == toupper(rhs);
+ char k = lhs ^ rhs;
+ if (k == 0) return true;
+ if (k != 32) return false;
+ k = lhs | rhs;
+ return (k >= 'a' && k <= 'z');
}
};
template <class T>
size_t qfind(const Range<T>& haystack,
- const Range<T>& needle) {
- return qfind(haystack, needle, asciiCaseSensitive);
+ const typename Range<T>::value_type& needle) {
+ auto pos = std::find(haystack.begin(), haystack.end(), needle);
+ return pos == haystack.end() ? std::string::npos : pos - haystack.data();
}
template <class T>
-size_t qfind(const Range<T>& haystack,
+size_t rfind(const Range<T>& haystack,
const typename Range<T>::value_type& needle) {
- auto pos = std::find(haystack.begin(), haystack.end(), needle);
- return pos == haystack.end() ? std::string::npos : pos - haystack.data();
+ for (auto i = haystack.size(); i-- > 0; ) {
+ if (haystack[i] == needle) {
+ return i;
+ }
+ }
+ return std::string::npos;
}
// specialization for StringPiece
return pos == nullptr ? std::string::npos : pos - haystack.data();
}
+#if FOLLY_HAVE_MEMRCHR
+template <>
+inline size_t rfind(const Range<const char*>& haystack, const char& needle) {
+ auto pos = static_cast<const char*>(
+ ::memrchr(haystack.data(), needle, haystack.size()));
+ return pos == nullptr ? std::string::npos : pos - haystack.data();
+}
+#endif
+
// specialization for ByteRange
template <>
inline size_t qfind(const Range<const unsigned char*>& haystack,
return pos == nullptr ? std::string::npos : pos - haystack.data();
}
+#if FOLLY_HAVE_MEMRCHR
+template <>
+inline size_t rfind(const Range<const unsigned char*>& haystack,
+ const unsigned char& needle) {
+ auto pos = static_cast<const unsigned char*>(
+ ::memrchr(haystack.data(), needle, haystack.size()));
+ return pos == nullptr ? std::string::npos : pos - haystack.data();
+}
+#endif
+
template <class T>
size_t qfind_first_of(const Range<T>& haystack,
const Range<T>& needles) {
}
} // !namespace folly
+#pragma GCC diagnostic pop
+
FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1(folly::Range);
#endif // FOLLY_RANGE_H_