/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef FOLLY_STRING_INL_H_
-#define FOLLY_STRING_INL_H_
+#pragma once
-#include <stdexcept>
#include <iterator>
+#include <stdexcept>
+
+#include <folly/CppAttributes.h>
-#ifndef FOLLY_BASE_STRING_H_
+#ifndef FOLLY_STRING_H_
#error This file may only be included from String.h
#endif
// an octal escape sequence, or 'P' if the character is printable and
// should be printed as is.
extern const char cEscapeTable[];
-} // namespace detail
+} // namespace detail
template <class String>
void cEscape(StringPiece str, String& out) {
if (e == 'P') { // printable
++p;
} else if (e == 'O') { // octal
- out.append(&*last, p - last);
+ out.append(&*last, size_t(p - last));
esc[1] = '0' + ((v >> 6) & 7);
esc[2] = '0' + ((v >> 3) & 7);
esc[3] = '0' + (v & 7);
++p;
last = p;
} else { // special 1-character escape
- out.append(&*last, p - last);
+ out.append(&*last, size_t(p - last));
esc[1] = e;
out.append(esc, 2);
++p;
last = p;
}
}
- out.append(&*last, p - last);
+ out.append(&*last, size_t(p - last));
}
namespace detail {
// Map from the character code to the hex value, or 16 if invalid hex char.
extern const unsigned char hexTable[];
-} // namespace detail
+} // namespace detail
template <class String>
void cUnescape(StringPiece str, String& out, bool strict) {
out.append(&*last, p - last);
}
+namespace detail {
+// Map from character code to escape mode:
+// 0 = pass through
+// 1 = unused
+// 2 = pass through in PATH mode
+// 3 = space, replace with '+' in QUERY mode
+// 4 = percent-encode
+extern const unsigned char uriEscapeTable[];
+} // namespace detail
+
+template <class String>
+void uriEscape(StringPiece str, String& out, UriEscapeMode mode) {
+ static const char hexValues[] = "0123456789abcdef";
+ char esc[3];
+ esc[0] = '%';
+ // Preallocate assuming that 25% of the input string will be escaped
+ out.reserve(out.size() + str.size() + 3 * (str.size() / 4));
+ auto p = str.begin();
+ auto last = p; // last regular character
+ // We advance over runs of passthrough characters and copy them in one go;
+ // this is faster than calling push_back repeatedly.
+ unsigned char minEncode = static_cast<unsigned char>(mode);
+ while (p != str.end()) {
+ char c = *p;
+ unsigned char v = static_cast<unsigned char>(c);
+ unsigned char discriminator = detail::uriEscapeTable[v];
+ if (LIKELY(discriminator <= minEncode)) {
+ ++p;
+ } else if (mode == UriEscapeMode::QUERY && discriminator == 3) {
+ out.append(&*last, size_t(p - last));
+ out.push_back('+');
+ ++p;
+ last = p;
+ } else {
+ out.append(&*last, size_t(p - last));
+ esc[1] = hexValues[v >> 4];
+ esc[2] = hexValues[v & 0x0f];
+ out.append(esc, 3);
+ ++p;
+ last = p;
+ }
+ }
+ out.append(&*last, size_t(p - last));
+}
+
+template <class String>
+void uriUnescape(StringPiece str, String& out, UriEscapeMode mode) {
+ out.reserve(out.size() + str.size());
+ auto p = str.begin();
+ auto last = p;
+ // We advance over runs of passthrough characters and copy them in one go;
+ // this is faster than calling push_back repeatedly.
+ while (p != str.end()) {
+ char c = *p;
+ switch (c) {
+ case '%':
+ {
+ if (UNLIKELY(std::distance(p, str.end()) < 3)) {
+ throw std::invalid_argument("incomplete percent encode sequence");
+ }
+ auto h1 = detail::hexTable[static_cast<unsigned char>(p[1])];
+ auto h2 = detail::hexTable[static_cast<unsigned char>(p[2])];
+ if (UNLIKELY(h1 == 16 || h2 == 16)) {
+ throw std::invalid_argument("invalid percent encode sequence");
+ }
+ out.append(&*last, size_t(p - last));
+ out.push_back((h1 << 4) | h2);
+ p += 3;
+ last = p;
+ break;
+ }
+ case '+':
+ if (mode == UriEscapeMode::QUERY) {
+ out.append(&*last, size_t(p - last));
+ out.push_back(' ');
+ ++p;
+ last = p;
+ break;
+ }
+ // else fallthrough
+ FOLLY_FALLTHROUGH;
+ default:
+ ++p;
+ break;
+ }
+ }
+ out.append(&*last, size_t(p - last));
+}
+
namespace detail {
/*
return *s.start();
}
-/*
- * These output conversion templates allow us to support multiple
- * output string types, even when we are using an arbitrary
- * OutputIterator.
- */
-template<class OutStringT> struct OutputConverter {};
-
-template<> struct OutputConverter<std::string> {
- std::string operator()(StringPiece sp) const {
- return sp.toString();
- }
-};
-
-template<> struct OutputConverter<fbstring> {
- fbstring operator()(StringPiece sp) const {
- return sp.toFbstring();
- }
-};
-
-template<> struct OutputConverter<StringPiece> {
- StringPiece operator()(StringPiece sp) const { return sp; }
-};
-
/*
* Shared implementation for all the split() overloads.
*
*
* @param ignoreEmpty iff true, don't copy empty segments to output
*/
-template<class OutStringT, class DelimT, class OutputIterator>
+template <class OutStringT, class DelimT, class OutputIterator>
void internalSplit(DelimT delim, StringPiece sp, OutputIterator out,
bool ignoreEmpty) {
- assert(sp.start() != nullptr);
+ assert(sp.empty() || sp.start() != nullptr);
const char* s = sp.start();
const size_t strSize = sp.size();
const size_t dSize = delimSize(delim);
- OutputConverter<OutStringT> conv;
-
if (dSize > strSize || dSize == 0) {
if (!ignoreEmpty || strSize > 0) {
- *out++ = conv(sp);
+ *out++ = to<OutStringT>(sp);
}
return;
}
ignoreEmpty);
}
- int tokenStartPos = 0;
- int tokenSize = 0;
- for (int i = 0; i <= strSize - dSize; ++i) {
+ size_t tokenStartPos = 0;
+ size_t tokenSize = 0;
+ for (size_t i = 0; i <= strSize - dSize; ++i) {
if (atDelim(&s[i], delim)) {
if (!ignoreEmpty || tokenSize > 0) {
- *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize));
+ *out++ = to<OutStringT>(sp.subpiece(tokenStartPos, tokenSize));
}
tokenStartPos = i + dSize;
++tokenSize;
}
}
-
+ tokenSize = strSize - tokenStartPos;
if (!ignoreEmpty || tokenSize > 0) {
- tokenSize = strSize - tokenStartPos;
- *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize));
+ *out++ = to<OutStringT>(sp.subpiece(tokenStartPos, tokenSize));
}
}
-template<class String> StringPiece prepareDelim(const String& s) {
+template <class String> StringPiece prepareDelim(const String& s) {
return StringPiece(s);
}
inline char prepareDelim(char c) { return c; }
+template <bool exact, class Delim, class OutputType>
+bool splitFixed(const Delim& delimiter, StringPiece input, OutputType& output) {
+ static_assert(
+ exact || std::is_same<OutputType, StringPiece>::value ||
+ IsSomeString<OutputType>::value,
+ "split<false>() requires that the last argument be a string type");
+ if (exact && UNLIKELY(std::string::npos != input.find(delimiter))) {
+ return false;
+ }
+ output = folly::to<OutputType>(input);
+ return true;
+}
+
+template <bool exact, class Delim, class OutputType, class... OutputTypes>
+bool splitFixed(
+ const Delim& delimiter,
+ StringPiece input,
+ OutputType& outHead,
+ OutputTypes&... outTail) {
+ size_t cut = input.find(delimiter);
+ if (UNLIKELY(cut == std::string::npos)) {
+ return false;
+ }
+ StringPiece head(input.begin(), input.begin() + cut);
+ StringPiece tail(input.begin() + cut + detail::delimSize(delimiter),
+ input.end());
+ if (LIKELY(splitFixed<exact>(delimiter, tail, outTail...))) {
+ outHead = folly::to<OutputType>(head);
+ return true;
+ }
+ return false;
}
+} // namespace detail
+
//////////////////////////////////////////////////////////////////////
-template<class Delim, class String, class OutputType>
+template <class Delim, class String, class OutputType>
void split(const Delim& delimiter,
const String& input,
std::vector<OutputType>& out,
ignoreEmpty);
}
-template<class Delim, class String, class OutputType>
+template <class Delim, class String, class OutputType>
void split(const Delim& delimiter,
const String& input,
fbvector<OutputType>& out,
- bool ignoreEmpty = false) {
+ bool ignoreEmpty) {
detail::internalSplit<OutputType>(
detail::prepareDelim(delimiter),
StringPiece(input),
ignoreEmpty);
}
-template<class OutputValueType, class Delim, class String,
- class OutputIterator>
+template <
+ class OutputValueType,
+ class Delim,
+ class String,
+ class OutputIterator>
void splitTo(const Delim& delimiter,
const String& input,
OutputIterator out,
ignoreEmpty);
}
+template <bool exact, class Delim, class... OutputTypes>
+typename std::enable_if<
+ AllConvertible<OutputTypes...>::value && sizeof...(OutputTypes) >= 1,
+ bool>::type
+split(const Delim& delimiter, StringPiece input, OutputTypes&... outputs) {
+ return detail::splitFixed<exact>(
+ detail::prepareDelim(delimiter), input, outputs...);
+}
+
namespace detail {
+/*
+ * If a type can have its string size determined cheaply, we can more
+ * efficiently append it in a loop (see internalJoinAppend). Note that the
+ * struct need not conform to the std::string api completely (ex. does not need
+ * to implement append()).
+ */
+template <class T> struct IsSizableString {
+ enum { value = IsSomeString<T>::value
+ || std::is_same<T, StringPiece>::value };
+};
+
template <class Iterator>
-struct IsStringContainerIterator :
- IsSomeString<typename std::iterator_traits<Iterator>::value_type> {
+struct IsSizableStringContainerIterator :
+ IsSizableString<typename std::iterator_traits<Iterator>::value_type> {
};
template <class Delim, class Iterator, class String>
}
template <class Delim, class Iterator, class String>
-typename std::enable_if<IsStringContainerIterator<Iterator>::value>::type
+typename std::enable_if<IsSizableStringContainerIterator<Iterator>::value>::type
internalJoin(Delim delimiter,
Iterator begin,
Iterator end,
}
template <class Delim, class Iterator, class String>
-typename std::enable_if<!IsStringContainerIterator<Iterator>::value>::type
+typename
+std::enable_if<!IsSizableStringContainerIterator<Iterator>::value>::type
internalJoin(Delim delimiter,
Iterator begin,
Iterator end,
internalJoinAppend(delimiter, begin, end, output);
}
-} // namespace detail
+} // namespace detail
template <class Delim, class Iterator, class String>
void join(const Delim& delimiter,
output);
}
-template <class String1, class String2>
-void backslashify(const String1& input, String2& output, bool hex_style) {
+template <class OutputString>
+void backslashify(
+ folly::StringPiece input,
+ OutputString& output,
+ bool hex_style) {
static const char hexValues[] = "0123456789abcdef";
output.clear();
output.reserve(3 * input.size());
if (hex_style) {
hex_append = true;
} else {
- if (c == '\r') output += 'r';
- else if (c == '\n') output += 'n';
- else if (c == '\t') output += 't';
- else if (c == '\a') output += 'a';
- else if (c == '\b') output += 'b';
- else if (c == '\0') output += '0';
- else if (c == '\\') output += '\\';
- else {
+ if (c == '\r') {
+ output += 'r';
+ } else if (c == '\n') {
+ output += 'n';
+ } else if (c == '\t') {
+ output += 't';
+ } else if (c == '\a') {
+ output += 'a';
+ } else if (c == '\b') {
+ output += 'b';
+ } else if (c == '\0') {
+ output += '0';
+ } else if (c == '\\') {
+ output += '\\';
+ } else {
hex_append = true;
}
}
template <class String1, class String2>
void humanify(const String1& input, String2& output) {
- int numUnprintable = 0;
- int numPrintablePrefix = 0;
+ size_t numUnprintable = 0;
+ size_t numPrintablePrefix = 0;
for (unsigned char c : input) {
if (c < 0x20 || c > 0x7e || c == '\\') {
++numUnprintable;
}
}
-template<class InputString, class OutputString>
+template <class InputString, class OutputString>
bool hexlify(const InputString& input, OutputString& output,
- bool append_output=false) {
- if (!append_output) output.clear();
+ bool append_output) {
+ if (!append_output) {
+ output.clear();
+ }
static char hexValues[] = "0123456789abcdef";
- int j = output.size();
+ auto j = output.size();
output.resize(2 * input.size() + output.size());
- for (int i = 0; i < input.size(); ++i) {
+ for (size_t i = 0; i < input.size(); ++i) {
int ch = input[i];
output[j++] = hexValues[(ch >> 4) & 0xf];
output[j++] = hexValues[ch & 0xf];
return true;
}
-template<class InputString, class OutputString>
+template <class InputString, class OutputString>
bool unhexlify(const InputString& input, OutputString& output) {
if (input.size() % 2 != 0) {
return false;
}
output.resize(input.size() / 2);
int j = 0;
- auto unhex = [](char c) -> int {
- return c >= '0' && c <= '9' ? c - '0' :
- c >= 'A' && c <= 'F' ? c - 'A' + 10 :
- c >= 'a' && c <= 'f' ? c - 'a' + 10 :
- -1;
- };
-
- for (int i = 0; i < input.size(); i += 2) {
- int highBits = unhex(input[i]);
- int lowBits = unhex(input[i + 1]);
- if (highBits < 0 || lowBits < 0) {
+
+ for (size_t i = 0; i < input.size(); i += 2) {
+ int highBits = detail::hexTable[static_cast<uint8_t>(input[i])];
+ int lowBits = detail::hexTable[static_cast<uint8_t>(input[i + 1])];
+ if ((highBits | lowBits) & 0x10) {
+ // One of the characters wasn't a hex digit
return false;
}
output[j++] = (highBits << 4) + lowBits;
*/
size_t hexDumpLine(const void* ptr, size_t offset, size_t size,
std::string& line);
-} // namespace detail
+} // namespace detail
template <class OutIt>
void hexDump(const void* ptr, size_t size, OutIt out) {
}
}
-} // namespace folly
-
-#endif /* FOLLY_STRING_INL_H_ */
-
+} // namespace folly