X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FString-inl.h;h=8baf8f85690c6fe5970f1903c9735a81b153e0ba;hp=b9f7c23700580fc875ec3ed8ae77e9baa80b6c22;hb=22d531a8fe503001a51672750dc09daae252fbf6;hpb=5c77fedbef46995a71ffa268c9fcaf49efddd01b diff --git a/folly/String-inl.h b/folly/String-inl.h index b9f7c237..8baf8f85 100644 --- a/folly/String-inl.h +++ b/folly/String-inl.h @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,14 @@ * limitations under the License. */ -#ifndef FOLLY_STRING_INL_H_ -#define FOLLY_STRING_INL_H_ +#pragma once #include #include -#ifndef FOLLY_BASE_STRING_H_ +#include + +#ifndef FOLLY_STRING_H_ #error This file may only be included from String.h #endif @@ -51,7 +52,7 @@ void cEscape(StringPiece str, String& out) { if (e == 'P') { // printable ++p; } else if (e == 'O') { // octal - out.append(&*last, p - last); + out.append(&*last, size_t(p - last)); esc[1] = '0' + ((v >> 6) & 7); esc[2] = '0' + ((v >> 3) & 7); esc[3] = '0' + (v & 7); @@ -59,14 +60,14 @@ void cEscape(StringPiece str, String& out) { ++p; last = p; } else { // special 1-character escape - out.append(&*last, p - last); + out.append(&*last, size_t(p - last)); esc[1] = e; out.append(esc, 2); ++p; last = p; } } - out.append(&*last, p - last); + out.append(&*last, size_t(p - last)); } namespace detail { @@ -149,6 +150,95 @@ void cUnescape(StringPiece str, String& out, bool strict) { out.append(&*last, p - last); } +namespace detail { +// Map from character code to escape mode: +// 0 = pass through +// 1 = unused +// 2 = pass through in PATH mode +// 3 = space, replace with '+' in QUERY mode +// 4 = percent-encode +extern const unsigned char uriEscapeTable[]; +} // namespace detail + +template +void uriEscape(StringPiece str, String& out, UriEscapeMode mode) { + static const char hexValues[] = "0123456789abcdef"; + char esc[3]; + esc[0] = '%'; + // Preallocate assuming that 25% of the input string will be escaped + out.reserve(out.size() + str.size() + 3 * (str.size() / 4)); + auto p = str.begin(); + auto last = p; // last regular character + // We advance over runs of passthrough characters and copy them in one go; + // this is faster than calling push_back repeatedly. + unsigned char minEncode = static_cast(mode); + while (p != str.end()) { + char c = *p; + unsigned char v = static_cast(c); + unsigned char discriminator = detail::uriEscapeTable[v]; + if (LIKELY(discriminator <= minEncode)) { + ++p; + } else if (mode == UriEscapeMode::QUERY && discriminator == 3) { + out.append(&*last, size_t(p - last)); + out.push_back('+'); + ++p; + last = p; + } else { + out.append(&*last, size_t(p - last)); + esc[1] = hexValues[v >> 4]; + esc[2] = hexValues[v & 0x0f]; + out.append(esc, 3); + ++p; + last = p; + } + } + out.append(&*last, size_t(p - last)); +} + +template +void uriUnescape(StringPiece str, String& out, UriEscapeMode mode) { + out.reserve(out.size() + str.size()); + auto p = str.begin(); + auto last = p; + // We advance over runs of passthrough characters and copy them in one go; + // this is faster than calling push_back repeatedly. + while (p != str.end()) { + char c = *p; + switch (c) { + case '%': + { + if (UNLIKELY(std::distance(p, str.end()) < 3)) { + throw std::invalid_argument("incomplete percent encode sequence"); + } + auto h1 = detail::hexTable[static_cast(p[1])]; + auto h2 = detail::hexTable[static_cast(p[2])]; + if (UNLIKELY(h1 == 16 || h2 == 16)) { + throw std::invalid_argument("invalid percent encode sequence"); + } + out.append(&*last, size_t(p - last)); + out.push_back((h1 << 4) | h2); + p += 3; + last = p; + break; + } + case '+': + if (mode == UriEscapeMode::QUERY) { + out.append(&*last, size_t(p - last)); + out.push_back(' '); + ++p; + last = p; + break; + } + // else fallthrough + FOLLY_FALLTHROUGH; + default: + ++p; + break; + } + } + out.append(&*last, size_t(p - last)); +} + namespace detail { /* @@ -176,29 +266,6 @@ inline char delimFront(StringPiece s) { return *s.start(); } -/* - * These output conversion templates allow us to support multiple - * output string types, even when we are using an arbitrary - * OutputIterator. - */ -template struct OutputConverter {}; - -template<> struct OutputConverter { - std::string operator()(StringPiece sp) const { - return sp.toString(); - } -}; - -template<> struct OutputConverter { - fbstring operator()(StringPiece sp) const { - return sp.toFbstring(); - } -}; - -template<> struct OutputConverter { - StringPiece operator()(StringPiece sp) const { return sp; } -}; - /* * Shared implementation for all the split() overloads. * @@ -211,17 +278,15 @@ template<> struct OutputConverter { template void internalSplit(DelimT delim, StringPiece sp, OutputIterator out, bool ignoreEmpty) { - assert(sp.start() != nullptr); + assert(sp.empty() || sp.start() != nullptr); const char* s = sp.start(); const size_t strSize = sp.size(); const size_t dSize = delimSize(delim); - OutputConverter conv; - if (dSize > strSize || dSize == 0) { if (!ignoreEmpty || strSize > 0) { - *out++ = conv(sp); + *out++ = to(sp); } return; } @@ -231,12 +296,12 @@ void internalSplit(DelimT delim, StringPiece sp, OutputIterator out, ignoreEmpty); } - int tokenStartPos = 0; - int tokenSize = 0; - for (int i = 0; i <= strSize - dSize; ++i) { + size_t tokenStartPos = 0; + size_t tokenSize = 0; + for (size_t i = 0; i <= strSize - dSize; ++i) { if (atDelim(&s[i], delim)) { if (!ignoreEmpty || tokenSize > 0) { - *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize)); + *out++ = to(sp.subpiece(tokenStartPos, tokenSize)); } tokenStartPos = i + dSize; @@ -246,10 +311,9 @@ void internalSplit(DelimT delim, StringPiece sp, OutputIterator out, ++tokenSize; } } - + tokenSize = strSize - tokenStartPos; if (!ignoreEmpty || tokenSize > 0) { - tokenSize = strSize - tokenStartPos; - *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize)); + *out++ = to(sp.subpiece(tokenStartPos, tokenSize)); } } @@ -258,6 +322,39 @@ template StringPiece prepareDelim(const String& s) { } inline char prepareDelim(char c) { return c; } +template +bool splitFixed(const Delim& delimiter, StringPiece input, OutputType& output) { + static_assert( + exact || std::is_same::value || + IsSomeString::value, + "split() requires that the last argument be a string type"); + if (exact && UNLIKELY(std::string::npos != input.find(delimiter))) { + return false; + } + output = folly::to(input); + return true; +} + +template +bool splitFixed( + const Delim& delimiter, + StringPiece input, + OutputType& outHead, + OutputTypes&... outTail) { + size_t cut = input.find(delimiter); + if (UNLIKELY(cut == std::string::npos)) { + return false; + } + StringPiece head(input.begin(), input.begin() + cut); + StringPiece tail(input.begin() + cut + detail::delimSize(delimiter), + input.end()); + if (LIKELY(splitFixed(delimiter, tail, outTail...))) { + outHead = folly::to(head); + return true; + } + return false; +} + } ////////////////////////////////////////////////////////////////////// @@ -299,11 +396,31 @@ void splitTo(const Delim& delimiter, ignoreEmpty); } +template +typename std::enable_if< + AllConvertible::value && sizeof...(OutputTypes) >= 1, + bool>::type +split(const Delim& delimiter, StringPiece input, OutputTypes&... outputs) { + return detail::splitFixed( + detail::prepareDelim(delimiter), input, outputs...); +} + namespace detail { +/* + * If a type can have its string size determined cheaply, we can more + * efficiently append it in a loop (see internalJoinAppend). Note that the + * struct need not conform to the std::string api completely (ex. does not need + * to implement append()). + */ +template struct IsSizableString { + enum { value = IsSomeString::value + || std::is_same::value }; +}; + template -struct IsStringContainerIterator : - IsSomeString::value_type> { +struct IsSizableStringContainerIterator : + IsSizableString::value_type> { }; template @@ -324,7 +441,7 @@ void internalJoinAppend(Delim delimiter, } template -typename std::enable_if::value>::type +typename std::enable_if::value>::type internalJoin(Delim delimiter, Iterator begin, Iterator end, @@ -344,7 +461,8 @@ internalJoin(Delim delimiter, } template -typename std::enable_if::value>::type +typename +std::enable_if::value>::type internalJoin(Delim delimiter, Iterator begin, Iterator end, @@ -407,8 +525,8 @@ void backslashify(const String1& input, String2& output, bool hex_style) { template void humanify(const String1& input, String2& output) { - int numUnprintable = 0; - int numPrintablePrefix = 0; + size_t numUnprintable = 0; + size_t numPrintablePrefix = 0; for (unsigned char c : input) { if (c < 0x20 || c > 0x7e || c == '\\') { ++numUnprintable; @@ -455,9 +573,9 @@ bool hexlify(const InputString& input, OutputString& output, if (!append_output) output.clear(); static char hexValues[] = "0123456789abcdef"; - int j = output.size(); + auto j = output.size(); output.resize(2 * input.size() + output.size()); - for (int i = 0; i < input.size(); ++i) { + for (size_t i = 0; i < input.size(); ++i) { int ch = input[i]; output[j++] = hexValues[(ch >> 4) & 0xf]; output[j++] = hexValues[ch & 0xf]; @@ -472,17 +590,12 @@ bool unhexlify(const InputString& input, OutputString& output) { } output.resize(input.size() / 2); int j = 0; - auto unhex = [](char c) -> int { - return c >= '0' && c <= '9' ? c - '0' : - c >= 'A' && c <= 'F' ? c - 'A' + 10 : - c >= 'a' && c <= 'f' ? c - 'a' + 10 : - -1; - }; - - for (int i = 0; i < input.size(); i += 2) { - int highBits = unhex(input[i]); - int lowBits = unhex(input[i + 1]); - if (highBits < 0 || lowBits < 0) { + + for (size_t i = 0; i < input.size(); i += 2) { + int highBits = detail::hexTable[static_cast(input[i])]; + int lowBits = detail::hexTable[static_cast(input[i + 1])]; + if ((highBits | lowBits) & 0x10) { + // One of the characters wasn't a hex digit return false; } output[j++] = (highBits << 4) + lowBits; @@ -510,6 +623,3 @@ void hexDump(const void* ptr, size_t size, OutIt out) { } } // namespace folly - -#endif /* FOLLY_STRING_INL_H_ */ -