X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FString-inl.h;h=f6b5ba993dffbba4d2cc318db4c269f314a0833d;hb=a91971707da2903e1d28a58ac18ae369df7fbf53;hp=4bff753e0ab757b291a9a6154a6bf173f31b321c;hpb=5df0c9714e235fc704cfd03aea833fe8163d0058;p=folly.git diff --git a/folly/String-inl.h b/folly/String-inl.h index 4bff753e..f6b5ba99 100644 --- a/folly/String-inl.h +++ b/folly/String-inl.h @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2017 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,14 @@ * limitations under the License. */ -#ifndef FOLLY_STRING_INL_H_ -#define FOLLY_STRING_INL_H_ +#pragma once -#include #include +#include + +#include -#ifndef FOLLY_BASE_STRING_H_ +#ifndef FOLLY_STRING_H_ #error This file may only be included from String.h #endif @@ -32,7 +33,7 @@ namespace detail { // an octal escape sequence, or 'P' if the character is printable and // should be printed as is. extern const char cEscapeTable[]; -} // namespace detail +} // namespace detail template void cEscape(StringPiece str, String& out) { @@ -51,7 +52,7 @@ void cEscape(StringPiece str, String& out) { if (e == 'P') { // printable ++p; } else if (e == 'O') { // octal - out.append(&*last, p - last); + out.append(&*last, size_t(p - last)); esc[1] = '0' + ((v >> 6) & 7); esc[2] = '0' + ((v >> 3) & 7); esc[3] = '0' + (v & 7); @@ -59,14 +60,14 @@ void cEscape(StringPiece str, String& out) { ++p; last = p; } else { // special 1-character escape - out.append(&*last, p - last); + out.append(&*last, size_t(p - last)); esc[1] = e; out.append(esc, 2); ++p; last = p; } } - out.append(&*last, p - last); + out.append(&*last, size_t(p - last)); } namespace detail { @@ -79,7 +80,7 @@ extern const char cUnescapeTable[]; // Map from the character code to the hex value, or 16 if invalid hex char. extern const unsigned char hexTable[]; -} // namespace detail +} // namespace detail template void cUnescape(StringPiece str, String& out, bool strict) { @@ -149,6 +150,95 @@ void cUnescape(StringPiece str, String& out, bool strict) { out.append(&*last, p - last); } +namespace detail { +// Map from character code to escape mode: +// 0 = pass through +// 1 = unused +// 2 = pass through in PATH mode +// 3 = space, replace with '+' in QUERY mode +// 4 = percent-encode +extern const unsigned char uriEscapeTable[]; +} // namespace detail + +template +void uriEscape(StringPiece str, String& out, UriEscapeMode mode) { + static const char hexValues[] = "0123456789abcdef"; + char esc[3]; + esc[0] = '%'; + // Preallocate assuming that 25% of the input string will be escaped + out.reserve(out.size() + str.size() + 3 * (str.size() / 4)); + auto p = str.begin(); + auto last = p; // last regular character + // We advance over runs of passthrough characters and copy them in one go; + // this is faster than calling push_back repeatedly. + unsigned char minEncode = static_cast(mode); + while (p != str.end()) { + char c = *p; + unsigned char v = static_cast(c); + unsigned char discriminator = detail::uriEscapeTable[v]; + if (LIKELY(discriminator <= minEncode)) { + ++p; + } else if (mode == UriEscapeMode::QUERY && discriminator == 3) { + out.append(&*last, size_t(p - last)); + out.push_back('+'); + ++p; + last = p; + } else { + out.append(&*last, size_t(p - last)); + esc[1] = hexValues[v >> 4]; + esc[2] = hexValues[v & 0x0f]; + out.append(esc, 3); + ++p; + last = p; + } + } + out.append(&*last, size_t(p - last)); +} + +template +void uriUnescape(StringPiece str, String& out, UriEscapeMode mode) { + out.reserve(out.size() + str.size()); + auto p = str.begin(); + auto last = p; + // We advance over runs of passthrough characters and copy them in one go; + // this is faster than calling push_back repeatedly. + while (p != str.end()) { + char c = *p; + switch (c) { + case '%': + { + if (UNLIKELY(std::distance(p, str.end()) < 3)) { + throw std::invalid_argument("incomplete percent encode sequence"); + } + auto h1 = detail::hexTable[static_cast(p[1])]; + auto h2 = detail::hexTable[static_cast(p[2])]; + if (UNLIKELY(h1 == 16 || h2 == 16)) { + throw std::invalid_argument("invalid percent encode sequence"); + } + out.append(&*last, size_t(p - last)); + out.push_back((h1 << 4) | h2); + p += 3; + last = p; + break; + } + case '+': + if (mode == UriEscapeMode::QUERY) { + out.append(&*last, size_t(p - last)); + out.push_back(' '); + ++p; + last = p; + break; + } + // else fallthrough + FOLLY_FALLTHROUGH; + default: + ++p; + break; + } + } + out.append(&*last, size_t(p - last)); +} + namespace detail { /* @@ -176,29 +266,6 @@ inline char delimFront(StringPiece s) { return *s.start(); } -/* - * These output conversion templates allow us to support multiple - * output string types, even when we are using an arbitrary - * OutputIterator. - */ -template struct OutputConverter {}; - -template<> struct OutputConverter { - std::string operator()(StringPiece sp) const { - return sp.toString(); - } -}; - -template<> struct OutputConverter { - fbstring operator()(StringPiece sp) const { - return sp.toFbstring(); - } -}; - -template<> struct OutputConverter { - StringPiece operator()(StringPiece sp) const { return sp; } -}; - /* * Shared implementation for all the split() overloads. * @@ -208,20 +275,18 @@ template<> struct OutputConverter { * * @param ignoreEmpty iff true, don't copy empty segments to output */ -template +template void internalSplit(DelimT delim, StringPiece sp, OutputIterator out, bool ignoreEmpty) { - assert(sp.start() != nullptr); + assert(sp.empty() || sp.start() != nullptr); const char* s = sp.start(); const size_t strSize = sp.size(); const size_t dSize = delimSize(delim); - OutputConverter conv; - if (dSize > strSize || dSize == 0) { if (!ignoreEmpty || strSize > 0) { - *out++ = conv(sp); + *out++ = to(sp); } return; } @@ -231,12 +296,12 @@ void internalSplit(DelimT delim, StringPiece sp, OutputIterator out, ignoreEmpty); } - int tokenStartPos = 0; - int tokenSize = 0; - for (int i = 0; i <= strSize - dSize; ++i) { + size_t tokenStartPos = 0; + size_t tokenSize = 0; + for (size_t i = 0; i <= strSize - dSize; ++i) { if (atDelim(&s[i], delim)) { if (!ignoreEmpty || tokenSize > 0) { - *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize)); + *out++ = to(sp.subpiece(tokenStartPos, tokenSize)); } tokenStartPos = i + dSize; @@ -246,23 +311,55 @@ void internalSplit(DelimT delim, StringPiece sp, OutputIterator out, ++tokenSize; } } - + tokenSize = strSize - tokenStartPos; if (!ignoreEmpty || tokenSize > 0) { - tokenSize = strSize - tokenStartPos; - *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize)); + *out++ = to(sp.subpiece(tokenStartPos, tokenSize)); } } -template StringPiece prepareDelim(const String& s) { +template StringPiece prepareDelim(const String& s) { return StringPiece(s); } inline char prepareDelim(char c) { return c; } +template +bool splitFixed(const Delim& delimiter, StringPiece input, OutputType& output) { + static_assert( + exact || std::is_same::value || + IsSomeString::value, + "split() requires that the last argument be a string type"); + if (exact && UNLIKELY(std::string::npos != input.find(delimiter))) { + return false; + } + output = folly::to(input); + return true; +} + +template +bool splitFixed( + const Delim& delimiter, + StringPiece input, + OutputType& outHead, + OutputTypes&... outTail) { + size_t cut = input.find(delimiter); + if (UNLIKELY(cut == std::string::npos)) { + return false; + } + StringPiece head(input.begin(), input.begin() + cut); + StringPiece tail(input.begin() + cut + detail::delimSize(delimiter), + input.end()); + if (LIKELY(splitFixed(delimiter, tail, outTail...))) { + outHead = folly::to(head); + return true; + } + return false; } +} // namespace detail + ////////////////////////////////////////////////////////////////////// -template +template void split(const Delim& delimiter, const String& input, std::vector& out, @@ -274,11 +371,11 @@ void split(const Delim& delimiter, ignoreEmpty); } -template +template void split(const Delim& delimiter, const String& input, fbvector& out, - bool ignoreEmpty = false) { + bool ignoreEmpty) { detail::internalSplit( detail::prepareDelim(delimiter), StringPiece(input), @@ -286,8 +383,11 @@ void split(const Delim& delimiter, ignoreEmpty); } -template +template < + class OutputValueType, + class Delim, + class String, + class OutputIterator> void splitTo(const Delim& delimiter, const String& input, OutputIterator out, @@ -299,11 +399,31 @@ void splitTo(const Delim& delimiter, ignoreEmpty); } +template +typename std::enable_if< + AllConvertible::value && sizeof...(OutputTypes) >= 1, + bool>::type +split(const Delim& delimiter, StringPiece input, OutputTypes&... outputs) { + return detail::splitFixed( + detail::prepareDelim(delimiter), input, outputs...); +} + namespace detail { +/* + * If a type can have its string size determined cheaply, we can more + * efficiently append it in a loop (see internalJoinAppend). Note that the + * struct need not conform to the std::string api completely (ex. does not need + * to implement append()). + */ +template struct IsSizableString { + enum { value = IsSomeString::value + || std::is_same::value }; +}; + template -struct IsStringContainerIterator : - IsSomeString::value_type> { +struct IsSizableStringContainerIterator : + IsSizableString::value_type> { }; template @@ -324,7 +444,7 @@ void internalJoinAppend(Delim delimiter, } template -typename std::enable_if::value>::type +typename std::enable_if::value>::type internalJoin(Delim delimiter, Iterator begin, Iterator end, @@ -344,7 +464,8 @@ internalJoin(Delim delimiter, } template -typename std::enable_if::value>::type +typename +std::enable_if::value>::type internalJoin(Delim delimiter, Iterator begin, Iterator end, @@ -356,7 +477,7 @@ internalJoin(Delim delimiter, internalJoinAppend(delimiter, begin, end, output); } -} // namespace detail +} // namespace detail template void join(const Delim& delimiter, @@ -370,8 +491,11 @@ void join(const Delim& delimiter, output); } -template -void backslashify(const String1& input, String2& output, bool hex_style) { +template +void backslashify( + folly::StringPiece input, + OutputString& output, + bool hex_style) { static const char hexValues[] = "0123456789abcdef"; output.clear(); output.reserve(3 * input.size()); @@ -383,14 +507,21 @@ void backslashify(const String1& input, String2& output, bool hex_style) { if (hex_style) { hex_append = true; } else { - if (c == '\r') output += 'r'; - else if (c == '\n') output += 'n'; - else if (c == '\t') output += 't'; - else if (c == '\a') output += 'a'; - else if (c == '\b') output += 'b'; - else if (c == '\0') output += '0'; - else if (c == '\\') output += '\\'; - else { + if (c == '\r') { + output += 'r'; + } else if (c == '\n') { + output += 'n'; + } else if (c == '\t') { + output += 't'; + } else if (c == '\a') { + output += 'a'; + } else if (c == '\b') { + output += 'b'; + } else if (c == '\0') { + output += '0'; + } else if (c == '\\') { + output += '\\'; + } else { hex_append = true; } } @@ -407,8 +538,8 @@ void backslashify(const String1& input, String2& output, bool hex_style) { template void humanify(const String1& input, String2& output) { - int numUnprintable = 0; - int numPrintablePrefix = 0; + size_t numUnprintable = 0; + size_t numPrintablePrefix = 0; for (unsigned char c : input) { if (c < 0x20 || c > 0x7e || c == '\\') { ++numUnprintable; @@ -449,15 +580,17 @@ void humanify(const String1& input, String2& output) { } } -template +template bool hexlify(const InputString& input, OutputString& output, - bool append_output=false) { - if (!append_output) output.clear(); + bool append_output) { + if (!append_output) { + output.clear(); + } static char hexValues[] = "0123456789abcdef"; - int j = output.size(); + auto j = output.size(); output.resize(2 * input.size() + output.size()); - for (int i = 0; i < input.size(); ++i) { + for (size_t i = 0; i < input.size(); ++i) { int ch = input[i]; output[j++] = hexValues[(ch >> 4) & 0xf]; output[j++] = hexValues[ch & 0xf]; @@ -465,24 +598,19 @@ bool hexlify(const InputString& input, OutputString& output, return true; } -template +template bool unhexlify(const InputString& input, OutputString& output) { if (input.size() % 2 != 0) { return false; } output.resize(input.size() / 2); int j = 0; - auto unhex = [](char c) -> int { - return c >= '0' && c <= '9' ? c - '0' : - c >= 'A' && c <= 'F' ? c - 'A' + 10 : - c >= 'a' && c <= 'f' ? c - 'a' + 10 : - -1; - }; - - for (int i = 0; i < input.size(); i += 2) { - int highBits = unhex(input[i]); - int lowBits = unhex(input[i + 1]); - if (highBits < 0 || lowBits < 0) { + + for (size_t i = 0; i < input.size(); i += 2) { + int highBits = detail::hexTable[static_cast(input[i])]; + int lowBits = detail::hexTable[static_cast(input[i + 1])]; + if ((highBits | lowBits) & 0x10) { + // One of the characters wasn't a hex digit return false; } output[j++] = (highBits << 4) + lowBits; @@ -497,7 +625,7 @@ namespace detail { */ size_t hexDumpLine(const void* ptr, size_t offset, size_t size, std::string& line); -} // namespace detail +} // namespace detail template void hexDump(const void* ptr, size_t size, OutIt out) { @@ -509,7 +637,4 @@ void hexDump(const void* ptr, size_t size, OutIt out) { } } -} // namespace folly - -#endif /* FOLLY_STRING_INL_H_ */ - +} // namespace folly