X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=folly%2FString-inl.h;h=27b02a614f96f4fffe949cd6ccdfc6bc19a279c0;hb=4ebfdff303924da5636e512ef7268612cba7c803;hp=02d376d82f06e02fd4b64b30f6758705091cc0ca;hpb=f3f96c69481d5dd078e3563b13bf05903dba6517;p=folly.git diff --git a/folly/String-inl.h b/folly/String-inl.h index 02d376d8..27b02a61 100644 --- a/folly/String-inl.h +++ b/folly/String-inl.h @@ -1,5 +1,5 @@ /* - * Copyright 2012 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -149,6 +149,95 @@ void cUnescape(StringPiece str, String& out, bool strict) { out.append(&*last, p - last); } +namespace detail { +// Map from character code to escape mode: +// 0 = pass through +// 1 = unused +// 2 = pass through in PATH mode +// 3 = space, replace with '+' in QUERY mode +// 4 = percent-encode +extern const unsigned char uriEscapeTable[]; +} // namespace detail + +template +void uriEscape(StringPiece str, String& out, UriEscapeMode mode) { + static const char hexValues[] = "0123456789abcdef"; + char esc[3]; + esc[0] = '%'; + // Preallocate assuming that 25% of the input string will be escaped + out.reserve(out.size() + str.size() + 3 * (str.size() / 4)); + auto p = str.begin(); + auto last = p; // last regular character + // We advance over runs of passthrough characters and copy them in one go; + // this is faster than calling push_back repeatedly. + unsigned char minEncode = static_cast(mode); + while (p != str.end()) { + char c = *p; + unsigned char v = static_cast(c); + unsigned char discriminator = detail::uriEscapeTable[v]; + if (LIKELY(discriminator <= minEncode)) { + ++p; + } else if (mode == UriEscapeMode::QUERY && discriminator == 3) { + out.append(&*last, p - last); + out.push_back('+'); + ++p; + last = p; + } else { + out.append(&*last, p - last); + esc[1] = hexValues[v >> 4]; + esc[2] = hexValues[v & 0x0f]; + out.append(esc, 3); + ++p; + last = p; + } + } + out.append(&*last, p - last); +} + +template +void uriUnescape(StringPiece str, String& out, UriEscapeMode mode) { + out.reserve(out.size() + str.size()); + auto p = str.begin(); + auto last = p; + // We advance over runs of passthrough characters and copy them in one go; + // this is faster than calling push_back repeatedly. + while (p != str.end()) { + char c = *p; + unsigned char v = static_cast(v); + switch (c) { + case '%': + { + if (UNLIKELY(std::distance(p, str.end()) < 3)) { + throw std::invalid_argument("incomplete percent encode sequence"); + } + auto h1 = detail::hexTable[static_cast(p[1])]; + auto h2 = detail::hexTable[static_cast(p[2])]; + if (UNLIKELY(h1 == 16 || h2 == 16)) { + throw std::invalid_argument("invalid percent encode sequence"); + } + out.append(&*last, p - last); + out.push_back((h1 << 4) | h2); + p += 3; + last = p; + break; + } + case '+': + if (mode == UriEscapeMode::QUERY) { + out.append(&*last, p - last); + out.push_back(' '); + ++p; + last = p; + break; + } + // else fallthrough + default: + ++p; + break; + } + } + out.append(&*last, p - last); +} + namespace detail { /* @@ -211,7 +300,7 @@ template<> struct OutputConverter { template void internalSplit(DelimT delim, StringPiece sp, OutputIterator out, bool ignoreEmpty) { - assert(sp.start() != nullptr); + assert(sp.empty() || sp.start() != nullptr); const char* s = sp.start(); const size_t strSize = sp.size(); @@ -231,9 +320,9 @@ void internalSplit(DelimT delim, StringPiece sp, OutputIterator out, ignoreEmpty); } - int tokenStartPos = 0; - int tokenSize = 0; - for (int i = 0; i <= strSize - dSize; ++i) { + size_t tokenStartPos = 0; + size_t tokenSize = 0; + for (size_t i = 0; i <= strSize - dSize; ++i) { if (atDelim(&s[i], delim)) { if (!ignoreEmpty || tokenSize > 0) { *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize)); @@ -246,9 +335,8 @@ void internalSplit(DelimT delim, StringPiece sp, OutputIterator out, ++tokenSize; } } - + tokenSize = strSize - tokenStartPos; if (!ignoreEmpty || tokenSize > 0) { - tokenSize = strSize - tokenStartPos; *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize)); } } @@ -258,6 +346,50 @@ template StringPiece prepareDelim(const String& s) { } inline char prepareDelim(char c) { return c; } +template +struct convertTo { + template + static Dst from(const Src& src) { return folly::to(src); } + static Dst from(const Dst& src) { return src; } +}; + +template +typename std::enable_if::value, bool>::type +splitFixed(const Delim& delimiter, + StringPiece input, + OutputType& out) { + if (exact && UNLIKELY(std::string::npos != input.find(delimiter))) { + return false; + } + out = convertTo::from(input); + return true; +} + +template +typename std::enable_if::value, bool>::type +splitFixed(const Delim& delimiter, + StringPiece input, + OutputType& outHead, + OutputTypes&... outTail) { + size_t cut = input.find(delimiter); + if (UNLIKELY(cut == std::string::npos)) { + return false; + } + StringPiece head(input.begin(), input.begin() + cut); + StringPiece tail(input.begin() + cut + detail::delimSize(delimiter), + input.end()); + if (LIKELY(splitFixed(delimiter, tail, outTail...))) { + outHead = convertTo::from(head); + return true; + } + return false; +} + } ////////////////////////////////////////////////////////////////////// @@ -278,7 +410,7 @@ template void split(const Delim& delimiter, const String& input, fbvector& out, - bool ignoreEmpty = false) { + bool ignoreEmpty) { detail::internalSplit( detail::prepareDelim(delimiter), StringPiece(input), @@ -299,11 +431,38 @@ void splitTo(const Delim& delimiter, ignoreEmpty); } +template +typename std::enable_if::value, bool>::type +split(const Delim& delimiter, + StringPiece input, + OutputType& outHead, + OutputTypes&... outTail) { + return detail::splitFixed( + detail::prepareDelim(delimiter), + input, + outHead, + outTail...); +} + namespace detail { +/* + * If a type can have its string size determined cheaply, we can more + * efficiently append it in a loop (see internalJoinAppend). Note that the + * struct need not conform to the std::string api completely (ex. does not need + * to implement append()). + */ +template struct IsSizableString { + enum { value = IsSomeString::value + || std::is_same::value }; +}; + template -struct IsStringContainerIterator : - IsSomeString::value_type> { +struct IsSizableStringContainerIterator : + IsSizableString::value_type> { }; template @@ -312,6 +471,11 @@ void internalJoinAppend(Delim delimiter, Iterator end, String& output) { assert(begin != end); + if (std::is_same::value && + delimSize(delimiter) == 1) { + internalJoinAppend(delimFront(delimiter), begin, end, output); + return; + } toAppend(*begin, &output); while (++begin != end) { toAppend(delimiter, *begin, &output); @@ -319,7 +483,7 @@ void internalJoinAppend(Delim delimiter, } template -typename std::enable_if::value>::type +typename std::enable_if::value>::type internalJoin(Delim delimiter, Iterator begin, Iterator end, @@ -339,7 +503,8 @@ internalJoin(Delim delimiter, } template -typename std::enable_if::value>::type +typename +std::enable_if::value>::type internalJoin(Delim delimiter, Iterator begin, Iterator end, @@ -446,13 +611,13 @@ void humanify(const String1& input, String2& output) { template bool hexlify(const InputString& input, OutputString& output, - bool append_output=false) { + bool append_output) { if (!append_output) output.clear(); static char hexValues[] = "0123456789abcdef"; - int j = output.size(); + auto j = output.size(); output.resize(2 * input.size() + output.size()); - for (int i = 0; i < input.size(); ++i) { + for (size_t i = 0; i < input.size(); ++i) { int ch = input[i]; output[j++] = hexValues[(ch >> 4) & 0xf]; output[j++] = hexValues[ch & 0xf]; @@ -474,7 +639,7 @@ bool unhexlify(const InputString& input, OutputString& output) { -1; }; - for (int i = 0; i < input.size(); i += 2) { + for (size_t i = 0; i < input.size(); i += 2) { int highBits = unhex(input[i]); int lowBits = unhex(input[i + 1]); if (highBits < 0 || lowBits < 0) { @@ -507,4 +672,3 @@ void hexDump(const void* ptr, size_t size, OutIt out) { } // namespace folly #endif /* FOLLY_STRING_INL_H_ */ -