X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=blobdiff_plain;f=folly%2FString-inl.h;h=8baf8f85690c6fe5970f1903c9735a81b153e0ba;hp=b9f7c23700580fc875ec3ed8ae77e9baa80b6c22;hb=22d531a8fe503001a51672750dc09daae252fbf6;hpb=5c77fedbef46995a71ffa268c9fcaf49efddd01b

diff --git a/folly/String-inl.h b/folly/String-inl.h
index b9f7c237..8baf8f85 100644
--- a/folly/String-inl.h
+++ b/folly/String-inl.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2013 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef FOLLY_STRING_INL_H_
-#define FOLLY_STRING_INL_H_
+#pragma once
 
 #include <stdexcept>
 #include <iterator>
 
-#ifndef FOLLY_BASE_STRING_H_
+#include <folly/CppAttributes.h>
+
+#ifndef FOLLY_STRING_H_
 #error This file may only be included from String.h
 #endif
 
@@ -51,7 +52,7 @@ void cEscape(StringPiece str, String& out) {
     if (e == 'P') {  // printable
       ++p;
     } else if (e == 'O') {  // octal
-      out.append(&*last, p - last);
+      out.append(&*last, size_t(p - last));
       esc[1] = '0' + ((v >> 6) & 7);
       esc[2] = '0' + ((v >> 3) & 7);
       esc[3] = '0' + (v & 7);
@@ -59,14 +60,14 @@ void cEscape(StringPiece str, String& out) {
       ++p;
       last = p;
     } else {  // special 1-character escape
-      out.append(&*last, p - last);
+      out.append(&*last, size_t(p - last));
       esc[1] = e;
       out.append(esc, 2);
       ++p;
       last = p;
     }
   }
-  out.append(&*last, p - last);
+  out.append(&*last, size_t(p - last));
 }
 
 namespace detail {
@@ -149,6 +150,95 @@ void cUnescape(StringPiece str, String& out, bool strict) {
   out.append(&*last, p - last);
 }
 
+namespace detail {
+// Map from character code to escape mode:
+// 0 = pass through
+// 1 = unused
+// 2 = pass through in PATH mode
+// 3 = space, replace with '+' in QUERY mode
+// 4 = percent-encode
+extern const unsigned char uriEscapeTable[];
+}  // namespace detail
+
+template <class String>
+void uriEscape(StringPiece str, String& out, UriEscapeMode mode) {
+  static const char hexValues[] = "0123456789abcdef";
+  char esc[3];
+  esc[0] = '%';
+  // Preallocate assuming that 25% of the input string will be escaped
+  out.reserve(out.size() + str.size() + 3 * (str.size() / 4));
+  auto p = str.begin();
+  auto last = p;  // last regular character
+  // We advance over runs of passthrough characters and copy them in one go;
+  // this is faster than calling push_back repeatedly.
+  unsigned char minEncode = static_cast<unsigned char>(mode);
+  while (p != str.end()) {
+    char c = *p;
+    unsigned char v = static_cast<unsigned char>(c);
+    unsigned char discriminator = detail::uriEscapeTable[v];
+    if (LIKELY(discriminator <= minEncode)) {
+      ++p;
+    } else if (mode == UriEscapeMode::QUERY && discriminator == 3) {
+      out.append(&*last, size_t(p - last));
+      out.push_back('+');
+      ++p;
+      last = p;
+    } else {
+      out.append(&*last, size_t(p - last));
+      esc[1] = hexValues[v >> 4];
+      esc[2] = hexValues[v & 0x0f];
+      out.append(esc, 3);
+      ++p;
+      last = p;
+    }
+  }
+  out.append(&*last, size_t(p - last));
+}
+
+template <class String>
+void uriUnescape(StringPiece str, String& out, UriEscapeMode mode) {
+  out.reserve(out.size() + str.size());
+  auto p = str.begin();
+  auto last = p;
+  // We advance over runs of passthrough characters and copy them in one go;
+  // this is faster than calling push_back repeatedly.
+  while (p != str.end()) {
+    char c = *p;
+    switch (c) {
+    case '%':
+      {
+        if (UNLIKELY(std::distance(p, str.end()) < 3)) {
+          throw std::invalid_argument("incomplete percent encode sequence");
+        }
+        auto h1 = detail::hexTable[static_cast<unsigned char>(p[1])];
+        auto h2 = detail::hexTable[static_cast<unsigned char>(p[2])];
+        if (UNLIKELY(h1 == 16 || h2 == 16)) {
+          throw std::invalid_argument("invalid percent encode sequence");
+        }
+        out.append(&*last, size_t(p - last));
+        out.push_back((h1 << 4) | h2);
+        p += 3;
+        last = p;
+        break;
+      }
+    case '+':
+      if (mode == UriEscapeMode::QUERY) {
+        out.append(&*last, size_t(p - last));
+        out.push_back(' ');
+        ++p;
+        last = p;
+        break;
+      }
+      // else fallthrough
+      FOLLY_FALLTHROUGH;
+    default:
+      ++p;
+      break;
+    }
+  }
+  out.append(&*last, size_t(p - last));
+}
+
 namespace detail {
 
 /*
@@ -176,29 +266,6 @@ inline char delimFront(StringPiece s) {
   return *s.start();
 }
 
-/*
- * These output conversion templates allow us to support multiple
- * output string types, even when we are using an arbitrary
- * OutputIterator.
- */
-template<class OutStringT> struct OutputConverter {};
-
-template<> struct OutputConverter<std::string> {
-  std::string operator()(StringPiece sp) const {
-    return sp.toString();
-  }
-};
-
-template<> struct OutputConverter<fbstring> {
-  fbstring operator()(StringPiece sp) const {
-    return sp.toFbstring();
-  }
-};
-
-template<> struct OutputConverter<StringPiece> {
-  StringPiece operator()(StringPiece sp) const { return sp; }
-};
-
 /*
  * Shared implementation for all the split() overloads.
  *
@@ -211,17 +278,15 @@ template<> struct OutputConverter<StringPiece> {
 template<class OutStringT, class DelimT, class OutputIterator>
 void internalSplit(DelimT delim, StringPiece sp, OutputIterator out,
     bool ignoreEmpty) {
-  assert(sp.start() != nullptr);
+  assert(sp.empty() || sp.start() != nullptr);
 
   const char* s = sp.start();
   const size_t strSize = sp.size();
   const size_t dSize = delimSize(delim);
 
-  OutputConverter<OutStringT> conv;
-
   if (dSize > strSize || dSize == 0) {
     if (!ignoreEmpty || strSize > 0) {
-      *out++ = conv(sp);
+      *out++ = to<OutStringT>(sp);
     }
     return;
   }
@@ -231,12 +296,12 @@ void internalSplit(DelimT delim, StringPiece sp, OutputIterator out,
       ignoreEmpty);
   }
 
-  int tokenStartPos = 0;
-  int tokenSize = 0;
-  for (int i = 0; i <= strSize - dSize; ++i) {
+  size_t tokenStartPos = 0;
+  size_t tokenSize = 0;
+  for (size_t i = 0; i <= strSize - dSize; ++i) {
     if (atDelim(&s[i], delim)) {
       if (!ignoreEmpty || tokenSize > 0) {
-        *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize));
+        *out++ = to<OutStringT>(sp.subpiece(tokenStartPos, tokenSize));
       }
 
       tokenStartPos = i + dSize;
@@ -246,10 +311,9 @@ void internalSplit(DelimT delim, StringPiece sp, OutputIterator out,
       ++tokenSize;
     }
   }
-
+  tokenSize = strSize - tokenStartPos;
   if (!ignoreEmpty || tokenSize > 0) {
-    tokenSize = strSize - tokenStartPos;
-    *out++ = conv(StringPiece(&s[tokenStartPos], tokenSize));
+    *out++ = to<OutStringT>(sp.subpiece(tokenStartPos, tokenSize));
   }
 }
 
@@ -258,6 +322,39 @@ template<class String> StringPiece prepareDelim(const String& s) {
 }
 inline char prepareDelim(char c) { return c; }
 
+template <bool exact, class Delim, class OutputType>
+bool splitFixed(const Delim& delimiter, StringPiece input, OutputType& output) {
+  static_assert(
+      exact || std::is_same<OutputType, StringPiece>::value ||
+          IsSomeString<OutputType>::value,
+      "split<false>() requires that the last argument be a string type");
+  if (exact && UNLIKELY(std::string::npos != input.find(delimiter))) {
+    return false;
+  }
+  output = folly::to<OutputType>(input);
+  return true;
+}
+
+template <bool exact, class Delim, class OutputType, class... OutputTypes>
+bool splitFixed(
+    const Delim& delimiter,
+    StringPiece input,
+    OutputType& outHead,
+    OutputTypes&... outTail) {
+  size_t cut = input.find(delimiter);
+  if (UNLIKELY(cut == std::string::npos)) {
+    return false;
+  }
+  StringPiece head(input.begin(), input.begin() + cut);
+  StringPiece tail(input.begin() + cut + detail::delimSize(delimiter),
+                   input.end());
+  if (LIKELY(splitFixed<exact>(delimiter, tail, outTail...))) {
+    outHead = folly::to<OutputType>(head);
+    return true;
+  }
+  return false;
+}
+
 }
 
 //////////////////////////////////////////////////////////////////////
@@ -299,11 +396,31 @@ void splitTo(const Delim& delimiter,
     ignoreEmpty);
 }
 
+template <bool exact, class Delim, class... OutputTypes>
+typename std::enable_if<
+    AllConvertible<OutputTypes...>::value && sizeof...(OutputTypes) >= 1,
+    bool>::type
+split(const Delim& delimiter, StringPiece input, OutputTypes&... outputs) {
+  return detail::splitFixed<exact>(
+      detail::prepareDelim(delimiter), input, outputs...);
+}
+
 namespace detail {
 
+/*
+ * If a type can have its string size determined cheaply, we can more
+ * efficiently append it in a loop (see internalJoinAppend). Note that the
+ * struct need not conform to the std::string api completely (ex. does not need
+ * to implement append()).
+ */
+template <class T> struct IsSizableString {
+  enum { value = IsSomeString<T>::value
+         || std::is_same<T, StringPiece>::value };
+};
+
 template <class Iterator>
-struct IsStringContainerIterator :
-  IsSomeString<typename std::iterator_traits<Iterator>::value_type> {
+struct IsSizableStringContainerIterator :
+  IsSizableString<typename std::iterator_traits<Iterator>::value_type> {
 };
 
 template <class Delim, class Iterator, class String>
@@ -324,7 +441,7 @@ void internalJoinAppend(Delim delimiter,
 }
 
 template <class Delim, class Iterator, class String>
-typename std::enable_if<IsStringContainerIterator<Iterator>::value>::type
+typename std::enable_if<IsSizableStringContainerIterator<Iterator>::value>::type
 internalJoin(Delim delimiter,
              Iterator begin,
              Iterator end,
@@ -344,7 +461,8 @@ internalJoin(Delim delimiter,
 }
 
 template <class Delim, class Iterator, class String>
-typename std::enable_if<!IsStringContainerIterator<Iterator>::value>::type
+typename
+std::enable_if<!IsSizableStringContainerIterator<Iterator>::value>::type
 internalJoin(Delim delimiter,
              Iterator begin,
              Iterator end,
@@ -407,8 +525,8 @@ void backslashify(const String1& input, String2& output, bool hex_style) {
 
 template <class String1, class String2>
 void humanify(const String1& input, String2& output) {
-  int numUnprintable = 0;
-  int numPrintablePrefix = 0;
+  size_t numUnprintable = 0;
+  size_t numPrintablePrefix = 0;
   for (unsigned char c : input) {
     if (c < 0x20 || c > 0x7e || c == '\\') {
       ++numUnprintable;
@@ -455,9 +573,9 @@ bool hexlify(const InputString& input, OutputString& output,
   if (!append_output) output.clear();
 
   static char hexValues[] = "0123456789abcdef";
-  int j = output.size();
+  auto j = output.size();
   output.resize(2 * input.size() + output.size());
-  for (int i = 0; i < input.size(); ++i) {
+  for (size_t i = 0; i < input.size(); ++i) {
     int ch = input[i];
     output[j++] = hexValues[(ch >> 4) & 0xf];
     output[j++] = hexValues[ch & 0xf];
@@ -472,17 +590,12 @@ bool unhexlify(const InputString& input, OutputString& output) {
   }
   output.resize(input.size() / 2);
   int j = 0;
-  auto unhex = [](char c) -> int {
-    return c >= '0' && c <= '9' ? c - '0' :
-           c >= 'A' && c <= 'F' ? c - 'A' + 10 :
-           c >= 'a' && c <= 'f' ? c - 'a' + 10 :
-           -1;
-  };
-
-  for (int i = 0; i < input.size(); i += 2) {
-    int highBits = unhex(input[i]);
-    int lowBits = unhex(input[i + 1]);
-    if (highBits < 0 || lowBits < 0) {
+
+  for (size_t i = 0; i < input.size(); i += 2) {
+    int highBits = detail::hexTable[static_cast<uint8_t>(input[i])];
+    int lowBits = detail::hexTable[static_cast<uint8_t>(input[i + 1])];
+    if ((highBits | lowBits) & 0x10) {
+      // One of the characters wasn't a hex digit
       return false;
     }
     output[j++] = (highBits << 4) + lowBits;
@@ -510,6 +623,3 @@ void hexDump(const void* ptr, size_t size, OutIt out) {
 }
 
 }  // namespace folly
-
-#endif /* FOLLY_STRING_INL_H_ */
-