Faster unhexlify
authorTom Jackson <tjackson@fb.com>
Fri, 18 Nov 2016 02:14:14 +0000 (18:14 -0800)
committerFacebook Github Bot <facebook-github-bot-bot@fb.com>
Fri, 18 Nov 2016 02:24:27 +0000 (18:24 -0800)
Summary:
Using already-available lookup table instead of a bunch of branching, this improves read throughput from ~750MB/s to 2.1GB/s in benchmarks.

```
before:        (time)   (rate)
BM_unhexlify   1.39ns  719.26M
after:
BM_unhexlify 470.59ps    2.13G
```

Reviewed By: philippv

Differential Revision: D4201352

fbshipit-source-id: 8393e066c45c402ccb8b537fdb25417e8e6a9511

folly/String-inl.h
folly/test/StringBenchmark.cpp

index 2bf0104a6259f113b825f22cf484e2a4234b8392..1732b50ce3d42feaddc691f01c584ed2cd957dc9 100644 (file)
@@ -587,17 +587,12 @@ bool unhexlify(const InputString& input, OutputString& output) {
   }
   output.resize(input.size() / 2);
   int j = 0;
-  auto unhex = [](char c) -> int {
-    return c >= '0' && c <= '9' ? c - '0' :
-           c >= 'A' && c <= 'F' ? c - 'A' + 10 :
-           c >= 'a' && c <= 'f' ? c - 'a' + 10 :
-           -1;
-  };
 
   for (size_t i = 0; i < input.size(); i += 2) {
-    int highBits = unhex(input[i]);
-    int lowBits = unhex(input[i + 1]);
-    if (highBits < 0 || lowBits < 0) {
+    int highBits = detail::hexTable[static_cast<uint8_t>(input[i])];
+    int lowBits = detail::hexTable[static_cast<uint8_t>(input[i + 1])];
+    if ((highBits | lowBits) & 0x10) {
+      // One of the characters wasn't a hex digit
       return false;
     }
     output[j++] = (highBits << 4) + lowBits;
index 67790d4dee8a5f1eb15811d209b9b151f0fa74a7..d6b57161fbea59ad5a3d9e59dd87c9eb09d7a56d 100644 (file)
@@ -17,8 +17,9 @@
 #include <folly/String.h>
 
 #include <boost/algorithm/string.hpp>
-#include <cstdarg>
 #include <folly/Benchmark.h>
+#include <folly/Random.h>
+#include <cstdarg>
 #include <random>
 
 using namespace folly;
@@ -102,6 +103,10 @@ fbstring uriUnescapedString;
 const size_t kURIBmStringLength = 256;
 const uint32_t kURIPassThroughPercentage = 50;
 
+fbstring hexlifyInput;
+fbstring hexlifyOutput;
+const size_t kHexlifyLength = 1024;
+
 void initBenchmark() {
   std::mt19937 rnd;
 
@@ -145,6 +150,11 @@ void initBenchmark() {
   }
 
   uribmEscapedString = uriEscape<fbstring>(uribmString);
+
+  // hexlify
+  hexlifyInput.resize(kHexlifyLength);
+  Random::secureRandom(&hexlifyInput[0], kHexlifyLength);
+  folly::hexlify(hexlifyInput, hexlifyOutput);
 }
 
 BENCHMARK(BM_cEscape, iters) {
@@ -175,6 +185,18 @@ BENCHMARK(BM_uriUnescape, iters) {
   }
 }
 
+BENCHMARK(BM_unhexlify, iters) {
+  // iters/sec = bytes output per sec
+  std::string unhexed;
+  folly::StringPiece hex = hexlifyOutput;
+  for (; iters >= hex.size(); iters -= hex.size()) {
+    folly::unhexlify(hex, unhexed);
+  }
+  iters -= iters % 2; // round down to an even number of chars
+  hex = hex.subpiece(0, iters);
+  folly::unhexlify(hex, unhexed);
+}
+
 } // namespace
 
 //////////////////////////////////////////////////////////////////////