Move and refactor code

author Chip Turner <chip@fb.com>

Fri, 8 Jun 2012 21:53:51 +0000 (14:53 -0700)

committer Tudor Bosman <tudorb@fb.com>

Thu, 14 Jun 2012 19:35:46 +0000 (12:35 -0700)
author Chip Turner <chip@fb.com>
Fri, 8 Jun 2012 21:53:51 +0000 (14:53 -0700)
committer Tudor Bosman <tudorb@fb.com>
Thu, 14 Jun 2012 19:35:46 +0000 (12:35 -0700)
diff --git a/folly/Hash.h b/folly/Hash.h

index a52da9c634e6ff0206b17e327b852896281ed786..a6e330465a9b323f859155e9527687c5a620d3ae 100644 (file)
--- a/folly/Hash.h
+++ b/folly/Hash.h
@@ -17,9 +17,10 @@
  #ifndef FOLLY_BASE_HASH_H_
  #define FOLLY_BASE_HASH_H_
  
-#include <stdint.h>
  #include <cstring>
+#include <stdint.h>
  #include <string>
+#include <utility>
  
  /*
   * Various hashing functions.
@@ -27,6 +28,40 @@
  
  namespace folly { namespace hash {
  
+// This is a general-purpose way to create a single hash from multiple
+// hashable objects. It relies on std::hash<T> being available for all
+// relevant types and combines those hashes in an order-dependent way
+// to yield a new hash.
+
+// Never used, but gcc demands it.
+inline size_t hash_combine() {
+  return 0;
+}
+
+// This is the Hash128to64 function from Google's cityhash (available
+// under the MIT License).  We use it to reduce multiple 64 bit hashes
+// into a single hash.
+inline size_t hash_128_to_64(const size_t upper, const size_t lower) {
+  // Murmur-inspired hashing.
+  const size_t kMul = 0x9ddfea08eb382d69ULL;
+  size_t a = (lower ^ upper) * kMul;
+  a ^= (a >> 47);
+  size_t b = (upper ^ a) * kMul;
+  b ^= (b >> 47);
+  b *= kMul;
+  return b;
+}
+
+template <typename T, typename... Ts>
+size_t hash_combine(const T& t, const Ts&... ts) {
+  size_t seed = std::hash<T>()(t);
+  if (sizeof...(ts) == 0) {
+    return seed;
+  }
+  size_t remainder = hash_combine(ts...);
+  return hash_128_to_64(seed, remainder);
+}
+
  //////////////////////////////////////////////////////////////////////
  
  /*
@@ -240,4 +275,26 @@ template<> struct hasher<uint64_t> {
  
  } // namespace folly
  
+// Custom hash functions.
+namespace std {
+  // Hash function for pairs. Requires default hash functions for both
+  // items in the pair.
+  template <typename T1, typename T2>
+  class hash<std::pair<T1, T2> > {
+  public:
+    size_t operator()(const std::pair<T1, T2>& x) const {
+      return folly::hash::hash_combine(x.first, x.second);
+    }
+  };
+
+  // Same as above, but for arbitrary tuples.
+  template <typename... Ts>
+  class hash<std::tuple<Ts...> > {
+  public:
+    size_t operator()(const Ts&... ts) const {
+      return folly::hash::hash_combine(ts...);
+    }
+  };
+} // namespace std
+
  #endif
diff --git a/folly/Logging.h b/folly/Logging.h

new file mode 100644 (file)

index 0000000..160ad8e
--- /dev/null
+++ b/folly/Logging.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2012 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_LOGGING_H_
+#define FOLLY_LOGGING_H_
+
+#include <time.h>
+#include <glog/logging.h>
+
+#ifndef FB_LOG_EVERY_MS
+/**
+ * Issues a LOG(severity) no more often that every
+ * milliseconds. Example:
+ *
+ * FB_LOG_EVERY_MS(INFO, 10000) << "At least ten seconds passed"
+ *   " since you last saw this.";
+ *
+ * The implementation uses for statements to introduce variables in
+ * a nice way that doesn't mess surrounding statements.
+ */
+#define FB_LOG_EVERY_MS(severity, milliseconds)                         \
+  for (bool LOG_EVERY_MS_once = true; LOG_EVERY_MS_once; )              \
+    for (const ::clock_t LOG_EVERY_MS_now = ::clock(); LOG_EVERY_MS_once; ) \
+      for (static ::clock_t LOG_EVERY_MS_last; LOG_EVERY_MS_once;       \
+           LOG_EVERY_MS_once = false)                                   \
+        if (1000 * (LOG_EVERY_MS_now - LOG_EVERY_MS_last) <             \
+            (milliseconds) * CLOCKS_PER_SEC) {}                         \
+        else                                                            \
+          (LOG_EVERY_MS_last = LOG_EVERY_MS_now, LOG(severity))
+
+#endif
+
+#endif  // FOLLY_LOGGING_H_
diff --git a/folly/Makefile.am b/folly/Makefile.am

index 68100f931c312403788d918a2c39e4e1342d3f1a..49573d0d909c01939a99d59838447005e32c9a00 100644 (file)
--- a/folly/Makefile.am
+++ b/folly/Makefile.am
@@ -52,6 +52,7 @@ nobase_follyinclude_HEADERS = \
         IntrusiveList.h \
         json.h \
         Likely.h \
+       Logging.h \
         Malloc.h \
         MapUtil.h \
         PackedSyncPtr.h \
diff --git a/folly/String-inl.h b/folly/String-inl.h

index af4c4dcfd38d9a9c71769df8ddfba41bdfff9e03..3829f1f872fe951cbc6d36bd7508e553be8c9502 100644 (file)
--- a/folly/String-inl.h
+++ b/folly/String-inl.h
@@ -298,6 +298,126 @@ void splitTo(const Delim& delimiter,
      ignoreEmpty);
  }
  
+template <class String1, class String2>
+void backslashify(const String1& input, String2& output, bool hex_style) {
+  static const char hexValues[] = "0123456789abcdef";
+  output.clear();
+  output.reserve(3 * input.size());
+  for (unsigned char c : input) {
+    // less than space or greater than '~' are considered unprintable
+    if (c < 0x20 || c > 0x7e || c == '\\') {
+      bool hex_append = false;
+      output.push_back('\\');
+      if (hex_style) {
+        hex_append = true;
+      } else {
+        if (c == '\r') output += 'r';
+        else if (c == '\n') output += 'n';
+        else if (c == '\t') output += 't';
+        else if (c == '\a') output += 'a';
+        else if (c == '\b') output += 'b';
+        else if (c == '\0') output += '0';
+        else if (c == '\\') output += '\\';
+        else {
+          hex_append = true;
+        }
+      }
+      if (hex_append) {
+        output.push_back('x');
+        output.push_back(hexValues[(c >> 4) & 0xf]);
+        output.push_back(hexValues[c & 0xf]);
+      }
+    } else {
+      output += c;
+    }
+  }
+}
+
+template <class String1, class String2>
+void humanify(const String1& input, String2& output) {
+  int numUnprintable = 0;
+  int numPrintablePrefix = 0;
+  for (unsigned char c : input) {
+    if (c < 0x20 || c > 0x7e || c == '\\') {
+      ++numUnprintable;
+    }
+    if (numUnprintable == 0) {
+      ++numPrintablePrefix;
+    }
+  }
+
+  // hexlify doubles a string's size; backslashify can potentially
+  // explode it by 4x.  Now, the printable range of the ascii
+  // "spectrum" is around 95 out of 256 values, so a "random" binary
+  // string should be around 60% unprintable.  We use a 50% hueristic
+  // here, so if a string is 60% unprintable, then we just use hex
+  // output.  Otherwise we backslash.
+  //
+  // UTF8 is completely ignored; as a result, utf8 characters will
+  // likely be \x escaped (since most common glyphs fit in two bytes).
+  // This is a tradeoff of complexity/speed instead of a convenience
+  // that likely would rarely matter.  Moreover, this function is more
+  // about displaying underlying bytes, not about displaying glyphs
+  // from languages.
+  if (numUnprintable == 0) {
+    output = input;
+  } else if (5 * numUnprintable >= 3 * input.size()) {
+    // However!  If we have a "meaningful" prefix of printable
+    // characters, say 20% of the string, we backslashify under the
+    // assumption viewing the prefix as ascii is worth blowing the
+    // output size up a bit.
+    if (5 * numPrintablePrefix >= input.size()) {
+      backslashify(input, output);
+    } else {
+      output = "0x";
+      hexlify(input, output, true /* append output */);
+    }
+  } else {
+    backslashify(input, output);
+  }
+}
+
+template<class InputString, class OutputString>
+bool hexlify(const InputString& input, OutputString& output,
+             bool append_output=false) {
+  if (!append_output) output.clear();
+
+  static char hexValues[] = "0123456789abcdef";
+  int j = output.size();
+  output.resize(2 * input.size() + output.size());
+  for (int i = 0; i < input.size(); ++i) {
+    int ch = input[i];
+    output[j++] = hexValues[(ch >> 4) & 0xf];
+    output[j++] = hexValues[ch & 0xf];
+  }
+  return true;
+}
+
+template<class InputString, class OutputString>
+bool unhexlify(const InputString& input, OutputString& output) {
+  if (input.size() % 2 != 0) {
+    return false;
+  }
+  output.resize(input.size() / 2);
+  int j = 0;
+  auto unhex = [](char c) -> int {
+    return c >= '0' && c <= '9' ? c - '0' :
+           c >= 'A' && c <= 'F' ? c - 'A' + 10 :
+           c >= 'a' && c <= 'f' ? c - 'a' + 10 :
+           -1;
+  };
+
+  for (int i = 0; i < input.size(); i += 2) {
+    int highBits = unhex(input[i]);
+    int lowBits = unhex(input[i + 1]);
+    if (highBits < 0 || lowBits < 0) {
+      return false;
+    }
+    output[j++] = (highBits << 4) + lowBits;
+  }
+  return true;
+}
+
  namespace detail {
  /**
   * Hex-dump at most 16 bytes starting at offset from a memory area of size
diff --git a/folly/String.h b/folly/String.h

index 57dfe876be99ffd5a10fabe7a1dd37213771806d..c193054ab28f83a73bca0bf122f8b3563d2430d9 100644 (file)
--- a/folly/String.h
+++ b/folly/String.h
@@ -128,6 +128,74 @@ void stringPrintf(std::string* out, const char* fmt, ...)
  std::string& stringAppendf(std::string* output, const char* format, ...)
    __attribute__ ((format (printf, 2, 3)));
  
+/**
+ * Backslashify a string, that is, replace non-printable characters
+ * with C-style (but NOT C compliant) "\xHH" encoding.  If hex_style
+ * is false, then shorthand notations like "\0" will be used instead
+ * of "\x00" for the most common backslash cases.
+ *
+ * There are two forms, one returning the input string, and one
+ * creating output in the specified output string.
+ *
+ * This is mainly intended for printing to a terminal, so it is not
+ * particularly optimized.
+ *
+ * Do *not* use this in situations where you expect to be able to feed
+ * the string to a C or C++ compiler, as there are nuances with how C
+ * parses such strings that lead to failures.  This is for display
+ * purposed only.  If you want a string you can embed for use in C or
+ * C++, use cEscape instead.  This function is for display purposes
+ * only.
+ */
+template <class String1, class String2>
+void backslashify(const String1& input, String2& output, bool hex_style=false);
+
+template <class String>
+String backslashify(const String& input, bool hex_style=false) {
+  String output;
+  backslashify(input, output, hex_style);
+  return output;
+}
+
+/**
+ * Take a string and "humanify" it -- that is, make it look better.
+ * Since "better" is subjective, caveat emptor.  The basic approach is
+ * to count the number of unprintable characters.  If there are none,
+ * then the output is the input.  If there are relatively few, or if
+ * there is a long "enough" prefix of printable characters, use
+ * backslashify.  If it is mostly binary, then simply hex encode.
+ *
+ * This is an attempt to make a computer smart, and so likely is wrong
+ * most of the time.
+ */
+template <class String1, class String2>
+void humanify(const String1& input, String2& output);
+
+template <class String>
+String humanify(const String& input) {
+  String output;
+  humanify(input, output);
+  return output;
+}
+
+/**
+ * Same functionality as Python's binascii.hexlify.  Returns true
+ * on successful conversion.
+ *
+ * If append_output is true, append data to the output rather than
+ * replace it.
+ */
+template<class InputString, class OutputString>
+bool hexlify(const InputString& input, OutputString& output,
+             bool append=false);
+
+/**
+ * Same functionality as Python's binascii.unhexlify.  Returns true
+ * on successful conversion.
+ */
+template<class InputString, class OutputString>
+bool unhexlify(const InputString& input, OutputString& output);
+
  /*
   * A pretty-printer for numbers that appends suffixes of units of the
   * given type.  It prints 4 sig-figs of value with the most
diff --git a/folly/test/HashTest.cpp b/folly/test/HashTest.cpp

index 6750eab7170779a06f832d189ed6d6382db7975e..f06f8b84495c52523330bed7a5274c720a074424 100644 (file)
--- a/folly/test/HashTest.cpp
+++ b/folly/test/HashTest.cpp
@@ -135,3 +135,7 @@ TEST(Hash, hasher) {
    m.insert(std::make_pair(4, 5));
    EXPECT_EQ(get_default(m, 4), 5);
  }
+
+TEST(Hash, hash_combine) {
+  EXPECT_NE(hash_combine(1, 2), hash_combine(2, 1));
+}
diff --git a/folly/test/StringTest.cpp b/folly/test/StringTest.cpp

index eab3912bdce7af7400158ba9bb6937cf622fa413..32de31947228423017a93b3fb9400949d3d28cde 100644 (file)
--- a/folly/test/StringTest.cpp
+++ b/folly/test/StringTest.cpp
@@ -604,6 +604,83 @@ TEST(Split, pieces_fbvector) {
    piecesTest<folly::fbvector>();
  }
  
+TEST(String, hexlify) {
+  string input1 = "0123";
+  string output1;
+  EXPECT_TRUE(hexlify(input1, output1));
+  EXPECT_EQ(output1, "30313233");
+
+  fbstring input2 = "abcdefg";
+  input2[1] = 0;
+  input2[3] = 0xff;
+  input2[5] = 0xb6;
+  fbstring output2;
+  EXPECT_TRUE(hexlify(input2, output2));
+  EXPECT_EQ(output2, "610063ff65b667");
+}
+
+TEST(String, unhexlify) {
+  string input1 = "30313233";
+  string output1;
+  EXPECT_TRUE(unhexlify(input1, output1));
+  EXPECT_EQ(output1, "0123");
+
+  fbstring input2 = "610063ff65b667";
+  fbstring output2;
+  EXPECT_TRUE(unhexlify(input2, output2));
+  EXPECT_EQ(output2.size(), 7);
+  EXPECT_EQ(output2[0], 'a');
+  EXPECT_EQ(output2[1], 0);
+  EXPECT_EQ(output2[2], 'c');
+  EXPECT_EQ(output2[3] & 0xff, 0xff);
+  EXPECT_EQ(output2[4], 'e');
+  EXPECT_EQ(output2[5] & 0xff, 0xb6);
+  EXPECT_EQ(output2[6], 'g');
+
+  string input3 = "x";
+  string output3;
+  EXPECT_FALSE(unhexlify(input3, output3));
+
+  string input4 = "xy";
+  string output4;
+  EXPECT_FALSE(unhexlify(input4, output4));
+}
+
+TEST(String, backslashify) {
+  EXPECT_EQ("abc", string("abc"));
+  EXPECT_EQ("abc", backslashify(string("abc")));
+  EXPECT_EQ("abc\\r", backslashify(string("abc\r")));
+  EXPECT_EQ("abc\\x0d", backslashify(string("abc\r"), true));
+  EXPECT_EQ("\\0\\0", backslashify(string(2, '\0')));
+}
+
+TEST(String, humanify) {
+  // Simple cases; output is obvious.
+  EXPECT_EQ("abc", humanify(string("abc")));
+  EXPECT_EQ("abc\\\\r", humanify(string("abc\\r")));
+  EXPECT_EQ("0xff", humanify(string("\xff")));
+  EXPECT_EQ("abc\\xff", humanify(string("abc\xff")));
+  EXPECT_EQ("abc\\b", humanify(string("abc\b")));
+  EXPECT_EQ("0x00", humanify(string(1, '\0')));
+  EXPECT_EQ("0x0000", humanify(string(2, '\0')));
+
+
+  // Mostly printable, so backslash!  80, 60, and 40% printable, respectively
+  EXPECT_EQ("aaaa\\xff", humanify(string("aaaa\xff")));
+  EXPECT_EQ("aaa\\xff\\xff", humanify(string("aaa\xff\xff")));
+  EXPECT_EQ("aa\\xff\\xff\\xff", humanify(string("aa\xff\xff\xff")));
+
+  // 20% printable, and the printable portion isn't the prefix; hexify!
+  EXPECT_EQ("0xff61ffffff", humanify(string("\xff" "a\xff\xff\xff")));
+
+  // Same as previous, except swap first two chars; prefix is
+  // printable and within the threshold, so backslashify.
+  EXPECT_EQ("a\\xff\\xff\\xff\\xff", humanify(string("a\xff\xff\xff\xff")));
+
+  // Just too much unprintable; hex, despite prefix.
+  EXPECT_EQ("0x61ffffffffff", humanify(string("a\xff\xff\xff\xff\xff")));
+}
+
  //////////////////////////////////////////////////////////////////////
  
  BENCHMARK(splitOnSingleChar, iters) {
author	Chip Turner <chip@fb.com>
	Fri, 8 Jun 2012 21:53:51 +0000 (14:53 -0700)
committer	Tudor Bosman <tudorb@fb.com>
	Thu, 14 Jun 2012 19:35:46 +0000 (12:35 -0700)
folly/Hash.h		patch \| blob \| history
folly/Logging.h	[new file with mode: 0644]	patch \| blob
folly/Makefile.am		patch \| blob \| history
folly/String-inl.h		patch \| blob \| history
folly/String.h		patch \| blob \| history
folly/test/HashTest.cpp		patch \| blob \| history
folly/test/StringTest.cpp		patch \| blob \| history