unsplit
authorAndrew Tulloch <tulloch@fb.com>
Tue, 5 Feb 2013 00:54:37 +0000 (16:54 -0800)
committerJordan DeLong <jdelong@fb.com>
Tue, 19 Mar 2013 00:04:36 +0000 (17:04 -0700)
Summary:
1. Incorporates @tjackson's offline comments.
2. See docstrings and examples for basic usage.
3. The usecase this covers (for me and others) was where I have some map or
vector of elements, and I want to form a string representation of this (for
logging, fb303 exported values, etc.). Various uses have existed in fbcode (e.g.
UP2X shard representations), and this seemed like a useful utility.

Test Plan: unit tests.

Reviewed By: tjackson@fb.com

FB internal diff: D696794

folly/experimental/StringGen-inl.h
folly/experimental/StringGen.h
folly/experimental/test/GenBenchmark.cpp
folly/experimental/test/GenTest.cpp

index 005381a1bdab9e8a6cdd3ac34021fae3fdca475f..c3fc4d1214548091e444006f3cd65bc88c5da56e 100644 (file)
@@ -18,6 +18,8 @@
 #error This file may only be included from folly/experimental/StringGen.h
 #endif
 
+#include "folly/Conv.h"
+#include "folly/String.h"
 #include "folly/io/IOBuf.h"
 
 namespace folly {
@@ -151,8 +153,67 @@ class SplitStringSource : public GenImpl<StringPiece, SplitStringSource> {
   }
 };
 
+/**
+ * Unsplit - For joining tokens from a generator into a string.  This is
+ * the inverse of `split` above.
+ *
+ * This type is primarily used through the 'unsplit' function.
+ */
+template<class Delimiter,
+         class Output>
+class Unsplit : public Operator<Unsplit<Delimiter, Output>> {
+  Delimiter delimiter_;
+ public:
+  Unsplit(const Delimiter& delimiter)
+    : delimiter_(delimiter) {
+  }
+
+  template<class Source,
+           class Value>
+  Output compose(const GenImpl<Value, Source>& source) const {
+    Output outputBuffer;
+    UnsplitBuffer<Delimiter, Output> unsplitter(delimiter_, &outputBuffer);
+    unsplitter.compose(source);
+    return outputBuffer;
+  }
+};
+
+/**
+ * UnsplitBuffer - For joining tokens from a generator into a string,
+ * and inserting them into a custom buffer.
+ *
+ * This type is primarily used through the 'unsplit' function.
+ */
+template<class Delimiter,
+         class OutputBuffer>
+class UnsplitBuffer : public Operator<UnsplitBuffer<Delimiter, OutputBuffer>> {
+  Delimiter delimiter_;
+  OutputBuffer* outputBuffer_;
+ public:
+  UnsplitBuffer(const Delimiter& delimiter, OutputBuffer* outputBuffer)
+    : delimiter_(delimiter)
+    , outputBuffer_(outputBuffer) {
+    CHECK(outputBuffer);
+  }
+
+  template<class Source,
+           class Value>
+  void compose(const GenImpl<Value, Source>& source) const {
+    // If the output buffer is empty, we skip inserting the delimiter for the
+    // first element.
+    bool skipDelim = outputBuffer_->empty();
+    source | [&](Value v) {
+      if (skipDelim) {
+        skipDelim = false;
+        toAppend(std::forward<Value>(v), outputBuffer_);
+      } else {
+        toAppend(delimiter_, std::forward<Value>(v), outputBuffer_);
+      }
+    };
+  }
+};
+
 
 }  // namespace detail
 }  // namespace gen
 }  // namespace folly
-
index c614cce56b5428a100e423b4506f48e010773a88..aed03c59d3a5111d5ef1ba3747d2c648d23d6d94 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2013 Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,12 @@ namespace gen {
 namespace detail {
 class StringResplitter;
 class SplitStringSource;
+
+template<class Delimiter, class Output>
+class Unsplit;
+
+template<class Delimiter, class OutputBuffer>
+class UnsplitBuffer;
 }  // namespace detail
 
 /**
@@ -46,6 +52,48 @@ S split(const StringPiece& source, char delimiter) {
   return S(source, delimiter);
 }
 
+/*
+ * Joins a sequence of tokens into a string, with the chosen delimiter.
+ *
+ * E.G.
+ *   fbstring result = split("a,b,c", ",") | unsplit(",");
+ *   assert(result == "a,b,c");
+ *
+ *   std::string result = split("a,b,c", ",") | unsplit<std::string>(" ");
+ *   assert(result == "a b c");
+ */
+
+
+// NOTE: The template arguments are reversed to allow the user to cleanly
+// specify the output type while still inferring the type of the delimiter.
+template<class Output = folly::fbstring,
+         class Delimiter,
+         class Unsplit = detail::Unsplit<Delimiter, Output>>
+Unsplit unsplit(const Delimiter& delimiter) {
+  return Unsplit(delimiter);
+}
+
+/*
+ * Joins a sequence of tokens into a string, appending them to the output
+ * buffer.  If the output buffer is empty, an initial delimiter will not be
+ * inserted at the start.
+ *
+ * E.G.
+ *   std::string buffer;
+ *   split("a,b,c", ",") | unsplit(",", &buffer);
+ *   assert(buffer == "a,b,c");
+ *
+ *   std::string anotherBuffer("initial");
+ *   split("a,b,c", ",") | unsplit(",", &anotherbuffer);
+ *   assert(anotherBuffer == "initial,a,b,c");
+ */
+template<class Delimiter,
+         class OutputBuffer,
+         class UnsplitBuffer = detail::UnsplitBuffer<Delimiter, OutputBuffer>>
+UnsplitBuffer unsplit(const Delimiter& delimiter, OutputBuffer* outputBuffer) {
+  return UnsplitBuffer(delimiter, outputBuffer);
+}
+
 }  // namespace gen
 }  // namespace folly
 
index 956fb756f6df11800f52176b1b1715318029b756..d9949117f21d1debdc903cea43638bd5917e9def 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2013 Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,6 +39,12 @@ static vector<int> testVector =
     seq(1, testSize.load())
   | mapped([](int) { return rand(); })
   | as<vector>();
+
+static vector<fbstring> testStrVector =
+    seq(1, testSize.load())
+  | eachTo<fbstring>()
+  | as<vector>();
+
 static vector<vector<int>> testVectorVector =
     seq(1, 100)
   | map([](int i) {
@@ -386,6 +392,76 @@ BENCHMARK_RELATIVE(StringSplit_Gen_Take, iters) {
 
 BENCHMARK_DRAW_LINE()
 
+BENCHMARK(StringUnsplit_Old, iters) {
+  size_t s = 0;
+  while (iters--) {
+    fbstring joined;
+    join(',', testStrVector, joined);
+    s += joined.size();
+  }
+  folly::doNotOptimizeAway(s);
+}
+
+BENCHMARK_RELATIVE(StringUnsplit_Old_ReusedBuffer, iters) {
+  size_t s = 0;
+  fbstring joined;
+  while (iters--) {
+    joined.clear();
+    join(',', testStrVector, joined);
+    s += joined.size();
+  }
+  folly::doNotOptimizeAway(s);
+}
+
+BENCHMARK_RELATIVE(StringUnsplit_Gen, iters) {
+  size_t s = 0;
+  StringPiece line(kLine);
+  while (iters--) {
+    fbstring joined = from(testStrVector) | unsplit(',');
+    s += joined.size();
+  }
+  folly::doNotOptimizeAway(s);
+}
+
+BENCHMARK_RELATIVE(StringUnsplit_Gen_ReusedBuffer, iters) {
+  size_t s = 0;
+  fbstring buffer;
+  while (iters--) {
+    buffer.clear();
+    from(testStrVector) | unsplit(',', &buffer);
+    s += buffer.size();
+  }
+  folly::doNotOptimizeAway(s);
+}
+
+BENCHMARK_DRAW_LINE()
+
+void StringUnsplit_Gen(size_t iters, size_t joinSize) {
+  std::vector<fbstring> v;
+  BENCHMARK_SUSPEND {
+    FOR_EACH_RANGE(i, 0, joinSize) {
+      v.push_back(to<fbstring>(rand()));
+    }
+  }
+  size_t s = 0;
+  fbstring buffer;
+  while (iters--) {
+    buffer.clear();
+    from(v) | unsplit(',', &buffer);
+    s += buffer.size();
+  }
+  folly::doNotOptimizeAway(s);
+}
+
+BENCHMARK_DRAW_LINE()
+
+BENCHMARK_PARAM(StringUnsplit_Gen, 1000)
+BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 2000)
+BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 4000)
+BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 8000)
+
+BENCHMARK_DRAW_LINE()
+
 BENCHMARK(ByLine_Pipes, iters) {
   std::thread thread;
   int rfd;
@@ -424,43 +500,54 @@ BENCHMARK(ByLine_Pipes, iters) {
 // ============================================================================
 // folly/experimental/test/GenBenchmark.cpp        relative  time/iter  iters/s
 // ============================================================================
-// Sum_Basic_NoGen                                            293.77ns    3.40M
-// Sum_Basic_Gen                                    100.24%   293.08ns    3.41M
+// Sum_Basic_NoGen                                            354.70ns    2.82M
+// Sum_Basic_Gen                                     95.88%   369.92ns    2.70M
+// ----------------------------------------------------------------------------
+// Sum_Vector_NoGen                                           211.89ns    4.72M
+// Sum_Vector_Gen                                    97.49%   217.35ns    4.60M
+// ----------------------------------------------------------------------------
+// Count_Vector_NoGen                                          13.93us   71.78K
+// Count_Vector_Gen                                 106.38%    13.10us   76.36K
+// ----------------------------------------------------------------------------
+// Fib_Sum_NoGen                                                4.54us  220.07K
+// Fib_Sum_Gen                                       45.81%     9.92us  100.82K
+// Fib_Sum_Gen_Static                               100.00%     4.54us  220.05K
 // ----------------------------------------------------------------------------
-// Sum_Vector_NoGen                                           199.09ns    5.02M
-// Sum_Vector_Gen                                    98.57%   201.98ns    4.95M
+// VirtualGen_0Virtual                                         12.03us   83.14K
+// VirtualGen_1Virtual                               32.89%    36.57us   27.34K
+// VirtualGen_2Virtual                               24.98%    48.15us   20.77K
+// VirtualGen_3Virtual                               17.82%    67.49us   14.82K
 // ----------------------------------------------------------------------------
-// Count_Vector_NoGen                                          12.40us   80.66K
-// Count_Vector_Gen                                 103.07%    12.03us   83.13K
+// Concat_NoGen                                                 1.92us  520.46K
+// Concat_Gen                                       102.79%     1.87us  534.97K
 // ----------------------------------------------------------------------------
-// Fib_Sum_NoGen                                                3.65us  274.29K
-// Fib_Sum_Gen                                       41.95%     8.69us  115.06K
-// Fib_Sum_Gen_Static                                86.10%     4.23us  236.15K
+// Composed_NoGen                                             545.64ns    1.83M
+// Composed_Gen                                      99.65%   547.55ns    1.83M
+// Composed_GenRegular                               99.64%   547.62ns    1.83M
 // ----------------------------------------------------------------------------
-// VirtualGen_0Virtual                                         10.10us   99.03K
-// VirtualGen_1Virtual                               29.67%    34.04us   29.38K
-// VirtualGen_2Virtual                               20.53%    49.19us   20.33K
-// VirtualGen_3Virtual                               15.22%    66.36us   15.07K
+// StringResplitter_Big                                       120.88us    8.27K
+// StringResplitter_Small                            14.39%   839.94us    1.19K
 // ----------------------------------------------------------------------------
-// Concat_NoGen                                                 2.33us  428.35K
-// Concat_Gen                                        85.36%     2.74us  365.62K
+// StringSplit_Old                                            421.09ns    2.37M
+// StringSplit_Gen_Vector                            97.73%   430.87ns    2.32M
 // ----------------------------------------------------------------------------
-// Composed_NoGen                                             552.78ns    1.81M
-// Composed_Gen                                     100.48%   550.14ns    1.82M
-// Composed_GenRegular                              100.60%   549.50ns    1.82M
+// StringSplit_Old_ReuseVector                                 80.25ns   12.46M
+// StringSplit_Gen_ReuseVector                       98.99%    81.07ns   12.34M
+// StringSplit_Gen                                  117.23%    68.45ns   14.61M
+// StringSplit_Gen_Take                             115.23%    69.64ns   14.36M
 // ----------------------------------------------------------------------------
-// StringResplitter_Big                                       118.40us    8.45K
-// StringResplitter_Small                            12.96%   913.23us    1.10K
+// StringUnsplit_Old                                           34.45us   29.02K
+// StringUnsplit_Old_ReusedBuffer                   100.37%    34.33us   29.13K
+// StringUnsplit_Gen                                106.27%    32.42us   30.84K
+// StringUnsplit_Gen_ReusedBuffer                   105.61%    32.62us   30.65K
 // ----------------------------------------------------------------------------
-// StringSplit_Old                                            567.61ns    1.76M
-// StringSplit_Gen_Vector                           146.52%   387.41ns    2.58M
 // ----------------------------------------------------------------------------
-// StringSplit_Old_ReuseVector                                 74.90ns   13.35M
-// StringSplit_Gen_ReuseVector                      112.29%    66.71ns   14.99M
-// StringSplit_Gen                                  122.42%    61.18ns   16.34M
-// StringSplit_Gen_Take                             134.49%    55.70ns   17.95M
+// StringUnsplit_Gen(1000)                                     32.20us   31.06K
+// StringUnsplit_Gen(2000)                           49.41%    65.17us   15.34K
+// StringUnsplit_Gen(4000)                           22.75%   141.52us    7.07K
+// StringUnsplit_Gen(8000)                           11.20%   287.53us    3.48K
 // ----------------------------------------------------------------------------
-// ByLine_Pipes                                               131.18ns    7.62M
+// ByLine_Pipes                                               126.58ns    7.90M
 // ============================================================================
 
 int main(int argc, char *argv[]) {
index a4d1a77eef422f5f4ebbba98f08774412516bfe0..355cd41def88e0613b703dbc5370dc8f4bb60ad0 100644 (file)
@@ -719,6 +719,41 @@ TEST(StringGen, Resplit) {
   }
 }
 
+template<typename F>
+void runUnsplitSuite(F fn) {
+  fn("hello, world");
+  fn("hello,world,goodbye");
+  fn(" ");
+  fn("");
+  fn(", ");
+  fn(", a, b,c");
+}
+
+TEST(StringGen, Unsplit) {
+
+  auto basicFn = [](const StringPiece& s) {
+    EXPECT_EQ(split(s, ',') | unsplit(','), s);
+  };
+
+  auto existingBuffer = [](const StringPiece& s) {
+    folly::fbstring buffer("asdf");
+    split(s, ',') | unsplit(',', &buffer);
+    auto expected = folly::to<folly::fbstring>(
+        "asdf", s.empty() ? "" : ",", s);
+    EXPECT_EQ(buffer, expected);
+  };
+
+  auto emptyBuffer = [](const StringPiece& s) {
+    std::string buffer;
+    split(s, ',') | unsplit(',', &buffer);
+    EXPECT_EQ(buffer, s);
+  };
+
+  runUnsplitSuite(basicFn);
+  runUnsplitSuite(existingBuffer);
+  runUnsplitSuite(emptyBuffer);
+}
+
 TEST(FileGen, ByLine) {
   auto collect = eachTo<std::string>() | as<vector>();
   test::TemporaryFile file("ByLine");