eachToTuple<Types...>()
authorTom Jackson <tjackson@fb.com>
Wed, 29 May 2013 01:17:37 +0000 (18:17 -0700)
committerOwen Yamauchi <oyamauchi@fb.com>
Mon, 3 Jun 2013 19:23:32 +0000 (12:23 -0700)
Summary: For parsing records. `eachToPair` might be a worthwhile specialization.

Test Plan: Unit tests, benchmarks maybe?

Reviewed By: antoine@fb.com

FB internal diff: D827441

folly/experimental/Gen.h
folly/experimental/StringGen-inl.h
folly/experimental/StringGen.h
folly/experimental/test/GenBenchmark.cpp
folly/experimental/test/GenTest.cpp

index 264ebf665af46aae7198e99c8136079ce52101a0..47fab1976a81a51b788cc1660e52fb5f04855be3 100644 (file)
@@ -220,6 +220,15 @@ class To {
   }
 };
 
+// Specialization to allow String->StringPiece conversion
+template <>
+class To<StringPiece> {
+ public:
+  StringPiece operator()(StringPiece src) const {
+    return src;
+  }
+};
+
 namespace detail {
 
 template<class Self>
index 8ddab93af6cd67fe8f04fcfe910afeda33fa593a..a763dda0ec1a3572149d2f422d83bbbd3d76133d 100644 (file)
@@ -216,6 +216,48 @@ class UnsplitBuffer : public Operator<UnsplitBuffer<Delimiter, OutputBuffer>> {
 };
 
 
+/**
+ * Hack for static for-like constructs
+ */
+template<class Target, class=void>
+inline Target passthrough(Target target) { return target; }
+
+/**
+ * ParseToTuple - For splitting a record and immediatlely converting it to a
+ * target tuple type. Primary used through the 'eachToTuple' helper, like so:
+ *
+ *  auto config
+ *    = split("1:a 2:b", ' ')
+ *    | eachToTuple<int, string>()
+ *    | as<vector<tuple<int, string>>>();
+ *
+ */
+template<class TargetContainer,
+         class Delimiter,
+         class... Targets>
+class SplitTo {
+  Delimiter delimiter_;
+ public:
+  explicit SplitTo(Delimiter delimiter)
+    : delimiter_(delimiter) {}
+
+  TargetContainer operator()(StringPiece line) const {
+    int i = 0;
+    StringPiece fields[sizeof...(Targets)];
+    // HACK(tjackson): Used for referencing fields[] corresponding to variadic
+    // template parameters.
+    auto eatField = [&]() -> StringPiece& { return fields[i++]; };
+    if (!split(delimiter_,
+               line,
+               detail::passthrough<StringPiece&, Targets>(eatField())...)) {
+      throw std::runtime_error("field count mismatch");
+    }
+    i = 0;
+    return TargetContainer(To<Targets>()(eatField())...);
+  }
+};
+
 }  // namespace detail
+
 }  // namespace gen
 }  // namespace folly
index 14e5115d12bd0598865b6088dd654a7654912054..e4b02d9adfe18ecfa8d93b4be5e63cfc91902ba5 100644 (file)
@@ -18,6 +18,7 @@
 #define FOLLY_STRINGGEN_H_
 
 #include "folly/Range.h"
+#include "folly/experimental/Gen.h"
 
 namespace folly {
 namespace gen {
@@ -31,6 +32,12 @@ class Unsplit;
 
 template<class Delimiter, class OutputBuffer>
 class UnsplitBuffer;
+
+template<class TargetContainer,
+         class Delimiter,
+         class... Targets>
+class SplitTo;
+
 }  // namespace detail
 
 /**
@@ -96,7 +103,7 @@ Unsplit unsplit(const char* delimiter) {
 template<class Delimiter,
          class OutputBuffer,
          class UnsplitBuffer = detail::UnsplitBuffer<Delimiter, OutputBuffer>>
-UnsplitBuffer unsplit(const Delimiter& delimiter, OutputBuffer* outputBuffer) {
+UnsplitBuffer unsplit(Delimiter delimiter, OutputBuffer* outputBuffer) {
   return UnsplitBuffer(delimiter, outputBuffer);
 }
 
@@ -106,6 +113,40 @@ UnsplitBuffer unsplit(const char* delimiter, OutputBuffer* outputBuffer) {
   return UnsplitBuffer(delimiter, outputBuffer);
 }
 
+
+template<class... Targets>
+detail::Map<detail::SplitTo<std::tuple<Targets...>, char, Targets...>>
+eachToTuple(char delim) {
+  return detail::Map<
+    detail::SplitTo<std::tuple<Targets...>, char, Targets...>>(
+    detail::SplitTo<std::tuple<Targets...>, char, Targets...>(delim));
+}
+
+template<class... Targets>
+detail::Map<detail::SplitTo<std::tuple<Targets...>, fbstring, Targets...>>
+eachToTuple(StringPiece delim) {
+  return detail::Map<
+    detail::SplitTo<std::tuple<Targets...>, fbstring, Targets...>>(
+    detail::SplitTo<std::tuple<Targets...>, fbstring, Targets...>(delim));
+}
+
+template<class First, class Second>
+detail::Map<detail::SplitTo<std::pair<First, Second>, char, First, Second>>
+eachToPair(char delim) {
+  return detail::Map<
+    detail::SplitTo<std::pair<First, Second>, char, First, Second>>(
+    detail::SplitTo<std::pair<First, Second>, char, First, Second>(delim));
+}
+
+template<class First, class Second>
+detail::Map<detail::SplitTo<std::pair<First, Second>, fbstring, First, Second>>
+eachToPair(StringPiece delim) {
+  return detail::Map<
+    detail::SplitTo<std::pair<First, Second>, fbstring, First, Second>>(
+    detail::SplitTo<std::pair<First, Second>, fbstring, First, Second>(
+      to<fbstring>(delim)));
+}
+
 }  // namespace gen
 }  // namespace folly
 
index 20516a215f354eb91486cc30d760607a66420c6e..2b03ca0f600dc661e53220c7c142b053478c799d 100644 (file)
@@ -490,8 +490,6 @@ void StringUnsplit_Gen(size_t iters, size_t joinSize) {
   folly::doNotOptimizeAway(s);
 }
 
-BENCHMARK_DRAW_LINE()
-
 BENCHMARK_PARAM(StringUnsplit_Gen, 1000)
 BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 2000)
 BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 4000)
@@ -499,6 +497,84 @@ BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 8000)
 
 BENCHMARK_DRAW_LINE()
 
+fbstring records
+= seq<size_t>(1, 1000)
+  | mapped([](size_t i) {
+      return folly::to<fbstring>(i, ' ', i * i, ' ', i * i * i);
+    })
+  | unsplit('\n');
+
+BENCHMARK(Records_EachToTuple, iters) {
+  size_t s = 0;
+  for (size_t i = 0; i < iters; i += 1000) {
+    s += split(records, '\n')
+       | eachToTuple<int, size_t, StringPiece>(' ')
+       | get<1>()
+       | sum;
+  }
+  folly::doNotOptimizeAway(s);
+}
+
+BENCHMARK_RELATIVE(Records_VectorStringPieceReused, iters) {
+  size_t s = 0;
+  std::vector<StringPiece> fields;
+  for (size_t i = 0; i < iters; i += 1000) {
+    s += split(records, '\n')
+       | mapped([&](StringPiece line) {
+           fields.clear();
+           folly::split(' ', line, fields);
+           CHECK(fields.size() == 3);
+           return std::make_tuple(
+             folly::to<int>(fields[0]),
+             folly::to<size_t>(fields[1]),
+             StringPiece(fields[2]));
+         })
+       | get<1>()
+       | sum;
+  }
+  folly::doNotOptimizeAway(s);
+}
+
+BENCHMARK_RELATIVE(Records_VectorStringPiece, iters) {
+  size_t s = 0;
+  for (size_t i = 0; i < iters; i += 1000) {
+    s += split(records, '\n')
+       | mapped([](StringPiece line) {
+           std::vector<StringPiece> fields;
+           folly::split(' ', line, fields);
+           CHECK(fields.size() == 3);
+           return std::make_tuple(
+             folly::to<int>(fields[0]),
+             folly::to<size_t>(fields[1]),
+             StringPiece(fields[2]));
+         })
+       | get<1>()
+       | sum;
+  }
+  folly::doNotOptimizeAway(s);
+}
+
+BENCHMARK_RELATIVE(Records_VectorString, iters) {
+  size_t s = 0;
+  for (size_t i = 0; i < iters; i += 1000) {
+    s += split(records, '\n')
+       | mapped([](StringPiece line) {
+           std::vector<std::string> fields;
+           folly::split(' ', line, fields);
+           CHECK(fields.size() == 3);
+           return std::make_tuple(
+             folly::to<int>(fields[0]),
+             folly::to<size_t>(fields[1]),
+             StringPiece(fields[2]));
+         })
+       | get<1>()
+       | sum;
+  }
+  folly::doNotOptimizeAway(s);
+}
+
+BENCHMARK_DRAW_LINE()
+
 BENCHMARK(ByLine_Pipes, iters) {
   std::thread thread;
   int rfd;
@@ -532,59 +608,66 @@ BENCHMARK(ByLine_Pipes, iters) {
   }
 }
 
-// Results from a dual core Xeon L5520 @ 2.27GHz:
-//
 // ============================================================================
 // folly/experimental/test/GenBenchmark.cpp        relative  time/iter  iters/s
 // ============================================================================
-// Sum_Basic_NoGen                                            354.70ns    2.82M
-// Sum_Basic_Gen                                     95.88%   369.92ns    2.70M
+// Sum_Basic_NoGen                                            374.39ns    2.67M
+// Sum_Basic_Gen                                    101.05%   370.48ns    2.70M
+// ----------------------------------------------------------------------------
+// Sum_Vector_NoGen                                           198.84ns    5.03M
+// Sum_Vector_Gen                                    98.14%   202.60ns    4.94M
+// ----------------------------------------------------------------------------
+// Member                                                       4.56us  219.11K
+// MapMember                                        400.21%     1.14us  876.89K
 // ----------------------------------------------------------------------------
-// Sum_Vector_NoGen                                           211.89ns    4.72M
-// Sum_Vector_Gen                                    97.49%   217.35ns    4.60M
+// Count_Vector_NoGen                                          13.99us   71.47K
+// Count_Vector_Gen                                 106.73%    13.11us   76.28K
 // ----------------------------------------------------------------------------
-// Count_Vector_NoGen                                          13.93us   71.78K
-// Count_Vector_Gen                                 106.38%    13.10us   76.36K
+// Fib_Sum_NoGen                                                4.27us  234.07K
+// Fib_Sum_Gen                                       43.18%     9.90us  101.06K
+// Fib_Sum_Gen_Static                                92.08%     4.64us  215.53K
 // ----------------------------------------------------------------------------
-// Fib_Sum_NoGen                                                4.54us  220.07K
-// Fib_Sum_Gen                                       45.81%     9.92us  100.82K
-// Fib_Sum_Gen_Static                               100.00%     4.54us  220.05K
+// VirtualGen_0Virtual                                         12.07us   82.83K
+// VirtualGen_1Virtual                               32.46%    37.19us   26.89K
+// VirtualGen_2Virtual                               24.36%    49.55us   20.18K
+// VirtualGen_3Virtual                               18.16%    66.49us   15.04K
 // ----------------------------------------------------------------------------
-// VirtualGen_0Virtual                                         12.03us   83.14K
-// VirtualGen_1Virtual                               32.89%    36.57us   27.34K
-// VirtualGen_2Virtual                               24.98%    48.15us   20.77K
-// VirtualGen_3Virtual                               17.82%    67.49us   14.82K
+// Concat_NoGen                                                 1.90us  527.40K
+// Concat_Gen                                        86.73%     2.19us  457.39K
 // ----------------------------------------------------------------------------
-// Concat_NoGen                                                 1.92us  520.46K
-// Concat_Gen                                       102.79%     1.87us  534.97K
+// Composed_NoGen                                             546.18ns    1.83M
+// Composed_Gen                                     100.41%   543.93ns    1.84M
+// Composed_GenRegular                              100.42%   543.92ns    1.84M
 // ----------------------------------------------------------------------------
-// Composed_NoGen                                             545.64ns    1.83M
-// Composed_Gen                                      99.65%   547.55ns    1.83M
-// Composed_GenRegular                               99.64%   547.62ns    1.83M
+// Sample                                                     146.68ms     6.82
 // ----------------------------------------------------------------------------
-// StringResplitter_Big                                       120.88us    8.27K
-// StringResplitter_Small                            14.39%   839.94us    1.19K
+// StringResplitter_Big                                       124.80us    8.01K
+// StringResplitter_Small                            15.11%   825.74us    1.21K
 // ----------------------------------------------------------------------------
-// StringSplit_Old                                            421.09ns    2.37M
-// StringSplit_Gen_Vector                            97.73%   430.87ns    2.32M
+// StringSplit_Old                                            393.49ns    2.54M
+// StringSplit_Gen_Vector                           121.47%   323.93ns    3.09M
 // ----------------------------------------------------------------------------
-// StringSplit_Old_ReuseVector                                 80.25ns   12.46M
-// StringSplit_Gen_ReuseVector                       98.99%    81.07ns   12.34M
-// StringSplit_Gen                                  117.23%    68.45ns   14.61M
-// StringSplit_Gen_Take                             115.23%    69.64ns   14.36M
+// StringSplit_Old_ReuseVector                                 80.77ns   12.38M
+// StringSplit_Gen_ReuseVector                      102.02%    79.17ns   12.63M
+// StringSplit_Gen                                  123.78%    65.25ns   15.32M
+// StringSplit_Gen_Take                             123.44%    65.43ns   15.28M
 // ----------------------------------------------------------------------------
-// StringUnsplit_Old                                           34.45us   29.02K
-// StringUnsplit_Old_ReusedBuffer                   100.37%    34.33us   29.13K
-// StringUnsplit_Gen                                106.27%    32.42us   30.84K
-// StringUnsplit_Gen_ReusedBuffer                   105.61%    32.62us   30.65K
+// StringUnsplit_Old                                           29.36us   34.06K
+// StringUnsplit_Old_ReusedBuffer                   100.25%    29.29us   34.14K
+// StringUnsplit_Gen                                103.38%    28.40us   35.21K
+// StringUnsplit_Gen_ReusedBuffer                   109.85%    26.73us   37.41K
 // ----------------------------------------------------------------------------
+// StringUnsplit_Gen(1000)                                     32.30us   30.96K
+// StringUnsplit_Gen(2000)                           49.75%    64.93us   15.40K
+// StringUnsplit_Gen(4000)                           24.74%   130.60us    7.66K
+// StringUnsplit_Gen(8000)                           12.31%   262.35us    3.81K
 // ----------------------------------------------------------------------------
-// StringUnsplit_Gen(1000)                                     32.20us   31.06K
-// StringUnsplit_Gen(2000)                           49.41%    65.17us   15.34K
-// StringUnsplit_Gen(4000)                           22.75%   141.52us    7.07K
-// StringUnsplit_Gen(8000)                           11.20%   287.53us    3.48K
+// Records_EachToTuple                                         75.03ns   13.33M
+// Records_VectorStringPieceReused                   81.79%    91.74ns   10.90M
+// Records_VectorStringPiece                         36.47%   205.77ns    4.86M
+// Records_VectorString                              12.90%   581.70ns    1.72M
 // ----------------------------------------------------------------------------
-// ByLine_Pipes                                               126.58ns    7.90M
+// ByLine_Pipes                                               121.68ns    8.22M
 // ============================================================================
 
 int main(int argc, char *argv[]) {
index d732e97e46f6f7d884d4821c1830b9fdc60fce83..ddb7bf8a74ed60ce29a629706b1d740724172d96 100644 (file)
@@ -1001,6 +1001,105 @@ TEST(StringGen, EmptyResplit) {
   }
 }
 
+TEST(StringGen, EachToTuple) {
+  {
+    auto lines = "2:1.414:yo 3:1.732:hi";
+    auto actual
+      = split(lines, ' ')
+      | eachToTuple<int, double, std::string>(':')
+      | as<vector>();
+    vector<tuple<int, double, std::string>> expected {
+      make_tuple(2, 1.414, "yo"),
+      make_tuple(3, 1.732, "hi"),
+    };
+    EXPECT_EQ(expected, actual);
+  }
+  {
+    auto lines = "2 3";
+    auto actual
+      = split(lines, ' ')
+      | eachToTuple<int>(',')
+      | as<vector>();
+    vector<tuple<int>> expected {
+      make_tuple(2),
+      make_tuple(3),
+    };
+    EXPECT_EQ(expected, actual);
+  }
+  {
+    // StringPiece target
+    auto lines = "1:cat 2:dog";
+    auto actual
+      = split(lines, ' ')
+      | eachToTuple<int, StringPiece>(':')
+      | as<vector>();
+    vector<tuple<int, StringPiece>> expected {
+      make_tuple(1, "cat"),
+      make_tuple(2, "dog"),
+    };
+    EXPECT_EQ(expected, actual);
+  }
+  {
+    // Empty field
+    auto lines = "2:tjackson:4 3::5";
+    auto actual
+      = split(lines, ' ')
+      | eachToTuple<int, fbstring, int>(':')
+      | as<vector>();
+    vector<tuple<int, fbstring, int>> expected {
+      make_tuple(2, "tjackson", 4),
+      make_tuple(3, "", 5),
+    };
+    EXPECT_EQ(expected, actual);
+  }
+  {
+    // Excess fields
+    auto lines = "1:2 3:4:5";
+    EXPECT_THROW((split(lines, ' ')
+                    | eachToTuple<int, int>(':')
+                    | as<vector>()),
+                 std::runtime_error);
+  }
+  {
+    // Missing fields
+    auto lines = "1:2:3 4:5";
+    EXPECT_THROW((split(lines, ' ')
+                    | eachToTuple<int, int, int>(':')
+                    | as<vector>()),
+                 std::runtime_error);
+  }
+}
+
+TEST(StringGen, EachToPair) {
+  {
+    // char delimiters
+    auto lines = "2:1.414 3:1.732";
+    auto actual
+      = split(lines, ' ')
+      | eachToPair<int, double>(':')
+      | as<std::map<int, double>>();
+    std::map<int, double> expected {
+      { 3, 1.732 },
+      { 2, 1.414 },
+    };
+    EXPECT_EQ(expected, actual);
+  }
+  {
+    // string delimiters
+    auto lines = "ab=>cd ef=>gh";
+    auto actual
+      = split(lines, ' ')
+      | eachToPair<string, string>("=>")
+      | as<std::map<string, string>>();
+    std::map<string, string> expected {
+      { "ab", "cd" },
+      { "ef", "gh" },
+    };
+    EXPECT_EQ(expected, actual);
+  }
+}
+
+
 TEST(StringGen, Resplit) {
   auto collect = eachTo<std::string>() | as<vector>();
   {