From d6ed81ca6f520767f841780f779bfb5922455662 Mon Sep 17 00:00:00 2001 From: Marcelo Juchem Date: Mon, 27 Jan 2014 15:45:38 -0800 Subject: [PATCH] split_step - tokenization make simple in folly::Range Summary: this is a simple method that allows a folly::Range `[b, e)` to be split on a character at position `i` (where b <= i < e) in an incremental manner. @override-unit-failures Test Plan: unit tests added Reviewed By: andrei.alexandrescu@fb.com FB internal diff: D1145631 --- folly/Range.h | 89 +++++++++++- folly/test/RangeTest.cpp | 297 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 383 insertions(+), 3 deletions(-) diff --git a/folly/Range.h b/folly/Range.h index 1d3d0dd0..6aba60be 100644 --- a/folly/Range.h +++ b/folly/Range.h @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -508,6 +508,93 @@ public: return endsWith(suffix) && (--e_, true); } + /** + * Splits this `Range` `[b, e)` in the position `i` dictated by the next + * occurence of `delimiter`. + * + * Returns a new `Range` `[b, i)` and adjusts this range to start right after + * the delimiter's position. This range will be empty if the delimiter is not + * found. If called on an empty `Range`, both this and the returned `Range` + * will be empty. + * + * Example: + * + * folly::StringPiece s("sample string for split_next"); + * auto p = s.split_step(' '); + * + * // prints "sample" + * cout << s << endl; + * + * // prints "string for split_next" + * cout << p << endl; + * + * Example 2: + * + * void tokenize(StringPiece s, char delimiter) { + * while (!s.empty()) { + * cout << s.split_step(delimiter); + * } + * } + * + * @author: Marcelo Juchem + */ + Range split_step(value_type delimiter) { + auto i = std::find(b_, e_, delimiter); + Range result(b_, i); + + b_ = i == e_ ? e_ : std::next(i); + + return result; + } + + Range split_step(Range delimiter) { + auto i = find(delimiter); + Range result(b_, i == std::string::npos ? size() : i); + + b_ = result.end() == e_ ? e_ : std::next(result.end(), delimiter.size()); + + return result; + } + + /** + * Convenience method that calls `split_step()` and passes the result to a + * functor, returning whatever the functor does. + * + * Say you have a functor with this signature: + * + * Foo fn(Range r) { } + * + * `split_step()`'s return type will be `Foo`. It works just like: + * + * auto result = fn(myRange.split_step(' ')); + * + * A functor returning `void` is also supported. + * + * Example: + * + * void do_some_parsing(folly::StringPiece s) { + * auto version = s.split_step(' ', [&](folly::StringPiece x) { + * if (x.empty()) { + * throw std::invalid_argument("empty string"); + * } + * return std::strtoull(x.begin(), x.end(), 16); + * }); + * + * // ... + * } + * + * @author: Marcelo Juchem + */ + template + auto split_step(value_type delimiter, TProcess &&process) + -> decltype(process(std::declval())) + { return process(split_step(delimiter)); } + + template + auto split_step(Range delimiter, TProcess &&process) + -> decltype(process(std::declval())) + { return process(split_step(delimiter)); } + private: Iter b_, e_; }; diff --git a/folly/test/RangeTest.cpp b/folly/test/RangeTest.cpp index 7859b6b8..ee6a0e1b 100644 --- a/folly/test/RangeTest.cpp +++ b/folly/test/RangeTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,9 @@ #include "folly/Range.h" #include -#include +#include #include +#include #include #include #include @@ -422,6 +423,298 @@ TEST(StringPiece, SuffixEmpty) { EXPECT_EQ("", a); } +TEST(StringPiece, split_step_char_delimiter) { + // 0 1 2 + // 012345678901234567890123456 + auto const s = "this is just a test string"; + auto const e = std::next(s, std::strlen(s)); + EXPECT_EQ('\0', *e); + + folly::StringPiece p(s); + EXPECT_EQ(s, p.begin()); + EXPECT_EQ(e, p.end()); + + auto x = p.split_step(' '); + EXPECT_EQ(std::next(s, 5), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("this", x); + + x = p.split_step(' '); + EXPECT_EQ(std::next(s, 8), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("is", x); + + x = p.split_step('u'); + EXPECT_EQ(std::next(s, 10), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("j", x); + + x = p.split_step(' '); + EXPECT_EQ(std::next(s, 13), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("st", x); + + x = p.split_step(' '); + EXPECT_EQ(std::next(s, 14), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); + + x = p.split_step(' '); + EXPECT_EQ(std::next(s, 16), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("a", x); + + x = p.split_step(' '); + EXPECT_EQ(std::next(s, 21), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("test", x); + + x = p.split_step(' '); + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("string", x); + + x = p.split_step(' '); + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); +} + +TEST(StringPiece, split_step_range_delimiter) { + // 0 1 2 3 + // 0123456789012345678901234567890123 + auto const s = "this is just a test string"; + auto const e = std::next(s, std::strlen(s)); + EXPECT_EQ('\0', *e); + + folly::StringPiece p(s); + EXPECT_EQ(s, p.begin()); + EXPECT_EQ(e, p.end()); + + auto x = p.split_step(" "); + EXPECT_EQ(std::next(s, 6), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("this", x); + + x = p.split_step(" "); + EXPECT_EQ(std::next(s, 10), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("is", x); + + x = p.split_step("u"); + EXPECT_EQ(std::next(s, 12), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("j", x); + + x = p.split_step(" "); + EXPECT_EQ(std::next(s, 16), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("st", x); + + x = p.split_step(" "); + EXPECT_EQ(std::next(s, 18), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); + + x = p.split_step(" "); + EXPECT_EQ(std::next(s, 21), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("a", x); + + x = p.split_step(" "); + EXPECT_EQ(std::next(s, 28), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ(" test", x); + + x = p.split_step(" "); + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("string", x); + + x = p.split_step(" "); + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); + + x = p.split_step(" "); + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); +} + +void split_step_with_process_noop(folly::StringPiece) {} + +TEST(StringPiece, split_step_with_process_char_delimiter) { + // 0 1 2 + // 012345678901234567890123456 + auto const s = "this is just a test string"; + auto const e = std::next(s, std::strlen(s)); + EXPECT_EQ('\0', *e); + + folly::StringPiece p(s); + EXPECT_EQ(s, p.begin()); + EXPECT_EQ(e, p.end()); + + EXPECT_EQ(1, (p.split_step(' ', [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 5), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("this", x); + return 1; + }))); + + EXPECT_EQ(2, (p.split_step(' ', [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 8), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("is", x); + return 2; + }))); + + EXPECT_EQ(3, (p.split_step('u', [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 10), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("j", x); + return 3; + }))); + + EXPECT_EQ(4, (p.split_step(' ', [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 13), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("st", x); + return 4; + }))); + + EXPECT_EQ(5, (p.split_step(' ', [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 14), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); + return 5; + }))); + + EXPECT_EQ(6, (p.split_step(' ', [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 16), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("a", x); + return 6; + }))); + + EXPECT_EQ(7, (p.split_step(' ', [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 21), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("test", x); + return 7; + }))); + + EXPECT_EQ(8, (p.split_step(' ', [&](folly::StringPiece x) { + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("string", x); + return 8; + }))); + + EXPECT_EQ(9, (p.split_step(' ', [&](folly::StringPiece x) { + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); + return 9; + }))); + + EXPECT_TRUE((std::is_same< + void, + decltype(p.split_step(' ', split_step_with_process_noop)) + >::value)); + + EXPECT_NO_THROW(p.split_step(' ', split_step_with_process_noop)); +} + +TEST(StringPiece, split_step_with_process_range_delimiter) { + // 0 1 2 3 + // 0123456789012345678901234567890123 + auto const s = "this is just a test string"; + auto const e = std::next(s, std::strlen(s)); + EXPECT_EQ('\0', *e); + + folly::StringPiece p(s); + EXPECT_EQ(s, p.begin()); + EXPECT_EQ(e, p.end()); + + EXPECT_EQ(1, (p.split_step(" ", [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 6), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("this", x); + return 1; + }))); + + EXPECT_EQ(2, (p.split_step(" ", [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 10), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("is", x); + return 2; + }))); + + EXPECT_EQ(3, (p.split_step("u", [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 12), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("j", x); + return 3; + }))); + + EXPECT_EQ(4, (p.split_step(" ", [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 16), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("st", x); + return 4; + }))); + + EXPECT_EQ(5, (p.split_step(" ", [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 18), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); + return 5; + }))); + + EXPECT_EQ(6, (p.split_step(" ", [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 21), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("a", x); + return 6; + }))); + + EXPECT_EQ(7, (p.split_step(" ", [&](folly::StringPiece x) { + EXPECT_EQ(std::next(s, 28), p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ(" test", x); + return 7; + }))); + + EXPECT_EQ(8, (p.split_step(" ", [&](folly::StringPiece x) { + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("string", x); + return 8; + }))); + + EXPECT_EQ(9, (p.split_step(" ", [&](folly::StringPiece x) { + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); + return 9; + }))); + + EXPECT_EQ(10, (p.split_step(" ", [&](folly::StringPiece x) { + EXPECT_EQ(e, p.begin()); + EXPECT_EQ(e, p.end()); + EXPECT_EQ("", x); + return 10; + }))); + + EXPECT_TRUE((std::is_same< + void, + decltype(p.split_step(' ', split_step_with_process_noop)) + >::value)); + + EXPECT_NO_THROW(p.split_step(' ', split_step_with_process_noop)); +} + TEST(qfind, UInt32_Ranges) { vector a({1, 2, 3, 260, 5}); vector b({2, 3, 4}); -- 2.34.1