From: Marc Celani Date: Sun, 16 Mar 2014 03:04:47 +0000 (-0700) Subject: folly::merge() - std::merge() with stronger guarantees (probably same implementation... X-Git-Tag: v0.22.0~643 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=cd3fcbcf5f7bbf06c5cae424b15fdc02e24407f5;p=folly.git folly::merge() - std::merge() with stronger guarantees (probably same implementation in practice) Summary: std::merge() does not guarantee the ordering when equal elements belong in two ranges(comparator(it_a, it_b) == comparator(it_b, it_a) == 0). For maps, it is important that we can specify the ordering (see array_merge in php, where we guarantee which array's value will be present in the output if a key is present in both inputs). Also removes folly::merge that is specfic for sorted_vector_map since this will not be needed. NOTE: I expect this to break feed, will fix in a separate non-folly diff. Test Plan: This implementation is directly ripped from cppreference.com, but unit tests added none-the-less. Specifically, one is added where the output is a std::map to demonstrate its usefulness. Reviewed By: delong.j@fb.com FB internal diff: D1223401 @override-unit-failures --- diff --git a/folly/Merge.h b/folly/Merge.h new file mode 100644 index 00000000..6598c682 --- /dev/null +++ b/folly/Merge.h @@ -0,0 +1,86 @@ +/* + * Copyright 2014 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * folly::merge() is an implementation of std::merge with one additonal + * guarantee: if the input ranges overlap, the order that values *from the two + * different ranges* appear in the output is well defined (std::merge only + * guarantees relative ordering is maintained within a single input range). + * This semantic is very useful when the output container removes duplicates + * (such as std::map) to guarantee that elements from b override elements from + * a. + * + * ex. Let's say we have two vector> as input, and we are + * merging into a vector>. The comparator is returns true if the + * first argument has a lesser 'first' value in the pair. + * + * a = {{1, 1}, {2, 2}, {3, 3}}; + * b = {{1, 2}, {2, 3}}; + * + * folly::merge<...>(a.begin(), a.end(), b.begin(), b.end(), outputIter) is + * guaranteed to produce {{1, 1}, {1, 2}, {2, 2}, {2, 3}, {3, 3}}. That is, + * if comp(it_a, it_b) == comp(it_b, it_a) == false, we first insert the element + * from a. + */ + +#ifndef FOLLY_MERGE_H_ +#define FOLLY_MERGE_H_ + +#include + +namespace folly { + +template +OutputIt merge(InputIt1 first1, InputIt1 last1, + InputIt2 first2, InputIt2 last2, + OutputIt d_first, Compare comp) { + for (; first1 != last1; ++d_first) { + if (first2 == last2) { + return std::copy(first1, last1, d_first); + } + if (comp(*first2, *first1)) { + *d_first = *first2; + ++first2; + } else { + *d_first = *first1; + ++first1; + } + } + return std::copy(first2, last2, d_first); +} + +template +OutputIt merge(InputIt1 first1, InputIt1 last1, + InputIt2 first2, InputIt2 last2, + OutputIt d_first) { + for (; first1 != last1; ++d_first) { + if (first2 == last2) { + return std::copy(first1, last1, d_first); + } + if (*first2 < *first1) { + *d_first = *first2; + ++first2; + } else { + *d_first = *first1; + ++first1; + } + } + return std::copy(first2, last2, d_first); +} + +} + +#endif diff --git a/folly/sorted_vector_types.h b/folly/sorted_vector_types.h index 4deaac57..e8486055 100644 --- a/folly/sorted_vector_types.h +++ b/folly/sorted_vector_types.h @@ -617,71 +617,6 @@ inline void swap(sorted_vector_map& a, return a.swap(b); } -/* - * Efficiently moves all elements from b into a by taking advantage of sorted - * inputs. Any keys that belong to both a and b will have the value from b. - * Assumes that C and A can be constructed using the default constructor. - * - * std::merge cannot be used for this use case because in the event of equal - * keys belonging to both a and b, it undefined which element will be inserted - * into the output map last (and therefore be present in the map). - */ -template -inline void merge(sorted_vector_map& a, - sorted_vector_map& b) { - auto size = a.size(); - auto it_a = a.begin(); - auto it_b = b.begin(); - while (it_a != a.end() && it_b != b.end()) { - auto comp = a.key_comp()(it_a->first, it_b->first); - if (!comp) { - if (!a.key_comp()(it_b->first, it_a->first)) { - ++it_a; - ++it_b; - } else { - ++size; - ++it_b; - } - } else { - ++it_a; - } - } - if (it_b != b.end()) { - size += b.end() - it_b; - } - - sorted_vector_map c; - c.reserve(size); - it_a = a.begin(); - it_b = b.begin(); - while (it_a != a.end() && it_b != b.end()) { - auto comp = a.key_comp()(it_a->first, it_b->first); - if (!comp) { - if (!a.key_comp()(it_b->first, it_a->first)) { - c.insert(c.end(), std::move(*it_b)); - ++it_a; - ++it_b; - } else { - c.insert(c.end(), std::move(*it_b)); - ++it_b; - } - } else { - c.insert(c.end(), std::move(*it_a)); - ++it_a; - } - } - while (it_a != a.end()) { - c.insert(c.end(), std::move(*it_a)); - ++it_a; - } - while (it_b != b.end()) { - c.insert(c.end(), std::move(*it_b)); - ++it_b; - } - a.swap(c); - b.clear(); -} - ////////////////////////////////////////////////////////////////////// } diff --git a/folly/test/MergeTest.cpp b/folly/test/MergeTest.cpp new file mode 100644 index 00000000..a538012d --- /dev/null +++ b/folly/test/MergeTest.cpp @@ -0,0 +1,68 @@ +/* + * Copyright 2014 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/Merge.h" +#include +#include +#include + +TEST(MergeTest, NonOverlapping) { + std::vector a = {0, 2, 4, 6}; + std::vector b = {1, 3, 5, 7}; + std::vector c; + + folly::merge(a.begin(), a.end(), + b.begin(), b.end(), + std::back_inserter(c)); + EXPECT_EQ(8, c.size()); + for (int i = 0; i < 8; ++i) { + EXPECT_EQ(i, c[i]); + } +} + +TEST(MergeTest, OverlappingInSingleInputRange) { + std::vector> a = {{0, 0}, {0, 1}}; + std::vector> b = {{2, 2}, {3, 3}}; + std::map c; + + folly::merge(a.begin(), a.end(), + b.begin(), b.end(), + std::inserter(c, c.begin())); + EXPECT_EQ(3, c.size()); + + // First value is inserted, second is not + EXPECT_EQ(c[0], 0); + + EXPECT_EQ(c[2], 2); + EXPECT_EQ(c[3], 3); +} + +TEST(MergeTest, OverlappingInDifferentInputRange) { + std::vector> a = {{0, 0}, {1, 1}}; + std::vector> b = {{0, 2}, {3, 3}}; + std::map c; + + folly::merge(a.begin(), a.end(), + b.begin(), b.end(), + std::inserter(c, c.begin())); + EXPECT_EQ(3, c.size()); + + // Value from a is inserted, value from b is not. + EXPECT_EQ(c[0], 0); + + EXPECT_EQ(c[1], 1); + EXPECT_EQ(c[3], 3); +} diff --git a/folly/test/SortedVectorBenchmark.cpp b/folly/test/SortedVectorBenchmark.cpp deleted file mode 100644 index 5a4a7bf9..00000000 --- a/folly/test/SortedVectorBenchmark.cpp +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright 2014 Facebook, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "folly/Format.h" - -#include - -#include "folly/sorted_vector_types.h" -#include "folly/Benchmark.h" - -namespace { - -using folly::sorted_vector_map; - -sorted_vector_map a; -sorted_vector_map b; - -BENCHMARK(merge_by_setting, iters) { - while (iters--) { - // copy to match merge benchmark - auto a_cpy = a; - auto b_cpy = b; - for (const auto& kv : b_cpy) { - a_cpy[kv.first] = kv.second; - } - } -} - -BENCHMARK_RELATIVE(merge, iters) { - while (iters--) { - auto a_cpy = a; - auto b_cpy = b; - merge(a_cpy, b_cpy); - } -} -} - -// Benchmark results on my dev server (Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz) -// -// ============================================================================ -// folly/test/SortedVectorBenchmark.cpp relative time/iter iters/s -// ============================================================================ -// merge_by_setting 482.01us 2.07K -// merge 2809.19% 17.16us 58.28K -// ============================================================================ - -int main(int argc, char *argv[]) { - google::ParseCommandLineFlags(&argc, &argv, true); - for (int i = 0; i < 1000; ++i) { - a[2 * i] = 2 * i; - b[2 * i + 1] = 2 * i + 1; - } - - folly::runBenchmarks(); - return 0; -} diff --git a/folly/test/sorted_vector_test.cpp b/folly/test/sorted_vector_test.cpp index d03823fe..80038a0a 100644 --- a/folly/test/sorted_vector_test.cpp +++ b/folly/test/sorted_vector_test.cpp @@ -301,29 +301,3 @@ TEST(SortedVectorTest, EmptyTest) { EXPECT_TRUE(emptyMap.lower_bound(10) == emptyMap.end()); EXPECT_TRUE(emptyMap.find(10) == emptyMap.end()); } - -TEST(SortedVectorTest, MergeTest) { - sorted_vector_map a; - a[0] = 0; - a[1] = 1; - a[5] = 5; - a[10] = 10; - - sorted_vector_map b; - b[0] = 10; - b[3] = 13; - b[7] = 17; - b[11] = 111; - - merge(a, b); - - EXPECT_TRUE(b.empty()); - EXPECT_EQ(a.size(), 7); - EXPECT_EQ(a[0], 10); - EXPECT_EQ(a[1], 1); - EXPECT_EQ(a[3], 13); - EXPECT_EQ(a[5], 5); - EXPECT_EQ(a[7], 17); - EXPECT_EQ(a[10], 10); - EXPECT_EQ(a[11], 111); -}