2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
23 #include <folly/Benchmark.h>
24 #include <folly/experimental/EliasFanoCoding.h>
25 #include <folly/experimental/Select64.h>
26 #include <folly/experimental/test/CodingTestUtils.h>
28 using namespace folly::compression;
31 #define EF_TEST_ARCH Default
32 #endif // EF_TEST_ARCH
36 uint8_t slowDefaultNumLowerBits(size_t upperBound, size_t size) {
37 if (size == 0 || upperBound < size) {
40 // floor(log(upperBound / size));
41 return uint8_t(folly::findLastSet(upperBound / size) - 1);
46 TEST(EliasFanoCoding, defaultNumLowerBits) {
47 // Verify that slowDefaultNumLowerBits and optimized
48 // Encoder::defaultNumLowerBits agree.
49 static constexpr size_t kNumIterations = 2500;
50 auto compare = [](size_t upperBound, size_t size) {
51 using Encoder = EliasFanoEncoderV2<size_t>;
52 EXPECT_EQ(int(slowDefaultNumLowerBits(upperBound, size)),
53 int(Encoder::defaultNumLowerBits(upperBound, size)))
54 << upperBound << " " << size;
56 auto batch = [&compare](size_t initialUpperBound) {
57 for (size_t upperBound = initialUpperBound, i = 0;
60 // Test "size" values close to "upperBound".
61 for (size_t size = upperBound, j = 0; j < kNumIterations; ++j, --size) {
62 compare(upperBound, size);
64 // Sample "size" values between [0, upperBound].
65 for (size_t size = upperBound;
66 size > 1 + upperBound / kNumIterations;
67 size -= 1 + upperBound / kNumIterations) {
68 compare(upperBound, size);
70 // Test "size" values close to 0.
71 for (size_t size = 0; size < kNumIterations; ++size) {
72 compare(upperBound, size);
76 batch(std::numeric_limits<size_t>::max());
77 batch(kNumIterations + 1312213123);
78 batch(kNumIterations);
81 std::uniform_int_distribution<size_t> distribution;
82 for (size_t i = 0; i < kNumIterations; ++i) {
83 const auto a = distribution(gen);
84 const auto b = distribution(gen);
85 compare(std::max(a, b), std::min(a, b));
89 class EliasFanoCodingTest : public ::testing::Test {
92 typedef EliasFanoEncoderV2<uint32_t, size_t> Encoder;
93 typedef EliasFanoReader<Encoder> Reader;
94 testEmpty<Reader, Encoder>();
97 template <size_t kSkipQuantum, size_t kForwardQuantum>
99 typedef EliasFanoEncoderV2<
100 uint32_t, uint32_t, kSkipQuantum, kForwardQuantum> Encoder;
101 typedef EliasFanoReader<Encoder, instructions::EF_TEST_ARCH> Reader;
102 testAll<Reader, Encoder>({0});
103 testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
104 testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
108 TEST_F(EliasFanoCodingTest, Empty) {
112 TEST_F(EliasFanoCodingTest, Simple) {
116 TEST_F(EliasFanoCodingTest, SkipPointers) {
120 TEST_F(EliasFanoCodingTest, ForwardPointers) {
124 TEST_F(EliasFanoCodingTest, SkipForwardPointers) {
125 doTestAll<128, 128>();
128 TEST_F(EliasFanoCodingTest, Select64) {
129 typedef instructions::EF_TEST_ARCH instr;
130 constexpr uint64_t kPrime = uint64_t(-59);
131 for (uint64_t x = kPrime, i = 0; i < (1 << 20); x *= kPrime, i += 1) {
132 size_t w = instr::popcount(x);
133 for (size_t k = 0; k < w; ++k) {
134 auto pos = folly::select64<instr>(x, k);
135 CHECK_EQ((x >> pos) & 1, 1);
136 CHECK_EQ(instr::popcount(x & ((uint64_t(1) << pos) - 1)), k);
141 TEST_F(EliasFanoCodingTest, BugLargeGapInUpperBits) { // t16274876
142 typedef EliasFanoEncoderV2<uint32_t, uint32_t, 2, 2> Encoder;
143 typedef EliasFanoReader<Encoder, instructions::EF_TEST_ARCH> Reader;
144 constexpr uint32_t kLargeValue = 127;
146 // Build a list where the upper bits have a large gap after the
147 // first element, so that we need to reposition in the upper bits
148 // using skips to position the iterator on the second element.
149 std::vector<uint32_t> data = {0, kLargeValue};
150 for (uint32_t i = 0; i < kLargeValue; ++i) {
151 data.push_back(data.back() + 1);
153 auto list = Encoder::encode(data.begin(), data.end());
157 ASSERT_TRUE(reader.skipTo(kLargeValue - 1));
158 ASSERT_EQ(kLargeValue, reader.value());
159 ASSERT_EQ(0, reader.previousValue());
167 typedef EliasFanoEncoderV2<uint32_t, uint32_t, 128, 128> Encoder;
168 typedef EliasFanoReader<Encoder> Reader;
170 std::vector<uint32_t> data;
171 std::vector<size_t> order;
173 std::vector<uint32_t> encodeSmallData;
174 std::vector<uint32_t> encodeLargeData;
176 std::vector<std::pair<size_t, size_t>> numLowerBitsInput;
178 typename Encoder::MutableCompressedList list;
183 data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen);
184 list = Encoder::encode(data.begin(), data.end());
186 order.resize(data.size());
187 std::iota(order.begin(), order.end(), size_t());
188 std::shuffle(order.begin(), order.end(), gen);
190 encodeSmallData = generateRandomList(10, 100 * 1000, gen);
191 encodeLargeData = generateRandomList(1000 * 1000, 100 * 1000 * 1000, gen);
193 std::uniform_int_distribution<size_t> distribution;
194 for (size_t i = 0; i < 10000; ++i) {
195 const auto a = distribution(gen);
196 const auto b = distribution(gen);
197 numLowerBitsInput.emplace_back(std::max(a, b), std::min(a, b));
207 BENCHMARK(Next, iters) {
208 bmNext<bm::Reader>(bm::list, bm::data, iters);
211 size_t Skip_ForwardQ128(size_t iters, size_t logAvgSkip) {
212 bmSkip<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
216 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1, 0)
217 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 2, 1)
218 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 4_pm_1, 2)
219 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 16_pm_4, 4)
220 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 64_pm_16, 6)
221 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 256_pm_64, 8)
222 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1024_pm_256, 10)
224 BENCHMARK(Jump_ForwardQ128, iters) {
225 bmJump<bm::Reader>(bm::list, bm::data, bm::order, iters);
228 BENCHMARK_DRAW_LINE();
230 size_t SkipTo_SkipQ128(size_t iters, size_t logAvgSkip) {
231 bmSkipTo<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
235 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1, 0)
236 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 2, 1)
237 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 4_pm_1, 2)
238 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 16_pm_4, 4)
239 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 64_pm_16, 6)
240 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 256_pm_64, 8)
241 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1024_pm_256, 10)
243 BENCHMARK(JumpTo_SkipQ128, iters) {
244 bmJumpTo<bm::Reader>(bm::list, bm::data, bm::order, iters);
247 BENCHMARK_DRAW_LINE();
249 BENCHMARK(Encode_10) {
250 auto list = bm::Encoder::encode(bm::encodeSmallData.begin(),
251 bm::encodeSmallData.end());
256 auto list = bm::Encoder::encode(bm::encodeLargeData.begin(),
257 bm::encodeLargeData.end());
261 BENCHMARK_DRAW_LINE();
263 BENCHMARK(defaultNumLowerBits, iters) {
264 using Encoder = EliasFanoEncoderV2<size_t>;
268 const auto& p = bm::numLowerBitsInput[i];
269 folly::doNotOptimizeAway(Encoder::defaultNumLowerBits(p.first, p.second));
270 if (++i == bm::numLowerBitsInput.size()) {
276 BENCHMARK(slowDefaultNumLowerBits, iters) {
279 const auto& p = bm::numLowerBitsInput[i];
280 folly::doNotOptimizeAway(slowDefaultNumLowerBits(p.first, p.second));
281 if (++i == bm::numLowerBitsInput.size()) {
288 Intel(R) Xeon(R) CPU E5-2678 v3 @ 2.50GHz (turbo on),
289 using -DEF_TEST_ARCH Haswell and GCC 4.9 with --bm_min_usec 100000.
290 ============================================================================
291 folly/experimental/test/EliasFanoCodingTest.cpp relative time/iter iters/s
292 ============================================================================
294 Skip_ForwardQ128(1) 3.73ns 267.93M
295 Skip_ForwardQ128(2) 4.89ns 204.34M
296 Skip_ForwardQ128(4_pm_1) 6.86ns 145.79M
297 Skip_ForwardQ128(16_pm_4) 18.92ns 52.85M
298 Skip_ForwardQ128(64_pm_16) 26.56ns 37.66M
299 Skip_ForwardQ128(256_pm_64) 30.12ns 33.20M
300 Skip_ForwardQ128(1024_pm_256) 30.74ns 32.53M
301 Jump_ForwardQ128 30.49ns 32.80M
302 ----------------------------------------------------------------------------
303 SkipTo_SkipQ128(1) 3.86ns 258.96M
304 SkipTo_SkipQ128(2) 7.73ns 129.36M
305 SkipTo_SkipQ128(4_pm_1) 10.29ns 97.18M
306 SkipTo_SkipQ128(16_pm_4) 28.69ns 34.86M
307 SkipTo_SkipQ128(64_pm_16) 39.73ns 25.17M
308 SkipTo_SkipQ128(256_pm_64) 43.45ns 23.01M
309 SkipTo_SkipQ128(1024_pm_256) 44.66ns 22.39M
310 JumpTo_SkipQ128 47.98ns 20.84M
311 ----------------------------------------------------------------------------
312 Encode_10 77.92ns 12.83M
314 ----------------------------------------------------------------------------
315 defaultNumLowerBits 2.20ns 455.01M
316 slowDefaultNumLowerBits 7.90ns 126.59M
317 ============================================================================
320 int main(int argc, char** argv) {
321 testing::InitGoogleTest(&argc, argv);
322 gflags::ParseCommandLineFlags(&argc, &argv, true);
324 auto ret = RUN_ALL_TESTS();
325 if (ret == 0 && FLAGS_benchmark) {
327 folly::runBenchmarks();