Elias-Fano micro-optimizations
[folly.git] / folly / experimental / test / EliasFanoCodingTest.cpp
1 /*
2  * Copyright 2014 Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <algorithm>
18 #include <numeric>
19 #include <random>
20 #include <vector>
21
22 #include <folly/Benchmark.h>
23 #include <folly/experimental/EliasFanoCoding.h>
24 #include <folly/experimental/Select64.h>
25 #include <folly/experimental/test/CodingTestUtils.h>
26
27 using namespace folly::compression;
28
29 #if defined(EF_TEST_NEHALEM)
30 #define EF_TEST_ARCH Nehalem
31 #elif defined(EF_TEST_HASWELL)
32 #define EF_TEST_ARCH Haswell
33 #else
34 #define EF_TEST_ARCH Default
35 #endif
36
37 template <size_t kVersion>
38 struct TestType {
39   static constexpr size_t Version = kVersion;
40 };
41
42 template <class T>
43 class EliasFanoCodingTest : public ::testing::Test {
44  public:
45   void doTestEmpty() {
46     typedef EliasFanoEncoder<uint32_t, size_t, 0, 0, T::Version> Encoder;
47     typedef EliasFanoReader<Encoder> Reader;
48     testEmpty<Reader, Encoder>();
49   }
50
51   template <size_t kSkipQuantum, size_t kForwardQuantum>
52   void doTestAll() {
53     typedef EliasFanoEncoder<
54       uint32_t, uint32_t, kSkipQuantum, kForwardQuantum, T::Version> Encoder;
55     typedef EliasFanoReader<Encoder, instructions::EF_TEST_ARCH> Reader;
56     testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
57     testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
58   }
59 };
60
61 typedef ::testing::Types<TestType<0>, TestType<1>> TestTypes;
62 TYPED_TEST_CASE(EliasFanoCodingTest, TestTypes);
63
64 TYPED_TEST(EliasFanoCodingTest, Empty) {
65   TestFixture::doTestEmpty();
66 }
67
68 TYPED_TEST(EliasFanoCodingTest, Simple) {
69   TestFixture::template doTestAll<0, 0>();
70 }
71
72 TYPED_TEST(EliasFanoCodingTest, SkipPointers) {
73   TestFixture::template doTestAll<128, 0>();
74 }
75
76 TYPED_TEST(EliasFanoCodingTest, ForwardPointers) {
77   TestFixture::template doTestAll<0, 128>();
78 }
79
80 TYPED_TEST(EliasFanoCodingTest, SkipForwardPointers) {
81   TestFixture::template doTestAll<128, 128>();
82 }
83
84 namespace bm {
85
86 constexpr size_t k1M = 1000000;
87 constexpr size_t kVersion = 1;
88
89 typedef EliasFanoEncoder<uint32_t, uint32_t, 128, 128, kVersion> Encoder;
90 typedef EliasFanoReader<Encoder> Reader;
91
92 std::vector<uint32_t> data;
93 std::vector<size_t> order;
94
95 std::vector<uint32_t> encodeSmallData;
96 std::vector<uint32_t> encodeLargeData;
97
98 typename Encoder::CompressedList list;
99
100 void init() {
101   std::mt19937 gen;
102
103   data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen);
104   //data = loadList("/home/philipp/pl_test_dump.txt");
105   list = Encoder::encode(data.begin(), data.end());
106
107   order.resize(data.size());
108   std::iota(order.begin(), order.end(), size_t());
109   std::shuffle(order.begin(), order.end(), gen);
110
111   encodeSmallData = generateRandomList(10, 100 * 1000, gen);
112   encodeLargeData = generateRandomList(1000 * 1000, 100 * 1000 * 1000, gen);
113 }
114
115 void free() {
116   list.free();
117 }
118
119 }  // namespace bm
120
121 BENCHMARK(Next, iters) {
122   bmNext<bm::Reader>(bm::list, bm::data, iters);
123 }
124
125 size_t Skip_ForwardQ128(size_t iters, size_t logAvgSkip) {
126   bmSkip<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
127   return iters;
128 }
129
130 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1, 0)
131 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 2, 1)
132 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 4_pm_1, 2)
133 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 16_pm_4, 4)
134 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 64_pm_16, 6)
135 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 256_pm_64, 8)
136 BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1024_pm_256, 10)
137
138 BENCHMARK(Jump_ForwardQ128, iters) {
139   bmJump<bm::Reader>(bm::list, bm::data, bm::order, iters);
140 }
141
142 BENCHMARK_DRAW_LINE();
143
144 size_t SkipTo_SkipQ128(size_t iters, size_t logAvgSkip) {
145   bmSkipTo<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
146   return iters;
147 }
148
149 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1, 0)
150 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 2, 1)
151 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 4_pm_1, 2)
152 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 16_pm_4, 4)
153 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 64_pm_16, 6)
154 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 256_pm_64, 8)
155 BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1024_pm_256, 10)
156
157 BENCHMARK(JumpTo_SkipQ128, iters) {
158   bmJumpTo<bm::Reader>(bm::list, bm::data, bm::order, iters);
159 }
160
161 BENCHMARK_DRAW_LINE();
162
163 BENCHMARK(Encode_10) {
164   auto list = bm::Encoder::encode(bm::encodeSmallData.begin(),
165                                   bm::encodeSmallData.end());
166   list.free();
167 }
168
169 BENCHMARK(Encode) {
170   auto list = bm::Encoder::encode(bm::encodeLargeData.begin(),
171                                   bm::encodeLargeData.end());
172   list.free();
173 }
174
175 BENCHMARK_DRAW_LINE();
176
177 BENCHMARK(Select64, iters) {
178   typedef instructions::EF_TEST_ARCH instr;
179   constexpr uint64_t kPrime = uint64_t(-59);
180   for (uint64_t x = kPrime, i = 0; i < iters; x *= kPrime, i += 1) {
181     size_t w = instr::popcount(x);
182     folly::doNotOptimizeAway(folly::select64<instr>(x, w - 1));
183   }
184 }
185
186 #if 0
187 Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz (turbo off),
188 using instructions::Haswell and GCC 4.9 with --bm_min_usec 100000.
189 ============================================================================
190 folly/experimental/test/EliasFanoCodingTest.cpp relative  time/iter  iters/s
191 ============================================================================
192 Next                                                         2.52ns  397.28M
193 Skip_ForwardQ128(1)                                          3.92ns  255.28M
194 Skip_ForwardQ128(2)                                          5.08ns  197.04M
195 Skip_ForwardQ128(4_pm_1)                                     7.04ns  142.02M
196 Skip_ForwardQ128(16_pm_4)                                   19.68ns   50.82M
197 Skip_ForwardQ128(64_pm_16)                                  27.58ns   36.26M
198 Skip_ForwardQ128(256_pm_64)                                 32.49ns   30.78M
199 Skip_ForwardQ128(1024_pm_256)                               33.39ns   29.95M
200 Jump_ForwardQ128                                            34.05ns   29.37M
201 ----------------------------------------------------------------------------
202 SkipTo_SkipQ128(1)                                           4.42ns  226.49M
203 SkipTo_SkipQ128(2)                                           8.58ns  116.55M
204 SkipTo_SkipQ128(4_pm_1)                                     11.43ns   87.50M
205 SkipTo_SkipQ128(16_pm_4)                                    31.19ns   32.06M
206 SkipTo_SkipQ128(64_pm_16)                                   43.88ns   22.79M
207 SkipTo_SkipQ128(256_pm_64)                                  49.08ns   20.37M
208 SkipTo_SkipQ128(1024_pm_256)                                52.24ns   19.14M
209 JumpTo_SkipQ128                                             54.61ns   18.31M
210 ----------------------------------------------------------------------------
211 Encode_10                                                  117.24ns    8.53M
212 Encode                                                       5.64ms   177.15
213 ----------------------------------------------------------------------------
214 Select64                                                     8.04ns  124.35M
215 ============================================================================
216 #endif
217
218 int main(int argc, char** argv) {
219   testing::InitGoogleTest(&argc, argv);
220   gflags::ParseCommandLineFlags(&argc, &argv, true);
221
222   auto ret = RUN_ALL_TESTS();
223   if (ret == 0 && FLAGS_benchmark) {
224     bm::init();
225     folly::runBenchmarks();
226     bm::free();
227   }
228
229   return ret;
230 }