2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include <unordered_set>
26 #include <glog/logging.h>
27 #include <gtest/gtest.h>
29 namespace folly { namespace compression {
32 std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId, URNG&& g) {
33 CHECK_LT(n, 2 * maxId);
34 std::uniform_int_distribution<> uid(1, maxId);
35 std::unordered_set<uint32_t> dataset;
36 while (dataset.size() < n) {
37 uint32_t value = uid(g);
38 if (dataset.count(value) == 0) {
39 dataset.insert(value);
43 std::vector<uint32_t> ids(dataset.begin(), dataset.end());
44 std::sort(ids.begin(), ids.end());
48 inline std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) {
50 return generateRandomList(n, maxId, gen);
53 inline std::vector<uint32_t> generateSeqList(uint32_t minId, uint32_t maxId,
55 CHECK_LE(minId, maxId);
57 std::vector<uint32_t> ids;
58 ids.reserve((maxId - minId) / step + 1);
59 for (uint32_t i = minId; i <= maxId; i += step) {
65 inline std::vector<uint32_t> loadList(const std::string& filename) {
66 std::ifstream fin(filename);
67 std::vector<uint32_t> result;
75 // Test previousValue only if Reader has it.
76 template <class... Args>
77 void maybeTestPreviousValue(Args&&...) { }
79 // Make all the arguments template because if the types are not exact,
80 // the above overload will be picked (for example i could be size_t or
82 template <class Vector, class Reader, class Index>
83 auto maybeTestPreviousValue(const Vector& data, Reader& reader, Index i)
84 -> decltype(reader.previousValue(), void()) {
86 EXPECT_EQ(reader.previousValue(), data[i - 1]);
90 template <class Reader, class List>
91 void testNext(const std::vector<uint32_t>& data, const List& list) {
93 EXPECT_FALSE(reader.valid());
95 for (size_t i = 0; i < data.size(); ++i) {
96 EXPECT_TRUE(reader.next());
97 EXPECT_TRUE(reader.valid());
98 EXPECT_EQ(reader.value(), data[i]);
99 EXPECT_EQ(reader.position(), i);
100 maybeTestPreviousValue(data, reader, i);
102 EXPECT_FALSE(reader.next());
103 EXPECT_FALSE(reader.valid());
104 EXPECT_EQ(reader.position(), reader.size());
107 template <class Reader, class List>
108 void testSkip(const std::vector<uint32_t>& data, const List& list,
110 CHECK_GT(skipStep, 0);
113 for (size_t i = skipStep - 1; i < data.size(); i += skipStep) {
114 EXPECT_TRUE(reader.skip(skipStep));
115 EXPECT_TRUE(reader.valid());
116 EXPECT_EQ(reader.value(), data[i]);
117 EXPECT_EQ(reader.position(), i);
118 maybeTestPreviousValue(data, reader, i);
120 EXPECT_FALSE(reader.skip(skipStep));
121 EXPECT_FALSE(reader.valid());
122 EXPECT_EQ(reader.position(), reader.size());
123 EXPECT_FALSE(reader.next());
126 template <class Reader, class List>
127 void testSkip(const std::vector<uint32_t>& data, const List& list) {
128 for (size_t skipStep = 1; skipStep < 25; ++skipStep) {
129 testSkip<Reader, List>(data, list, skipStep);
131 for (size_t skipStep = 25; skipStep <= 500; skipStep += 25) {
132 testSkip<Reader, List>(data, list, skipStep);
136 template <class Reader, class List>
137 void testSkipTo(const std::vector<uint32_t>& data, const List& list,
139 CHECK_GT(skipToStep, 0);
142 const uint32_t delta = std::max<uint32_t>(1, data.back() / skipToStep);
143 uint32_t value = delta;
144 auto it = data.begin();
146 it = std::lower_bound(it, data.end(), value);
147 if (it == data.end()) {
148 EXPECT_FALSE(reader.skipTo(value));
151 EXPECT_TRUE(reader.skipTo(value));
152 EXPECT_TRUE(reader.valid());
153 EXPECT_EQ(reader.value(), *it);
154 EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
155 value = reader.value() + delta;
156 maybeTestPreviousValue(data, reader, std::distance(data.begin(), it));
158 EXPECT_FALSE(reader.valid());
159 EXPECT_EQ(reader.position(), reader.size());
160 EXPECT_FALSE(reader.next());
163 template <class Reader, class List>
164 void testSkipTo(const std::vector<uint32_t>& data, const List& list) {
165 for (size_t steps = 10; steps < 100; steps += 10) {
166 testSkipTo<Reader, List>(data, list, steps);
168 for (size_t steps = 100; steps <= 1000; steps += 100) {
169 testSkipTo<Reader, List>(data, list, steps);
171 testSkipTo<Reader, List>(data, list, std::numeric_limits<size_t>::max());
173 // Skip to the first element.
175 EXPECT_TRUE(reader.skipTo(data[0]));
176 EXPECT_EQ(reader.value(), data[0]);
177 EXPECT_EQ(reader.position(), 0);
180 // Skip past the last element.
182 EXPECT_FALSE(reader.skipTo(data.back() + 1));
183 EXPECT_FALSE(reader.valid());
184 EXPECT_EQ(reader.position(), reader.size());
185 EXPECT_FALSE(reader.next());
188 // Skip to maximum integer.
190 using ValueType = typename Reader::ValueType;
191 EXPECT_FALSE(reader.skipTo(std::numeric_limits<ValueType>::max()));
192 EXPECT_FALSE(reader.valid());
193 EXPECT_EQ(reader.position(), reader.size());
194 EXPECT_FALSE(reader.next());
198 template <class Reader, class List>
199 void testJump(const std::vector<uint32_t>& data, const List& list) {
201 std::vector<size_t> is(data.size());
202 for (size_t i = 0; i < data.size(); ++i) {
205 std::shuffle(is.begin(), is.end(), gen);
206 if (Reader::EncoderType::forwardQuantum == 0) {
207 is.resize(std::min<size_t>(is.size(), 100));
212 EXPECT_TRUE(reader.jump(i));
213 EXPECT_EQ(reader.value(), data[i]);
214 EXPECT_EQ(reader.position(), i);
215 maybeTestPreviousValue(data, reader, i);
217 EXPECT_FALSE(reader.jump(data.size()));
218 EXPECT_FALSE(reader.valid());
219 EXPECT_EQ(reader.position(), reader.size());
222 template <class Reader, class List>
223 void testJumpTo(const std::vector<uint32_t>& data, const List& list) {
224 CHECK(!data.empty());
229 std::uniform_int_distribution<> values(0, data.back());
230 const size_t iters = Reader::EncoderType::skipQuantum == 0 ? 100 : 10000;
231 for (size_t i = 0; i < iters; ++i) {
232 const uint32_t value = values(gen);
233 auto it = std::lower_bound(data.begin(), data.end(), value);
234 CHECK(it != data.end());
235 EXPECT_TRUE(reader.jumpTo(value));
236 EXPECT_EQ(reader.value(), *it);
237 EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
240 EXPECT_TRUE(reader.jumpTo(0));
241 EXPECT_EQ(reader.value(), data[0]);
242 EXPECT_EQ(reader.position(), 0);
244 EXPECT_TRUE(reader.jumpTo(data.back()));
245 EXPECT_EQ(reader.value(), data.back());
246 EXPECT_EQ(reader.position(), reader.size() - 1);
248 EXPECT_FALSE(reader.jumpTo(data.back() + 1));
249 EXPECT_FALSE(reader.valid());
250 EXPECT_EQ(reader.position(), reader.size());
253 template <class Reader, class Encoder>
255 const typename Encoder::ValueType* const data = nullptr;
256 auto list = Encoder::encode(data, data);
259 EXPECT_FALSE(reader.next());
260 EXPECT_EQ(reader.size(), 0);
264 EXPECT_FALSE(reader.skip(1));
265 EXPECT_FALSE(reader.skip(10));
266 EXPECT_FALSE(reader.jump(0));
267 EXPECT_FALSE(reader.jump(10));
271 EXPECT_FALSE(reader.skipTo(1));
272 EXPECT_FALSE(reader.jumpTo(1));
276 template <class Reader, class Encoder>
277 void testAll(const std::vector<uint32_t>& data) {
278 auto list = Encoder::encode(data.begin(), data.end());
279 testNext<Reader>(data, list);
280 testSkip<Reader>(data, list);
281 testSkipTo<Reader>(data, list);
282 testJump<Reader>(data, list);
283 testJumpTo<Reader>(data, list);
287 template <class Reader, class List>
288 void bmNext(const List& list, const std::vector<uint32_t>& data, size_t iters) {
294 for (size_t i = 0; i < iters; ++i) {
295 if (LIKELY(reader.next())) {
296 folly::doNotOptimizeAway(reader.value());
303 template <class Reader, class List>
304 void bmSkip(const List& list,
305 const std::vector<uint32_t>& /* data */,
308 size_t avg = (size_t(1) << logAvgSkip);
309 size_t base = avg - (avg >> 2);
310 size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
313 for (size_t i = 0; i < iters; ++i) {
314 size_t skip = base + (i & mask);
315 if (LIKELY(reader.skip(skip))) {
316 folly::doNotOptimizeAway(reader.value());
323 template <class Reader, class List>
324 void bmSkipTo(const List& list, const std::vector<uint32_t>& data,
325 size_t logAvgSkip, size_t iters) {
326 size_t avg = (size_t(1) << logAvgSkip);
327 size_t base = avg - (avg >> 2);
328 size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
331 for (size_t i = 0, j = -1; i < iters; ++i) {
332 size_t skip = base + (i & mask);
334 if (j >= data.size()) {
339 reader.skipTo(data[j]);
340 folly::doNotOptimizeAway(reader.value());
344 template <class Reader, class List>
345 void bmJump(const List& list, const std::vector<uint32_t>& data,
346 const std::vector<size_t>& order, size_t iters) {
347 CHECK(!data.empty());
348 CHECK_EQ(data.size(), order.size());
351 for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
352 if (j == order.size()) j = 0;
353 reader.jump(order[j]);
354 folly::doNotOptimizeAway(reader.value());
358 template <class Reader, class List>
359 void bmJumpTo(const List& list, const std::vector<uint32_t>& data,
360 const std::vector<size_t>& order, size_t iters) {
361 CHECK(!data.empty());
362 CHECK_EQ(data.size(), order.size());
365 for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
366 if (j == order.size()) j = 0;
367 reader.jumpTo(data[order[j]]);
368 folly::doNotOptimizeAway(reader.value());