2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/io/Compression.h>
22 #include <unordered_map>
24 #include <boost/noncopyable.hpp>
25 #include <glog/logging.h>
27 #include <folly/Benchmark.h>
28 #include <folly/Hash.h>
29 #include <folly/Random.h>
30 #include <folly/Varint.h>
31 #include <folly/io/IOBufQueue.h>
32 #include <folly/portability/GTest.h>
34 namespace folly { namespace io { namespace test {
36 class DataHolder : private boost::noncopyable {
38 uint64_t hash(size_t size) const;
39 ByteRange data(size_t size) const;
42 explicit DataHolder(size_t sizeLog2);
44 std::unique_ptr<uint8_t[]> data_;
45 mutable std::unordered_map<uint64_t, uint64_t> hashCache_;
48 DataHolder::DataHolder(size_t sizeLog2)
49 : size_(size_t(1) << sizeLog2),
50 data_(new uint8_t[size_]) {
53 uint64_t DataHolder::hash(size_t size) const {
54 CHECK_LE(size, size_);
55 auto p = hashCache_.find(size);
56 if (p != hashCache_.end()) {
60 uint64_t h = folly::hash::fnv64_buf(data_.get(), size);
65 ByteRange DataHolder::data(size_t size) const {
66 CHECK_LE(size, size_);
67 return ByteRange(data_.get(), size);
70 uint64_t hashIOBuf(const IOBuf* buf) {
71 uint64_t h = folly::hash::FNV_64_HASH_START;
72 for (auto& range : *buf) {
73 h = folly::hash::fnv64_buf(range.data(), range.size(), h);
78 class RandomDataHolder : public DataHolder {
80 explicit RandomDataHolder(size_t sizeLog2);
83 RandomDataHolder::RandomDataHolder(size_t sizeLog2)
84 : DataHolder(sizeLog2) {
85 constexpr size_t numThreadsLog2 = 3;
86 constexpr size_t numThreads = size_t(1) << numThreadsLog2;
88 uint32_t seed = randomNumberSeed();
90 std::vector<std::thread> threads;
91 threads.reserve(numThreads);
92 for (size_t t = 0; t < numThreads; ++t) {
94 [this, seed, t, numThreadsLog2, sizeLog2] () {
95 std::mt19937 rng(seed + t);
96 size_t countLog2 = sizeLog2 - numThreadsLog2;
97 size_t start = size_t(t) << countLog2;
98 for (size_t i = 0; i < countLog2; ++i) {
99 this->data_[start + i] = rng();
104 for (auto& t : threads) {
109 class ConstantDataHolder : public DataHolder {
111 explicit ConstantDataHolder(size_t sizeLog2);
114 ConstantDataHolder::ConstantDataHolder(size_t sizeLog2)
115 : DataHolder(sizeLog2) {
116 memset(data_.get(), 'a', size_);
119 constexpr size_t dataSizeLog2 = 27; // 128MiB
120 RandomDataHolder randomDataHolder(dataSizeLog2);
121 ConstantDataHolder constantDataHolder(dataSizeLog2);
123 // The intersection of the provided codecs & those that are compiled in.
124 static std::vector<CodecType> supportedCodecs(std::vector<CodecType> const& v) {
125 std::vector<CodecType> supported;
130 std::back_inserter(supported),
136 // All compiled-in compression codecs.
137 static std::vector<CodecType> availableCodecs() {
138 std::vector<CodecType> codecs;
140 for (size_t i = 0; i < static_cast<size_t>(CodecType::NUM_CODEC_TYPES); ++i) {
141 auto type = static_cast<CodecType>(i);
142 if (hasCodec(type)) {
143 codecs.push_back(type);
150 TEST(CompressionTestNeedsUncompressedLength, Simple) {
151 static const struct { CodecType type; bool needsUncompressedLength; }
153 { CodecType::NO_COMPRESSION, false },
154 { CodecType::LZ4, true },
155 { CodecType::SNAPPY, false },
156 { CodecType::ZLIB, false },
157 { CodecType::LZ4_VARINT_SIZE, false },
158 { CodecType::LZMA2, true },
159 { CodecType::LZMA2_VARINT_SIZE, false },
160 { CodecType::ZSTD, false },
161 { CodecType::GZIP, false },
162 { CodecType::LZ4_FRAME, false },
165 for (auto const& test : expectations) {
166 if (hasCodec(test.type)) {
167 EXPECT_EQ(getCodec(test.type)->needsUncompressedLength(),
168 test.needsUncompressedLength);
173 class CompressionTest
174 : public testing::TestWithParam<std::tr1::tuple<int, int, CodecType>> {
176 void SetUp() override {
177 auto tup = GetParam();
178 uncompressedLength_ = uint64_t(1) << std::tr1::get<0>(tup);
179 chunks_ = std::tr1::get<1>(tup);
180 codec_ = getCodec(std::tr1::get<2>(tup));
183 void runSimpleIOBufTest(const DataHolder& dh);
185 void runSimpleStringTest(const DataHolder& dh);
188 std::unique_ptr<IOBuf> split(std::unique_ptr<IOBuf> data) const;
190 uint64_t uncompressedLength_;
192 std::unique_ptr<Codec> codec_;
195 void CompressionTest::runSimpleIOBufTest(const DataHolder& dh) {
196 const auto original = split(IOBuf::wrapBuffer(dh.data(uncompressedLength_)));
197 const auto compressed = split(codec_->compress(original.get()));
198 if (!codec_->needsUncompressedLength()) {
199 auto uncompressed = codec_->uncompress(compressed.get());
200 EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
201 EXPECT_EQ(dh.hash(uncompressedLength_), hashIOBuf(uncompressed.get()));
204 auto uncompressed = codec_->uncompress(compressed.get(),
205 uncompressedLength_);
206 EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
207 EXPECT_EQ(dh.hash(uncompressedLength_), hashIOBuf(uncompressed.get()));
211 void CompressionTest::runSimpleStringTest(const DataHolder& dh) {
212 const auto original = std::string(
213 reinterpret_cast<const char*>(dh.data(uncompressedLength_).data()),
214 uncompressedLength_);
215 const auto compressed = codec_->compress(original);
216 if (!codec_->needsUncompressedLength()) {
217 auto uncompressed = codec_->uncompress(compressed);
218 EXPECT_EQ(uncompressedLength_, uncompressed.length());
219 EXPECT_EQ(uncompressed, original);
222 auto uncompressed = codec_->uncompress(compressed, uncompressedLength_);
223 EXPECT_EQ(uncompressedLength_, uncompressed.length());
224 EXPECT_EQ(uncompressed, original);
228 // Uniformly split data into (potentially empty) chunks.
229 std::unique_ptr<IOBuf> CompressionTest::split(
230 std::unique_ptr<IOBuf> data) const {
231 if (data->isChained()) {
235 const size_t size = data->computeChainDataLength();
237 std::multiset<size_t> splits;
238 for (size_t i = 1; i < chunks_; ++i) {
239 splits.insert(Random::rand64(size));
242 folly::IOBufQueue result;
245 for (size_t split : splits) {
246 result.append(IOBuf::copyBuffer(data->data() + offset, split - offset));
249 result.append(IOBuf::copyBuffer(data->data() + offset, size - offset));
251 return result.move();
254 TEST_P(CompressionTest, RandomData) {
255 runSimpleIOBufTest(randomDataHolder);
258 TEST_P(CompressionTest, ConstantData) {
259 runSimpleIOBufTest(constantDataHolder);
262 TEST_P(CompressionTest, RandomDataString) {
263 runSimpleStringTest(randomDataHolder);
266 TEST_P(CompressionTest, ConstantDataString) {
267 runSimpleStringTest(constantDataHolder);
270 INSTANTIATE_TEST_CASE_P(
274 testing::Values(0, 1, 12, 22, 25, 27),
275 testing::Values(1, 2, 3, 8, 65),
276 testing::ValuesIn(availableCodecs())));
278 class CompressionVarintTest
279 : public testing::TestWithParam<std::tr1::tuple<int, CodecType>> {
281 void SetUp() override {
282 auto tup = GetParam();
283 uncompressedLength_ = uint64_t(1) << std::tr1::get<0>(tup);
284 codec_ = getCodec(std::tr1::get<1>(tup));
287 void runSimpleTest(const DataHolder& dh);
289 uint64_t uncompressedLength_;
290 std::unique_ptr<Codec> codec_;
293 inline uint64_t oneBasedMsbPos(uint64_t number) {
295 for (; number > 0; ++pos, number >>= 1) {
300 void CompressionVarintTest::runSimpleTest(const DataHolder& dh) {
301 auto original = IOBuf::wrapBuffer(dh.data(uncompressedLength_));
302 auto compressed = codec_->compress(original.get());
306 std::max(uint64_t(9), oneBasedMsbPos(uncompressedLength_)) / 9UL);
307 auto tinyBuf = IOBuf::copyBuffer(compressed->data(),
308 std::min(compressed->length(), breakPoint));
309 compressed->trimStart(breakPoint);
310 tinyBuf->prependChain(std::move(compressed));
311 compressed = std::move(tinyBuf);
313 auto uncompressed = codec_->uncompress(compressed.get());
315 EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
316 EXPECT_EQ(dh.hash(uncompressedLength_), hashIOBuf(uncompressed.get()));
319 TEST_P(CompressionVarintTest, RandomData) {
320 runSimpleTest(randomDataHolder);
323 TEST_P(CompressionVarintTest, ConstantData) {
324 runSimpleTest(constantDataHolder);
327 INSTANTIATE_TEST_CASE_P(
328 CompressionVarintTest,
329 CompressionVarintTest,
331 testing::Values(0, 1, 12, 22, 25, 27),
332 testing::ValuesIn(supportedCodecs({
333 CodecType::LZ4_VARINT_SIZE,
334 CodecType::LZMA2_VARINT_SIZE,
337 class CompressionCorruptionTest : public testing::TestWithParam<CodecType> {
339 void SetUp() override { codec_ = getCodec(GetParam()); }
341 void runSimpleTest(const DataHolder& dh);
343 std::unique_ptr<Codec> codec_;
346 void CompressionCorruptionTest::runSimpleTest(const DataHolder& dh) {
347 constexpr uint64_t uncompressedLength = 42;
348 auto original = IOBuf::wrapBuffer(dh.data(uncompressedLength));
349 auto compressed = codec_->compress(original.get());
351 if (!codec_->needsUncompressedLength()) {
352 auto uncompressed = codec_->uncompress(compressed.get());
353 EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength());
354 EXPECT_EQ(dh.hash(uncompressedLength), hashIOBuf(uncompressed.get()));
357 auto uncompressed = codec_->uncompress(compressed.get(),
359 EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength());
360 EXPECT_EQ(dh.hash(uncompressedLength), hashIOBuf(uncompressed.get()));
363 EXPECT_THROW(codec_->uncompress(compressed.get(), uncompressedLength + 1),
366 // Corrupt the first character
367 ++(compressed->writableData()[0]);
369 if (!codec_->needsUncompressedLength()) {
370 EXPECT_THROW(codec_->uncompress(compressed.get()),
374 EXPECT_THROW(codec_->uncompress(compressed.get(), uncompressedLength),
378 TEST_P(CompressionCorruptionTest, RandomData) {
379 runSimpleTest(randomDataHolder);
382 TEST_P(CompressionCorruptionTest, ConstantData) {
383 runSimpleTest(constantDataHolder);
386 INSTANTIATE_TEST_CASE_P(
387 CompressionCorruptionTest,
388 CompressionCorruptionTest,
390 // NO_COMPRESSION can't detect corruption
391 // LZ4 can't detect corruption reliably (sigh)
396 CodecType::LZ4_FRAME,
400 int main(int argc, char *argv[]) {
401 testing::InitGoogleTest(&argc, argv);
402 gflags::ParseCommandLineFlags(&argc, &argv, true);
404 auto ret = RUN_ALL_TESTS();
406 folly::runBenchmarksOnFlag();