Add Varint-length-prefixed flavor of LZ4
[folly.git] / folly / io / test / CompressionTest.cpp
1 /*
2  * Copyright 2013 Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "folly/io/Compression.h"
18
19 // Yes, tr1, as that's what gtest requires
20 #include <random>
21 #include <thread>
22 #include <tr1/tuple>
23 #include <unordered_map>
24
25 #include <glog/logging.h>
26 #include <gtest/gtest.h>
27
28 #include "folly/Benchmark.h"
29 #include "folly/Hash.h"
30 #include "folly/Random.h"
31 #include "folly/io/IOBufQueue.h"
32
33 namespace folly { namespace io { namespace test {
34
35 constexpr size_t randomDataSizeLog2 = 27;  // 128MiB
36 constexpr size_t randomDataSize = size_t(1) << randomDataSizeLog2;
37
38 std::unique_ptr<uint8_t[]> randomData;
39 std::unordered_map<uint64_t, uint64_t> hashes;
40
41 uint64_t hashIOBuf(const IOBuf* buf) {
42   uint64_t h = folly::hash::FNV_64_HASH_START;
43   for (auto& range : *buf) {
44     h = folly::hash::fnv64_buf(range.data(), range.size(), h);
45   }
46   return h;
47 }
48
49 uint64_t getRandomDataHash(uint64_t size) {
50   auto p = hashes.find(size);
51   if (p != hashes.end()) {
52     return p->second;
53   }
54
55   uint64_t h = folly::hash::fnv64_buf(randomData.get(), size);
56   hashes[size] = h;
57   return h;
58 }
59
60 void generateRandomData() {
61   randomData.reset(new uint8_t[size_t(1) << randomDataSizeLog2]);
62
63   constexpr size_t numThreadsLog2 = 3;
64   constexpr size_t numThreads = size_t(1) << numThreadsLog2;
65
66   uint32_t seed = randomNumberSeed();
67
68   std::vector<std::thread> threads;
69   threads.reserve(numThreads);
70   for (size_t t = 0; t < numThreads; ++t) {
71     threads.emplace_back(
72         [seed, t, numThreadsLog2] () {
73           std::mt19937 rng(seed + t);
74           size_t countLog2 = size_t(1) << (randomDataSizeLog2 - numThreadsLog2);
75           size_t start = size_t(t) << countLog2;
76           for (size_t i = 0; i < countLog2; ++i) {
77             randomData[start + i] = rng();
78           }
79         });
80   }
81
82   for (auto& t : threads) {
83     t.join();
84   }
85 }
86
87 TEST(CompressionTestNeedsUncompressedLength, Simple) {
88   EXPECT_FALSE(getCodec(CodecType::NO_COMPRESSION)->needsUncompressedLength());
89   EXPECT_TRUE(getCodec(CodecType::LZ4)->needsUncompressedLength());
90   EXPECT_FALSE(getCodec(CodecType::SNAPPY)->needsUncompressedLength());
91   EXPECT_FALSE(getCodec(CodecType::ZLIB)->needsUncompressedLength());
92   EXPECT_FALSE(getCodec(CodecType::LZ4_VARINT_SIZE)->needsUncompressedLength());
93 }
94
95 class CompressionTest : public testing::TestWithParam<
96     std::tr1::tuple<int, CodecType>> {
97   protected:
98    void SetUp() {
99      auto tup = GetParam();
100      uncompressedLength_ = uint64_t(1) << std::tr1::get<0>(tup);
101      codec_ = getCodec(std::tr1::get<1>(tup));
102    }
103
104    uint64_t uncompressedLength_;
105    std::unique_ptr<Codec> codec_;
106 };
107
108 TEST_P(CompressionTest, Simple) {
109   auto original = IOBuf::wrapBuffer(randomData.get(), uncompressedLength_);
110   auto compressed = codec_->compress(original.get());
111   if (!codec_->needsUncompressedLength()) {
112     auto uncompressed = codec_->uncompress(compressed.get());
113     EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
114     EXPECT_EQ(getRandomDataHash(uncompressedLength_),
115               hashIOBuf(uncompressed.get()));
116   }
117   {
118     auto uncompressed = codec_->uncompress(compressed.get(),
119                                            uncompressedLength_);
120     EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
121     EXPECT_EQ(getRandomDataHash(uncompressedLength_),
122               hashIOBuf(uncompressed.get()));
123   }
124 }
125
126 INSTANTIATE_TEST_CASE_P(
127     CompressionTest,
128     CompressionTest,
129     testing::Combine(
130         testing::Values(0, 1, 12, 22, int(randomDataSizeLog2)),
131         testing::Values(CodecType::NO_COMPRESSION,
132                         CodecType::LZ4,
133                         CodecType::SNAPPY,
134                         CodecType::ZLIB,
135                         CodecType::LZ4_VARINT_SIZE)));
136
137 class CompressionCorruptionTest : public testing::TestWithParam<CodecType> {
138  protected:
139   void SetUp() {
140     codec_ = getCodec(GetParam());
141   }
142
143   std::unique_ptr<Codec> codec_;
144 };
145
146 TEST_P(CompressionCorruptionTest, Simple) {
147   constexpr uint64_t uncompressedLength = 42;
148   auto original = IOBuf::wrapBuffer(randomData.get(), uncompressedLength);
149   auto compressed = codec_->compress(original.get());
150
151   if (!codec_->needsUncompressedLength()) {
152     auto uncompressed = codec_->uncompress(compressed.get());
153     EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength());
154     EXPECT_EQ(getRandomDataHash(uncompressedLength),
155               hashIOBuf(uncompressed.get()));
156   }
157   {
158     auto uncompressed = codec_->uncompress(compressed.get(),
159                                            uncompressedLength);
160     EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength());
161     EXPECT_EQ(getRandomDataHash(uncompressedLength),
162               hashIOBuf(uncompressed.get()));
163   }
164
165   EXPECT_THROW(codec_->uncompress(compressed.get(), uncompressedLength + 1),
166                std::runtime_error);
167
168   // Corrupt the first character
169   ++(compressed->writableData()[0]);
170
171   if (!codec_->needsUncompressedLength()) {
172     EXPECT_THROW(codec_->uncompress(compressed.get()),
173                  std::runtime_error);
174   }
175
176   EXPECT_THROW(codec_->uncompress(compressed.get(), uncompressedLength),
177                std::runtime_error);
178 }
179
180 INSTANTIATE_TEST_CASE_P(
181     CompressionCorruptionTest,
182     CompressionCorruptionTest,
183     testing::Values(
184         // NO_COMPRESSION can't detect corruption
185         // LZ4 can't detect corruption reliably (sigh)
186         CodecType::SNAPPY,
187         CodecType::ZLIB));
188
189 }}}  // namespaces
190
191 int main(int argc, char *argv[]) {
192   testing::InitGoogleTest(&argc, argv);
193   google::ParseCommandLineFlags(&argc, &argv, true);
194
195   folly::io::test::generateRandomData();  // 4GB
196
197   auto ret = RUN_ALL_TESTS();
198   if (!ret) {
199     folly::runBenchmarksOnFlag();
200   }
201   return ret;
202 }
203