remove eof whitespace lines
[folly.git] / folly / io / test / CompressionTest.cpp
1 /*
2  * Copyright 2014 Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <folly/io/Compression.h>
18
19 #include <random>
20 #include <thread>
21 #include <unordered_map>
22
23 #include <boost/noncopyable.hpp>
24 #include <glog/logging.h>
25 #include <gtest/gtest.h>
26
27 #include <folly/Benchmark.h>
28 #include <folly/Hash.h>
29 #include <folly/Random.h>
30 #include <folly/io/IOBufQueue.h>
31
32 namespace folly { namespace io { namespace test {
33
34 class DataHolder : private boost::noncopyable {
35  public:
36   uint64_t hash(size_t size) const;
37   ByteRange data(size_t size) const;
38
39  protected:
40   explicit DataHolder(size_t sizeLog2);
41   const size_t size_;
42   std::unique_ptr<uint8_t[]> data_;
43   mutable std::unordered_map<uint64_t, uint64_t> hashCache_;
44 };
45
46 DataHolder::DataHolder(size_t sizeLog2)
47   : size_(size_t(1) << sizeLog2),
48     data_(new uint8_t[size_]) {
49 }
50
51 uint64_t DataHolder::hash(size_t size) const {
52   CHECK_LE(size, size_);
53   auto p = hashCache_.find(size);
54   if (p != hashCache_.end()) {
55     return p->second;
56   }
57
58   uint64_t h = folly::hash::fnv64_buf(data_.get(), size);
59   hashCache_[size] = h;
60   return h;
61 }
62
63 ByteRange DataHolder::data(size_t size) const {
64   CHECK_LE(size, size_);
65   return ByteRange(data_.get(), size);
66 }
67
68 uint64_t hashIOBuf(const IOBuf* buf) {
69   uint64_t h = folly::hash::FNV_64_HASH_START;
70   for (auto& range : *buf) {
71     h = folly::hash::fnv64_buf(range.data(), range.size(), h);
72   }
73   return h;
74 }
75
76 class RandomDataHolder : public DataHolder {
77  public:
78   explicit RandomDataHolder(size_t sizeLog2);
79 };
80
81 RandomDataHolder::RandomDataHolder(size_t sizeLog2)
82   : DataHolder(sizeLog2) {
83   constexpr size_t numThreadsLog2 = 3;
84   constexpr size_t numThreads = size_t(1) << numThreadsLog2;
85
86   uint32_t seed = randomNumberSeed();
87
88   std::vector<std::thread> threads;
89   threads.reserve(numThreads);
90   for (size_t t = 0; t < numThreads; ++t) {
91     threads.emplace_back(
92         [this, seed, t, numThreadsLog2, sizeLog2] () {
93           std::mt19937 rng(seed + t);
94           size_t countLog2 = size_t(1) << (sizeLog2 - numThreadsLog2);
95           size_t start = size_t(t) << countLog2;
96           for (size_t i = 0; i < countLog2; ++i) {
97             this->data_[start + i] = rng();
98           }
99         });
100   }
101
102   for (auto& t : threads) {
103     t.join();
104   }
105 }
106
107 class ConstantDataHolder : public DataHolder {
108  public:
109   explicit ConstantDataHolder(size_t sizeLog2);
110 };
111
112 ConstantDataHolder::ConstantDataHolder(size_t sizeLog2)
113   : DataHolder(sizeLog2) {
114   memset(data_.get(), 'a', size_);
115 }
116
117 constexpr size_t dataSizeLog2 = 27;  // 128MiB
118 RandomDataHolder randomDataHolder(dataSizeLog2);
119 ConstantDataHolder constantDataHolder(dataSizeLog2);
120
121 TEST(CompressionTestNeedsUncompressedLength, Simple) {
122   EXPECT_FALSE(getCodec(CodecType::NO_COMPRESSION)->needsUncompressedLength());
123   EXPECT_TRUE(getCodec(CodecType::LZ4)->needsUncompressedLength());
124   EXPECT_FALSE(getCodec(CodecType::SNAPPY)->needsUncompressedLength());
125   EXPECT_FALSE(getCodec(CodecType::ZLIB)->needsUncompressedLength());
126   EXPECT_FALSE(getCodec(CodecType::LZ4_VARINT_SIZE)->needsUncompressedLength());
127   EXPECT_TRUE(getCodec(CodecType::LZMA2)->needsUncompressedLength());
128   EXPECT_FALSE(getCodec(CodecType::LZMA2_VARINT_SIZE)
129     ->needsUncompressedLength());
130 }
131
132 class CompressionTest : public testing::TestWithParam<
133     std::tr1::tuple<int, CodecType>> {
134   protected:
135    void SetUp() {
136      auto tup = GetParam();
137      uncompressedLength_ = uint64_t(1) << std::tr1::get<0>(tup);
138      codec_ = getCodec(std::tr1::get<1>(tup));
139    }
140
141    void runSimpleTest(const DataHolder& dh);
142
143    uint64_t uncompressedLength_;
144    std::unique_ptr<Codec> codec_;
145 };
146
147 void CompressionTest::runSimpleTest(const DataHolder& dh) {
148   auto original = IOBuf::wrapBuffer(dh.data(uncompressedLength_));
149   auto compressed = codec_->compress(original.get());
150   if (!codec_->needsUncompressedLength()) {
151     auto uncompressed = codec_->uncompress(compressed.get());
152     EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
153     EXPECT_EQ(dh.hash(uncompressedLength_), hashIOBuf(uncompressed.get()));
154   }
155   {
156     auto uncompressed = codec_->uncompress(compressed.get(),
157                                            uncompressedLength_);
158     EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
159     EXPECT_EQ(dh.hash(uncompressedLength_), hashIOBuf(uncompressed.get()));
160   }
161 }
162
163 TEST_P(CompressionTest, RandomData) {
164   runSimpleTest(randomDataHolder);
165 }
166
167 TEST_P(CompressionTest, ConstantData) {
168   runSimpleTest(constantDataHolder);
169 }
170
171 INSTANTIATE_TEST_CASE_P(
172     CompressionTest,
173     CompressionTest,
174     testing::Combine(
175         testing::Values(0, 1, 12, 22, 25, 27),
176         testing::Values(CodecType::NO_COMPRESSION,
177                         CodecType::LZ4,
178                         CodecType::SNAPPY,
179                         CodecType::ZLIB,
180                         CodecType::LZ4_VARINT_SIZE,
181                         CodecType::LZMA2,
182                         CodecType::LZMA2_VARINT_SIZE)));
183
184 class CompressionCorruptionTest : public testing::TestWithParam<CodecType> {
185  protected:
186   void SetUp() {
187     codec_ = getCodec(GetParam());
188   }
189
190   void runSimpleTest(const DataHolder& dh);
191
192   std::unique_ptr<Codec> codec_;
193 };
194
195 void CompressionCorruptionTest::runSimpleTest(const DataHolder& dh) {
196   constexpr uint64_t uncompressedLength = 42;
197   auto original = IOBuf::wrapBuffer(dh.data(uncompressedLength));
198   auto compressed = codec_->compress(original.get());
199
200   if (!codec_->needsUncompressedLength()) {
201     auto uncompressed = codec_->uncompress(compressed.get());
202     EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength());
203     EXPECT_EQ(dh.hash(uncompressedLength), hashIOBuf(uncompressed.get()));
204   }
205   {
206     auto uncompressed = codec_->uncompress(compressed.get(),
207                                            uncompressedLength);
208     EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength());
209     EXPECT_EQ(dh.hash(uncompressedLength), hashIOBuf(uncompressed.get()));
210   }
211
212   EXPECT_THROW(codec_->uncompress(compressed.get(), uncompressedLength + 1),
213                std::runtime_error);
214
215   // Corrupt the first character
216   ++(compressed->writableData()[0]);
217
218   if (!codec_->needsUncompressedLength()) {
219     EXPECT_THROW(codec_->uncompress(compressed.get()),
220                  std::runtime_error);
221   }
222
223   EXPECT_THROW(codec_->uncompress(compressed.get(), uncompressedLength),
224                std::runtime_error);
225 }
226
227 TEST_P(CompressionCorruptionTest, RandomData) {
228   runSimpleTest(randomDataHolder);
229 }
230
231 TEST_P(CompressionCorruptionTest, ConstantData) {
232   runSimpleTest(constantDataHolder);
233 }
234
235 INSTANTIATE_TEST_CASE_P(
236     CompressionCorruptionTest,
237     CompressionCorruptionTest,
238     testing::Values(
239         // NO_COMPRESSION can't detect corruption
240         // LZ4 can't detect corruption reliably (sigh)
241         CodecType::SNAPPY,
242         CodecType::ZLIB));
243
244 }}}  // namespaces
245
246 int main(int argc, char *argv[]) {
247   testing::InitGoogleTest(&argc, argv);
248   gflags::ParseCommandLineFlags(&argc, &argv, true);
249
250   auto ret = RUN_ALL_TESTS();
251   if (!ret) {
252     folly::runBenchmarksOnFlag();
253   }
254   return ret;
255 }