Improve CompressionTest
authorTudor Bosman <tudorb@fb.com>
Mon, 18 Nov 2013 18:42:02 +0000 (10:42 -0800)
committerPeter Griess <pgriess@fb.com>
Tue, 26 Nov 2013 15:05:17 +0000 (07:05 -0800)
Summary: Test compressing low-entropy (constant) data as well.

Test Plan: ran it

Reviewed By: tuomas.pelkonen@fb.com

FB internal diff: D1061444

@override-unit-failures

folly/io/IOBuf.h
folly/io/test/CompressionTest.cpp

index a071ba3390f81c3542b1bb9cee2a3a41bfa545a0..03f3e657f1a2700a87fb0db6b59de76265135240 100644 (file)
@@ -313,6 +313,10 @@ class IOBuf {
    * On error, std::bad_alloc will be thrown.
    */
   static std::unique_ptr<IOBuf> wrapBuffer(const void* buf, uint32_t capacity);
+  static std::unique_ptr<IOBuf> wrapBuffer(ByteRange br) {
+    CHECK_LE(br.size(), std::numeric_limits<uint32_t>::max());
+    return wrapBuffer(br.data(), br.size());
+  }
 
   /**
    * Convenience function to create a new IOBuf object that copies data from a
@@ -322,6 +326,12 @@ class IOBuf {
   static std::unique_ptr<IOBuf> copyBuffer(const void* buf, uint32_t size,
                                            uint32_t headroom=0,
                                            uint32_t minTailroom=0);
+  static std::unique_ptr<IOBuf> copyBuffer(ByteRange br,
+                                           uint32_t headroom=0,
+                                           uint32_t minTailroom=0) {
+    CHECK_LE(br.size(), std::numeric_limits<uint32_t>::max());
+    return copyBuffer(br.data(), br.size(), headroom, minTailroom);
+  }
 
   /**
    * Convenience function to create a new IOBuf object that copies data from a
index 57399bea1ee2a8c1e47a388c5d6e1642fa292afb..27a6b9fdccde103bdff6a12a4310d109b221e3e5 100644 (file)
@@ -22,6 +22,7 @@
 #include <tr1/tuple>
 #include <unordered_map>
 
+#include <boost/noncopyable.hpp>
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
 
 namespace folly { namespace io { namespace test {
 
-constexpr size_t randomDataSizeLog2 = 27;  // 128MiB
-constexpr size_t randomDataSize = size_t(1) << randomDataSizeLog2;
+class DataHolder : private boost::noncopyable {
+ public:
+  uint64_t hash(size_t size) const;
+  ByteRange data(size_t size) const;
 
-std::unique_ptr<uint8_t[]> randomData;
-std::unordered_map<uint64_t, uint64_t> hashes;
+ protected:
+  explicit DataHolder(size_t sizeLog2);
+  const size_t size_;
+  std::unique_ptr<uint8_t[]> data_;
+  mutable std::unordered_map<uint64_t, uint64_t> hashCache_;
+};
 
-uint64_t hashIOBuf(const IOBuf* buf) {
-  uint64_t h = folly::hash::FNV_64_HASH_START;
-  for (auto& range : *buf) {
-    h = folly::hash::fnv64_buf(range.data(), range.size(), h);
-  }
-  return h;
+DataHolder::DataHolder(size_t sizeLog2)
+  : size_(size_t(1) << sizeLog2),
+    data_(new uint8_t[size_]) {
 }
 
-uint64_t getRandomDataHash(uint64_t size) {
-  auto p = hashes.find(size);
-  if (p != hashes.end()) {
+uint64_t DataHolder::hash(size_t size) const {
+  CHECK_LE(size, size_);
+  auto p = hashCache_.find(size);
+  if (p != hashCache_.end()) {
     return p->second;
   }
 
-  uint64_t h = folly::hash::fnv64_buf(randomData.get(), size);
-  hashes[size] = h;
+  uint64_t h = folly::hash::fnv64_buf(data_.get(), size);
+  hashCache_[size] = h;
   return h;
 }
 
-void generateRandomData() {
-  randomData.reset(new uint8_t[size_t(1) << randomDataSizeLog2]);
+ByteRange DataHolder::data(size_t size) const {
+  CHECK_LE(size, size_);
+  return ByteRange(data_.get(), size);
+}
+
+uint64_t hashIOBuf(const IOBuf* buf) {
+  uint64_t h = folly::hash::FNV_64_HASH_START;
+  for (auto& range : *buf) {
+    h = folly::hash::fnv64_buf(range.data(), range.size(), h);
+  }
+  return h;
+}
 
+class RandomDataHolder : public DataHolder {
+ public:
+  explicit RandomDataHolder(size_t sizeLog2);
+};
+
+RandomDataHolder::RandomDataHolder(size_t sizeLog2)
+  : DataHolder(sizeLog2) {
   constexpr size_t numThreadsLog2 = 3;
   constexpr size_t numThreads = size_t(1) << numThreadsLog2;
 
@@ -69,12 +91,12 @@ void generateRandomData() {
   threads.reserve(numThreads);
   for (size_t t = 0; t < numThreads; ++t) {
     threads.emplace_back(
-        [seed, t, numThreadsLog2] () {
+        [this, seed, t, numThreadsLog2, sizeLog2] () {
           std::mt19937 rng(seed + t);
-          size_t countLog2 = size_t(1) << (randomDataSizeLog2 - numThreadsLog2);
+          size_t countLog2 = size_t(1) << (sizeLog2 - numThreadsLog2);
           size_t start = size_t(t) << countLog2;
           for (size_t i = 0; i < countLog2; ++i) {
-            randomData[start + i] = rng();
+            this->data_[start + i] = rng();
           }
         });
   }
@@ -84,6 +106,20 @@ void generateRandomData() {
   }
 }
 
+class ConstantDataHolder : public DataHolder {
+ public:
+  explicit ConstantDataHolder(size_t sizeLog2);
+};
+
+ConstantDataHolder::ConstantDataHolder(size_t sizeLog2)
+  : DataHolder(sizeLog2) {
+  memset(data_.get(), 'a', size_);
+}
+
+constexpr size_t dataSizeLog2 = 27;  // 128MiB
+RandomDataHolder randomDataHolder(dataSizeLog2);
+ConstantDataHolder constantDataHolder(dataSizeLog2);
+
 TEST(CompressionTestNeedsUncompressedLength, Simple) {
   EXPECT_FALSE(getCodec(CodecType::NO_COMPRESSION)->needsUncompressedLength());
   EXPECT_TRUE(getCodec(CodecType::LZ4)->needsUncompressedLength());
@@ -101,33 +137,41 @@ class CompressionTest : public testing::TestWithParam<
      codec_ = getCodec(std::tr1::get<1>(tup));
    }
 
+   void runSimpleTest(const DataHolder& dh);
+
    uint64_t uncompressedLength_;
    std::unique_ptr<Codec> codec_;
 };
 
-TEST_P(CompressionTest, Simple) {
-  auto original = IOBuf::wrapBuffer(randomData.get(), uncompressedLength_);
+void CompressionTest::runSimpleTest(const DataHolder& dh) {
+  auto original = IOBuf::wrapBuffer(dh.data(uncompressedLength_));
   auto compressed = codec_->compress(original.get());
   if (!codec_->needsUncompressedLength()) {
     auto uncompressed = codec_->uncompress(compressed.get());
     EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
-    EXPECT_EQ(getRandomDataHash(uncompressedLength_),
-              hashIOBuf(uncompressed.get()));
+    EXPECT_EQ(dh.hash(uncompressedLength_), hashIOBuf(uncompressed.get()));
   }
   {
     auto uncompressed = codec_->uncompress(compressed.get(),
                                            uncompressedLength_);
     EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
-    EXPECT_EQ(getRandomDataHash(uncompressedLength_),
-              hashIOBuf(uncompressed.get()));
+    EXPECT_EQ(dh.hash(uncompressedLength_), hashIOBuf(uncompressed.get()));
   }
 }
 
+TEST_P(CompressionTest, RandomData) {
+  runSimpleTest(randomDataHolder);
+}
+
+TEST_P(CompressionTest, ConstantData) {
+  runSimpleTest(constantDataHolder);
+}
+
 INSTANTIATE_TEST_CASE_P(
     CompressionTest,
     CompressionTest,
     testing::Combine(
-        testing::Values(0, 1, 12, 22, int(randomDataSizeLog2)),
+        testing::Values(0, 1, 12, 22, 25, 27),
         testing::Values(CodecType::NO_COMPRESSION,
                         CodecType::LZ4,
                         CodecType::SNAPPY,
@@ -140,26 +184,26 @@ class CompressionCorruptionTest : public testing::TestWithParam<CodecType> {
     codec_ = getCodec(GetParam());
   }
 
+  void runSimpleTest(const DataHolder& dh);
+
   std::unique_ptr<Codec> codec_;
 };
 
-TEST_P(CompressionCorruptionTest, Simple) {
+void CompressionCorruptionTest::runSimpleTest(const DataHolder& dh) {
   constexpr uint64_t uncompressedLength = 42;
-  auto original = IOBuf::wrapBuffer(randomData.get(), uncompressedLength);
+  auto original = IOBuf::wrapBuffer(dh.data(uncompressedLength));
   auto compressed = codec_->compress(original.get());
 
   if (!codec_->needsUncompressedLength()) {
     auto uncompressed = codec_->uncompress(compressed.get());
     EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength());
-    EXPECT_EQ(getRandomDataHash(uncompressedLength),
-              hashIOBuf(uncompressed.get()));
+    EXPECT_EQ(dh.hash(uncompressedLength), hashIOBuf(uncompressed.get()));
   }
   {
     auto uncompressed = codec_->uncompress(compressed.get(),
                                            uncompressedLength);
     EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength());
-    EXPECT_EQ(getRandomDataHash(uncompressedLength),
-              hashIOBuf(uncompressed.get()));
+    EXPECT_EQ(dh.hash(uncompressedLength), hashIOBuf(uncompressed.get()));
   }
 
   EXPECT_THROW(codec_->uncompress(compressed.get(), uncompressedLength + 1),
@@ -177,6 +221,14 @@ TEST_P(CompressionCorruptionTest, Simple) {
                std::runtime_error);
 }
 
+TEST_P(CompressionCorruptionTest, RandomData) {
+  runSimpleTest(randomDataHolder);
+}
+
+TEST_P(CompressionCorruptionTest, ConstantData) {
+  runSimpleTest(constantDataHolder);
+}
+
 INSTANTIATE_TEST_CASE_P(
     CompressionCorruptionTest,
     CompressionCorruptionTest,
@@ -192,8 +244,6 @@ int main(int argc, char *argv[]) {
   testing::InitGoogleTest(&argc, argv);
   google::ParseCommandLineFlags(&argc, &argv, true);
 
-  folly::io::test::generateRandomData();  // 4GB
-
   auto ret = RUN_ALL_TESTS();
   if (!ret) {
     folly::runBenchmarksOnFlag();