Allow decompresion without uncompressedSize
authorNick Terrell <terrelln@fb.com>
Fri, 24 Mar 2017 21:30:45 +0000 (14:30 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 24 Mar 2017 21:37:06 +0000 (14:37 -0700)
Summary:
The `LZMA2` codec doesn't allow decompression without passing the uncompressed size.
Every other codec that supports streaming decompression without the uncompressed size by streaming into an IOBuf chain.
Benchmarks show that reducing the `defaultBufferLength` to 256 KiB doesn't slow down decompression of large files, but it speeds up decompression of small files (< 1000 bytes).

Reviewed By: yfeldblum

Differential Revision: D4751571

fbshipit-source-id: 39dbe6754a1ecdc2b7ba3107e9face926d4c98ca

folly/io/Compression.cpp
folly/io/test/CompressionTest.cpp

index 45a4d97..0774912 100644 (file)
@@ -973,7 +973,7 @@ LZMA2Codec::LZMA2Codec(int level, CodecType type) : Codec(type) {
 }
 
 bool LZMA2Codec::doNeedsUncompressedLength() const {
-  return !encodeSize();
+  return false;
 }
 
 uint64_t LZMA2Codec::doMaxUncompressedLength() const {
@@ -1104,27 +1104,25 @@ std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(const IOBuf* data,
   SCOPE_EXIT { lzma_end(&stream); };
 
   // Max 64MiB in one go
-  constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20;    // 64MiB
-  constexpr uint32_t defaultBufferLength = uint32_t(4) << 20;     // 4MiB
+  constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
+  constexpr uint32_t defaultBufferLength = uint32_t(256) << 10; // 256 KiB
 
   folly::io::Cursor cursor(data);
-  uint64_t actualUncompressedLength;
   if (encodeSize()) {
-    actualUncompressedLength = decodeVarintFromCursor(cursor);
+    const uint64_t actualUncompressedLength = decodeVarintFromCursor(cursor);
     if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
         uncompressedLength != actualUncompressedLength) {
       throw std::runtime_error("LZMA2Codec: invalid uncompressed length");
     }
-  } else {
-    actualUncompressedLength = uncompressedLength;
-    DCHECK_NE(actualUncompressedLength, UNKNOWN_UNCOMPRESSED_LENGTH);
+    uncompressedLength = actualUncompressedLength;
   }
 
   auto out = addOutputBuffer(
       &stream,
-      (actualUncompressedLength <= maxSingleStepLength ?
-       actualUncompressedLength :
-       defaultBufferLength));
+      ((uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
+        uncompressedLength <= maxSingleStepLength)
+           ? uncompressedLength
+           : defaultBufferLength));
 
   bool streamEnd = false;
   auto buf = cursor.peekBytes();
@@ -1151,9 +1149,10 @@ std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(const IOBuf* data,
 
   out->prev()->trimEnd(stream.avail_out);
 
-  if (actualUncompressedLength != stream.total_out) {
-    throw std::runtime_error(to<std::string>(
-        "LZMA2Codec: invalid uncompressed length"));
+  if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
+      uncompressedLength != stream.total_out) {
+    throw std::runtime_error(
+        to<std::string>("LZMA2Codec: invalid uncompressed length"));
   }
 
   return out;
index a97b5bc..5cac888 100644 (file)
@@ -155,7 +155,7 @@ TEST(CompressionTestNeedsUncompressedLength, Simple) {
       { CodecType::SNAPPY, false },
       { CodecType::ZLIB, false },
       { CodecType::LZ4_VARINT_SIZE, false },
-      { CodecType::LZMA2, true },
+      { CodecType::LZMA2, false },
       { CodecType::LZMA2_VARINT_SIZE, false },
       { CodecType::ZSTD, false },
       { CodecType::GZIP, false },
@@ -392,6 +392,7 @@ INSTANTIATE_TEST_CASE_P(
         supportedCodecs({
             CodecType::SNAPPY,
             CodecType::ZLIB,
+            CodecType::LZMA2,
             CodecType::ZSTD,
             CodecType::LZ4_FRAME,
         })));