From 481365efcd19b729e41400f442bcd19cb9ea1323 Mon Sep 17 00:00:00 2001 From: Stella Lau Date: Fri, 15 Sep 2017 10:13:08 -0700 Subject: [PATCH] Allow getAutoUncompressionCodec() to have 1 terminal decoder Summary: getAutoUncompressionCodec() currently only allows unambiguous headers. Allow a single "terminal codec" to be called if all other codecs can't uncompress or throw. Reviewed By: terrelln Differential Revision: D5804833 fbshipit-source-id: 057cb6e13a48fea20508d5c028234afddf7435f6 --- folly/io/Compression.cpp | 103 +++++++++++++----- folly/io/Compression.h | 32 ++++-- folly/io/test/CompressionTest.cpp | 171 +++++++++++++++++++++++++++--- 3 files changed, 258 insertions(+), 48 deletions(-) diff --git a/folly/io/Compression.cpp b/folly/io/Compression.cpp index d02a6b3e..0d386525 100644 --- a/folly/io/Compression.cpp +++ b/folly/io/Compression.cpp @@ -1818,8 +1818,11 @@ std::unique_ptr getZlibStreamCodec(int level, CodecType type) { class AutomaticCodec final : public Codec { public: static std::unique_ptr create( - std::vector> customCodecs); - explicit AutomaticCodec(std::vector> customCodecs); + std::vector> customCodecs, + std::unique_ptr terminalCodec); + explicit AutomaticCodec( + std::vector> customCodecs, + std::unique_ptr terminalCodec); std::vector validPrefixes() const override; bool canUncompress(const IOBuf* data, Optional uncompressedLength) @@ -1846,6 +1849,7 @@ class AutomaticCodec final : public Codec { void checkCompatibleCodecs() const; std::vector> codecs_; + std::unique_ptr terminalCodec_; bool needsUncompressedLength_; uint64_t maxUncompressedLength_; }; @@ -1877,38 +1881,70 @@ void AutomaticCodec::addCodecIfSupported(CodecType type) { [&type](std::unique_ptr const& codec) { return codec->type() == type; }); - if (hasCodec(type) && !present) { + bool const isTerminalType = terminalCodec_ && terminalCodec_->type() == type; + if (hasCodec(type) && !present && !isTerminalType) { codecs_.push_back(getCodec(type)); } } /* static */ std::unique_ptr AutomaticCodec::create( - std::vector> customCodecs) { - return std::make_unique(std::move(customCodecs)); -} - -AutomaticCodec::AutomaticCodec(std::vector> customCodecs) - : Codec(CodecType::USER_DEFINED), codecs_(std::move(customCodecs)) { + std::vector> customCodecs, + std::unique_ptr terminalCodec) { + return std::make_unique( + std::move(customCodecs), std::move(terminalCodec)); +} + +AutomaticCodec::AutomaticCodec( + std::vector> customCodecs, + std::unique_ptr terminalCodec) + : Codec(CodecType::USER_DEFINED), + codecs_(std::move(customCodecs)), + terminalCodec_(std::move(terminalCodec)) { // Fastest -> slowest - addCodecIfSupported(CodecType::LZ4_FRAME); - addCodecIfSupported(CodecType::ZSTD); - addCodecIfSupported(CodecType::ZLIB); - addCodecIfSupported(CodecType::GZIP); - addCodecIfSupported(CodecType::LZMA2); - addCodecIfSupported(CodecType::BZIP2); + std::array defaultTypes{{ + CodecType::LZ4_FRAME, + CodecType::ZSTD, + CodecType::ZLIB, + CodecType::GZIP, + CodecType::LZMA2, + CodecType::BZIP2, + }}; + + for (auto type : defaultTypes) { + addCodecIfSupported(type); + } + if (kIsDebug) { checkCompatibleCodecs(); } - // Check that none of the codes are are null + + // Check that none of the codecs are null DCHECK(std::none_of( codecs_.begin(), codecs_.end(), [](std::unique_ptr const& codec) { return codec == nullptr; })); + // Check that the terminal codec's type is not duplicated (with the exception + // of USER_DEFINED). + if (terminalCodec_) { + DCHECK(std::none_of( + codecs_.begin(), + codecs_.end(), + [&](std::unique_ptr const& codec) { + return codec->type() != CodecType::USER_DEFINED && + codec->type() == terminalCodec_->type(); + })); + } + + bool const terminalNeedsUncompressedLength = + terminalCodec_ && terminalCodec_->needsUncompressedLength(); needsUncompressedLength_ = std::any_of( - codecs_.begin(), codecs_.end(), [](std::unique_ptr const& codec) { - return codec->needsUncompressedLength(); - }); + codecs_.begin(), + codecs_.end(), + [](std::unique_ptr const& codec) { + return codec->needsUncompressedLength(); + }) || + terminalNeedsUncompressedLength; const auto it = std::max_element( codecs_.begin(), @@ -1917,7 +1953,10 @@ AutomaticCodec::AutomaticCodec(std::vector> customCodecs) return lhs->maxUncompressedLength() < rhs->maxUncompressedLength(); }); DCHECK(it != codecs_.end()); - maxUncompressedLength_ = (*it)->maxUncompressedLength(); + auto const terminalMaxUncompressedLength = + terminalCodec_ ? terminalCodec_->maxUncompressedLength() : 0; + maxUncompressedLength_ = + std::max((*it)->maxUncompressedLength(), terminalMaxUncompressedLength); } void AutomaticCodec::checkCompatibleCodecs() const { @@ -1968,11 +2007,23 @@ uint64_t AutomaticCodec::doMaxUncompressedLength() const { std::unique_ptr AutomaticCodec::doUncompress( const IOBuf* data, Optional uncompressedLength) { - for (auto&& codec : codecs_) { - if (codec->canUncompress(data, uncompressedLength)) { - return codec->uncompress(data, uncompressedLength); + try { + for (auto&& codec : codecs_) { + if (codec->canUncompress(data, uncompressedLength)) { + return codec->uncompress(data, uncompressedLength); + } } + } catch (std::exception const& e) { + if (!terminalCodec_) { + throw e; + } + } + + // Try terminal codec + if (terminalCodec_) { + return terminalCodec_->uncompress(data, uncompressedLength); } + throw std::runtime_error("AutomaticCodec error: Unknown compressed data"); } @@ -2086,8 +2137,10 @@ std::unique_ptr getStreamCodec(CodecType type, int level) { } std::unique_ptr getAutoUncompressionCodec( - std::vector> customCodecs) { - return AutomaticCodec::create(std::move(customCodecs)); + std::vector> customCodecs, + std::unique_ptr terminalCodec) { + return AutomaticCodec::create( + std::move(customCodecs), std::move(terminalCodec)); } } // namespace io } // namespace folly diff --git a/folly/io/Compression.h b/folly/io/Compression.h index 4013e0a2..345eda82 100644 --- a/folly/io/Compression.h +++ b/folly/io/Compression.h @@ -443,11 +443,28 @@ std::unique_ptr getStreamCodec( * Returns a codec that can uncompress any of the given codec types as well as * {LZ4_FRAME, ZSTD, ZLIB, GZIP, LZMA2, BZIP2}. Appends each default codec to * customCodecs in order, so long as a codec with the same type() isn't already - * present. When uncompress() is called, each codec's canUncompress() is called - * in the order that they are given. Appended default codecs are checked last. - * uncompress() is called on the first codec whose canUncompress() returns true. - * An exception is thrown if no codec canUncompress() the data. - * An exception is thrown if the chosen codec's uncompress() throws on the data. + * present in customCodecs or as the terminalCodec. When uncompress() is called, + * each codec's canUncompress() is called in the order that they are given. + * Appended default codecs are checked last. uncompress() is called on the + * first codec whose canUncompress() returns true. + * + * In addition, an optional `terminalCodec` can be provided. This codec's + * uncompress() will be called either when no other codec canUncompress() the + * data or the chosen codec throws an exception on the data. The terminalCodec + * is intended for ambiguous headers, when canUncompress() is false for some + * data it can actually uncompress. The terminalCodec does not need to override + * validPrefixes() or canUncompress() and overriding these functions will have + * no effect on the returned codec's validPrefixes() or canUncompress() + * functions. The terminalCodec's needsUncompressedLength() and + * maxUncompressedLength() will affect the returned codec's respective + * functions. The terminalCodec must not be duplicated in customCodecs. + * + * An exception is thrown if no codec canUncompress() the data and either no + * terminal codec was provided or a terminal codec was provided and it throws on + * the data. + * An exception is thrown if the chosen codec's uncompress() throws on the data + * and either no terminal codec was provided or a terminal codec was provided + * and it also throws on the data. * An exception is thrown if compress() is called on the returned codec. * * Requirements are checked in debug mode and are as follows: @@ -457,9 +474,12 @@ std::unique_ptr getStreamCodec( * 3. No header in headers may be empty. * 4. headers must not contain any duplicate elements. * 5. No strict non-empty prefix of any header in headers may be in headers. + * 6. The terminalCodec's type must not be the same as any other codec's type + * (with USER_DEFINED being the exception). */ std::unique_ptr getAutoUncompressionCodec( - std::vector> customCodecs = {}); + std::vector> customCodecs = {}, + std::unique_ptr terminalCodec = {}); /** * Check if a specified codec is supported. diff --git a/folly/io/test/CompressionTest.cpp b/folly/io/test/CompressionTest.cpp index b9db1da0..69a037d8 100644 --- a/folly/io/test/CompressionTest.cpp +++ b/folly/io/test/CompressionTest.cpp @@ -958,17 +958,46 @@ INSTANTIATE_TEST_CASE_P( testing::Values(12, 17, 20), testing::ValuesIn(availableStreamCodecs()))); +namespace { + +// Codec types included in the codec returned by getAutoUncompressionCodec() by +// default. +std::vector autoUncompressionCodecTypes = {{ + CodecType::LZ4_FRAME, + CodecType::ZSTD, + CodecType::ZLIB, + CodecType::GZIP, + CodecType::LZMA2, + CodecType::BZIP2, +}}; + +} // namespace + class AutomaticCodecTest : public testing::TestWithParam { protected: void SetUp() override { - codec_ = getCodec(GetParam()); - auto_ = getAutoUncompressionCodec(); + codecType_ = GetParam(); + codec_ = getCodec(codecType_); + autoType_ = std::any_of( + autoUncompressionCodecTypes.begin(), + autoUncompressionCodecTypes.end(), + [&](CodecType o) { return codecType_ == o; }); + // Add the codec with type codecType_ as the terminal codec if it is not in + // autoUncompressionCodecTypes. + auto_ = getAutoUncompressionCodec({}, getTerminalCodec()); } void runSimpleTest(const DataHolder& dh); + std::unique_ptr getTerminalCodec() { + return (autoType_ ? nullptr : getCodec(codecType_)); + } + std::unique_ptr codec_; std::unique_ptr auto_; + CodecType codecType_; + // true if codecType_ is in autoUncompressionCodecTypes + bool autoType_; }; void AutomaticCodecTest::runSimpleTest(const DataHolder& dh) { @@ -1034,10 +1063,17 @@ TEST_P(AutomaticCodecTest, DefaultCodec) { const uint64_t length = 42; std::vector> codecs; codecs.push_back(getCodec(CodecType::ZSTD)); - auto automatic = getAutoUncompressionCodec(std::move(codecs)); + auto automatic = + getAutoUncompressionCodec(std::move(codecs), getTerminalCodec()); auto original = IOBuf::wrapBuffer(constantDataHolder.data(length)); auto compressed = codec_->compress(original.get()); - auto decompressed = automatic->uncompress(compressed.get()); + std::unique_ptr decompressed; + + if (automatic->needsUncompressedLength()) { + decompressed = automatic->uncompress(compressed.get(), length); + } else { + decompressed = automatic->uncompress(compressed.get()); + } EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(decompressed.get())); } @@ -1096,17 +1132,28 @@ TEST_P(AutomaticCodecTest, CustomCodec) { auto ab = CustomCodec::create("ab", CodecType::ZSTD); std::vector> codecs; codecs.push_back(CustomCodec::create("ab", CodecType::ZSTD)); - auto automatic = getAutoUncompressionCodec(std::move(codecs)); + auto automatic = + getAutoUncompressionCodec(std::move(codecs), getTerminalCodec()); auto original = IOBuf::wrapBuffer(constantDataHolder.data(length)); auto abCompressed = ab->compress(original.get()); - auto abDecompressed = automatic->uncompress(abCompressed.get()); + std::unique_ptr abDecompressed; + if (automatic->needsUncompressedLength()) { + abDecompressed = automatic->uncompress(abCompressed.get(), length); + } else { + abDecompressed = automatic->uncompress(abCompressed.get()); + } EXPECT_TRUE(automatic->canUncompress(abCompressed.get())); EXPECT_FALSE(auto_->canUncompress(abCompressed.get())); EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(abDecompressed.get())); auto compressed = codec_->compress(original.get()); - auto decompressed = automatic->uncompress(compressed.get()); + std::unique_ptr decompressed; + if (automatic->needsUncompressedLength()) { + decompressed = automatic->uncompress(compressed.get(), length); + } else { + decompressed = automatic->uncompress(compressed.get()); + } EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(decompressed.get())); } @@ -1116,17 +1163,28 @@ TEST_P(AutomaticCodecTest, CustomDefaultCodec) { std::vector> codecs; codecs.push_back(CustomCodec::create("none", CodecType::NO_COMPRESSION)); codecs.push_back(getCodec(CodecType::LZ4_FRAME)); - auto automatic = getAutoUncompressionCodec(std::move(codecs)); + auto automatic = + getAutoUncompressionCodec(std::move(codecs), getTerminalCodec()); auto original = IOBuf::wrapBuffer(constantDataHolder.data(length)); auto noneCompressed = none->compress(original.get()); - auto noneDecompressed = automatic->uncompress(noneCompressed.get()); + std::unique_ptr noneDecompressed; + if (automatic->needsUncompressedLength()) { + noneDecompressed = automatic->uncompress(noneCompressed.get(), length); + } else { + noneDecompressed = automatic->uncompress(noneCompressed.get()); + } EXPECT_TRUE(automatic->canUncompress(noneCompressed.get())); EXPECT_FALSE(auto_->canUncompress(noneCompressed.get())); EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(noneDecompressed.get())); auto compressed = codec_->compress(original.get()); - auto decompressed = automatic->uncompress(compressed.get()); + std::unique_ptr decompressed; + if (automatic->needsUncompressedLength()) { + decompressed = automatic->uncompress(compressed.get(), length); + } else { + decompressed = automatic->uncompress(compressed.get()); + } EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(decompressed.get())); } @@ -1143,13 +1201,92 @@ TEST_P(AutomaticCodecTest, canUncompressOneBytes) { INSTANTIATE_TEST_CASE_P( AutomaticCodecTest, AutomaticCodecTest, - testing::Values( - CodecType::LZ4_FRAME, - CodecType::ZSTD, - CodecType::ZLIB, - CodecType::GZIP, - CodecType::LZMA2, - CodecType::BZIP2)); + testing::ValuesIn(availableCodecs())); + +namespace { + +// Codec that always "uncompresses" to the same string. +class ConstantCodec : public Codec { + public: + static std::unique_ptr create( + std::string uncompressed, + CodecType type) { + return std::make_unique(std::move(uncompressed), type); + } + explicit ConstantCodec(std::string uncompressed, CodecType type) + : Codec(type), uncompressed_(std::move(uncompressed)) {} + + private: + uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override { + return uncompressedLength; + } + + std::unique_ptr doCompress(const IOBuf*) override { + throw std::runtime_error("ConstantCodec error: compress() not supported."); + } + + std::unique_ptr doUncompress(const IOBuf*, Optional) + override { + return IOBuf::copyBuffer(uncompressed_); + } + + std::string uncompressed_; + std::unique_ptr codec_; +}; + +} // namespace + +class TerminalCodecTest : public testing::TestWithParam { + protected: + void SetUp() override { + codecType_ = GetParam(); + codec_ = getCodec(codecType_); + auto_ = getAutoUncompressionCodec(); + } + + CodecType codecType_; + std::unique_ptr codec_; + std::unique_ptr auto_; +}; + +// Test that the terminal codec's uncompress() function is called when the +// default chosen automatic codec throws. +TEST_P(TerminalCodecTest, uncompressIfDefaultThrows) { + std::string const original = "abc"; + auto const compressed = codec_->compress(original); + + // Sanity check: the automatic codec can uncompress the original string. + auto const uncompressed = auto_->uncompress(compressed); + EXPECT_EQ(uncompressed, original); + + // Truncate the compressed string. + auto const truncated = compressed.substr(0, compressed.size() - 1); + auto const truncatedBuf = + IOBuf::wrapBuffer(truncated.data(), truncated.size()); + EXPECT_TRUE(auto_->canUncompress(truncatedBuf.get())); + EXPECT_ANY_THROW(auto_->uncompress(truncated)); + + // Expect the terminal codec to successfully uncompress the string. + std::unique_ptr terminal = getAutoUncompressionCodec( + {}, ConstantCodec::create("dummyString", CodecType::USER_DEFINED)); + EXPECT_TRUE(terminal->canUncompress(truncatedBuf.get())); + EXPECT_EQ(terminal->uncompress(truncated), "dummyString"); +} + +// If the terminal codec has one of the "default types" automatically added in +// the AutomaticCodec, check that the default codec is no longer added. +TEST_P(TerminalCodecTest, terminalOverridesDefaults) { + std::unique_ptr terminal = getAutoUncompressionCodec( + {}, ConstantCodec::create("dummyString", codecType_)); + std::string const original = "abc"; + auto const compressed = codec_->compress(original); + EXPECT_EQ(terminal->uncompress(compressed), "dummyString"); +} + +INSTANTIATE_TEST_CASE_P( + TerminalCodecTest, + TerminalCodecTest, + testing::ValuesIn(autoUncompressionCodecTypes)); TEST(ValidPrefixesTest, CustomCodec) { std::vector> codecs; -- 2.34.1