From: Nick Terrell Date: Wed, 29 Mar 2017 04:21:02 +0000 (-0700) Subject: AutomaticCodec X-Git-Tag: v2017.04.03.00~12 X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=commitdiff_plain;h=2a60bc196ca0ec395b029ad1e066873fb0676bdb AutomaticCodec Summary: Add codec that does automatic format detection for decompression. The codec returned by `getCodec(CodecType::AUTOMATIC)` can decompress codecs `LZ4_FRAME`, `ZSTD`, `ZLIB`, `GZIP`, and `LZMA2`. Additionally, the function `getAutomaticCodec()` allows users to pass their own custom codecs, but the 5 default codecs must also be supported. We force the default codecs on users because having a common subset of formats is in general useful, and because we want to be able to add codecs to this set. When compiled in debug mode, on construction it checks that the codecs are compatible. This helps make sure that users don't accidentally add ambiguous codecs, and if we add a new default codec in the future, tests will fail on ambiguity. Reviewed By: yfeldblum Differential Revision: D4760065 fbshipit-source-id: 9e65844aba9f1ae3d5c7b86643931782c06ab3eb --- diff --git a/folly/io/Compression.cpp b/folly/io/Compression.cpp index c692895f..5b83a6f0 100644 --- a/folly/io/Compression.cpp +++ b/folly/io/Compression.cpp @@ -43,12 +43,15 @@ #include #endif +#include #include #include #include #include #include #include +#include +#include namespace folly { namespace io { @@ -138,6 +141,14 @@ uint64_t Codec::doMaxUncompressedLength() const { return UNLIMITED_UNCOMPRESSED_LENGTH; } +std::vector Codec::validPrefixes() const { + return {}; +} + +bool Codec::canUncompress(const IOBuf*, uint64_t) const { + return false; +} + std::string Codec::doCompressString(const StringPiece data) { const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data}; auto outputBuffer = doCompress(&inputBuffer); @@ -243,6 +254,39 @@ inline uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) { #endif // FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA +namespace { +/** + * Reads sizeof(T) bytes, and returns false if not enough bytes are available. + * Returns true if the first n bytes are equal to prefix when interpreted as + * a little endian T. + */ +template +typename std::enable_if::value, bool>::type +dataStartsWithLE(const IOBuf* data, T prefix, uint64_t n = sizeof(T)) { + DCHECK_GT(n, 0); + DCHECK_LE(n, sizeof(T)); + T value; + Cursor cursor{data}; + if (!cursor.tryReadLE(value)) { + return false; + } + const T mask = n == sizeof(T) ? T(-1) : (T(1) << (8 * n)) - 1; + return prefix == (value & mask); +} + +template +typename std::enable_if::value, std::string>::type +prefixToStringLE(T prefix, uint64_t n = sizeof(T)) { + DCHECK_GT(n, 0); + DCHECK_LE(n, sizeof(T)); + prefix = Endian::little(prefix); + std::string result; + result.resize(n); + memcpy(&result[0], &prefix, n); + return result; +} +} // namespace + #if FOLLY_HAVE_LIBLZ4 /** @@ -394,6 +438,10 @@ class LZ4FrameCodec final : public Codec { explicit LZ4FrameCodec(int level, CodecType type); ~LZ4FrameCodec(); + std::vector validPrefixes() const override; + bool canUncompress(const IOBuf* data, uint64_t uncompressedLength) + const override; + private: std::unique_ptr doCompress(const IOBuf* data) override; std::unique_ptr doUncompress( @@ -414,6 +462,16 @@ class LZ4FrameCodec final : public Codec { return make_unique(level, type); } +static constexpr uint32_t kLZ4FrameMagicLE = 0x184D2204; + +std::vector LZ4FrameCodec::validPrefixes() const { + return {prefixToStringLE(kLZ4FrameMagicLE)}; +} + +bool LZ4FrameCodec::canUncompress(const IOBuf* data, uint64_t) const { + return dataStartsWithLE(data, kLZ4FrameMagicLE); +} + static size_t lz4FrameThrowOnError(size_t code) { if (LZ4F_isError(code)) { throw std::runtime_error( @@ -676,6 +734,10 @@ class ZlibCodec final : public Codec { static std::unique_ptr create(int level, CodecType type); explicit ZlibCodec(int level, CodecType type); + std::vector validPrefixes() const override; + bool canUncompress(const IOBuf* data, uint64_t uncompressedLength) + const override; + private: std::unique_ptr doCompress(const IOBuf* data) override; std::unique_ptr doUncompress( @@ -688,6 +750,66 @@ class ZlibCodec final : public Codec { int level_; }; +static constexpr uint16_t kGZIPMagicLE = 0x8B1F; + +std::vector ZlibCodec::validPrefixes() const { + if (type() == CodecType::ZLIB) { + // Zlib streams start with a 2 byte header. + // + // 0 1 + // +---+---+ + // |CMF|FLG| + // +---+---+ + // + // We won't restrict the values of any sub-fields except as described below. + // + // The lowest 4 bits of CMF is the compression method (CM). + // CM == 0x8 is the deflate compression method, which is currently the only + // supported compression method, so any valid prefix must have CM == 0x8. + // + // The lowest 5 bits of FLG is FCHECK. + // FCHECK must be such that the two header bytes are a multiple of 31 when + // interpreted as a big endian 16-bit number. + std::vector result; + // 16 values for the first byte, 8 values for the second byte. + // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK. + result.reserve(132); + // Select all values for the CMF byte that use the deflate algorithm 0x8. + for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) { + // Select all values for the FLG, but leave FCHECK as 0 since it's fixed. + for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) { + uint16_t prefix = first | second; + // Compute FCHECK. + prefix += 31 - (prefix % 31); + result.push_back(prefixToStringLE(Endian::big(prefix))); + // zlib won't produce this, but it is a valid prefix. + if ((prefix & 0x1F) == 31) { + prefix -= 31; + result.push_back(prefixToStringLE(Endian::big(prefix))); + } + } + } + return result; + } else { + // The gzip frame starts with 2 magic bytes. + return {prefixToStringLE(kGZIPMagicLE)}; + } +} + +bool ZlibCodec::canUncompress(const IOBuf* data, uint64_t) const { + if (type() == CodecType::ZLIB) { + uint16_t value; + Cursor cursor{data}; + if (!cursor.tryReadBE(value)) { + return false; + } + // zlib compressed if using deflate and is a multiple of 31. + return (value & 0x0F00) == 0x0800 && value % 31 == 0; + } else { + return dataStartsWithLE(data, kGZIPMagicLE); + } +} + std::unique_ptr ZlibCodec::create(int level, CodecType type) { return make_unique(level, type); } @@ -944,6 +1066,10 @@ class LZMA2Codec final : public Codec { static std::unique_ptr create(int level, CodecType type); explicit LZMA2Codec(int level, CodecType type); + std::vector validPrefixes() const override; + bool canUncompress(const IOBuf* data, uint64_t uncompressedLength) + const override; + private: bool doNeedsUncompressedLength() const override; uint64_t doMaxUncompressedLength() const override; @@ -961,6 +1087,25 @@ class LZMA2Codec final : public Codec { int level_; }; +static constexpr uint64_t kLZMA2MagicLE = 0x005A587A37FD; +static constexpr unsigned kLZMA2MagicBytes = 6; + +std::vector LZMA2Codec::validPrefixes() const { + if (type() == CodecType::LZMA2_VARINT_SIZE) { + return {}; + } + return {prefixToStringLE(kLZMA2MagicLE, kLZMA2MagicBytes)}; +} + +bool LZMA2Codec::canUncompress(const IOBuf* data, uint64_t) const { + if (type() == CodecType::LZMA2_VARINT_SIZE) { + return false; + } + // Returns false for all inputs less than 8 bytes. + // This is okay, because no valid LZMA2 streams are less than 8 bytes. + return dataStartsWithLE(data, kLZMA2MagicLE, kLZMA2MagicBytes); +} + std::unique_ptr LZMA2Codec::create(int level, CodecType type) { return make_unique(level, type); } @@ -1183,6 +1328,10 @@ class ZSTDCodec final : public Codec { static std::unique_ptr create(int level, CodecType); explicit ZSTDCodec(int level, CodecType type); + std::vector validPrefixes() const override; + bool canUncompress(const IOBuf* data, uint64_t uncompressedLength) + const override; + private: bool doNeedsUncompressedLength() const override; std::unique_ptr doCompress(const IOBuf* data) override; @@ -1193,6 +1342,16 @@ class ZSTDCodec final : public Codec { int level_; }; +static constexpr uint32_t kZSTDMagicLE = 0xFD2FB528; + +std::vector ZSTDCodec::validPrefixes() const { + return {prefixToStringLE(kZSTDMagicLE)}; +} + +bool ZSTDCodec::canUncompress(const IOBuf* data, uint64_t) const { + return dataStartsWithLE(data, kZSTDMagicLE); +} + std::unique_ptr ZSTDCodec::create(int level, CodecType type) { return make_unique(level, type); } @@ -1391,6 +1550,160 @@ std::unique_ptr ZSTDCodec::doUncompress( #endif // FOLLY_HAVE_LIBZSTD +/** + * Automatic decompression + */ +class AutomaticCodec final : public Codec { + public: + static std::unique_ptr create( + std::vector> customCodecs); + explicit AutomaticCodec(std::vector> customCodecs); + + std::vector validPrefixes() const override; + bool canUncompress(const IOBuf* data, uint64_t uncompressedLength) + const override; + + private: + bool doNeedsUncompressedLength() const override; + uint64_t doMaxUncompressedLength() const override; + + std::unique_ptr doCompress(const IOBuf*) override { + throw std::runtime_error("AutomaticCodec error: compress() not supported."); + } + std::unique_ptr doUncompress( + const IOBuf* data, + uint64_t uncompressedLength) override; + + void addCodecIfSupported(CodecType type); + + // Throws iff the codecs aren't compatible (very slow) + void checkCompatibleCodecs() const; + + std::vector> codecs_; + bool needsUncompressedLength_; + uint64_t maxUncompressedLength_; +}; + +std::vector AutomaticCodec::validPrefixes() const { + std::unordered_set prefixes; + for (const auto& codec : codecs_) { + const auto codecPrefixes = codec->validPrefixes(); + prefixes.insert(codecPrefixes.begin(), codecPrefixes.end()); + } + return std::vector{prefixes.begin(), prefixes.end()}; +} + +bool AutomaticCodec::canUncompress( + const IOBuf* data, + uint64_t uncompressedLength) const { + return std::any_of( + codecs_.begin(), + codecs_.end(), + [data, uncompressedLength](const auto& codec) { + return codec->canUncompress(data, uncompressedLength); + }); +} + +void AutomaticCodec::addCodecIfSupported(CodecType type) { + const bool present = + std::any_of(codecs_.begin(), codecs_.end(), [&type](const auto& codec) { + return codec->type() == type; + }); + if (hasCodec(type) && !present) { + codecs_.push_back(getCodec(type)); + } +} + +/* static */ std::unique_ptr AutomaticCodec::create( + std::vector> customCodecs) { + return make_unique(std::move(customCodecs)); +} + +AutomaticCodec::AutomaticCodec(std::vector> customCodecs) + : Codec(CodecType::USER_DEFINED), codecs_(std::move(customCodecs)) { + // Fastest -> slowest + addCodecIfSupported(CodecType::LZ4_FRAME); + addCodecIfSupported(CodecType::ZSTD); + addCodecIfSupported(CodecType::ZLIB); + addCodecIfSupported(CodecType::GZIP); + addCodecIfSupported(CodecType::LZMA2); + if (kIsDebug) { + checkCompatibleCodecs(); + } + // Check that none of the codes are are null + DCHECK(std::none_of(codecs_.begin(), codecs_.end(), [](const auto& codec) { + return codec == nullptr; + })); + + needsUncompressedLength_ = + std::any_of(codecs_.begin(), codecs_.end(), [](const auto& codec) { + return codec->needsUncompressedLength(); + }); + + const auto it = std::max_element( + codecs_.begin(), codecs_.end(), [](const auto& lhs, const auto& rhs) { + return lhs->maxUncompressedLength() < rhs->maxUncompressedLength(); + }); + DCHECK(it != codecs_.end()); + maxUncompressedLength_ = (*it)->maxUncompressedLength(); +} + +void AutomaticCodec::checkCompatibleCodecs() const { + // Keep track of all the possible headers. + std::unordered_set headers; + // The empty header is not allowed. + headers.insert(""); + // Step 1: + // Construct a set of headers and check that none of the headers occur twice. + // Eliminate edge cases. + for (auto&& codec : codecs_) { + const auto codecHeaders = codec->validPrefixes(); + // Codecs without any valid headers are not allowed. + if (codecHeaders.empty()) { + throw std::invalid_argument{ + "AutomaticCodec: validPrefixes() must not be empty."}; + } + // Insert all the headers for the current codec. + const size_t beforeSize = headers.size(); + headers.insert(codecHeaders.begin(), codecHeaders.end()); + // Codecs are not compatible if any header occurred twice. + if (beforeSize + codecHeaders.size() != headers.size()) { + throw std::invalid_argument{ + "AutomaticCodec: Two valid prefixes collide."}; + } + } + // Step 2: + // Check if any strict non-empty prefix of any header is a header. + for (const auto& header : headers) { + for (size_t i = 1; i < header.size(); ++i) { + if (headers.count(header.substr(0, i))) { + throw std::invalid_argument{ + "AutomaticCodec: One valid prefix is a prefix of another valid " + "prefix."}; + } + } + } +} + +bool AutomaticCodec::doNeedsUncompressedLength() const { + return needsUncompressedLength_; +} + +uint64_t AutomaticCodec::doMaxUncompressedLength() const { + return maxUncompressedLength_; +} + +std::unique_ptr AutomaticCodec::doUncompress( + const IOBuf* data, + uint64_t uncompressedLength) { + for (auto&& codec : codecs_) { + if (codec->canUncompress(data, uncompressedLength)) { + return codec->uncompress(data, uncompressedLength); + } + } + throw std::runtime_error("AutomaticCodec error: Unknown compressed data"); +} + } // namespace typedef std::unique_ptr (*CodecFactory)(int, CodecType); @@ -1475,4 +1788,8 @@ std::unique_ptr getCodec(CodecType type, int level) { return codec; } +std::unique_ptr getAutoUncompressionCodec( + std::vector> customCodecs) { + return AutomaticCodec::create(std::move(customCodecs)); +} }} // namespaces diff --git a/folly/io/Compression.h b/folly/io/Compression.h index 6d6ae3a4..c46c6164 100644 --- a/folly/io/Compression.h +++ b/folly/io/Compression.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include @@ -164,6 +166,25 @@ class Codec { protected: explicit Codec(CodecType type); + public: + /** + * Returns a superset of the set of prefixes for which canUncompress() will + * return true. A superset is allowed for optimizations in canUncompress() + * based on other knowledge such as length. None of the prefixes may be empty. + * default: No prefixes. + */ + virtual std::vector validPrefixes() const; + + /** + * Returns true if the codec thinks it can uncompress the data. + * If a codec doesn't have magic bytes at the beginning, like LZ4 and Snappy, + * it can always return false. + * default: Returns false. + */ + virtual bool canUncompress( + const folly::IOBuf* data, + uint64_t uncompressedLength = UNKNOWN_UNCOMPRESSED_LENGTH) const; + private: // default: no limits (save for special value UNKNOWN_UNCOMPRESSED_LENGTH) virtual uint64_t doMaxUncompressedLength() const; @@ -207,6 +228,28 @@ constexpr int COMPRESSION_LEVEL_BEST = -3; std::unique_ptr getCodec(CodecType type, int level = COMPRESSION_LEVEL_DEFAULT); +/** + * Returns a codec that can uncompress any of the given codec types as well as + * {LZ4_FRAME, ZSTD, ZLIB, GZIP, LZMA2}. Appends each default codec to + * customCodecs in order, so long as a codec with the same type() isn't already + * present. When uncompress() is called, each codec's canUncompress() is called + * in the order that they are given. Appended default codecs are checked last. + * uncompress() is called on the first codec whose canUncompress() returns true. + * An exception is thrown if no codec canUncompress() the data. + * An exception is thrown if the chosen codec's uncompress() throws on the data. + * An exception is thrown if compress() is called on the returned codec. + * + * Requirements are checked in debug mode and are as follows: + * Let headers be the concatenation of every codec's validPrefixes(). + * 1. Each codec must override validPrefixes() and canUncompress(). + * 2. No codec's validPrefixes() may be empty. + * 3. No header in headers may be empty. + * 4. headers must not contain any duplicate elements. + * 5. No strict non-empty prefix of any header in headers may be in headers. + */ +std::unique_ptr getAutoUncompressionCodec( + std::vector> customCodecs = {}); + /** * Check if a specified codec is supported. */ diff --git a/folly/io/test/CompressionTest.cpp b/folly/io/test/CompressionTest.cpp index 5cac8887..197d50fd 100644 --- a/folly/io/test/CompressionTest.cpp +++ b/folly/io/test/CompressionTest.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -396,6 +397,256 @@ INSTANTIATE_TEST_CASE_P( CodecType::ZSTD, CodecType::LZ4_FRAME, }))); + +class AutomaticCodecTest : public testing::TestWithParam { + protected: + void SetUp() override { + codec_ = getCodec(GetParam()); + auto_ = getAutoUncompressionCodec(); + } + + void runSimpleTest(const DataHolder& dh); + + std::unique_ptr codec_; + std::unique_ptr auto_; +}; + +void AutomaticCodecTest::runSimpleTest(const DataHolder& dh) { + constexpr uint64_t uncompressedLength = 1000; + auto original = IOBuf::wrapBuffer(dh.data(uncompressedLength)); + auto compressed = codec_->compress(original.get()); + + if (!codec_->needsUncompressedLength()) { + auto uncompressed = auto_->uncompress(compressed.get()); + EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength()); + EXPECT_EQ(dh.hash(uncompressedLength), hashIOBuf(uncompressed.get())); + } + { + auto uncompressed = auto_->uncompress(compressed.get(), uncompressedLength); + EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength()); + EXPECT_EQ(dh.hash(uncompressedLength), hashIOBuf(uncompressed.get())); + } + ASSERT_GE(compressed->computeChainDataLength(), 8); + for (size_t i = 0; i < 8; ++i) { + auto split = compressed->clone(); + auto rest = compressed->clone(); + split->trimEnd(split->length() - i); + rest->trimStart(i); + split->appendChain(std::move(rest)); + auto uncompressed = auto_->uncompress(split.get(), uncompressedLength); + EXPECT_EQ(uncompressedLength, uncompressed->computeChainDataLength()); + EXPECT_EQ(dh.hash(uncompressedLength), hashIOBuf(uncompressed.get())); + } +} + +TEST_P(AutomaticCodecTest, RandomData) { + runSimpleTest(randomDataHolder); +} + +TEST_P(AutomaticCodecTest, ConstantData) { + runSimpleTest(constantDataHolder); +} + +TEST_P(AutomaticCodecTest, ValidPrefixes) { + const auto prefixes = codec_->validPrefixes(); + for (const auto& prefix : prefixes) { + EXPECT_FALSE(prefix.empty()); + // Ensure that all strings are at least 8 bytes for LZMA2. + // The bytes after the prefix should be ignored by `canUncompress()`. + IOBuf data{IOBuf::COPY_BUFFER, prefix, 0, 8}; + data.append(8); + EXPECT_TRUE(codec_->canUncompress(&data)); + EXPECT_TRUE(auto_->canUncompress(&data)); + } +} + +TEST_P(AutomaticCodecTest, NeedsUncompressedLength) { + if (codec_->needsUncompressedLength()) { + EXPECT_TRUE(auto_->needsUncompressedLength()); + } +} + +TEST_P(AutomaticCodecTest, maxUncompressedLength) { + EXPECT_LE(codec_->maxUncompressedLength(), auto_->maxUncompressedLength()); +} + +TEST_P(AutomaticCodecTest, DefaultCodec) { + const uint64_t length = 42; + std::vector> codecs; + codecs.push_back(getCodec(CodecType::ZSTD)); + auto automatic = getAutoUncompressionCodec(std::move(codecs)); + auto original = IOBuf::wrapBuffer(constantDataHolder.data(length)); + auto compressed = codec_->compress(original.get()); + auto decompressed = automatic->uncompress(compressed.get()); + + EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(decompressed.get())); +} + +namespace { +class CustomCodec : public Codec { + public: + static std::unique_ptr create(std::string prefix, CodecType type) { + return make_unique(std::move(prefix), type); + } + explicit CustomCodec(std::string prefix, CodecType type) + : Codec(CodecType::USER_DEFINED), + prefix_(std::move(prefix)), + codec_(getCodec(type)) {} + + private: + std::vector validPrefixes() const override { + return {prefix_}; + } + + bool canUncompress(const IOBuf* data, uint64_t) const override { + auto clone = data->cloneCoalescedAsValue(); + if (clone.length() < prefix_.size()) { + return false; + } + return memcmp(clone.data(), prefix_.data(), prefix_.size()) == 0; + } + + std::unique_ptr doCompress(const IOBuf* data) override { + auto result = IOBuf::copyBuffer(prefix_); + result->appendChain(codec_->compress(data)); + EXPECT_TRUE(canUncompress(result.get(), data->computeChainDataLength())); + return result; + } + + std::unique_ptr doUncompress( + const IOBuf* data, + uint64_t uncompressedLength) override { + EXPECT_TRUE(canUncompress(data, uncompressedLength)); + auto clone = data->cloneCoalescedAsValue(); + clone.trimStart(prefix_.size()); + return codec_->uncompress(&clone, uncompressedLength); + } + + std::string prefix_; + std::unique_ptr codec_; +}; +} + +TEST_P(AutomaticCodecTest, CustomCodec) { + const uint64_t length = 42; + auto ab = CustomCodec::create("ab", CodecType::ZSTD); + std::vector> codecs; + codecs.push_back(CustomCodec::create("ab", CodecType::ZSTD)); + auto automatic = getAutoUncompressionCodec(std::move(codecs)); + auto original = IOBuf::wrapBuffer(constantDataHolder.data(length)); + + auto abCompressed = ab->compress(original.get()); + auto abDecompressed = automatic->uncompress(abCompressed.get()); + EXPECT_TRUE(automatic->canUncompress(abCompressed.get())); + EXPECT_FALSE(auto_->canUncompress(abCompressed.get())); + EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(abDecompressed.get())); + + auto compressed = codec_->compress(original.get()); + auto decompressed = automatic->uncompress(compressed.get()); + EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(decompressed.get())); +} + +TEST_P(AutomaticCodecTest, CustomDefaultCodec) { + const uint64_t length = 42; + auto none = CustomCodec::create("none", CodecType::NO_COMPRESSION); + std::vector> codecs; + codecs.push_back(CustomCodec::create("none", CodecType::NO_COMPRESSION)); + codecs.push_back(getCodec(CodecType::LZ4_FRAME)); + auto automatic = getAutoUncompressionCodec(std::move(codecs)); + auto original = IOBuf::wrapBuffer(constantDataHolder.data(length)); + + auto noneCompressed = none->compress(original.get()); + auto noneDecompressed = automatic->uncompress(noneCompressed.get()); + EXPECT_TRUE(automatic->canUncompress(noneCompressed.get())); + EXPECT_FALSE(auto_->canUncompress(noneCompressed.get())); + EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(noneDecompressed.get())); + + auto compressed = codec_->compress(original.get()); + auto decompressed = automatic->uncompress(compressed.get()); + EXPECT_EQ(constantDataHolder.hash(length), hashIOBuf(decompressed.get())); +} + +TEST_P(AutomaticCodecTest, canUncompressOneBytes) { + // No default codec can uncompress 1 bytes. + IOBuf buf{IOBuf::CREATE, 1}; + buf.append(1); + EXPECT_FALSE(codec_->canUncompress(&buf, 1)); + EXPECT_FALSE(codec_->canUncompress(&buf, Codec::UNKNOWN_UNCOMPRESSED_LENGTH)); + EXPECT_FALSE(auto_->canUncompress(&buf, 1)); + EXPECT_FALSE(auto_->canUncompress(&buf, Codec::UNKNOWN_UNCOMPRESSED_LENGTH)); +} + +INSTANTIATE_TEST_CASE_P( + AutomaticCodecTest, + AutomaticCodecTest, + testing::Values( + CodecType::LZ4_FRAME, + CodecType::ZSTD, + CodecType::ZLIB, + CodecType::GZIP, + CodecType::LZMA2)); + +TEST(ValidPrefixesTest, CustomCodec) { + std::vector> codecs; + codecs.push_back(CustomCodec::create("none", CodecType::NO_COMPRESSION)); + const auto none = getAutoUncompressionCodec(std::move(codecs)); + const auto prefixes = none->validPrefixes(); + const auto it = std::find(prefixes.begin(), prefixes.end(), "none"); + EXPECT_TRUE(it != prefixes.end()); +} + +#define EXPECT_THROW_IF_DEBUG(statement, expected_exception) \ + do { \ + if (kIsDebug) { \ + EXPECT_THROW((statement), expected_exception); \ + } else { \ + EXPECT_NO_THROW((statement)); \ + } \ + } while (false) + +TEST(CheckCompatibleTest, SimplePrefixSecond) { + std::vector> codecs; + codecs.push_back(CustomCodec::create("abc", CodecType::NO_COMPRESSION)); + codecs.push_back(CustomCodec::create("ab", CodecType::NO_COMPRESSION)); + EXPECT_THROW_IF_DEBUG( + getAutoUncompressionCodec(std::move(codecs)), std::invalid_argument); +} + +TEST(CheckCompatibleTest, SimplePrefixFirst) { + std::vector> codecs; + codecs.push_back(CustomCodec::create("ab", CodecType::NO_COMPRESSION)); + codecs.push_back(CustomCodec::create("abc", CodecType::NO_COMPRESSION)); + EXPECT_THROW_IF_DEBUG( + getAutoUncompressionCodec(std::move(codecs)), std::invalid_argument); +} + +TEST(CheckCompatibleTest, Empty) { + std::vector> codecs; + codecs.push_back(CustomCodec::create("", CodecType::NO_COMPRESSION)); + EXPECT_THROW_IF_DEBUG( + getAutoUncompressionCodec(std::move(codecs)), std::invalid_argument); +} + +TEST(CheckCompatibleTest, ZstdPrefix) { + std::vector> codecs; + codecs.push_back(CustomCodec::create("\x28\xB5\x2F", CodecType::ZSTD)); + EXPECT_THROW_IF_DEBUG( + getAutoUncompressionCodec(std::move(codecs)), std::invalid_argument); +} + +TEST(CheckCompatibleTest, ZstdDuplicate) { + std::vector> codecs; + codecs.push_back(CustomCodec::create("\x28\xB5\x2F\xFD", CodecType::ZSTD)); + EXPECT_THROW_IF_DEBUG( + getAutoUncompressionCodec(std::move(codecs)), std::invalid_argument); +} + +TEST(CheckCompatibleTest, ZlibIsPrefix) { + std::vector> codecs; + codecs.push_back(CustomCodec::create("\x18\x76zzasdf", CodecType::ZSTD)); + EXPECT_THROW_IF_DEBUG( + getAutoUncompressionCodec(std::move(codecs)), std::invalid_argument); +} }}} // namespaces int main(int argc, char *argv[]) {