2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/io/Compression.h>
22 #if LZ4_VERSION_NUMBER >= 10301
27 #include <glog/logging.h>
29 #if FOLLY_HAVE_LIBSNAPPY
31 #include <snappy-sinksource.h>
38 #if FOLLY_HAVE_LIBLZMA
42 #if FOLLY_HAVE_LIBZSTD
46 #include <folly/Bits.h>
47 #include <folly/Conv.h>
48 #include <folly/Memory.h>
49 #include <folly/Portability.h>
50 #include <folly/ScopeGuard.h>
51 #include <folly/Varint.h>
52 #include <folly/io/Cursor.h>
54 #include <unordered_set>
56 namespace folly { namespace io {
58 Codec::Codec(CodecType type) : type_(type) { }
60 // Ensure consistent behavior in the nullptr case
61 std::unique_ptr<IOBuf> Codec::compress(const IOBuf* data) {
62 uint64_t len = data->computeChainDataLength();
64 return IOBuf::create(0);
66 if (len > maxUncompressedLength()) {
67 throw std::runtime_error("Codec: uncompressed length too large");
70 return doCompress(data);
73 std::string Codec::compress(const StringPiece data) {
74 const uint64_t len = data.size();
78 if (len > maxUncompressedLength()) {
79 throw std::runtime_error("Codec: uncompressed length too large");
82 return doCompressString(data);
85 std::unique_ptr<IOBuf> Codec::uncompress(const IOBuf* data,
86 uint64_t uncompressedLength) {
87 if (uncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH) {
88 if (needsUncompressedLength()) {
89 throw std::invalid_argument("Codec: uncompressed length required");
91 } else if (uncompressedLength > maxUncompressedLength()) {
92 throw std::runtime_error("Codec: uncompressed length too large");
96 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
97 uncompressedLength != 0) {
98 throw std::runtime_error("Codec: invalid uncompressed length");
100 return IOBuf::create(0);
103 return doUncompress(data, uncompressedLength);
106 std::string Codec::uncompress(
107 const StringPiece data,
108 uint64_t uncompressedLength) {
109 if (uncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH) {
110 if (needsUncompressedLength()) {
111 throw std::invalid_argument("Codec: uncompressed length required");
113 } else if (uncompressedLength > maxUncompressedLength()) {
114 throw std::runtime_error("Codec: uncompressed length too large");
118 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
119 uncompressedLength != 0) {
120 throw std::runtime_error("Codec: invalid uncompressed length");
125 return doUncompressString(data, uncompressedLength);
128 bool Codec::needsUncompressedLength() const {
129 return doNeedsUncompressedLength();
132 uint64_t Codec::maxUncompressedLength() const {
133 return doMaxUncompressedLength();
136 bool Codec::doNeedsUncompressedLength() const {
140 uint64_t Codec::doMaxUncompressedLength() const {
141 return UNLIMITED_UNCOMPRESSED_LENGTH;
144 std::vector<std::string> Codec::validPrefixes() const {
148 bool Codec::canUncompress(const IOBuf*, uint64_t) const {
152 std::string Codec::doCompressString(const StringPiece data) {
153 const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data};
154 auto outputBuffer = doCompress(&inputBuffer);
156 output.reserve(outputBuffer->computeChainDataLength());
157 for (auto range : *outputBuffer) {
158 output.append(reinterpret_cast<const char*>(range.data()), range.size());
163 std::string Codec::doUncompressString(
164 const StringPiece data,
165 uint64_t uncompressedLength) {
166 const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data};
167 auto outputBuffer = doUncompress(&inputBuffer, uncompressedLength);
169 output.reserve(outputBuffer->computeChainDataLength());
170 for (auto range : *outputBuffer) {
171 output.append(reinterpret_cast<const char*>(range.data()), range.size());
181 class NoCompressionCodec final : public Codec {
183 static std::unique_ptr<Codec> create(int level, CodecType type);
184 explicit NoCompressionCodec(int level, CodecType type);
187 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
188 std::unique_ptr<IOBuf> doUncompress(
190 uint64_t uncompressedLength) override;
193 std::unique_ptr<Codec> NoCompressionCodec::create(int level, CodecType type) {
194 return make_unique<NoCompressionCodec>(level, type);
197 NoCompressionCodec::NoCompressionCodec(int level, CodecType type)
199 DCHECK(type == CodecType::NO_COMPRESSION);
201 case COMPRESSION_LEVEL_DEFAULT:
202 case COMPRESSION_LEVEL_FASTEST:
203 case COMPRESSION_LEVEL_BEST:
207 throw std::invalid_argument(to<std::string>(
208 "NoCompressionCodec: invalid level ", level));
212 std::unique_ptr<IOBuf> NoCompressionCodec::doCompress(
214 return data->clone();
217 std::unique_ptr<IOBuf> NoCompressionCodec::doUncompress(
219 uint64_t uncompressedLength) {
220 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
221 data->computeChainDataLength() != uncompressedLength) {
222 throw std::runtime_error(to<std::string>(
223 "NoCompressionCodec: invalid uncompressed length"));
225 return data->clone();
228 #if (FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA)
232 void encodeVarintToIOBuf(uint64_t val, folly::IOBuf* out) {
233 DCHECK_GE(out->tailroom(), kMaxVarintLength64);
234 out->append(encodeVarint(val, out->writableTail()));
237 inline uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) {
240 for (int shift = 0; shift <= 63; shift += 7) {
241 b = cursor.read<int8_t>();
242 val |= static_cast<uint64_t>(b & 0x7f) << shift;
248 throw std::invalid_argument("Invalid varint value. Too big.");
255 #endif // FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA
259 * Reads sizeof(T) bytes, and returns false if not enough bytes are available.
260 * Returns true if the first n bytes are equal to prefix when interpreted as
263 template <typename T>
264 typename std::enable_if<std::is_unsigned<T>::value, bool>::type
265 dataStartsWithLE(const IOBuf* data, T prefix, uint64_t n = sizeof(T)) {
267 DCHECK_LE(n, sizeof(T));
270 if (!cursor.tryReadLE(value)) {
273 const T mask = n == sizeof(T) ? T(-1) : (T(1) << (8 * n)) - 1;
274 return prefix == (value & mask);
277 template <typename T>
278 typename std::enable_if<std::is_arithmetic<T>::value, std::string>::type
279 prefixToStringLE(T prefix, uint64_t n = sizeof(T)) {
281 DCHECK_LE(n, sizeof(T));
282 prefix = Endian::little(prefix);
285 memcpy(&result[0], &prefix, n);
290 #if FOLLY_HAVE_LIBLZ4
295 class LZ4Codec final : public Codec {
297 static std::unique_ptr<Codec> create(int level, CodecType type);
298 explicit LZ4Codec(int level, CodecType type);
301 bool doNeedsUncompressedLength() const override;
302 uint64_t doMaxUncompressedLength() const override;
304 bool encodeSize() const { return type() == CodecType::LZ4_VARINT_SIZE; }
306 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
307 std::unique_ptr<IOBuf> doUncompress(
309 uint64_t uncompressedLength) override;
311 bool highCompression_;
314 std::unique_ptr<Codec> LZ4Codec::create(int level, CodecType type) {
315 return make_unique<LZ4Codec>(level, type);
318 LZ4Codec::LZ4Codec(int level, CodecType type) : Codec(type) {
319 DCHECK(type == CodecType::LZ4 || type == CodecType::LZ4_VARINT_SIZE);
322 case COMPRESSION_LEVEL_FASTEST:
323 case COMPRESSION_LEVEL_DEFAULT:
326 case COMPRESSION_LEVEL_BEST:
330 if (level < 1 || level > 2) {
331 throw std::invalid_argument(to<std::string>(
332 "LZ4Codec: invalid level: ", level));
334 highCompression_ = (level > 1);
337 bool LZ4Codec::doNeedsUncompressedLength() const {
338 return !encodeSize();
341 // The value comes from lz4.h in lz4-r117, but older versions of lz4 don't
342 // define LZ4_MAX_INPUT_SIZE (even though the max size is the same), so do it
344 #ifndef LZ4_MAX_INPUT_SIZE
345 # define LZ4_MAX_INPUT_SIZE 0x7E000000
348 uint64_t LZ4Codec::doMaxUncompressedLength() const {
349 return LZ4_MAX_INPUT_SIZE;
352 std::unique_ptr<IOBuf> LZ4Codec::doCompress(const IOBuf* data) {
354 if (data->isChained()) {
355 // LZ4 doesn't support streaming, so we have to coalesce
356 clone = data->cloneCoalescedAsValue();
360 uint32_t extraSize = encodeSize() ? kMaxVarintLength64 : 0;
361 auto out = IOBuf::create(extraSize + LZ4_compressBound(data->length()));
363 encodeVarintToIOBuf(data->length(), out.get());
367 auto input = reinterpret_cast<const char*>(data->data());
368 auto output = reinterpret_cast<char*>(out->writableTail());
369 const auto inputLength = data->length();
370 #if LZ4_VERSION_NUMBER >= 10700
371 if (highCompression_) {
372 n = LZ4_compress_HC(input, output, inputLength, out->tailroom(), 0);
374 n = LZ4_compress_default(input, output, inputLength, out->tailroom());
377 if (highCompression_) {
378 n = LZ4_compressHC(input, output, inputLength);
380 n = LZ4_compress(input, output, inputLength);
385 CHECK_LE(n, out->capacity());
391 std::unique_ptr<IOBuf> LZ4Codec::doUncompress(
393 uint64_t uncompressedLength) {
395 if (data->isChained()) {
396 // LZ4 doesn't support streaming, so we have to coalesce
397 clone = data->cloneCoalescedAsValue();
401 folly::io::Cursor cursor(data);
402 uint64_t actualUncompressedLength;
404 actualUncompressedLength = decodeVarintFromCursor(cursor);
405 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
406 uncompressedLength != actualUncompressedLength) {
407 throw std::runtime_error("LZ4Codec: invalid uncompressed length");
410 actualUncompressedLength = uncompressedLength;
411 if (actualUncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH ||
412 actualUncompressedLength > maxUncompressedLength()) {
413 throw std::runtime_error("LZ4Codec: invalid uncompressed length");
417 auto sp = StringPiece{cursor.peekBytes()};
418 auto out = IOBuf::create(actualUncompressedLength);
419 int n = LZ4_decompress_safe(
421 reinterpret_cast<char*>(out->writableTail()),
423 actualUncompressedLength);
425 if (n < 0 || uint64_t(n) != actualUncompressedLength) {
426 throw std::runtime_error(to<std::string>(
427 "LZ4 decompression returned invalid value ", n));
429 out->append(actualUncompressedLength);
433 #if LZ4_VERSION_NUMBER >= 10301
435 class LZ4FrameCodec final : public Codec {
437 static std::unique_ptr<Codec> create(int level, CodecType type);
438 explicit LZ4FrameCodec(int level, CodecType type);
441 std::vector<std::string> validPrefixes() const override;
442 bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
446 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
447 std::unique_ptr<IOBuf> doUncompress(
449 uint64_t uncompressedLength) override;
451 // Reset the dctx_ if it is dirty or null.
455 LZ4F_decompressionContext_t dctx_{nullptr};
459 /* static */ std::unique_ptr<Codec> LZ4FrameCodec::create(
462 return make_unique<LZ4FrameCodec>(level, type);
465 static constexpr uint32_t kLZ4FrameMagicLE = 0x184D2204;
467 std::vector<std::string> LZ4FrameCodec::validPrefixes() const {
468 return {prefixToStringLE(kLZ4FrameMagicLE)};
471 bool LZ4FrameCodec::canUncompress(const IOBuf* data, uint64_t) const {
472 return dataStartsWithLE(data, kLZ4FrameMagicLE);
475 static size_t lz4FrameThrowOnError(size_t code) {
476 if (LZ4F_isError(code)) {
477 throw std::runtime_error(
478 to<std::string>("LZ4Frame error: ", LZ4F_getErrorName(code)));
483 void LZ4FrameCodec::resetDCtx() {
484 if (dctx_ && !dirty_) {
488 LZ4F_freeDecompressionContext(dctx_);
490 lz4FrameThrowOnError(LZ4F_createDecompressionContext(&dctx_, 100));
494 LZ4FrameCodec::LZ4FrameCodec(int level, CodecType type) : Codec(type) {
495 DCHECK(type == CodecType::LZ4_FRAME);
497 case COMPRESSION_LEVEL_FASTEST:
498 case COMPRESSION_LEVEL_DEFAULT:
501 case COMPRESSION_LEVEL_BEST:
510 LZ4FrameCodec::~LZ4FrameCodec() {
512 LZ4F_freeDecompressionContext(dctx_);
516 std::unique_ptr<IOBuf> LZ4FrameCodec::doCompress(const IOBuf* data) {
517 // LZ4 Frame compression doesn't support streaming so we have to coalesce
519 if (data->isChained()) {
520 clone = data->cloneCoalescedAsValue();
524 const auto uncompressedLength = data->length();
525 LZ4F_preferences_t prefs{};
526 prefs.compressionLevel = level_;
527 prefs.frameInfo.contentSize = uncompressedLength;
529 auto buf = IOBuf::create(LZ4F_compressFrameBound(uncompressedLength, &prefs));
530 const size_t written = lz4FrameThrowOnError(LZ4F_compressFrame(
536 buf->append(written);
540 std::unique_ptr<IOBuf> LZ4FrameCodec::doUncompress(
542 uint64_t uncompressedLength) {
543 // Reset the dctx if any errors have occurred
546 ByteRange in = *data->begin();
548 if (data->isChained()) {
549 clone = data->cloneCoalescedAsValue();
550 in = clone.coalesce();
553 // Select decompression options
554 LZ4F_decompressOptions_t options;
555 options.stableDst = 1;
556 // Select blockSize and growthSize for the IOBufQueue
557 IOBufQueue queue(IOBufQueue::cacheChainLength());
558 auto blockSize = uint64_t{64} << 10;
559 auto growthSize = uint64_t{4} << 20;
560 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH) {
561 // Allocate uncompressedLength in one chunk (up to 64 MB)
562 const auto allocateSize = std::min(uncompressedLength, uint64_t{64} << 20);
563 queue.preallocate(allocateSize, allocateSize);
564 blockSize = std::min(uncompressedLength, blockSize);
565 growthSize = std::min(uncompressedLength, growthSize);
567 // Reduce growthSize for small data
568 const auto guessUncompressedLen =
569 4 * std::max<uint64_t>(blockSize, in.size());
570 growthSize = std::min(guessUncompressedLen, growthSize);
572 // Once LZ4_decompress() is called, the dctx_ cannot be reused until it
575 // Decompress until the frame is over
578 // Allocate enough space to decompress at least a block
581 std::tie(out, outSize) = queue.preallocate(blockSize, growthSize);
583 size_t inSize = in.size();
584 code = lz4FrameThrowOnError(
585 LZ4F_decompress(dctx_, out, &outSize, in.data(), &inSize, &options));
586 if (in.empty() && outSize == 0 && code != 0) {
587 // We passed no input, no output was produced, and the frame isn't over
588 // No more forward progress is possible
589 throw std::runtime_error("LZ4Frame error: Incomplete frame");
591 in.uncheckedAdvance(inSize);
592 queue.postallocate(outSize);
594 // At this point the decompression context can be reused
596 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
597 queue.chainLength() != uncompressedLength) {
598 throw std::runtime_error("LZ4Frame error: Invalid uncompressedLength");
603 #endif // LZ4_VERSION_NUMBER >= 10301
604 #endif // FOLLY_HAVE_LIBLZ4
606 #if FOLLY_HAVE_LIBSNAPPY
613 * Implementation of snappy::Source that reads from a IOBuf chain.
615 class IOBufSnappySource final : public snappy::Source {
617 explicit IOBufSnappySource(const IOBuf* data);
618 size_t Available() const override;
619 const char* Peek(size_t* len) override;
620 void Skip(size_t n) override;
626 IOBufSnappySource::IOBufSnappySource(const IOBuf* data)
627 : available_(data->computeChainDataLength()),
631 size_t IOBufSnappySource::Available() const {
635 const char* IOBufSnappySource::Peek(size_t* len) {
636 auto sp = StringPiece{cursor_.peekBytes()};
641 void IOBufSnappySource::Skip(size_t n) {
642 CHECK_LE(n, available_);
647 class SnappyCodec final : public Codec {
649 static std::unique_ptr<Codec> create(int level, CodecType type);
650 explicit SnappyCodec(int level, CodecType type);
653 uint64_t doMaxUncompressedLength() const override;
654 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
655 std::unique_ptr<IOBuf> doUncompress(
657 uint64_t uncompressedLength) override;
660 std::unique_ptr<Codec> SnappyCodec::create(int level, CodecType type) {
661 return make_unique<SnappyCodec>(level, type);
664 SnappyCodec::SnappyCodec(int level, CodecType type) : Codec(type) {
665 DCHECK(type == CodecType::SNAPPY);
667 case COMPRESSION_LEVEL_FASTEST:
668 case COMPRESSION_LEVEL_DEFAULT:
669 case COMPRESSION_LEVEL_BEST:
673 throw std::invalid_argument(to<std::string>(
674 "SnappyCodec: invalid level: ", level));
678 uint64_t SnappyCodec::doMaxUncompressedLength() const {
679 // snappy.h uses uint32_t for lengths, so there's that.
680 return std::numeric_limits<uint32_t>::max();
683 std::unique_ptr<IOBuf> SnappyCodec::doCompress(const IOBuf* data) {
684 IOBufSnappySource source(data);
686 IOBuf::create(snappy::MaxCompressedLength(source.Available()));
688 snappy::UncheckedByteArraySink sink(reinterpret_cast<char*>(
689 out->writableTail()));
691 size_t n = snappy::Compress(&source, &sink);
693 CHECK_LE(n, out->capacity());
698 std::unique_ptr<IOBuf> SnappyCodec::doUncompress(const IOBuf* data,
699 uint64_t uncompressedLength) {
700 uint32_t actualUncompressedLength = 0;
703 IOBufSnappySource source(data);
704 if (!snappy::GetUncompressedLength(&source, &actualUncompressedLength)) {
705 throw std::runtime_error("snappy::GetUncompressedLength failed");
707 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
708 uncompressedLength != actualUncompressedLength) {
709 throw std::runtime_error("snappy: invalid uncompressed length");
713 auto out = IOBuf::create(actualUncompressedLength);
716 IOBufSnappySource source(data);
717 if (!snappy::RawUncompress(&source,
718 reinterpret_cast<char*>(out->writableTail()))) {
719 throw std::runtime_error("snappy::RawUncompress failed");
723 out->append(actualUncompressedLength);
727 #endif // FOLLY_HAVE_LIBSNAPPY
733 class ZlibCodec final : public Codec {
735 static std::unique_ptr<Codec> create(int level, CodecType type);
736 explicit ZlibCodec(int level, CodecType type);
738 std::vector<std::string> validPrefixes() const override;
739 bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
743 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
744 std::unique_ptr<IOBuf> doUncompress(
746 uint64_t uncompressedLength) override;
748 std::unique_ptr<IOBuf> addOutputBuffer(z_stream* stream, uint32_t length);
749 bool doInflate(z_stream* stream, IOBuf* head, uint32_t bufferLength);
754 static constexpr uint16_t kGZIPMagicLE = 0x8B1F;
756 std::vector<std::string> ZlibCodec::validPrefixes() const {
757 if (type() == CodecType::ZLIB) {
758 // Zlib streams start with a 2 byte header.
765 // We won't restrict the values of any sub-fields except as described below.
767 // The lowest 4 bits of CMF is the compression method (CM).
768 // CM == 0x8 is the deflate compression method, which is currently the only
769 // supported compression method, so any valid prefix must have CM == 0x8.
771 // The lowest 5 bits of FLG is FCHECK.
772 // FCHECK must be such that the two header bytes are a multiple of 31 when
773 // interpreted as a big endian 16-bit number.
774 std::vector<std::string> result;
775 // 16 values for the first byte, 8 values for the second byte.
776 // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK.
778 // Select all values for the CMF byte that use the deflate algorithm 0x8.
779 for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) {
780 // Select all values for the FLG, but leave FCHECK as 0 since it's fixed.
781 for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) {
782 uint16_t prefix = first | second;
784 prefix += 31 - (prefix % 31);
785 result.push_back(prefixToStringLE(Endian::big(prefix)));
786 // zlib won't produce this, but it is a valid prefix.
787 if ((prefix & 0x1F) == 31) {
789 result.push_back(prefixToStringLE(Endian::big(prefix)));
795 // The gzip frame starts with 2 magic bytes.
796 return {prefixToStringLE(kGZIPMagicLE)};
800 bool ZlibCodec::canUncompress(const IOBuf* data, uint64_t) const {
801 if (type() == CodecType::ZLIB) {
804 if (!cursor.tryReadBE(value)) {
807 // zlib compressed if using deflate and is a multiple of 31.
808 return (value & 0x0F00) == 0x0800 && value % 31 == 0;
810 return dataStartsWithLE(data, kGZIPMagicLE);
814 std::unique_ptr<Codec> ZlibCodec::create(int level, CodecType type) {
815 return make_unique<ZlibCodec>(level, type);
818 ZlibCodec::ZlibCodec(int level, CodecType type) : Codec(type) {
819 DCHECK(type == CodecType::ZLIB || type == CodecType::GZIP);
821 case COMPRESSION_LEVEL_FASTEST:
824 case COMPRESSION_LEVEL_DEFAULT:
825 level = Z_DEFAULT_COMPRESSION;
827 case COMPRESSION_LEVEL_BEST:
831 if (level != Z_DEFAULT_COMPRESSION && (level < 0 || level > 9)) {
832 throw std::invalid_argument(to<std::string>(
833 "ZlibCodec: invalid level: ", level));
838 std::unique_ptr<IOBuf> ZlibCodec::addOutputBuffer(z_stream* stream,
840 CHECK_EQ(stream->avail_out, 0);
842 auto buf = IOBuf::create(length);
843 buf->append(buf->capacity());
845 stream->next_out = buf->writableData();
846 stream->avail_out = buf->length();
851 bool ZlibCodec::doInflate(z_stream* stream,
853 uint32_t bufferLength) {
854 if (stream->avail_out == 0) {
855 head->prependChain(addOutputBuffer(stream, bufferLength));
858 int rc = inflate(stream, Z_NO_FLUSH);
869 throw std::runtime_error(to<std::string>(
870 "ZlibCodec: inflate error: ", rc, ": ", stream->msg));
872 CHECK(false) << rc << ": " << stream->msg;
878 std::unique_ptr<IOBuf> ZlibCodec::doCompress(const IOBuf* data) {
880 stream.zalloc = nullptr;
881 stream.zfree = nullptr;
882 stream.opaque = nullptr;
884 // Using deflateInit2() to support gzip. "The windowBits parameter is the
885 // base two logarithm of the maximum window size (...) The default value is
886 // 15 (...) Add 16 to windowBits to write a simple gzip header and trailer
887 // around the compressed data instead of a zlib wrapper. The gzip header
888 // will have no file name, no extra data, no comment, no modification time
889 // (set to zero), no header crc, and the operating system will be set to 255
891 int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
892 // All other parameters (method, memLevel, strategy) get default values from
894 int rc = deflateInit2(&stream,
901 throw std::runtime_error(to<std::string>(
902 "ZlibCodec: deflateInit error: ", rc, ": ", stream.msg));
905 stream.next_in = stream.next_out = nullptr;
906 stream.avail_in = stream.avail_out = 0;
907 stream.total_in = stream.total_out = 0;
909 bool success = false;
912 rc = deflateEnd(&stream);
913 // If we're here because of an exception, it's okay if some data
915 CHECK(rc == Z_OK || (!success && rc == Z_DATA_ERROR))
916 << rc << ": " << stream.msg;
919 uint64_t uncompressedLength = data->computeChainDataLength();
920 uint64_t maxCompressedLength = deflateBound(&stream, uncompressedLength);
922 // Max 64MiB in one go
923 constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
924 constexpr uint32_t defaultBufferLength = uint32_t(4) << 20; // 4MiB
926 auto out = addOutputBuffer(
928 (maxCompressedLength <= maxSingleStepLength ?
929 maxCompressedLength :
930 defaultBufferLength));
932 for (auto& range : *data) {
933 uint64_t remaining = range.size();
934 uint64_t written = 0;
936 uint32_t step = (remaining > maxSingleStepLength ?
937 maxSingleStepLength : remaining);
938 stream.next_in = const_cast<uint8_t*>(range.data() + written);
939 stream.avail_in = step;
943 while (stream.avail_in != 0) {
944 if (stream.avail_out == 0) {
945 out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
948 rc = deflate(&stream, Z_NO_FLUSH);
950 CHECK_EQ(rc, Z_OK) << stream.msg;
956 if (stream.avail_out == 0) {
957 out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
960 rc = deflate(&stream, Z_FINISH);
961 } while (rc == Z_OK);
963 CHECK_EQ(rc, Z_STREAM_END) << stream.msg;
965 out->prev()->trimEnd(stream.avail_out);
967 success = true; // we survived
972 static uint64_t computeBufferLength(uint64_t const compressedLength) {
973 constexpr uint64_t kMaxBufferLength = uint64_t(4) << 20; // 4 MiB
974 constexpr uint64_t kBlockSize = uint64_t(32) << 10; // 32 KiB
975 const uint64_t goodBufferSize = 4 * std::max(kBlockSize, compressedLength);
976 return std::min(goodBufferSize, kMaxBufferLength);
979 std::unique_ptr<IOBuf> ZlibCodec::doUncompress(const IOBuf* data,
980 uint64_t uncompressedLength) {
982 stream.zalloc = nullptr;
983 stream.zfree = nullptr;
984 stream.opaque = nullptr;
986 // "The windowBits parameter is the base two logarithm of the maximum window
987 // size (...) The default value is 15 (...) add 16 to decode only the gzip
988 // format (the zlib format will return a Z_DATA_ERROR)."
989 int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
990 int rc = inflateInit2(&stream, windowBits);
992 throw std::runtime_error(to<std::string>(
993 "ZlibCodec: inflateInit error: ", rc, ": ", stream.msg));
996 stream.next_in = stream.next_out = nullptr;
997 stream.avail_in = stream.avail_out = 0;
998 stream.total_in = stream.total_out = 0;
1000 bool success = false;
1003 rc = inflateEnd(&stream);
1004 // If we're here because of an exception, it's okay if some data
1006 CHECK(rc == Z_OK || (!success && rc == Z_DATA_ERROR))
1007 << rc << ": " << stream.msg;
1010 // Max 64MiB in one go
1011 constexpr uint64_t maxSingleStepLength = uint64_t(64) << 20; // 64MiB
1012 const uint64_t defaultBufferLength =
1013 computeBufferLength(data->computeChainDataLength());
1015 auto out = addOutputBuffer(
1017 ((uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
1018 uncompressedLength <= maxSingleStepLength) ?
1019 uncompressedLength :
1020 defaultBufferLength));
1022 bool streamEnd = false;
1023 for (auto& range : *data) {
1024 if (range.empty()) {
1028 stream.next_in = const_cast<uint8_t*>(range.data());
1029 stream.avail_in = range.size();
1031 while (stream.avail_in != 0) {
1033 throw std::runtime_error(to<std::string>(
1034 "ZlibCodec: junk after end of data"));
1037 streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
1041 while (!streamEnd) {
1042 streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
1045 out->prev()->trimEnd(stream.avail_out);
1047 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
1048 uncompressedLength != stream.total_out) {
1049 throw std::runtime_error(to<std::string>(
1050 "ZlibCodec: invalid uncompressed length"));
1053 success = true; // we survived
1058 #endif // FOLLY_HAVE_LIBZ
1060 #if FOLLY_HAVE_LIBLZMA
1065 class LZMA2Codec final : public Codec {
1067 static std::unique_ptr<Codec> create(int level, CodecType type);
1068 explicit LZMA2Codec(int level, CodecType type);
1070 std::vector<std::string> validPrefixes() const override;
1071 bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
1075 bool doNeedsUncompressedLength() const override;
1076 uint64_t doMaxUncompressedLength() const override;
1078 bool encodeSize() const { return type() == CodecType::LZMA2_VARINT_SIZE; }
1080 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
1081 std::unique_ptr<IOBuf> doUncompress(
1083 uint64_t uncompressedLength) override;
1085 std::unique_ptr<IOBuf> addOutputBuffer(lzma_stream* stream, size_t length);
1086 bool doInflate(lzma_stream* stream, IOBuf* head, size_t bufferLength);
1091 static constexpr uint64_t kLZMA2MagicLE = 0x005A587A37FD;
1092 static constexpr unsigned kLZMA2MagicBytes = 6;
1094 std::vector<std::string> LZMA2Codec::validPrefixes() const {
1095 if (type() == CodecType::LZMA2_VARINT_SIZE) {
1098 return {prefixToStringLE(kLZMA2MagicLE, kLZMA2MagicBytes)};
1101 bool LZMA2Codec::canUncompress(const IOBuf* data, uint64_t) const {
1102 if (type() == CodecType::LZMA2_VARINT_SIZE) {
1105 // Returns false for all inputs less than 8 bytes.
1106 // This is okay, because no valid LZMA2 streams are less than 8 bytes.
1107 return dataStartsWithLE(data, kLZMA2MagicLE, kLZMA2MagicBytes);
1110 std::unique_ptr<Codec> LZMA2Codec::create(int level, CodecType type) {
1111 return make_unique<LZMA2Codec>(level, type);
1114 LZMA2Codec::LZMA2Codec(int level, CodecType type) : Codec(type) {
1115 DCHECK(type == CodecType::LZMA2 || type == CodecType::LZMA2_VARINT_SIZE);
1117 case COMPRESSION_LEVEL_FASTEST:
1120 case COMPRESSION_LEVEL_DEFAULT:
1121 level = LZMA_PRESET_DEFAULT;
1123 case COMPRESSION_LEVEL_BEST:
1127 if (level < 0 || level > 9) {
1128 throw std::invalid_argument(to<std::string>(
1129 "LZMA2Codec: invalid level: ", level));
1134 bool LZMA2Codec::doNeedsUncompressedLength() const {
1138 uint64_t LZMA2Codec::doMaxUncompressedLength() const {
1139 // From lzma/base.h: "Stream is roughly 8 EiB (2^63 bytes)"
1140 return uint64_t(1) << 63;
1143 std::unique_ptr<IOBuf> LZMA2Codec::addOutputBuffer(
1144 lzma_stream* stream,
1147 CHECK_EQ(stream->avail_out, 0);
1149 auto buf = IOBuf::create(length);
1150 buf->append(buf->capacity());
1152 stream->next_out = buf->writableData();
1153 stream->avail_out = buf->length();
1158 std::unique_ptr<IOBuf> LZMA2Codec::doCompress(const IOBuf* data) {
1160 lzma_stream stream = LZMA_STREAM_INIT;
1162 rc = lzma_easy_encoder(&stream, level_, LZMA_CHECK_NONE);
1163 if (rc != LZMA_OK) {
1164 throw std::runtime_error(folly::to<std::string>(
1165 "LZMA2Codec: lzma_easy_encoder error: ", rc));
1168 SCOPE_EXIT { lzma_end(&stream); };
1170 uint64_t uncompressedLength = data->computeChainDataLength();
1171 uint64_t maxCompressedLength = lzma_stream_buffer_bound(uncompressedLength);
1173 // Max 64MiB in one go
1174 constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
1175 constexpr uint32_t defaultBufferLength = uint32_t(4) << 20; // 4MiB
1177 auto out = addOutputBuffer(
1179 (maxCompressedLength <= maxSingleStepLength ?
1180 maxCompressedLength :
1181 defaultBufferLength));
1184 auto size = IOBuf::createCombined(kMaxVarintLength64);
1185 encodeVarintToIOBuf(uncompressedLength, size.get());
1186 size->appendChain(std::move(out));
1187 out = std::move(size);
1190 for (auto& range : *data) {
1191 if (range.empty()) {
1195 stream.next_in = const_cast<uint8_t*>(range.data());
1196 stream.avail_in = range.size();
1198 while (stream.avail_in != 0) {
1199 if (stream.avail_out == 0) {
1200 out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
1203 rc = lzma_code(&stream, LZMA_RUN);
1205 if (rc != LZMA_OK) {
1206 throw std::runtime_error(folly::to<std::string>(
1207 "LZMA2Codec: lzma_code error: ", rc));
1213 if (stream.avail_out == 0) {
1214 out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
1217 rc = lzma_code(&stream, LZMA_FINISH);
1218 } while (rc == LZMA_OK);
1220 if (rc != LZMA_STREAM_END) {
1221 throw std::runtime_error(folly::to<std::string>(
1222 "LZMA2Codec: lzma_code ended with error: ", rc));
1225 out->prev()->trimEnd(stream.avail_out);
1230 bool LZMA2Codec::doInflate(lzma_stream* stream,
1232 size_t bufferLength) {
1233 if (stream->avail_out == 0) {
1234 head->prependChain(addOutputBuffer(stream, bufferLength));
1237 lzma_ret rc = lzma_code(stream, LZMA_RUN);
1242 case LZMA_STREAM_END:
1245 throw std::runtime_error(to<std::string>(
1246 "LZMA2Codec: lzma_code error: ", rc));
1252 std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(const IOBuf* data,
1253 uint64_t uncompressedLength) {
1255 lzma_stream stream = LZMA_STREAM_INIT;
1257 rc = lzma_auto_decoder(&stream, std::numeric_limits<uint64_t>::max(), 0);
1258 if (rc != LZMA_OK) {
1259 throw std::runtime_error(folly::to<std::string>(
1260 "LZMA2Codec: lzma_auto_decoder error: ", rc));
1263 SCOPE_EXIT { lzma_end(&stream); };
1265 // Max 64MiB in one go
1266 constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
1267 constexpr uint32_t defaultBufferLength = uint32_t(256) << 10; // 256 KiB
1269 folly::io::Cursor cursor(data);
1271 const uint64_t actualUncompressedLength = decodeVarintFromCursor(cursor);
1272 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
1273 uncompressedLength != actualUncompressedLength) {
1274 throw std::runtime_error("LZMA2Codec: invalid uncompressed length");
1276 uncompressedLength = actualUncompressedLength;
1279 auto out = addOutputBuffer(
1281 ((uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
1282 uncompressedLength <= maxSingleStepLength)
1283 ? uncompressedLength
1284 : defaultBufferLength));
1286 bool streamEnd = false;
1287 auto buf = cursor.peekBytes();
1288 while (!buf.empty()) {
1289 stream.next_in = const_cast<uint8_t*>(buf.data());
1290 stream.avail_in = buf.size();
1292 while (stream.avail_in != 0) {
1294 throw std::runtime_error(to<std::string>(
1295 "LZMA2Codec: junk after end of data"));
1298 streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
1301 cursor.skip(buf.size());
1302 buf = cursor.peekBytes();
1305 while (!streamEnd) {
1306 streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
1309 out->prev()->trimEnd(stream.avail_out);
1311 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
1312 uncompressedLength != stream.total_out) {
1313 throw std::runtime_error(
1314 to<std::string>("LZMA2Codec: invalid uncompressed length"));
1320 #endif // FOLLY_HAVE_LIBLZMA
1322 #ifdef FOLLY_HAVE_LIBZSTD
1327 class ZSTDCodec final : public Codec {
1329 static std::unique_ptr<Codec> create(int level, CodecType);
1330 explicit ZSTDCodec(int level, CodecType type);
1332 std::vector<std::string> validPrefixes() const override;
1333 bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
1337 bool doNeedsUncompressedLength() const override;
1338 std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
1339 std::unique_ptr<IOBuf> doUncompress(
1341 uint64_t uncompressedLength) override;
1346 static constexpr uint32_t kZSTDMagicLE = 0xFD2FB528;
1348 std::vector<std::string> ZSTDCodec::validPrefixes() const {
1349 return {prefixToStringLE(kZSTDMagicLE)};
1352 bool ZSTDCodec::canUncompress(const IOBuf* data, uint64_t) const {
1353 return dataStartsWithLE(data, kZSTDMagicLE);
1356 std::unique_ptr<Codec> ZSTDCodec::create(int level, CodecType type) {
1357 return make_unique<ZSTDCodec>(level, type);
1360 ZSTDCodec::ZSTDCodec(int level, CodecType type) : Codec(type) {
1361 DCHECK(type == CodecType::ZSTD);
1363 case COMPRESSION_LEVEL_FASTEST:
1366 case COMPRESSION_LEVEL_DEFAULT:
1369 case COMPRESSION_LEVEL_BEST:
1373 if (level < 1 || level > ZSTD_maxCLevel()) {
1374 throw std::invalid_argument(
1375 to<std::string>("ZSTD: invalid level: ", level));
1380 bool ZSTDCodec::doNeedsUncompressedLength() const {
1384 void zstdThrowIfError(size_t rc) {
1385 if (!ZSTD_isError(rc)) {
1388 throw std::runtime_error(
1389 to<std::string>("ZSTD returned an error: ", ZSTD_getErrorName(rc)));
1392 std::unique_ptr<IOBuf> ZSTDCodec::doCompress(const IOBuf* data) {
1393 // Support earlier versions of the codec (working with a single IOBuf,
1394 // and using ZSTD_decompress which requires ZSTD frame to contain size,
1395 // which isn't populated by streaming API).
1396 if (!data->isChained()) {
1397 auto out = IOBuf::createCombined(ZSTD_compressBound(data->length()));
1398 const auto rc = ZSTD_compress(
1399 out->writableData(),
1404 zstdThrowIfError(rc);
1409 auto zcs = ZSTD_createCStream();
1411 ZSTD_freeCStream(zcs);
1414 auto rc = ZSTD_initCStream(zcs, level_);
1415 zstdThrowIfError(rc);
1417 Cursor cursor(data);
1418 auto result = IOBuf::createCombined(ZSTD_compressBound(cursor.totalLength()));
1421 out.dst = result->writableTail();
1422 out.size = result->capacity();
1425 for (auto buffer = cursor.peekBytes(); !buffer.empty();) {
1427 in.src = buffer.data();
1428 in.size = buffer.size();
1429 for (in.pos = 0; in.pos != in.size;) {
1430 rc = ZSTD_compressStream(zcs, &out, &in);
1431 zstdThrowIfError(rc);
1433 cursor.skip(in.size);
1434 buffer = cursor.peekBytes();
1437 rc = ZSTD_endStream(zcs, &out);
1438 zstdThrowIfError(rc);
1441 result->append(out.pos);
1445 static std::unique_ptr<IOBuf> zstdUncompressBuffer(
1447 uint64_t uncompressedLength) {
1448 // Check preconditions
1449 DCHECK(!data->isChained());
1450 DCHECK(uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH);
1452 auto uncompressed = IOBuf::create(uncompressedLength);
1453 const auto decompressedSize = ZSTD_decompress(
1454 uncompressed->writableTail(),
1455 uncompressed->tailroom(),
1458 zstdThrowIfError(decompressedSize);
1459 if (decompressedSize != uncompressedLength) {
1460 throw std::runtime_error("ZSTD: invalid uncompressed length");
1462 uncompressed->append(decompressedSize);
1463 return uncompressed;
1466 static std::unique_ptr<IOBuf> zstdUncompressStream(
1468 uint64_t uncompressedLength) {
1469 auto zds = ZSTD_createDStream();
1471 ZSTD_freeDStream(zds);
1474 auto rc = ZSTD_initDStream(zds);
1475 zstdThrowIfError(rc);
1477 ZSTD_outBuffer out{};
1480 auto outputSize = ZSTD_DStreamOutSize();
1481 if (uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH) {
1482 outputSize = uncompressedLength;
1485 IOBufQueue queue(IOBufQueue::cacheChainLength());
1487 Cursor cursor(data);
1489 if (in.pos == in.size) {
1490 auto buffer = cursor.peekBytes();
1491 in.src = buffer.data();
1492 in.size = buffer.size();
1494 cursor.skip(in.size);
1495 if (rc > 1 && in.size == 0) {
1496 throw std::runtime_error(to<std::string>("ZSTD: incomplete input"));
1499 if (out.pos == out.size) {
1501 queue.postallocate(out.pos);
1503 auto buffer = queue.preallocate(outputSize, outputSize);
1504 out.dst = buffer.first;
1505 out.size = buffer.second;
1507 outputSize = ZSTD_DStreamOutSize();
1509 rc = ZSTD_decompressStream(zds, &out, &in);
1510 zstdThrowIfError(rc);
1516 queue.postallocate(out.pos);
1518 if (in.pos != in.size || !cursor.isAtEnd()) {
1519 throw std::runtime_error("ZSTD: junk after end of data");
1521 if (uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH &&
1522 queue.chainLength() != uncompressedLength) {
1523 throw std::runtime_error("ZSTD: invalid uncompressed length");
1526 return queue.move();
1529 std::unique_ptr<IOBuf> ZSTDCodec::doUncompress(
1531 uint64_t uncompressedLength) {
1533 // Read decompressed size from frame if available in first IOBuf.
1534 const auto decompressedSize =
1535 ZSTD_getDecompressedSize(data->data(), data->length());
1536 if (decompressedSize != 0) {
1537 if (uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH &&
1538 uncompressedLength != decompressedSize) {
1539 throw std::runtime_error("ZSTD: invalid uncompressed length");
1541 uncompressedLength = decompressedSize;
1544 // Faster to decompress using ZSTD_decompress() if we can.
1545 if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH && !data->isChained()) {
1546 return zstdUncompressBuffer(data, uncompressedLength);
1548 // Fall back to slower streaming decompression.
1549 return zstdUncompressStream(data, uncompressedLength);
1552 #endif // FOLLY_HAVE_LIBZSTD
1555 * Automatic decompression
1557 class AutomaticCodec final : public Codec {
1559 static std::unique_ptr<Codec> create(
1560 std::vector<std::unique_ptr<Codec>> customCodecs);
1561 explicit AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs);
1563 std::vector<std::string> validPrefixes() const override;
1564 bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
1568 bool doNeedsUncompressedLength() const override;
1569 uint64_t doMaxUncompressedLength() const override;
1571 std::unique_ptr<IOBuf> doCompress(const IOBuf*) override {
1572 throw std::runtime_error("AutomaticCodec error: compress() not supported.");
1574 std::unique_ptr<IOBuf> doUncompress(
1576 uint64_t uncompressedLength) override;
1578 void addCodecIfSupported(CodecType type);
1580 // Throws iff the codecs aren't compatible (very slow)
1581 void checkCompatibleCodecs() const;
1583 std::vector<std::unique_ptr<Codec>> codecs_;
1584 bool needsUncompressedLength_;
1585 uint64_t maxUncompressedLength_;
1588 std::vector<std::string> AutomaticCodec::validPrefixes() const {
1589 std::unordered_set<std::string> prefixes;
1590 for (const auto& codec : codecs_) {
1591 const auto codecPrefixes = codec->validPrefixes();
1592 prefixes.insert(codecPrefixes.begin(), codecPrefixes.end());
1594 return std::vector<std::string>{prefixes.begin(), prefixes.end()};
1597 bool AutomaticCodec::canUncompress(
1599 uint64_t uncompressedLength) const {
1603 [data, uncompressedLength](std::unique_ptr<Codec> const& codec) {
1604 return codec->canUncompress(data, uncompressedLength);
1608 void AutomaticCodec::addCodecIfSupported(CodecType type) {
1609 const bool present = std::any_of(
1612 [&type](std::unique_ptr<Codec> const& codec) {
1613 return codec->type() == type;
1615 if (hasCodec(type) && !present) {
1616 codecs_.push_back(getCodec(type));
1620 /* static */ std::unique_ptr<Codec> AutomaticCodec::create(
1621 std::vector<std::unique_ptr<Codec>> customCodecs) {
1622 return make_unique<AutomaticCodec>(std::move(customCodecs));
1625 AutomaticCodec::AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs)
1626 : Codec(CodecType::USER_DEFINED), codecs_(std::move(customCodecs)) {
1627 // Fastest -> slowest
1628 addCodecIfSupported(CodecType::LZ4_FRAME);
1629 addCodecIfSupported(CodecType::ZSTD);
1630 addCodecIfSupported(CodecType::ZLIB);
1631 addCodecIfSupported(CodecType::GZIP);
1632 addCodecIfSupported(CodecType::LZMA2);
1634 checkCompatibleCodecs();
1636 // Check that none of the codes are are null
1637 DCHECK(std::none_of(
1638 codecs_.begin(), codecs_.end(), [](std::unique_ptr<Codec> const& codec) {
1639 return codec == nullptr;
1642 needsUncompressedLength_ = std::any_of(
1643 codecs_.begin(), codecs_.end(), [](std::unique_ptr<Codec> const& codec) {
1644 return codec->needsUncompressedLength();
1647 const auto it = std::max_element(
1650 [](std::unique_ptr<Codec> const& lhs, std::unique_ptr<Codec> const& rhs) {
1651 return lhs->maxUncompressedLength() < rhs->maxUncompressedLength();
1653 DCHECK(it != codecs_.end());
1654 maxUncompressedLength_ = (*it)->maxUncompressedLength();
1657 void AutomaticCodec::checkCompatibleCodecs() const {
1658 // Keep track of all the possible headers.
1659 std::unordered_set<std::string> headers;
1660 // The empty header is not allowed.
1663 // Construct a set of headers and check that none of the headers occur twice.
1664 // Eliminate edge cases.
1665 for (auto&& codec : codecs_) {
1666 const auto codecHeaders = codec->validPrefixes();
1667 // Codecs without any valid headers are not allowed.
1668 if (codecHeaders.empty()) {
1669 throw std::invalid_argument{
1670 "AutomaticCodec: validPrefixes() must not be empty."};
1672 // Insert all the headers for the current codec.
1673 const size_t beforeSize = headers.size();
1674 headers.insert(codecHeaders.begin(), codecHeaders.end());
1675 // Codecs are not compatible if any header occurred twice.
1676 if (beforeSize + codecHeaders.size() != headers.size()) {
1677 throw std::invalid_argument{
1678 "AutomaticCodec: Two valid prefixes collide."};
1682 // Check if any strict non-empty prefix of any header is a header.
1683 for (const auto& header : headers) {
1684 for (size_t i = 1; i < header.size(); ++i) {
1685 if (headers.count(header.substr(0, i))) {
1686 throw std::invalid_argument{
1687 "AutomaticCodec: One valid prefix is a prefix of another valid "
1694 bool AutomaticCodec::doNeedsUncompressedLength() const {
1695 return needsUncompressedLength_;
1698 uint64_t AutomaticCodec::doMaxUncompressedLength() const {
1699 return maxUncompressedLength_;
1702 std::unique_ptr<IOBuf> AutomaticCodec::doUncompress(
1704 uint64_t uncompressedLength) {
1705 for (auto&& codec : codecs_) {
1706 if (codec->canUncompress(data, uncompressedLength)) {
1707 return codec->uncompress(data, uncompressedLength);
1710 throw std::runtime_error("AutomaticCodec error: Unknown compressed data");
1715 typedef std::unique_ptr<Codec> (*CodecFactory)(int, CodecType);
1716 static constexpr CodecFactory
1717 codecFactories[static_cast<size_t>(CodecType::NUM_CODEC_TYPES)] = {
1718 nullptr, // USER_DEFINED
1719 NoCompressionCodec::create,
1721 #if FOLLY_HAVE_LIBLZ4
1727 #if FOLLY_HAVE_LIBSNAPPY
1728 SnappyCodec::create,
1739 #if FOLLY_HAVE_LIBLZ4
1745 #if FOLLY_HAVE_LIBLZMA
1753 #if FOLLY_HAVE_LIBZSTD
1765 #if (FOLLY_HAVE_LIBLZ4 && LZ4_VERSION_NUMBER >= 10301)
1766 LZ4FrameCodec::create,
1772 bool hasCodec(CodecType type) {
1773 size_t idx = static_cast<size_t>(type);
1774 if (idx >= static_cast<size_t>(CodecType::NUM_CODEC_TYPES)) {
1775 throw std::invalid_argument(
1776 to<std::string>("Compression type ", idx, " invalid"));
1778 return codecFactories[idx] != nullptr;
1781 std::unique_ptr<Codec> getCodec(CodecType type, int level) {
1782 size_t idx = static_cast<size_t>(type);
1783 if (idx >= static_cast<size_t>(CodecType::NUM_CODEC_TYPES)) {
1784 throw std::invalid_argument(
1785 to<std::string>("Compression type ", idx, " invalid"));
1787 auto factory = codecFactories[idx];
1789 throw std::invalid_argument(to<std::string>(
1790 "Compression type ", idx, " not supported"));
1792 auto codec = (*factory)(level, type);
1793 DCHECK_EQ(static_cast<size_t>(codec->type()), idx);
1797 std::unique_ptr<Codec> getAutoUncompressionCodec(
1798 std::vector<std::unique_ptr<Codec>> customCodecs) {
1799 return AutomaticCodec::create(std::move(customCodecs));