#endif
#if FOLLY_HAVE_LIBZSTD
+#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
#endif
#include <algorithm>
#include <unordered_set>
-namespace folly { namespace io {
+namespace folly {
+namespace io {
Codec::Codec(CodecType type) : type_(type) { }
// Ensure consistent behavior in the nullptr case
std::unique_ptr<IOBuf> Codec::compress(const IOBuf* data) {
+ if (data == nullptr) {
+ throw std::invalid_argument("Codec: data must not be nullptr");
+ }
uint64_t len = data->computeChainDataLength();
if (len == 0) {
return IOBuf::create(0);
std::unique_ptr<IOBuf> Codec::uncompress(
const IOBuf* data,
Optional<uint64_t> uncompressedLength) {
+ if (data == nullptr) {
+ throw std::invalid_argument("Codec: data must not be nullptr");
+ }
if (!uncompressedLength) {
if (needsUncompressedLength()) {
throw std::invalid_argument("Codec: uncompressed length required");
if (output.empty()) {
buffer->prependChain(addOutputBuffer(output, kDefaultBufferLength));
}
+ size_t const inputSize = input.size();
+ size_t const outputSize = output.size();
bool const done = compressStream(input, output, flushOp);
if (done) {
DCHECK(input.empty());
DCHECK_EQ(current->next(), data);
break;
}
+ if (inputSize == input.size() && outputSize == output.size()) {
+ throw std::runtime_error("Codec: No forward progress made");
+ }
}
buffer->prev()->trimEnd(output.size());
return buffer;
if (output.empty()) {
buffer->prependChain(addOutputBuffer(output, defaultBufferLength));
}
+ size_t const inputSize = input.size();
+ size_t const outputSize = output.size();
bool const done = uncompressStream(input, output, flushOp);
if (done) {
break;
}
+ if (inputSize == input.size() && outputSize == output.size()) {
+ throw std::runtime_error("Codec: Truncated data");
+ }
}
if (!input.empty()) {
throw std::runtime_error("Codec: Junk after end of data");
return val;
}
-} // namespace
+} // namespace
#endif // FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA
/**
* Zlib codec
*/
-class ZlibCodec final : public Codec {
+class ZlibStreamCodec final : public StreamCodec {
public:
- static std::unique_ptr<Codec> create(int level, CodecType type);
- explicit ZlibCodec(int level, CodecType type);
+ static std::unique_ptr<Codec> createCodec(int level, CodecType type);
+ static std::unique_ptr<StreamCodec> createStream(int level, CodecType type);
+ explicit ZlibStreamCodec(int level, CodecType type);
+ ~ZlibStreamCodec() override;
std::vector<std::string> validPrefixes() const override;
bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
private:
uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
- std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
- std::unique_ptr<IOBuf> doUncompress(
- const IOBuf* data,
- Optional<uint64_t> uncompressedLength) override;
-
- std::unique_ptr<IOBuf> addOutputBuffer(z_stream* stream, uint32_t length);
- bool doInflate(z_stream* stream, IOBuf* head, uint32_t bufferLength);
+ void doResetStream() override;
+ bool doCompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flush) override;
+ bool doUncompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flush) override;
+
+ void resetDeflateStream();
+ void resetInflateStream();
+
+ Optional<z_stream> deflateStream_{};
+ Optional<z_stream> inflateStream_{};
int level_;
+ bool needReset_{true};
};
static constexpr uint16_t kGZIPMagicLE = 0x8B1F;
-std::vector<std::string> ZlibCodec::validPrefixes() const {
+std::vector<std::string> ZlibStreamCodec::validPrefixes() const {
if (type() == CodecType::ZLIB) {
// Zlib streams start with a 2 byte header.
//
}
}
-bool ZlibCodec::canUncompress(const IOBuf* data, Optional<uint64_t>) const {
+bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
+ const {
if (type() == CodecType::ZLIB) {
uint16_t value;
Cursor cursor{data};
}
}
-uint64_t ZlibCodec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+uint64_t ZlibStreamCodec::doMaxCompressedLength(
+ uint64_t uncompressedLength) const {
return deflateBound(nullptr, uncompressedLength);
}
-std::unique_ptr<Codec> ZlibCodec::create(int level, CodecType type) {
- return std::make_unique<ZlibCodec>(level, type);
+std::unique_ptr<Codec> ZlibStreamCodec::createCodec(int level, CodecType type) {
+ return std::make_unique<ZlibStreamCodec>(level, type);
+}
+
+std::unique_ptr<StreamCodec> ZlibStreamCodec::createStream(
+ int level,
+ CodecType type) {
+ return std::make_unique<ZlibStreamCodec>(level, type);
}
-ZlibCodec::ZlibCodec(int level, CodecType type) : Codec(type) {
+ZlibStreamCodec::ZlibStreamCodec(int level, CodecType type)
+ : StreamCodec(type) {
DCHECK(type == CodecType::ZLIB || type == CodecType::GZIP);
switch (level) {
- case COMPRESSION_LEVEL_FASTEST:
- level = 1;
- break;
- case COMPRESSION_LEVEL_DEFAULT:
- level = Z_DEFAULT_COMPRESSION;
- break;
- case COMPRESSION_LEVEL_BEST:
- level = 9;
- break;
+ case COMPRESSION_LEVEL_FASTEST:
+ level = 1;
+ break;
+ case COMPRESSION_LEVEL_DEFAULT:
+ level = Z_DEFAULT_COMPRESSION;
+ break;
+ case COMPRESSION_LEVEL_BEST:
+ level = 9;
+ break;
}
if (level != Z_DEFAULT_COMPRESSION && (level < 0 || level > 9)) {
- throw std::invalid_argument(to<std::string>(
- "ZlibCodec: invalid level: ", level));
+ throw std::invalid_argument(
+ to<std::string>("ZlibStreamCodec: invalid level: ", level));
}
level_ = level;
}
-std::unique_ptr<IOBuf> ZlibCodec::addOutputBuffer(z_stream* stream,
- uint32_t length) {
- CHECK_EQ(stream->avail_out, 0);
-
- auto buf = IOBuf::create(length);
- buf->append(buf->capacity());
-
- stream->next_out = buf->writableData();
- stream->avail_out = buf->length();
-
- return buf;
-}
-
-bool ZlibCodec::doInflate(z_stream* stream,
- IOBuf* head,
- uint32_t bufferLength) {
- if (stream->avail_out == 0) {
- head->prependChain(addOutputBuffer(stream, bufferLength));
+ZlibStreamCodec::~ZlibStreamCodec() {
+ if (deflateStream_) {
+ deflateEnd(deflateStream_.get_pointer());
+ deflateStream_.clear();
}
-
- int rc = inflate(stream, Z_NO_FLUSH);
-
- switch (rc) {
- case Z_OK:
- break;
- case Z_STREAM_END:
- return true;
- case Z_BUF_ERROR:
- case Z_NEED_DICT:
- case Z_DATA_ERROR:
- case Z_MEM_ERROR:
- throw std::runtime_error(to<std::string>(
- "ZlibCodec: inflate error: ", rc, ": ", stream->msg));
- default:
- CHECK(false) << rc << ": " << stream->msg;
+ if (inflateStream_) {
+ inflateEnd(inflateStream_.get_pointer());
+ inflateStream_.clear();
}
-
- return false;
}
-std::unique_ptr<IOBuf> ZlibCodec::doCompress(const IOBuf* data) {
- z_stream stream;
- stream.zalloc = nullptr;
- stream.zfree = nullptr;
- stream.opaque = nullptr;
+void ZlibStreamCodec::doResetStream() {
+ needReset_ = true;
+}
+void ZlibStreamCodec::resetDeflateStream() {
+ if (deflateStream_) {
+ int const rc = deflateReset(deflateStream_.get_pointer());
+ if (rc != Z_OK) {
+ deflateStream_.clear();
+ throw std::runtime_error(
+ to<std::string>("ZlibStreamCodec: deflateReset error: ", rc));
+ }
+ return;
+ }
+ deflateStream_ = z_stream{};
// Using deflateInit2() to support gzip. "The windowBits parameter is the
// base two logarithm of the maximum window size (...) The default value is
// 15 (...) Add 16 to windowBits to write a simple gzip header and trailer
// will have no file name, no extra data, no comment, no modification time
// (set to zero), no header crc, and the operating system will be set to 255
// (unknown)."
- int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
+ int const windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
// All other parameters (method, memLevel, strategy) get default values from
// the zlib manual.
- int rc = deflateInit2(&stream,
- level_,
- Z_DEFLATED,
- windowBits,
- /* memLevel */ 8,
- Z_DEFAULT_STRATEGY);
+ int const rc = deflateInit2(
+ deflateStream_.get_pointer(),
+ level_,
+ Z_DEFLATED,
+ windowBits,
+ /* memLevel */ 8,
+ Z_DEFAULT_STRATEGY);
if (rc != Z_OK) {
- throw std::runtime_error(to<std::string>(
- "ZlibCodec: deflateInit error: ", rc, ": ", stream.msg));
+ deflateStream_.clear();
+ throw std::runtime_error(
+ to<std::string>("ZlibStreamCodec: deflateInit error: ", rc));
}
+}
- stream.next_in = stream.next_out = nullptr;
- stream.avail_in = stream.avail_out = 0;
- stream.total_in = stream.total_out = 0;
-
- bool success = false;
-
- SCOPE_EXIT {
- rc = deflateEnd(&stream);
- // If we're here because of an exception, it's okay if some data
- // got dropped.
- CHECK(rc == Z_OK || (!success && rc == Z_DATA_ERROR))
- << rc << ": " << stream.msg;
- };
-
- uint64_t uncompressedLength = data->computeChainDataLength();
- uint64_t maxCompressedLength = deflateBound(&stream, uncompressedLength);
-
- // Max 64MiB in one go
- constexpr uint32_t maxSingleStepLength = uint32_t(64) << 20; // 64MiB
- constexpr uint32_t defaultBufferLength = uint32_t(4) << 20; // 4MiB
-
- auto out = addOutputBuffer(
- &stream,
- (maxCompressedLength <= maxSingleStepLength ?
- maxCompressedLength :
- defaultBufferLength));
-
- for (auto& range : *data) {
- uint64_t remaining = range.size();
- uint64_t written = 0;
- while (remaining) {
- uint32_t step = (remaining > maxSingleStepLength ?
- maxSingleStepLength : remaining);
- stream.next_in = const_cast<uint8_t*>(range.data() + written);
- stream.avail_in = step;
- remaining -= step;
- written += step;
-
- while (stream.avail_in != 0) {
- if (stream.avail_out == 0) {
- out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
- }
-
- rc = deflate(&stream, Z_NO_FLUSH);
-
- CHECK_EQ(rc, Z_OK) << stream.msg;
- }
+void ZlibStreamCodec::resetInflateStream() {
+ if (inflateStream_) {
+ int const rc = inflateReset(inflateStream_.get_pointer());
+ if (rc != Z_OK) {
+ inflateStream_.clear();
+ throw std::runtime_error(
+ to<std::string>("ZlibStreamCodec: inflateReset error: ", rc));
}
+ return;
}
-
- do {
- if (stream.avail_out == 0) {
- out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
- }
-
- rc = deflate(&stream, Z_FINISH);
- } while (rc == Z_OK);
-
- CHECK_EQ(rc, Z_STREAM_END) << stream.msg;
-
- out->prev()->trimEnd(stream.avail_out);
-
- success = true; // we survived
-
- return out;
-}
-
-std::unique_ptr<IOBuf> ZlibCodec::doUncompress(
- const IOBuf* data,
- Optional<uint64_t> uncompressedLength) {
- z_stream stream;
- stream.zalloc = nullptr;
- stream.zfree = nullptr;
- stream.opaque = nullptr;
-
+ inflateStream_ = z_stream{};
// "The windowBits parameter is the base two logarithm of the maximum window
// size (...) The default value is 15 (...) add 16 to decode only the gzip
// format (the zlib format will return a Z_DATA_ERROR)."
- int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
- int rc = inflateInit2(&stream, windowBits);
+ int const windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
+ int const rc = inflateInit2(inflateStream_.get_pointer(), windowBits);
if (rc != Z_OK) {
- throw std::runtime_error(to<std::string>(
- "ZlibCodec: inflateInit error: ", rc, ": ", stream.msg));
+ inflateStream_.clear();
+ throw std::runtime_error(
+ to<std::string>("ZlibStreamCodec: inflateInit error: ", rc));
}
+}
- stream.next_in = stream.next_out = nullptr;
- stream.avail_in = stream.avail_out = 0;
- stream.total_in = stream.total_out = 0;
+static int zlibTranslateFlush(StreamCodec::FlushOp flush) {
+ switch (flush) {
+ case StreamCodec::FlushOp::NONE:
+ return Z_NO_FLUSH;
+ case StreamCodec::FlushOp::FLUSH:
+ return Z_SYNC_FLUSH;
+ case StreamCodec::FlushOp::END:
+ return Z_FINISH;
+ default:
+ throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
+ }
+}
- bool success = false;
+static int zlibThrowOnError(int rc) {
+ switch (rc) {
+ case Z_OK:
+ case Z_BUF_ERROR:
+ case Z_STREAM_END:
+ return rc;
+ default:
+ throw std::runtime_error(to<std::string>("ZlibStreamCodec: error: ", rc));
+ }
+}
+bool ZlibStreamCodec::doCompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flush) {
+ if (needReset_) {
+ resetDeflateStream();
+ needReset_ = false;
+ }
+ DCHECK(deflateStream_.hasValue());
+ // zlib will return Z_STREAM_ERROR if output.data() is null.
+ if (output.data() == nullptr) {
+ return false;
+ }
+ deflateStream_->next_in = const_cast<uint8_t*>(input.data());
+ deflateStream_->avail_in = input.size();
+ deflateStream_->next_out = output.data();
+ deflateStream_->avail_out = output.size();
SCOPE_EXIT {
- rc = inflateEnd(&stream);
- // If we're here because of an exception, it's okay if some data
- // got dropped.
- CHECK(rc == Z_OK || (!success && rc == Z_DATA_ERROR))
- << rc << ": " << stream.msg;
+ input.uncheckedAdvance(input.size() - deflateStream_->avail_in);
+ output.uncheckedAdvance(output.size() - deflateStream_->avail_out);
};
-
- // Max 64MiB in one go
- constexpr uint64_t maxSingleStepLength = uint64_t(64) << 20; // 64MiB
- constexpr uint64_t kBlockSize = uint64_t(32) << 10; // 32 KiB
- const uint64_t defaultBufferLength =
- computeBufferLength(data->computeChainDataLength(), kBlockSize);
-
- auto out = addOutputBuffer(
- &stream,
- ((uncompressedLength && *uncompressedLength <= maxSingleStepLength)
- ? *uncompressedLength
- : defaultBufferLength));
-
- bool streamEnd = false;
- for (auto& range : *data) {
- if (range.empty()) {
- continue;
- }
-
- stream.next_in = const_cast<uint8_t*>(range.data());
- stream.avail_in = range.size();
-
- while (stream.avail_in != 0) {
- if (streamEnd) {
- throw std::runtime_error(to<std::string>(
- "ZlibCodec: junk after end of data"));
- }
-
- streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
- }
- }
-
- while (!streamEnd) {
- streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
+ int const rc = zlibThrowOnError(
+ deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush)));
+ switch (flush) {
+ case StreamCodec::FlushOp::NONE:
+ return false;
+ case StreamCodec::FlushOp::FLUSH:
+ return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0;
+ case StreamCodec::FlushOp::END:
+ return rc == Z_STREAM_END;
+ default:
+ throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
}
+}
- out->prev()->trimEnd(stream.avail_out);
-
- if (uncompressedLength && *uncompressedLength != stream.total_out) {
- throw std::runtime_error(
- to<std::string>("ZlibCodec: invalid uncompressed length"));
+bool ZlibStreamCodec::doUncompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flush) {
+ if (needReset_) {
+ resetInflateStream();
+ needReset_ = false;
+ }
+ DCHECK(inflateStream_.hasValue());
+ // zlib will return Z_STREAM_ERROR if output.data() is null.
+ if (output.data() == nullptr) {
+ return false;
}
-
- success = true; // we survived
-
- return out;
+ inflateStream_->next_in = const_cast<uint8_t*>(input.data());
+ inflateStream_->avail_in = input.size();
+ inflateStream_->next_out = output.data();
+ inflateStream_->avail_out = output.size();
+ SCOPE_EXIT {
+ input.advance(input.size() - inflateStream_->avail_in);
+ output.advance(output.size() - inflateStream_->avail_out);
+ };
+ int const rc = zlibThrowOnError(
+ inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush)));
+ return rc == Z_STREAM_END;
}
-#endif // FOLLY_HAVE_LIBZ
+#endif // FOLLY_HAVE_LIBZ
#if FOLLY_HAVE_LIBLZMA
#ifdef FOLLY_HAVE_LIBZSTD
+namespace {
+void zstdFreeCStream(ZSTD_CStream* zcs) {
+ ZSTD_freeCStream(zcs);
+}
+
+void zstdFreeDStream(ZSTD_DStream* zds) {
+ ZSTD_freeDStream(zds);
+}
+}
+
/**
* ZSTD compression
*/
-class ZSTDCodec final : public Codec {
+class ZSTDStreamCodec final : public StreamCodec {
public:
- static std::unique_ptr<Codec> create(int level, CodecType);
- explicit ZSTDCodec(int level, CodecType type);
+ static std::unique_ptr<Codec> createCodec(int level, CodecType);
+ static std::unique_ptr<StreamCodec> createStream(int level, CodecType);
+ explicit ZSTDStreamCodec(int level, CodecType type);
std::vector<std::string> validPrefixes() const override;
bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
private:
bool doNeedsUncompressedLength() const override;
uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
- std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
- std::unique_ptr<IOBuf> doUncompress(
- const IOBuf* data,
- Optional<uint64_t> uncompressedLength) override;
+ Optional<uint64_t> doGetUncompressedLength(
+ IOBuf const* data,
+ Optional<uint64_t> uncompressedLength) const override;
+
+ void doResetStream() override;
+ bool doCompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) override;
+ bool doUncompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) override;
+
+ void resetCStream();
+ void resetDStream();
+
+ bool tryBlockCompress(ByteRange& input, MutableByteRange& output) const;
+ bool tryBlockUncompress(ByteRange& input, MutableByteRange& output) const;
int level_;
+ bool needReset_{true};
+ std::unique_ptr<
+ ZSTD_CStream,
+ folly::static_function_deleter<ZSTD_CStream, &zstdFreeCStream>>
+ cstream_{nullptr};
+ std::unique_ptr<
+ ZSTD_DStream,
+ folly::static_function_deleter<ZSTD_DStream, &zstdFreeDStream>>
+ dstream_{nullptr};
};
static constexpr uint32_t kZSTDMagicLE = 0xFD2FB528;
-std::vector<std::string> ZSTDCodec::validPrefixes() const {
+std::vector<std::string> ZSTDStreamCodec::validPrefixes() const {
return {prefixToStringLE(kZSTDMagicLE)};
}
-bool ZSTDCodec::canUncompress(const IOBuf* data, Optional<uint64_t>) const {
+bool ZSTDStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
+ const {
return dataStartsWithLE(data, kZSTDMagicLE);
}
-std::unique_ptr<Codec> ZSTDCodec::create(int level, CodecType type) {
- return std::make_unique<ZSTDCodec>(level, type);
+std::unique_ptr<Codec> ZSTDStreamCodec::createCodec(int level, CodecType type) {
+ return make_unique<ZSTDStreamCodec>(level, type);
+}
+
+std::unique_ptr<StreamCodec> ZSTDStreamCodec::createStream(
+ int level,
+ CodecType type) {
+ return make_unique<ZSTDStreamCodec>(level, type);
}
-ZSTDCodec::ZSTDCodec(int level, CodecType type) : Codec(type) {
+ZSTDStreamCodec::ZSTDStreamCodec(int level, CodecType type)
+ : StreamCodec(type) {
DCHECK(type == CodecType::ZSTD);
switch (level) {
case COMPRESSION_LEVEL_FASTEST:
level_ = level;
}
-bool ZSTDCodec::doNeedsUncompressedLength() const {
+bool ZSTDStreamCodec::doNeedsUncompressedLength() const {
return false;
}
-uint64_t ZSTDCodec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+uint64_t ZSTDStreamCodec::doMaxCompressedLength(
+ uint64_t uncompressedLength) const {
return ZSTD_compressBound(uncompressedLength);
}
to<std::string>("ZSTD returned an error: ", ZSTD_getErrorName(rc)));
}
-std::unique_ptr<IOBuf> ZSTDCodec::doCompress(const IOBuf* data) {
- // Support earlier versions of the codec (working with a single IOBuf,
- // and using ZSTD_decompress which requires ZSTD frame to contain size,
- // which isn't populated by streaming API).
- if (!data->isChained()) {
- auto out = IOBuf::createCombined(ZSTD_compressBound(data->length()));
- const auto rc = ZSTD_compress(
- out->writableData(),
- out->capacity(),
- data->data(),
- data->length(),
- level_);
- zstdThrowIfError(rc);
- out->append(rc);
- return out;
- }
-
- auto zcs = ZSTD_createCStream();
- SCOPE_EXIT {
- ZSTD_freeCStream(zcs);
- };
-
- auto rc = ZSTD_initCStream(zcs, level_);
- zstdThrowIfError(rc);
-
- Cursor cursor(data);
- auto result =
- IOBuf::createCombined(maxCompressedLength(cursor.totalLength()));
-
- ZSTD_outBuffer out;
- out.dst = result->writableTail();
- out.size = result->capacity();
- out.pos = 0;
-
- for (auto buffer = cursor.peekBytes(); !buffer.empty();) {
- ZSTD_inBuffer in;
- in.src = buffer.data();
- in.size = buffer.size();
- for (in.pos = 0; in.pos != in.size;) {
- rc = ZSTD_compressStream(zcs, &out, &in);
- zstdThrowIfError(rc);
+Optional<uint64_t> ZSTDStreamCodec::doGetUncompressedLength(
+ IOBuf const* data,
+ Optional<uint64_t> uncompressedLength) const {
+ // Read decompressed size from frame if available in first IOBuf.
+ auto const decompressedSize =
+ ZSTD_getDecompressedSize(data->data(), data->length());
+ if (decompressedSize != 0) {
+ if (uncompressedLength && *uncompressedLength != decompressedSize) {
+ throw std::runtime_error("ZSTD: invalid uncompressed length");
}
- cursor.skip(in.size);
- buffer = cursor.peekBytes();
+ uncompressedLength = decompressedSize;
}
+ return uncompressedLength;
+}
- rc = ZSTD_endStream(zcs, &out);
- zstdThrowIfError(rc);
- CHECK_EQ(rc, 0);
+void ZSTDStreamCodec::doResetStream() {
+ needReset_ = true;
+}
- result->append(out.pos);
- return result;
+bool ZSTDStreamCodec::tryBlockCompress(
+ ByteRange& input,
+ MutableByteRange& output) const {
+ DCHECK(needReset_);
+ // We need to know that we have enough output space to use block compression
+ if (output.size() < ZSTD_compressBound(input.size())) {
+ return false;
+ }
+ size_t const length = ZSTD_compress(
+ output.data(), output.size(), input.data(), input.size(), level_);
+ zstdThrowIfError(length);
+ input.uncheckedAdvance(input.size());
+ output.uncheckedAdvance(length);
+ return true;
}
-static std::unique_ptr<IOBuf> zstdUncompressBuffer(
- const IOBuf* data,
- Optional<uint64_t> uncompressedLength) {
- // Check preconditions
- DCHECK(!data->isChained());
- DCHECK(uncompressedLength.hasValue());
-
- auto uncompressed = IOBuf::create(*uncompressedLength);
- const auto decompressedSize = ZSTD_decompress(
- uncompressed->writableTail(),
- uncompressed->tailroom(),
- data->data(),
- data->length());
- zstdThrowIfError(decompressedSize);
- if (decompressedSize != uncompressedLength) {
- throw std::runtime_error("ZSTD: invalid uncompressed length");
+void ZSTDStreamCodec::resetCStream() {
+ if (!cstream_) {
+ cstream_.reset(ZSTD_createCStream());
+ if (!cstream_) {
+ throw std::bad_alloc{};
+ }
}
- uncompressed->append(decompressedSize);
- return uncompressed;
+ // Advanced API usage works for all supported versions of zstd.
+ // Required to set contentSizeFlag.
+ auto params = ZSTD_getParams(level_, uncompressedLength().value_or(0), 0);
+ params.fParams.contentSizeFlag = uncompressedLength().hasValue();
+ zstdThrowIfError(ZSTD_initCStream_advanced(
+ cstream_.get(), nullptr, 0, params, uncompressedLength().value_or(0)));
}
-static std::unique_ptr<IOBuf> zstdUncompressStream(
- const IOBuf* data,
- Optional<uint64_t> uncompressedLength) {
- auto zds = ZSTD_createDStream();
+bool ZSTDStreamCodec::doCompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) {
+ if (needReset_) {
+ // If we are given all the input in one chunk try to use block compression
+ if (flushOp == StreamCodec::FlushOp::END &&
+ tryBlockCompress(input, output)) {
+ return true;
+ }
+ resetCStream();
+ needReset_ = false;
+ }
+ ZSTD_inBuffer in = {input.data(), input.size(), 0};
+ ZSTD_outBuffer out = {output.data(), output.size(), 0};
SCOPE_EXIT {
- ZSTD_freeDStream(zds);
+ input.uncheckedAdvance(in.pos);
+ output.uncheckedAdvance(out.pos);
};
-
- auto rc = ZSTD_initDStream(zds);
- zstdThrowIfError(rc);
-
- ZSTD_outBuffer out{};
- ZSTD_inBuffer in{};
-
- auto outputSize = uncompressedLength.value_or(ZSTD_DStreamOutSize());
-
- IOBufQueue queue(IOBufQueue::cacheChainLength());
-
- Cursor cursor(data);
- for (rc = 0;;) {
- if (in.pos == in.size) {
- auto buffer = cursor.peekBytes();
- in.src = buffer.data();
- in.size = buffer.size();
- in.pos = 0;
- cursor.skip(in.size);
- if (rc > 1 && in.size == 0) {
- throw std::runtime_error(to<std::string>("ZSTD: incomplete input"));
- }
- }
- if (out.pos == out.size) {
- if (out.pos != 0) {
- queue.postallocate(out.pos);
- }
- auto buffer = queue.preallocate(outputSize, outputSize);
- out.dst = buffer.first;
- out.size = buffer.second;
- out.pos = 0;
- outputSize = ZSTD_DStreamOutSize();
+ if (flushOp == StreamCodec::FlushOp::NONE || !input.empty()) {
+ zstdThrowIfError(ZSTD_compressStream(cstream_.get(), &out, &in));
+ }
+ if (in.pos == in.size && flushOp != StreamCodec::FlushOp::NONE) {
+ size_t rc;
+ switch (flushOp) {
+ case StreamCodec::FlushOp::FLUSH:
+ rc = ZSTD_flushStream(cstream_.get(), &out);
+ break;
+ case StreamCodec::FlushOp::END:
+ rc = ZSTD_endStream(cstream_.get(), &out);
+ break;
+ default:
+ throw std::invalid_argument("ZSTD: invalid FlushOp");
}
- rc = ZSTD_decompressStream(zds, &out, &in);
zstdThrowIfError(rc);
if (rc == 0) {
- break;
+ return true;
}
}
- if (out.pos != 0) {
- queue.postallocate(out.pos);
- }
- if (in.pos != in.size || !cursor.isAtEnd()) {
- throw std::runtime_error("ZSTD: junk after end of data");
- }
- if (uncompressedLength && queue.chainLength() != *uncompressedLength) {
- throw std::runtime_error("ZSTD: invalid uncompressed length");
- }
+ return false;
+}
- return queue.move();
+bool ZSTDStreamCodec::tryBlockUncompress(
+ ByteRange& input,
+ MutableByteRange& output) const {
+ DCHECK(needReset_);
+#if ZSTD_VERSION_NUMBER < 10104
+ // We require ZSTD_findFrameCompressedSize() to perform this optimization.
+ return false;
+#else
+ // We need to know the uncompressed length and have enough output space.
+ if (!uncompressedLength() || output.size() < *uncompressedLength()) {
+ return false;
+ }
+ size_t const compressedLength =
+ ZSTD_findFrameCompressedSize(input.data(), input.size());
+ zstdThrowIfError(compressedLength);
+ size_t const length = ZSTD_decompress(
+ output.data(), *uncompressedLength(), input.data(), compressedLength);
+ zstdThrowIfError(length);
+ if (length != *uncompressedLength()) {
+ throw std::runtime_error("ZSTDStreamCodec: Incorrect uncompressed length");
+ }
+ input.uncheckedAdvance(compressedLength);
+ output.uncheckedAdvance(length);
+ return true;
+#endif
}
-std::unique_ptr<IOBuf> ZSTDCodec::doUncompress(
- const IOBuf* data,
- Optional<uint64_t> uncompressedLength) {
- {
- // Read decompressed size from frame if available in first IOBuf.
- const auto decompressedSize =
- ZSTD_getDecompressedSize(data->data(), data->length());
- if (decompressedSize != 0) {
- if (uncompressedLength && *uncompressedLength != decompressedSize) {
- throw std::runtime_error("ZSTD: invalid uncompressed length");
- }
- uncompressedLength = decompressedSize;
+void ZSTDStreamCodec::resetDStream() {
+ if (!dstream_) {
+ dstream_.reset(ZSTD_createDStream());
+ if (!dstream_) {
+ throw std::bad_alloc{};
}
}
- // Faster to decompress using ZSTD_decompress() if we can.
- if (uncompressedLength && !data->isChained()) {
- return zstdUncompressBuffer(data, uncompressedLength);
+ zstdThrowIfError(ZSTD_initDStream(dstream_.get()));
+}
+
+bool ZSTDStreamCodec::doUncompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) {
+ if (needReset_) {
+ // If we are given all the input in one chunk try to use block uncompression
+ if (flushOp == StreamCodec::FlushOp::END &&
+ tryBlockUncompress(input, output)) {
+ return true;
+ }
+ resetDStream();
+ needReset_ = false;
}
- // Fall back to slower streaming decompression.
- return zstdUncompressStream(data, uncompressedLength);
+ ZSTD_inBuffer in = {input.data(), input.size(), 0};
+ ZSTD_outBuffer out = {output.data(), output.size(), 0};
+ SCOPE_EXIT {
+ input.uncheckedAdvance(in.pos);
+ output.uncheckedAdvance(out.pos);
+ };
+ size_t const rc = ZSTD_decompressStream(dstream_.get(), &out, &in);
+ zstdThrowIfError(rc);
+ return rc == 0;
}
-#endif // FOLLY_HAVE_LIBZSTD
+#endif // FOLLY_HAVE_LIBZSTD
#if FOLLY_HAVE_LIBBZ2
if (stream.avail_out == 0) {
out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
}
-
+ size_t const outputSize = stream.avail_out;
rc = bzCheck(BZ2_bzDecompress(&stream));
+ if (outputSize == stream.avail_out) {
+ throw std::runtime_error("Bzip2Codec: Truncated input");
+ }
}
out->prev()->trimEnd(stream.avail_out);
#endif
#if FOLLY_HAVE_LIBZ
- {ZlibCodec::create, nullptr},
+ {ZlibStreamCodec::createCodec, ZlibStreamCodec::createStream},
#else
{},
#endif
#endif
#if FOLLY_HAVE_LIBZSTD
- {ZSTDCodec::create, nullptr},
+ {ZSTDStreamCodec::createCodec, ZSTDStreamCodec::createStream},
#else
{},
#endif
#if FOLLY_HAVE_LIBZ
- {ZlibCodec::create, nullptr},
+ {ZlibStreamCodec::createCodec, ZlibStreamCodec::createStream},
#else
{},
#endif
std::vector<std::unique_ptr<Codec>> customCodecs) {
return AutomaticCodec::create(std::move(customCodecs));
}
-}} // namespaces
+} // namespace io
+} // namespace folly