#endif
#if FOLLY_HAVE_LIBZSTD
+#define ZSTD_STATIC_LINKING_ONLY
#include <zstd.h>
#endif
+#if FOLLY_HAVE_LIBBZ2
+#include <bzlib.h>
+#endif
+
#include <folly/Bits.h>
#include <folly/Conv.h>
#include <folly/Memory.h>
return doCompressString(data);
}
-std::unique_ptr<IOBuf> Codec::uncompress(const IOBuf* data,
- uint64_t uncompressedLength) {
- if (uncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH) {
+std::unique_ptr<IOBuf> Codec::uncompress(
+ const IOBuf* data,
+ Optional<uint64_t> uncompressedLength) {
+ if (!uncompressedLength) {
if (needsUncompressedLength()) {
throw std::invalid_argument("Codec: uncompressed length required");
}
- } else if (uncompressedLength > maxUncompressedLength()) {
+ } else if (*uncompressedLength > maxUncompressedLength()) {
throw std::runtime_error("Codec: uncompressed length too large");
}
if (data->empty()) {
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength != 0) {
+ if (uncompressedLength.value_or(0) != 0) {
throw std::runtime_error("Codec: invalid uncompressed length");
}
return IOBuf::create(0);
std::string Codec::uncompress(
const StringPiece data,
- uint64_t uncompressedLength) {
- if (uncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH) {
+ Optional<uint64_t> uncompressedLength) {
+ if (!uncompressedLength) {
if (needsUncompressedLength()) {
throw std::invalid_argument("Codec: uncompressed length required");
}
- } else if (uncompressedLength > maxUncompressedLength()) {
+ } else if (*uncompressedLength > maxUncompressedLength()) {
throw std::runtime_error("Codec: uncompressed length too large");
}
if (data.empty()) {
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength != 0) {
+ if (uncompressedLength.value_or(0) != 0) {
throw std::runtime_error("Codec: invalid uncompressed length");
}
return "";
return {};
}
-bool Codec::canUncompress(const IOBuf*, uint64_t) const {
+bool Codec::canUncompress(const IOBuf*, Optional<uint64_t>) const {
return false;
}
std::string Codec::doUncompressString(
const StringPiece data,
- uint64_t uncompressedLength) {
+ Optional<uint64_t> uncompressedLength) {
const IOBuf inputBuffer{IOBuf::WRAP_BUFFER, data};
auto outputBuffer = doUncompress(&inputBuffer, uncompressedLength);
std::string output;
return output;
}
+uint64_t Codec::maxCompressedLength(uint64_t uncompressedLength) const {
+ if (uncompressedLength == 0) {
+ return 0;
+ }
+ return doMaxCompressedLength(uncompressedLength);
+}
+
+Optional<uint64_t> Codec::getUncompressedLength(
+ const folly::IOBuf* data,
+ Optional<uint64_t> uncompressedLength) const {
+ auto const compressedLength = data->computeChainDataLength();
+ if (uncompressedLength == uint64_t(0) || compressedLength == 0) {
+ if (uncompressedLength.value_or(0) != 0 || compressedLength != 0) {
+ throw std::runtime_error("Invalid uncompressed length");
+ }
+ return 0;
+ }
+ return doGetUncompressedLength(data, uncompressedLength);
+}
+
+Optional<uint64_t> Codec::doGetUncompressedLength(
+ const folly::IOBuf*,
+ Optional<uint64_t> uncompressedLength) const {
+ return uncompressedLength;
+}
+
+bool StreamCodec::needsDataLength() const {
+ return doNeedsDataLength();
+}
+
+bool StreamCodec::doNeedsDataLength() const {
+ return false;
+}
+
+void StreamCodec::assertStateIs(State expected) const {
+ if (state_ != expected) {
+ throw std::logic_error(folly::to<std::string>(
+ "Codec: state is ", state_, "; expected state ", expected));
+ }
+}
+
+void StreamCodec::resetStream(Optional<uint64_t> uncompressedLength) {
+ state_ = State::RESET;
+ uncompressedLength_ = uncompressedLength;
+ doResetStream();
+}
+
+bool StreamCodec::compressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) {
+ if (state_ == State::RESET && input.empty()) {
+ if (flushOp == StreamCodec::FlushOp::NONE) {
+ return false;
+ }
+ if (flushOp == StreamCodec::FlushOp::END &&
+ uncompressedLength().value_or(0) != 0) {
+ throw std::runtime_error("Codec: invalid uncompressed length");
+ }
+ return true;
+ }
+ if (state_ == State::RESET && !input.empty() &&
+ uncompressedLength() == uint64_t(0)) {
+ throw std::runtime_error("Codec: invalid uncompressed length");
+ }
+ // Handle input state transitions
+ switch (flushOp) {
+ case StreamCodec::FlushOp::NONE:
+ if (state_ == State::RESET) {
+ state_ = State::COMPRESS;
+ }
+ assertStateIs(State::COMPRESS);
+ break;
+ case StreamCodec::FlushOp::FLUSH:
+ if (state_ == State::RESET || state_ == State::COMPRESS) {
+ state_ = State::COMPRESS_FLUSH;
+ }
+ assertStateIs(State::COMPRESS_FLUSH);
+ break;
+ case StreamCodec::FlushOp::END:
+ if (state_ == State::RESET || state_ == State::COMPRESS) {
+ state_ = State::COMPRESS_END;
+ }
+ assertStateIs(State::COMPRESS_END);
+ break;
+ }
+ bool const done = doCompressStream(input, output, flushOp);
+ // Handle output state transitions
+ if (done) {
+ if (state_ == State::COMPRESS_FLUSH) {
+ state_ = State::COMPRESS;
+ } else if (state_ == State::COMPRESS_END) {
+ state_ = State::END;
+ }
+ // Check internal invariants
+ DCHECK(input.empty());
+ DCHECK(flushOp != StreamCodec::FlushOp::NONE);
+ }
+ return done;
+}
+
+bool StreamCodec::uncompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) {
+ if (state_ == State::RESET && input.empty()) {
+ if (uncompressedLength().value_or(0) == 0) {
+ return true;
+ }
+ return false;
+ }
+ // Handle input state transitions
+ if (state_ == State::RESET) {
+ state_ = State::UNCOMPRESS;
+ }
+ assertStateIs(State::UNCOMPRESS);
+ bool const done = doUncompressStream(input, output, flushOp);
+ // Handle output state transitions
+ if (done) {
+ state_ = State::END;
+ }
+ return done;
+}
+
+static std::unique_ptr<IOBuf> addOutputBuffer(
+ MutableByteRange& output,
+ uint64_t size) {
+ DCHECK(output.empty());
+ auto buffer = IOBuf::create(size);
+ buffer->append(buffer->capacity());
+ output = {buffer->writableData(), buffer->length()};
+ return buffer;
+}
+
+std::unique_ptr<IOBuf> StreamCodec::doCompress(IOBuf const* data) {
+ uint64_t const uncompressedLength = data->computeChainDataLength();
+ resetStream(uncompressedLength);
+ uint64_t const maxCompressedLen = maxCompressedLength(uncompressedLength);
+
+ auto constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MB
+ auto constexpr kDefaultBufferLength = uint64_t(4) << 20; // 4 MB
+
+ MutableByteRange output;
+ auto buffer = addOutputBuffer(
+ output,
+ maxCompressedLen <= kMaxSingleStepLength ? maxCompressedLen
+ : kDefaultBufferLength);
+
+ // Compress the entire IOBuf chain into the IOBuf chain pointed to by buffer
+ IOBuf const* current = data;
+ ByteRange input{current->data(), current->length()};
+ StreamCodec::FlushOp flushOp = StreamCodec::FlushOp::NONE;
+ for (;;) {
+ while (input.empty() && current->next() != data) {
+ current = current->next();
+ input = {current->data(), current->length()};
+ }
+ if (current->next() == data) {
+ // This is the last input buffer so end the stream
+ flushOp = StreamCodec::FlushOp::END;
+ }
+ if (output.empty()) {
+ buffer->prependChain(addOutputBuffer(output, kDefaultBufferLength));
+ }
+ bool const done = compressStream(input, output, flushOp);
+ if (done) {
+ DCHECK(input.empty());
+ DCHECK(flushOp == StreamCodec::FlushOp::END);
+ DCHECK_EQ(current->next(), data);
+ break;
+ }
+ }
+ buffer->prev()->trimEnd(output.size());
+ return buffer;
+}
+
+static uint64_t computeBufferLength(
+ uint64_t const compressedLength,
+ uint64_t const blockSize) {
+ uint64_t constexpr kMaxBufferLength = uint64_t(4) << 20; // 4 MiB
+ uint64_t const goodBufferSize = 4 * std::max(blockSize, compressedLength);
+ return std::min(goodBufferSize, kMaxBufferLength);
+}
+
+std::unique_ptr<IOBuf> StreamCodec::doUncompress(
+ IOBuf const* data,
+ Optional<uint64_t> uncompressedLength) {
+ auto constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MB
+ auto constexpr kBlockSize = uint64_t(128) << 10;
+ auto const defaultBufferLength =
+ computeBufferLength(data->computeChainDataLength(), kBlockSize);
+
+ uncompressedLength = getUncompressedLength(data, uncompressedLength);
+ resetStream(uncompressedLength);
+
+ MutableByteRange output;
+ auto buffer = addOutputBuffer(
+ output,
+ (uncompressedLength && *uncompressedLength <= kMaxSingleStepLength
+ ? *uncompressedLength
+ : defaultBufferLength));
+
+ // Uncompress the entire IOBuf chain into the IOBuf chain pointed to by buffer
+ IOBuf const* current = data;
+ ByteRange input{current->data(), current->length()};
+ StreamCodec::FlushOp flushOp = StreamCodec::FlushOp::NONE;
+ for (;;) {
+ while (input.empty() && current->next() != data) {
+ current = current->next();
+ input = {current->data(), current->length()};
+ }
+ if (current->next() == data) {
+ // Tell the uncompressor there is no more input (it may optimize)
+ flushOp = StreamCodec::FlushOp::END;
+ }
+ if (output.empty()) {
+ buffer->prependChain(addOutputBuffer(output, defaultBufferLength));
+ }
+ bool const done = uncompressStream(input, output, flushOp);
+ if (done) {
+ break;
+ }
+ }
+ if (!input.empty()) {
+ throw std::runtime_error("Codec: Junk after end of data");
+ }
+
+ buffer->prev()->trimEnd(output.size());
+ if (uncompressedLength &&
+ *uncompressedLength != buffer->computeChainDataLength()) {
+ throw std::runtime_error("Codec: invalid uncompressed length");
+ }
+
+ return buffer;
+}
+
namespace {
/**
explicit NoCompressionCodec(int level, CodecType type);
private:
+ uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) override;
+ Optional<uint64_t> uncompressedLength) override;
};
std::unique_ptr<Codec> NoCompressionCodec::create(int level, CodecType type) {
- return make_unique<NoCompressionCodec>(level, type);
+ return std::make_unique<NoCompressionCodec>(level, type);
}
NoCompressionCodec::NoCompressionCodec(int level, CodecType type)
}
}
+uint64_t NoCompressionCodec::doMaxCompressedLength(
+ uint64_t uncompressedLength) const {
+ return uncompressedLength;
+}
+
std::unique_ptr<IOBuf> NoCompressionCodec::doCompress(
const IOBuf* data) {
return data->clone();
std::unique_ptr<IOBuf> NoCompressionCodec::doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) {
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- data->computeChainDataLength() != uncompressedLength) {
- throw std::runtime_error(to<std::string>(
- "NoCompressionCodec: invalid uncompressed length"));
+ Optional<uint64_t> uncompressedLength) {
+ if (uncompressedLength &&
+ data->computeChainDataLength() != *uncompressedLength) {
+ throw std::runtime_error(
+ to<std::string>("NoCompressionCodec: invalid uncompressed length"));
}
return data->clone();
}
private:
bool doNeedsUncompressedLength() const override;
uint64_t doMaxUncompressedLength() const override;
+ uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
bool encodeSize() const { return type() == CodecType::LZ4_VARINT_SIZE; }
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) override;
+ Optional<uint64_t> uncompressedLength) override;
bool highCompression_;
};
std::unique_ptr<Codec> LZ4Codec::create(int level, CodecType type) {
- return make_unique<LZ4Codec>(level, type);
+ return std::make_unique<LZ4Codec>(level, type);
}
LZ4Codec::LZ4Codec(int level, CodecType type) : Codec(type) {
return LZ4_MAX_INPUT_SIZE;
}
+uint64_t LZ4Codec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+ return LZ4_compressBound(uncompressedLength) +
+ (encodeSize() ? kMaxVarintLength64 : 0);
+}
+
std::unique_ptr<IOBuf> LZ4Codec::doCompress(const IOBuf* data) {
IOBuf clone;
if (data->isChained()) {
data = &clone;
}
- uint32_t extraSize = encodeSize() ? kMaxVarintLength64 : 0;
- auto out = IOBuf::create(extraSize + LZ4_compressBound(data->length()));
+ auto out = IOBuf::create(maxCompressedLength(data->length()));
if (encodeSize()) {
encodeVarintToIOBuf(data->length(), out.get());
}
std::unique_ptr<IOBuf> LZ4Codec::doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) {
+ Optional<uint64_t> uncompressedLength) {
IOBuf clone;
if (data->isChained()) {
// LZ4 doesn't support streaming, so we have to coalesce
uint64_t actualUncompressedLength;
if (encodeSize()) {
actualUncompressedLength = decodeVarintFromCursor(cursor);
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength != actualUncompressedLength) {
+ if (uncompressedLength && *uncompressedLength != actualUncompressedLength) {
throw std::runtime_error("LZ4Codec: invalid uncompressed length");
}
} else {
- actualUncompressedLength = uncompressedLength;
- if (actualUncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH ||
- actualUncompressedLength > maxUncompressedLength()) {
- throw std::runtime_error("LZ4Codec: invalid uncompressed length");
- }
+ // Invariants
+ DCHECK(uncompressedLength.hasValue());
+ DCHECK(*uncompressedLength <= maxUncompressedLength());
+ actualUncompressedLength = *uncompressedLength;
}
auto sp = StringPiece{cursor.peekBytes()};
public:
static std::unique_ptr<Codec> create(int level, CodecType type);
explicit LZ4FrameCodec(int level, CodecType type);
- ~LZ4FrameCodec();
+ ~LZ4FrameCodec() override;
std::vector<std::string> validPrefixes() const override;
- bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
+ bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
const override;
private:
+ uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
+
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) override;
+ Optional<uint64_t> uncompressedLength) override;
// Reset the dctx_ if it is dirty or null.
void resetDCtx();
/* static */ std::unique_ptr<Codec> LZ4FrameCodec::create(
int level,
CodecType type) {
- return make_unique<LZ4FrameCodec>(level, type);
+ return std::make_unique<LZ4FrameCodec>(level, type);
}
static constexpr uint32_t kLZ4FrameMagicLE = 0x184D2204;
return {prefixToStringLE(kLZ4FrameMagicLE)};
}
-bool LZ4FrameCodec::canUncompress(const IOBuf* data, uint64_t) const {
+bool LZ4FrameCodec::canUncompress(const IOBuf* data, Optional<uint64_t>) const {
return dataStartsWithLE(data, kLZ4FrameMagicLE);
}
+uint64_t LZ4FrameCodec::doMaxCompressedLength(
+ uint64_t uncompressedLength) const {
+ LZ4F_preferences_t prefs{};
+ prefs.compressionLevel = level_;
+ prefs.frameInfo.contentSize = uncompressedLength;
+ return LZ4F_compressFrameBound(uncompressedLength, &prefs);
+}
+
static size_t lz4FrameThrowOnError(size_t code) {
if (LZ4F_isError(code)) {
throw std::runtime_error(
prefs.compressionLevel = level_;
prefs.frameInfo.contentSize = uncompressedLength;
// Compress
- auto buf = IOBuf::create(LZ4F_compressFrameBound(uncompressedLength, &prefs));
+ auto buf = IOBuf::create(maxCompressedLength(uncompressedLength));
const size_t written = lz4FrameThrowOnError(LZ4F_compressFrame(
buf->writableTail(),
buf->tailroom(),
std::unique_ptr<IOBuf> LZ4FrameCodec::doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) {
+ Optional<uint64_t> uncompressedLength) {
// Reset the dctx if any errors have occurred
resetDCtx();
// Coalesce the data
IOBufQueue queue(IOBufQueue::cacheChainLength());
auto blockSize = uint64_t{64} << 10;
auto growthSize = uint64_t{4} << 20;
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH) {
+ if (uncompressedLength) {
// Allocate uncompressedLength in one chunk (up to 64 MB)
- const auto allocateSize = std::min(uncompressedLength, uint64_t{64} << 20);
+ const auto allocateSize = std::min(*uncompressedLength, uint64_t{64} << 20);
queue.preallocate(allocateSize, allocateSize);
- blockSize = std::min(uncompressedLength, blockSize);
- growthSize = std::min(uncompressedLength, growthSize);
+ blockSize = std::min(*uncompressedLength, blockSize);
+ growthSize = std::min(*uncompressedLength, growthSize);
} else {
// Reduce growthSize for small data
- const auto guessUncompressedLen = 4 * std::max(blockSize, in.size());
+ const auto guessUncompressedLen =
+ 4 * std::max<uint64_t>(blockSize, in.size());
growthSize = std::min(guessUncompressedLen, growthSize);
}
// Once LZ4_decompress() is called, the dctx_ cannot be reused until it
} while (code != 0);
// At this point the decompression context can be reused
dirty_ = false;
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- queue.chainLength() != uncompressedLength) {
+ if (uncompressedLength && queue.chainLength() != *uncompressedLength) {
throw std::runtime_error("LZ4Frame error: Invalid uncompressedLength");
}
return queue.move();
private:
uint64_t doMaxUncompressedLength() const override;
+ uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) override;
+ Optional<uint64_t> uncompressedLength) override;
};
std::unique_ptr<Codec> SnappyCodec::create(int level, CodecType type) {
- return make_unique<SnappyCodec>(level, type);
+ return std::make_unique<SnappyCodec>(level, type);
}
SnappyCodec::SnappyCodec(int level, CodecType type) : Codec(type) {
return std::numeric_limits<uint32_t>::max();
}
+uint64_t SnappyCodec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+ return snappy::MaxCompressedLength(uncompressedLength);
+}
+
std::unique_ptr<IOBuf> SnappyCodec::doCompress(const IOBuf* data) {
IOBufSnappySource source(data);
- auto out =
- IOBuf::create(snappy::MaxCompressedLength(source.Available()));
+ auto out = IOBuf::create(maxCompressedLength(source.Available()));
snappy::UncheckedByteArraySink sink(reinterpret_cast<char*>(
out->writableTail()));
return out;
}
-std::unique_ptr<IOBuf> SnappyCodec::doUncompress(const IOBuf* data,
- uint64_t uncompressedLength) {
+std::unique_ptr<IOBuf> SnappyCodec::doUncompress(
+ const IOBuf* data,
+ Optional<uint64_t> uncompressedLength) {
uint32_t actualUncompressedLength = 0;
{
if (!snappy::GetUncompressedLength(&source, &actualUncompressedLength)) {
throw std::runtime_error("snappy::GetUncompressedLength failed");
}
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength != actualUncompressedLength) {
+ if (uncompressedLength && *uncompressedLength != actualUncompressedLength) {
throw std::runtime_error("snappy: invalid uncompressed length");
}
}
explicit ZlibCodec(int level, CodecType type);
std::vector<std::string> validPrefixes() const override;
- bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
+ bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
const override;
private:
+ uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) override;
+ Optional<uint64_t> uncompressedLength) override;
std::unique_ptr<IOBuf> addOutputBuffer(z_stream* stream, uint32_t length);
bool doInflate(z_stream* stream, IOBuf* head, uint32_t bufferLength);
}
}
-bool ZlibCodec::canUncompress(const IOBuf* data, uint64_t) const {
+bool ZlibCodec::canUncompress(const IOBuf* data, Optional<uint64_t>) const {
if (type() == CodecType::ZLIB) {
uint16_t value;
Cursor cursor{data};
}
}
+uint64_t ZlibCodec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+ return deflateBound(nullptr, uncompressedLength);
+}
+
std::unique_ptr<Codec> ZlibCodec::create(int level, CodecType type) {
- return make_unique<ZlibCodec>(level, type);
+ return std::make_unique<ZlibCodec>(level, type);
}
ZlibCodec::ZlibCodec(int level, CodecType type) : Codec(type) {
CHECK_EQ(stream->avail_out, 0);
auto buf = IOBuf::create(length);
- buf->append(length);
+ buf->append(buf->capacity());
stream->next_out = buf->writableData();
stream->avail_out = buf->length();
return out;
}
-static uint64_t computeBufferLength(uint64_t const compressedLength) {
- constexpr uint64_t kMaxBufferLength = uint64_t(4) << 20; // 4 MiB
- constexpr uint64_t kBlockSize = uint64_t(32) << 10; // 32 KiB
- const uint64_t goodBufferSize = 4 * std::max(kBlockSize, compressedLength);
- return std::min(goodBufferSize, kMaxBufferLength);
-}
-
-std::unique_ptr<IOBuf> ZlibCodec::doUncompress(const IOBuf* data,
- uint64_t uncompressedLength) {
+std::unique_ptr<IOBuf> ZlibCodec::doUncompress(
+ const IOBuf* data,
+ Optional<uint64_t> uncompressedLength) {
z_stream stream;
stream.zalloc = nullptr;
stream.zfree = nullptr;
// Max 64MiB in one go
constexpr uint64_t maxSingleStepLength = uint64_t(64) << 20; // 64MiB
+ constexpr uint64_t kBlockSize = uint64_t(32) << 10; // 32 KiB
const uint64_t defaultBufferLength =
- computeBufferLength(data->computeChainDataLength());
+ computeBufferLength(data->computeChainDataLength(), kBlockSize);
auto out = addOutputBuffer(
&stream,
- ((uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength <= maxSingleStepLength) ?
- uncompressedLength :
- defaultBufferLength));
+ ((uncompressedLength && *uncompressedLength <= maxSingleStepLength)
+ ? *uncompressedLength
+ : defaultBufferLength));
bool streamEnd = false;
for (auto& range : *data) {
out->prev()->trimEnd(stream.avail_out);
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength != stream.total_out) {
- throw std::runtime_error(to<std::string>(
- "ZlibCodec: invalid uncompressed length"));
+ if (uncompressedLength && *uncompressedLength != stream.total_out) {
+ throw std::runtime_error(
+ to<std::string>("ZlibCodec: invalid uncompressed length"));
}
success = true; // we survived
explicit LZMA2Codec(int level, CodecType type);
std::vector<std::string> validPrefixes() const override;
- bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
+ bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
const override;
private:
bool doNeedsUncompressedLength() const override;
uint64_t doMaxUncompressedLength() const override;
+ uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
bool encodeSize() const { return type() == CodecType::LZMA2_VARINT_SIZE; }
std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) override;
+ Optional<uint64_t> uncompressedLength) override;
std::unique_ptr<IOBuf> addOutputBuffer(lzma_stream* stream, size_t length);
bool doInflate(lzma_stream* stream, IOBuf* head, size_t bufferLength);
return {prefixToStringLE(kLZMA2MagicLE, kLZMA2MagicBytes)};
}
-bool LZMA2Codec::canUncompress(const IOBuf* data, uint64_t) const {
+bool LZMA2Codec::canUncompress(const IOBuf* data, Optional<uint64_t>) const {
if (type() == CodecType::LZMA2_VARINT_SIZE) {
return false;
}
}
std::unique_ptr<Codec> LZMA2Codec::create(int level, CodecType type) {
- return make_unique<LZMA2Codec>(level, type);
+ return std::make_unique<LZMA2Codec>(level, type);
}
LZMA2Codec::LZMA2Codec(int level, CodecType type) : Codec(type) {
return uint64_t(1) << 63;
}
+uint64_t LZMA2Codec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+ return lzma_stream_buffer_bound(uncompressedLength) +
+ (encodeSize() ? kMaxVarintLength64 : 0);
+}
+
std::unique_ptr<IOBuf> LZMA2Codec::addOutputBuffer(
lzma_stream* stream,
size_t length) {
CHECK_EQ(stream->avail_out, 0);
auto buf = IOBuf::create(length);
- buf->append(length);
+ buf->append(buf->capacity());
stream->next_out = buf->writableData();
stream->avail_out = buf->length();
return false;
}
-std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(const IOBuf* data,
- uint64_t uncompressedLength) {
+std::unique_ptr<IOBuf> LZMA2Codec::doUncompress(
+ const IOBuf* data,
+ Optional<uint64_t> uncompressedLength) {
lzma_ret rc;
lzma_stream stream = LZMA_STREAM_INIT;
folly::io::Cursor cursor(data);
if (encodeSize()) {
const uint64_t actualUncompressedLength = decodeVarintFromCursor(cursor);
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength != actualUncompressedLength) {
+ if (uncompressedLength && *uncompressedLength != actualUncompressedLength) {
throw std::runtime_error("LZMA2Codec: invalid uncompressed length");
}
uncompressedLength = actualUncompressedLength;
auto out = addOutputBuffer(
&stream,
- ((uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength <= maxSingleStepLength)
- ? uncompressedLength
+ ((uncompressedLength && *uncompressedLength <= maxSingleStepLength)
+ ? *uncompressedLength
: defaultBufferLength));
bool streamEnd = false;
out->prev()->trimEnd(stream.avail_out);
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength != stream.total_out) {
+ if (uncompressedLength && *uncompressedLength != stream.total_out) {
throw std::runtime_error(
to<std::string>("LZMA2Codec: invalid uncompressed length"));
}
#ifdef FOLLY_HAVE_LIBZSTD
+namespace {
+void zstdFreeCStream(ZSTD_CStream* zcs) {
+ ZSTD_freeCStream(zcs);
+}
+
+void zstdFreeDStream(ZSTD_DStream* zds) {
+ ZSTD_freeDStream(zds);
+}
+}
+
/**
* ZSTD compression
*/
-class ZSTDCodec final : public Codec {
+class ZSTDStreamCodec final : public StreamCodec {
public:
- static std::unique_ptr<Codec> create(int level, CodecType);
- explicit ZSTDCodec(int level, CodecType type);
+ static std::unique_ptr<Codec> createCodec(int level, CodecType);
+ static std::unique_ptr<StreamCodec> createStream(int level, CodecType);
+ explicit ZSTDStreamCodec(int level, CodecType type);
std::vector<std::string> validPrefixes() const override;
- bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
+ bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
const override;
private:
bool doNeedsUncompressedLength() const override;
- std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
- std::unique_ptr<IOBuf> doUncompress(
- const IOBuf* data,
- uint64_t uncompressedLength) override;
+ uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
+ Optional<uint64_t> doGetUncompressedLength(
+ IOBuf const* data,
+ Optional<uint64_t> uncompressedLength) const override;
+
+ void doResetStream() override;
+ bool doCompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) override;
+ bool doUncompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) override;
+
+ void resetCStream();
+ void resetDStream();
+
+ bool tryBlockCompress(ByteRange& input, MutableByteRange& output) const;
+ bool tryBlockUncompress(ByteRange& input, MutableByteRange& output) const;
int level_;
+ bool needReset_{true};
+ std::unique_ptr<
+ ZSTD_CStream,
+ folly::static_function_deleter<ZSTD_CStream, &zstdFreeCStream>>
+ cstream_{nullptr};
+ std::unique_ptr<
+ ZSTD_DStream,
+ folly::static_function_deleter<ZSTD_DStream, &zstdFreeDStream>>
+ dstream_{nullptr};
};
static constexpr uint32_t kZSTDMagicLE = 0xFD2FB528;
-std::vector<std::string> ZSTDCodec::validPrefixes() const {
+std::vector<std::string> ZSTDStreamCodec::validPrefixes() const {
return {prefixToStringLE(kZSTDMagicLE)};
}
-bool ZSTDCodec::canUncompress(const IOBuf* data, uint64_t) const {
+bool ZSTDStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
+ const {
return dataStartsWithLE(data, kZSTDMagicLE);
}
-std::unique_ptr<Codec> ZSTDCodec::create(int level, CodecType type) {
- return make_unique<ZSTDCodec>(level, type);
+std::unique_ptr<Codec> ZSTDStreamCodec::createCodec(int level, CodecType type) {
+ return make_unique<ZSTDStreamCodec>(level, type);
}
-ZSTDCodec::ZSTDCodec(int level, CodecType type) : Codec(type) {
+std::unique_ptr<StreamCodec> ZSTDStreamCodec::createStream(
+ int level,
+ CodecType type) {
+ return make_unique<ZSTDStreamCodec>(level, type);
+}
+
+ZSTDStreamCodec::ZSTDStreamCodec(int level, CodecType type)
+ : StreamCodec(type) {
DCHECK(type == CodecType::ZSTD);
switch (level) {
case COMPRESSION_LEVEL_FASTEST:
level_ = level;
}
-bool ZSTDCodec::doNeedsUncompressedLength() const {
+bool ZSTDStreamCodec::doNeedsUncompressedLength() const {
return false;
}
+uint64_t ZSTDStreamCodec::doMaxCompressedLength(
+ uint64_t uncompressedLength) const {
+ return ZSTD_compressBound(uncompressedLength);
+}
+
void zstdThrowIfError(size_t rc) {
if (!ZSTD_isError(rc)) {
return;
to<std::string>("ZSTD returned an error: ", ZSTD_getErrorName(rc)));
}
-std::unique_ptr<IOBuf> ZSTDCodec::doCompress(const IOBuf* data) {
- // Support earlier versions of the codec (working with a single IOBuf,
- // and using ZSTD_decompress which requires ZSTD frame to contain size,
- // which isn't populated by streaming API).
- if (!data->isChained()) {
- auto out = IOBuf::createCombined(ZSTD_compressBound(data->length()));
- const auto rc = ZSTD_compress(
- out->writableData(),
- out->capacity(),
- data->data(),
- data->length(),
- level_);
- zstdThrowIfError(rc);
- out->append(rc);
- return out;
+Optional<uint64_t> ZSTDStreamCodec::doGetUncompressedLength(
+ IOBuf const* data,
+ Optional<uint64_t> uncompressedLength) const {
+ // Read decompressed size from frame if available in first IOBuf.
+ auto const decompressedSize =
+ ZSTD_getDecompressedSize(data->data(), data->length());
+ if (decompressedSize != 0) {
+ if (uncompressedLength && *uncompressedLength != decompressedSize) {
+ throw std::runtime_error("ZSTD: invalid uncompressed length");
+ }
+ uncompressedLength = decompressedSize;
}
+ return uncompressedLength;
+}
- auto zcs = ZSTD_createCStream();
+void ZSTDStreamCodec::doResetStream() {
+ needReset_ = true;
+}
+
+bool ZSTDStreamCodec::tryBlockCompress(
+ ByteRange& input,
+ MutableByteRange& output) const {
+ DCHECK(needReset_);
+ // We need to know that we have enough output space to use block compression
+ if (output.size() < ZSTD_compressBound(input.size())) {
+ return false;
+ }
+ size_t const length = ZSTD_compress(
+ output.data(), output.size(), input.data(), input.size(), level_);
+ zstdThrowIfError(length);
+ input.uncheckedAdvance(input.size());
+ output.uncheckedAdvance(length);
+ return true;
+}
+
+void ZSTDStreamCodec::resetCStream() {
+ if (!cstream_) {
+ cstream_.reset(ZSTD_createCStream());
+ if (!cstream_) {
+ throw std::bad_alloc{};
+ }
+ }
+ // Advanced API usage works for all supported versions of zstd.
+ // Required to set contentSizeFlag.
+ auto params = ZSTD_getParams(level_, uncompressedLength().value_or(0), 0);
+ params.fParams.contentSizeFlag = uncompressedLength().hasValue();
+ zstdThrowIfError(ZSTD_initCStream_advanced(
+ cstream_.get(), nullptr, 0, params, uncompressedLength().value_or(0)));
+}
+
+bool ZSTDStreamCodec::doCompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) {
+ if (needReset_) {
+ // If we are given all the input in one chunk try to use block compression
+ if (flushOp == StreamCodec::FlushOp::END &&
+ tryBlockCompress(input, output)) {
+ return true;
+ }
+ resetCStream();
+ needReset_ = false;
+ }
+ ZSTD_inBuffer in = {input.data(), input.size(), 0};
+ ZSTD_outBuffer out = {output.data(), output.size(), 0};
SCOPE_EXIT {
- ZSTD_freeCStream(zcs);
+ input.uncheckedAdvance(in.pos);
+ output.uncheckedAdvance(out.pos);
};
+ if (flushOp == StreamCodec::FlushOp::NONE || !input.empty()) {
+ zstdThrowIfError(ZSTD_compressStream(cstream_.get(), &out, &in));
+ }
+ if (in.pos == in.size && flushOp != StreamCodec::FlushOp::NONE) {
+ size_t rc;
+ switch (flushOp) {
+ case StreamCodec::FlushOp::FLUSH:
+ rc = ZSTD_flushStream(cstream_.get(), &out);
+ break;
+ case StreamCodec::FlushOp::END:
+ rc = ZSTD_endStream(cstream_.get(), &out);
+ break;
+ default:
+ throw std::invalid_argument("ZSTD: invalid FlushOp");
+ }
+ zstdThrowIfError(rc);
+ if (rc == 0) {
+ return true;
+ }
+ }
+ return false;
+}
- auto rc = ZSTD_initCStream(zcs, level_);
- zstdThrowIfError(rc);
+bool ZSTDStreamCodec::tryBlockUncompress(
+ ByteRange& input,
+ MutableByteRange& output) const {
+ DCHECK(needReset_);
+#if ZSTD_VERSION_NUMBER < 10104
+ // We require ZSTD_findFrameCompressedSize() to perform this optimization.
+ return false;
+#else
+ // We need to know the uncompressed length and have enough output space.
+ if (!uncompressedLength() || output.size() < *uncompressedLength()) {
+ return false;
+ }
+ size_t const compressedLength =
+ ZSTD_findFrameCompressedSize(input.data(), input.size());
+ zstdThrowIfError(compressedLength);
+ size_t const length = ZSTD_decompress(
+ output.data(), *uncompressedLength(), input.data(), compressedLength);
+ zstdThrowIfError(length);
+ DCHECK_EQ(length, *uncompressedLength());
+ input.uncheckedAdvance(compressedLength);
+ output.uncheckedAdvance(length);
+ return true;
+#endif
+}
- Cursor cursor(data);
- auto result = IOBuf::createCombined(ZSTD_compressBound(cursor.totalLength()));
-
- ZSTD_outBuffer out;
- out.dst = result->writableTail();
- out.size = result->capacity();
- out.pos = 0;
-
- for (auto buffer = cursor.peekBytes(); !buffer.empty();) {
- ZSTD_inBuffer in;
- in.src = buffer.data();
- in.size = buffer.size();
- for (in.pos = 0; in.pos != in.size;) {
- rc = ZSTD_compressStream(zcs, &out, &in);
- zstdThrowIfError(rc);
+void ZSTDStreamCodec::resetDStream() {
+ if (!dstream_) {
+ dstream_.reset(ZSTD_createDStream());
+ if (!dstream_) {
+ throw std::bad_alloc{};
}
- cursor.skip(in.size);
- buffer = cursor.peekBytes();
}
+ zstdThrowIfError(ZSTD_initDStream(dstream_.get()));
+}
- rc = ZSTD_endStream(zcs, &out);
+bool ZSTDStreamCodec::doUncompressStream(
+ ByteRange& input,
+ MutableByteRange& output,
+ StreamCodec::FlushOp flushOp) {
+ if (needReset_) {
+ // If we are given all the input in one chunk try to use block uncompression
+ if (flushOp == StreamCodec::FlushOp::END &&
+ tryBlockUncompress(input, output)) {
+ return true;
+ }
+ resetDStream();
+ needReset_ = false;
+ }
+ ZSTD_inBuffer in = {input.data(), input.size(), 0};
+ ZSTD_outBuffer out = {output.data(), output.size(), 0};
+ SCOPE_EXIT {
+ input.uncheckedAdvance(in.pos);
+ output.uncheckedAdvance(out.pos);
+ };
+ size_t const rc = ZSTD_decompressStream(dstream_.get(), &out, &in);
zstdThrowIfError(rc);
- CHECK_EQ(rc, 0);
+ return rc == 0;
+}
- result->append(out.pos);
- return result;
+#endif // FOLLY_HAVE_LIBZSTD
+
+#if FOLLY_HAVE_LIBBZ2
+
+class Bzip2Codec final : public Codec {
+ public:
+ static std::unique_ptr<Codec> create(int level, CodecType type);
+ explicit Bzip2Codec(int level, CodecType type);
+
+ std::vector<std::string> validPrefixes() const override;
+ bool canUncompress(IOBuf const* data, Optional<uint64_t> uncompressedLength)
+ const override;
+
+ private:
+ uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
+ std::unique_ptr<IOBuf> doCompress(IOBuf const* data) override;
+ std::unique_ptr<IOBuf> doUncompress(
+ IOBuf const* data,
+ Optional<uint64_t> uncompressedLength) override;
+
+ int level_;
+};
+
+/* static */ std::unique_ptr<Codec> Bzip2Codec::create(
+ int level,
+ CodecType type) {
+ return std::make_unique<Bzip2Codec>(level, type);
}
-static std::unique_ptr<IOBuf> zstdUncompressBuffer(
- const IOBuf* data,
- uint64_t uncompressedLength) {
- // Check preconditions
- DCHECK(!data->isChained());
- DCHECK(uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH);
-
- auto uncompressed = IOBuf::create(uncompressedLength);
- const auto decompressedSize = ZSTD_decompress(
- uncompressed->writableTail(),
- uncompressed->tailroom(),
- data->data(),
- data->length());
- zstdThrowIfError(decompressedSize);
- if (decompressedSize != uncompressedLength) {
- throw std::runtime_error("ZSTD: invalid uncompressed length");
+Bzip2Codec::Bzip2Codec(int level, CodecType type) : Codec(type) {
+ DCHECK(type == CodecType::BZIP2);
+ switch (level) {
+ case COMPRESSION_LEVEL_FASTEST:
+ level = 1;
+ break;
+ case COMPRESSION_LEVEL_DEFAULT:
+ level = 9;
+ break;
+ case COMPRESSION_LEVEL_BEST:
+ level = 9;
+ break;
+ }
+ if (level < 1 || level > 9) {
+ throw std::invalid_argument(
+ to<std::string>("Bzip2: invalid level: ", level));
}
- uncompressed->append(decompressedSize);
- return uncompressed;
+ level_ = level;
}
-static std::unique_ptr<IOBuf> zstdUncompressStream(
- const IOBuf* data,
- uint64_t uncompressedLength) {
- auto zds = ZSTD_createDStream();
- SCOPE_EXIT {
- ZSTD_freeDStream(zds);
- };
+static uint32_t constexpr kBzip2MagicLE = 0x685a42;
+static uint64_t constexpr kBzip2MagicBytes = 3;
- auto rc = ZSTD_initDStream(zds);
- zstdThrowIfError(rc);
+std::vector<std::string> Bzip2Codec::validPrefixes() const {
+ return {prefixToStringLE(kBzip2MagicLE, kBzip2MagicBytes)};
+}
+
+bool Bzip2Codec::canUncompress(IOBuf const* data, Optional<uint64_t>) const {
+ return dataStartsWithLE(data, kBzip2MagicLE, kBzip2MagicBytes);
+}
- ZSTD_outBuffer out{};
- ZSTD_inBuffer in{};
+uint64_t Bzip2Codec::doMaxCompressedLength(uint64_t uncompressedLength) const {
+ // http://www.bzip.org/1.0.5/bzip2-manual-1.0.5.html#bzbufftobuffcompress
+ // To guarantee that the compressed data will fit in its buffer, allocate an
+ // output buffer of size 1% larger than the uncompressed data, plus six
+ // hundred extra bytes.
+ return uncompressedLength + uncompressedLength / 100 + 600;
+}
- auto outputSize = ZSTD_DStreamOutSize();
- if (uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH) {
- outputSize = uncompressedLength;
+static bz_stream createBzStream() {
+ bz_stream stream;
+ stream.bzalloc = nullptr;
+ stream.bzfree = nullptr;
+ stream.opaque = nullptr;
+ stream.next_in = stream.next_out = nullptr;
+ stream.avail_in = stream.avail_out = 0;
+ return stream;
+}
+
+// Throws on error condition, otherwise returns the code.
+static int bzCheck(int const rc) {
+ switch (rc) {
+ case BZ_OK:
+ case BZ_RUN_OK:
+ case BZ_FLUSH_OK:
+ case BZ_FINISH_OK:
+ case BZ_STREAM_END:
+ return rc;
+ default:
+ throw std::runtime_error(to<std::string>("Bzip2 error: ", rc));
}
+}
- IOBufQueue queue(IOBufQueue::cacheChainLength());
+static std::unique_ptr<IOBuf> addOutputBuffer(
+ bz_stream* stream,
+ uint64_t const bufferLength) {
+ DCHECK_LE(bufferLength, std::numeric_limits<unsigned>::max());
+ DCHECK_EQ(stream->avail_out, 0);
- Cursor cursor(data);
- for (rc = 0;;) {
- if (in.pos == in.size) {
- auto buffer = cursor.peekBytes();
- in.src = buffer.data();
- in.size = buffer.size();
- in.pos = 0;
- cursor.skip(in.size);
- if (rc > 1 && in.size == 0) {
- throw std::runtime_error(to<std::string>("ZSTD: incomplete input"));
- }
- }
- if (out.pos == out.size) {
- if (out.pos != 0) {
- queue.postallocate(out.pos);
+ auto buf = IOBuf::create(bufferLength);
+ buf->append(buf->capacity());
+
+ stream->next_out = reinterpret_cast<char*>(buf->writableData());
+ stream->avail_out = buf->length();
+
+ return buf;
+}
+
+std::unique_ptr<IOBuf> Bzip2Codec::doCompress(IOBuf const* data) {
+ bz_stream stream = createBzStream();
+ bzCheck(BZ2_bzCompressInit(&stream, level_, 0, 0));
+ SCOPE_EXIT {
+ bzCheck(BZ2_bzCompressEnd(&stream));
+ };
+
+ uint64_t const uncompressedLength = data->computeChainDataLength();
+ uint64_t const maxCompressedLen = maxCompressedLength(uncompressedLength);
+ uint64_t constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MiB
+ uint64_t constexpr kDefaultBufferLength = uint64_t(4) << 20;
+
+ auto out = addOutputBuffer(
+ &stream,
+ maxCompressedLen <= kMaxSingleStepLength ? maxCompressedLen
+ : kDefaultBufferLength);
+
+ for (auto range : *data) {
+ while (!range.empty()) {
+ auto const inSize = std::min<size_t>(range.size(), kMaxSingleStepLength);
+ stream.next_in =
+ const_cast<char*>(reinterpret_cast<char const*>(range.data()));
+ stream.avail_in = inSize;
+
+ if (stream.avail_out == 0) {
+ out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
}
- auto buffer = queue.preallocate(outputSize, outputSize);
- out.dst = buffer.first;
- out.size = buffer.second;
- out.pos = 0;
- outputSize = ZSTD_DStreamOutSize();
- }
- rc = ZSTD_decompressStream(zds, &out, &in);
- zstdThrowIfError(rc);
- if (rc == 0) {
- break;
+
+ bzCheck(BZ2_bzCompress(&stream, BZ_RUN));
+ range.uncheckedAdvance(inSize - stream.avail_in);
}
}
- if (out.pos != 0) {
- queue.postallocate(out.pos);
- }
- if (in.pos != in.size || !cursor.isAtEnd()) {
- throw std::runtime_error("ZSTD: junk after end of data");
- }
- if (uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH &&
- queue.chainLength() != uncompressedLength) {
- throw std::runtime_error("ZSTD: invalid uncompressed length");
- }
+ do {
+ if (stream.avail_out == 0) {
+ out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
+ }
+ } while (bzCheck(BZ2_bzCompress(&stream, BZ_FINISH)) != BZ_STREAM_END);
- return queue.move();
+ out->prev()->trimEnd(stream.avail_out);
+
+ return out;
}
-std::unique_ptr<IOBuf> ZSTDCodec::doUncompress(
+std::unique_ptr<IOBuf> Bzip2Codec::doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) {
- {
- // Read decompressed size from frame if available in first IOBuf.
- const auto decompressedSize =
- ZSTD_getDecompressedSize(data->data(), data->length());
- if (decompressedSize != 0) {
- if (uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH &&
- uncompressedLength != decompressedSize) {
- throw std::runtime_error("ZSTD: invalid uncompressed length");
+ Optional<uint64_t> uncompressedLength) {
+ bz_stream stream = createBzStream();
+ bzCheck(BZ2_bzDecompressInit(&stream, 0, 0));
+ SCOPE_EXIT {
+ bzCheck(BZ2_bzDecompressEnd(&stream));
+ };
+
+ uint64_t constexpr kMaxSingleStepLength = uint64_t(64) << 20; // 64 MiB
+ uint64_t const kBlockSize = uint64_t(100) << 10; // 100 KiB
+ uint64_t const kDefaultBufferLength =
+ computeBufferLength(data->computeChainDataLength(), kBlockSize);
+
+ auto out = addOutputBuffer(
+ &stream,
+ ((uncompressedLength && *uncompressedLength <= kMaxSingleStepLength)
+ ? *uncompressedLength
+ : kDefaultBufferLength));
+
+ int rc = BZ_OK;
+ for (auto range : *data) {
+ while (!range.empty()) {
+ auto const inSize = std::min<size_t>(range.size(), kMaxSingleStepLength);
+ stream.next_in =
+ const_cast<char*>(reinterpret_cast<char const*>(range.data()));
+ stream.avail_in = inSize;
+
+ if (stream.avail_out == 0) {
+ out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
}
- uncompressedLength = decompressedSize;
+
+ rc = bzCheck(BZ2_bzDecompress(&stream));
+ range.uncheckedAdvance(inSize - stream.avail_in);
}
}
- // Faster to decompress using ZSTD_decompress() if we can.
- if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH && !data->isChained()) {
- return zstdUncompressBuffer(data, uncompressedLength);
+ while (rc != BZ_STREAM_END) {
+ if (stream.avail_out == 0) {
+ out->prependChain(addOutputBuffer(&stream, kDefaultBufferLength));
+ }
+
+ rc = bzCheck(BZ2_bzDecompress(&stream));
}
- // Fall back to slower streaming decompression.
- return zstdUncompressStream(data, uncompressedLength);
+
+ out->prev()->trimEnd(stream.avail_out);
+
+ uint64_t const totalOut =
+ (uint64_t(stream.total_out_hi32) << 32) + stream.total_out_lo32;
+ if (uncompressedLength && uncompressedLength != totalOut) {
+ throw std::runtime_error("Bzip2 error: Invalid uncompressed length");
+ }
+
+ return out;
}
-#endif // FOLLY_HAVE_LIBZSTD
+#endif // FOLLY_HAVE_LIBBZ2
/**
* Automatic decompression
explicit AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs);
std::vector<std::string> validPrefixes() const override;
- bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
+ bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
const override;
private:
bool doNeedsUncompressedLength() const override;
uint64_t doMaxUncompressedLength() const override;
+ uint64_t doMaxCompressedLength(uint64_t) const override {
+ throw std::runtime_error(
+ "AutomaticCodec error: maxCompressedLength() not supported.");
+ }
std::unique_ptr<IOBuf> doCompress(const IOBuf*) override {
throw std::runtime_error("AutomaticCodec error: compress() not supported.");
}
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) override;
+ Optional<uint64_t> uncompressedLength) override;
void addCodecIfSupported(CodecType type);
bool AutomaticCodec::canUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) const {
+ Optional<uint64_t> uncompressedLength) const {
return std::any_of(
codecs_.begin(),
codecs_.end(),
/* static */ std::unique_ptr<Codec> AutomaticCodec::create(
std::vector<std::unique_ptr<Codec>> customCodecs) {
- return make_unique<AutomaticCodec>(std::move(customCodecs));
+ return std::make_unique<AutomaticCodec>(std::move(customCodecs));
}
AutomaticCodec::AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs)
addCodecIfSupported(CodecType::ZLIB);
addCodecIfSupported(CodecType::GZIP);
addCodecIfSupported(CodecType::LZMA2);
+ addCodecIfSupported(CodecType::BZIP2);
if (kIsDebug) {
checkCompatibleCodecs();
}
std::unique_ptr<IOBuf> AutomaticCodec::doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) {
+ Optional<uint64_t> uncompressedLength) {
for (auto&& codec : codecs_) {
if (codec->canUncompress(data, uncompressedLength)) {
return codec->uncompress(data, uncompressedLength);
throw std::runtime_error("AutomaticCodec error: Unknown compressed data");
}
-} // namespace
+using CodecFactory = std::unique_ptr<Codec> (*)(int, CodecType);
+using StreamCodecFactory = std::unique_ptr<StreamCodec> (*)(int, CodecType);
+struct Factory {
+ CodecFactory codec;
+ StreamCodecFactory stream;
+};
-typedef std::unique_ptr<Codec> (*CodecFactory)(int, CodecType);
-static constexpr CodecFactory
+constexpr Factory
codecFactories[static_cast<size_t>(CodecType::NUM_CODEC_TYPES)] = {
- nullptr, // USER_DEFINED
- NoCompressionCodec::create,
+ {}, // USER_DEFINED
+ {NoCompressionCodec::create, nullptr},
#if FOLLY_HAVE_LIBLZ4
- LZ4Codec::create,
+ {LZ4Codec::create, nullptr},
#else
- nullptr,
+ {},
#endif
#if FOLLY_HAVE_LIBSNAPPY
- SnappyCodec::create,
+ {SnappyCodec::create, nullptr},
#else
- nullptr,
+ {},
#endif
#if FOLLY_HAVE_LIBZ
- ZlibCodec::create,
+ {ZlibCodec::create, nullptr},
#else
- nullptr,
+ {},
#endif
#if FOLLY_HAVE_LIBLZ4
- LZ4Codec::create,
+ {LZ4Codec::create, nullptr},
#else
- nullptr,
+ {},
#endif
#if FOLLY_HAVE_LIBLZMA
- LZMA2Codec::create,
- LZMA2Codec::create,
+ {LZMA2Codec::create, nullptr},
+ {LZMA2Codec::create, nullptr},
#else
- nullptr,
- nullptr,
+ {},
+ {},
#endif
#if FOLLY_HAVE_LIBZSTD
- ZSTDCodec::create,
+ {ZSTDStreamCodec::createCodec, ZSTDStreamCodec::createStream},
#else
- nullptr,
+ {},
#endif
#if FOLLY_HAVE_LIBZ
- ZlibCodec::create,
+ {ZlibCodec::create, nullptr},
#else
- nullptr,
+ {},
#endif
#if (FOLLY_HAVE_LIBLZ4 && LZ4_VERSION_NUMBER >= 10301)
- LZ4FrameCodec::create,
+ {LZ4FrameCodec::create, nullptr},
+#else
+ {},
+#endif
+
+#if FOLLY_HAVE_LIBBZ2
+ {Bzip2Codec::create, nullptr},
#else
- nullptr,
+ {},
#endif
};
-bool hasCodec(CodecType type) {
- size_t idx = static_cast<size_t>(type);
+Factory const& getFactory(CodecType type) {
+ size_t const idx = static_cast<size_t>(type);
if (idx >= static_cast<size_t>(CodecType::NUM_CODEC_TYPES)) {
throw std::invalid_argument(
to<std::string>("Compression type ", idx, " invalid"));
}
- return codecFactories[idx] != nullptr;
+ return codecFactories[idx];
+}
+} // namespace
+
+bool hasCodec(CodecType type) {
+ return getFactory(type).codec != nullptr;
}
std::unique_ptr<Codec> getCodec(CodecType type, int level) {
- size_t idx = static_cast<size_t>(type);
- if (idx >= static_cast<size_t>(CodecType::NUM_CODEC_TYPES)) {
+ auto const factory = getFactory(type).codec;
+ if (!factory) {
throw std::invalid_argument(
- to<std::string>("Compression type ", idx, " invalid"));
+ to<std::string>("Compression type ", type, " not supported"));
}
- auto factory = codecFactories[idx];
+ auto codec = (*factory)(level, type);
+ DCHECK(codec->type() == type);
+ return codec;
+}
+
+bool hasStreamCodec(CodecType type) {
+ return getFactory(type).stream != nullptr;
+}
+
+std::unique_ptr<StreamCodec> getStreamCodec(CodecType type, int level) {
+ auto const factory = getFactory(type).stream;
if (!factory) {
- throw std::invalid_argument(to<std::string>(
- "Compression type ", idx, " not supported"));
+ throw std::invalid_argument(
+ to<std::string>("Compression type ", type, " not supported"));
}
auto codec = (*factory)(level, type);
- DCHECK_EQ(static_cast<size_t>(codec->type()), idx);
+ DCHECK(codec->type() == type);
return codec;
}