/*
- * Copyright 2014 Facebook, Inc.
+ * Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <lzma.h>
#endif
+#if FOLLY_HAVE_LIBZSTD
+#include <zstd.h>
+#endif
+
#include <folly/Conv.h>
#include <folly/Memory.h>
#include <folly/Portability.h>
// Ensure consistent behavior in the nullptr case
std::unique_ptr<IOBuf> Codec::compress(const IOBuf* data) {
- return !data->empty() ? doCompress(data) : IOBuf::create(0);
+ uint64_t len = data->computeChainDataLength();
+ if (len == 0) {
+ return IOBuf::create(0);
+ } else if (len > maxUncompressedLength()) {
+ throw std::runtime_error("Codec: uncompressed length too large");
+ }
+
+ return doCompress(data);
}
std::unique_ptr<IOBuf> Codec::uncompress(const IOBuf* data,
}
uint64_t Codec::doMaxUncompressedLength() const {
- return std::numeric_limits<uint64_t>::max() - 1;
+ return UNLIMITED_UNCOMPRESSED_LENGTH;
}
namespace {
/**
* No compression
*/
-class NoCompressionCodec FOLLY_FINAL : public Codec {
+class NoCompressionCodec final : public Codec {
public:
static std::unique_ptr<Codec> create(int level, CodecType type);
explicit NoCompressionCodec(int level, CodecType type);
private:
- std::unique_ptr<IOBuf> doCompress(const IOBuf* data) FOLLY_OVERRIDE;
+ std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) FOLLY_OVERRIDE;
+ uint64_t uncompressedLength) override;
};
std::unique_ptr<Codec> NoCompressionCodec::create(int level, CodecType type) {
return data->clone();
}
+#if (FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA)
+
namespace {
void encodeVarintToIOBuf(uint64_t val, folly::IOBuf* out) {
out->append(encodeVarint(val, out->writableTail()));
}
-uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) {
- // Must have enough room in *this* buffer.
- auto p = cursor.peek();
- folly::ByteRange range(p.first, p.second);
- uint64_t val = decodeVarint(range);
- cursor.skip(range.data() - p.first);
+inline uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) {
+ uint64_t val = 0;
+ int8_t b = 0;
+ for (int shift = 0; shift <= 63; shift += 7) {
+ b = cursor.read<int8_t>();
+ val |= static_cast<uint64_t>(b & 0x7f) << shift;
+ if (b >= 0) {
+ break;
+ }
+ }
+ if (b < 0) {
+ throw std::invalid_argument("Invalid varint value. Too big.");
+ }
return val;
}
} // namespace
+#endif // FOLLY_HAVE_LIBLZ4 || FOLLY_HAVE_LIBLZMA
+
#if FOLLY_HAVE_LIBLZ4
/**
* LZ4 compression
*/
-class LZ4Codec FOLLY_FINAL : public Codec {
+class LZ4Codec final : public Codec {
public:
static std::unique_ptr<Codec> create(int level, CodecType type);
explicit LZ4Codec(int level, CodecType type);
private:
- bool doNeedsUncompressedLength() const FOLLY_OVERRIDE;
- uint64_t doMaxUncompressedLength() const FOLLY_OVERRIDE;
+ bool doNeedsUncompressedLength() const override;
+ uint64_t doMaxUncompressedLength() const override;
bool encodeSize() const { return type() == CodecType::LZ4_VARINT_SIZE; }
- std::unique_ptr<IOBuf> doCompress(const IOBuf* data) FOLLY_OVERRIDE;
+ std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) FOLLY_OVERRIDE;
+ uint64_t uncompressedLength) override;
bool highCompression_;
};
return !encodeSize();
}
+// The value comes from lz4.h in lz4-r117, but older versions of lz4 don't
+// define LZ4_MAX_INPUT_SIZE (even though the max size is the same), so do it
+// here.
+#ifndef LZ4_MAX_INPUT_SIZE
+# define LZ4_MAX_INPUT_SIZE 0x7E000000
+#endif
+
uint64_t LZ4Codec::doMaxUncompressedLength() const {
- // From lz4.h: "Max supported value is ~1.9GB"; I wish we had something
- // more accurate.
- return 1.8 * (uint64_t(1) << 30);
+ return LZ4_MAX_INPUT_SIZE;
}
std::unique_ptr<IOBuf> LZ4Codec::doCompress(const IOBuf* data) {
}
} else {
actualUncompressedLength = uncompressedLength;
- DCHECK_NE(actualUncompressedLength, UNKNOWN_UNCOMPRESSED_LENGTH);
+ if (actualUncompressedLength == UNKNOWN_UNCOMPRESSED_LENGTH ||
+ actualUncompressedLength > maxUncompressedLength()) {
+ throw std::runtime_error("LZ4Codec: invalid uncompressed length");
+ }
}
+ auto sp = StringPiece{cursor.peekBytes()};
auto out = IOBuf::create(actualUncompressedLength);
- auto p = cursor.peek();
- int n = LZ4_uncompress(reinterpret_cast<const char*>(p.first),
- reinterpret_cast<char*>(out->writableTail()),
- actualUncompressedLength);
- if (n != p.second) {
+ int n = LZ4_decompress_safe(
+ sp.data(),
+ reinterpret_cast<char*>(out->writableTail()),
+ sp.size(),
+ actualUncompressedLength);
+
+ if (n < 0 || uint64_t(n) != actualUncompressedLength) {
throw std::runtime_error(to<std::string>(
"LZ4 decompression returned invalid value ", n));
}
/**
* Implementation of snappy::Source that reads from a IOBuf chain.
*/
-class IOBufSnappySource FOLLY_FINAL : public snappy::Source {
+class IOBufSnappySource final : public snappy::Source {
public:
explicit IOBufSnappySource(const IOBuf* data);
- size_t Available() const FOLLY_OVERRIDE;
- const char* Peek(size_t* len) FOLLY_OVERRIDE;
- void Skip(size_t n) FOLLY_OVERRIDE;
+ size_t Available() const override;
+ const char* Peek(size_t* len) override;
+ void Skip(size_t n) override;
private:
size_t available_;
io::Cursor cursor_;
}
const char* IOBufSnappySource::Peek(size_t* len) {
- auto p = cursor_.peek();
- *len = p.second;
- return reinterpret_cast<const char*>(p.first);
+ auto sp = StringPiece{cursor_.peekBytes()};
+ *len = sp.size();
+ return sp.data();
}
void IOBufSnappySource::Skip(size_t n) {
available_ -= n;
}
-class SnappyCodec FOLLY_FINAL : public Codec {
+class SnappyCodec final : public Codec {
public:
static std::unique_ptr<Codec> create(int level, CodecType type);
explicit SnappyCodec(int level, CodecType type);
private:
- uint64_t doMaxUncompressedLength() const FOLLY_OVERRIDE;
- std::unique_ptr<IOBuf> doCompress(const IOBuf* data) FOLLY_OVERRIDE;
+ uint64_t doMaxUncompressedLength() const override;
+ std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) FOLLY_OVERRIDE;
+ uint64_t uncompressedLength) override;
};
std::unique_ptr<Codec> SnappyCodec::create(int level, CodecType type) {
/**
* Zlib codec
*/
-class ZlibCodec FOLLY_FINAL : public Codec {
+class ZlibCodec final : public Codec {
public:
static std::unique_ptr<Codec> create(int level, CodecType type);
explicit ZlibCodec(int level, CodecType type);
private:
- std::unique_ptr<IOBuf> doCompress(const IOBuf* data) FOLLY_OVERRIDE;
+ std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) FOLLY_OVERRIDE;
+ uint64_t uncompressedLength) override;
std::unique_ptr<IOBuf> addOutputBuffer(z_stream* stream, uint32_t length);
bool doInflate(z_stream* stream, IOBuf* head, uint32_t bufferLength);
}
ZlibCodec::ZlibCodec(int level, CodecType type) : Codec(type) {
- DCHECK(type == CodecType::ZLIB);
+ DCHECK(type == CodecType::ZLIB || type == CodecType::GZIP);
switch (level) {
case COMPRESSION_LEVEL_FASTEST:
level = 1;
stream.zfree = nullptr;
stream.opaque = nullptr;
- int rc = deflateInit(&stream, level_);
+ // Using deflateInit2() to support gzip. "The windowBits parameter is the
+ // base two logarithm of the maximum window size (...) The default value is
+ // 15 (...) Add 16 to windowBits to write a simple gzip header and trailer
+ // around the compressed data instead of a zlib wrapper. The gzip header
+ // will have no file name, no extra data, no comment, no modification time
+ // (set to zero), no header crc, and the operating system will be set to 255
+ // (unknown)."
+ int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
+ // All other parameters (method, memLevel, strategy) get default values from
+ // the zlib manual.
+ int rc = deflateInit2(&stream,
+ level_,
+ Z_DEFLATED,
+ windowBits,
+ /* memLevel */ 8,
+ Z_DEFAULT_STRATEGY);
if (rc != Z_OK) {
throw std::runtime_error(to<std::string>(
"ZlibCodec: deflateInit error: ", rc, ": ", stream.msg));
defaultBufferLength));
for (auto& range : *data) {
- if (range.empty()) {
- continue;
- }
-
- stream.next_in = const_cast<uint8_t*>(range.data());
- stream.avail_in = range.size();
-
- while (stream.avail_in != 0) {
- if (stream.avail_out == 0) {
- out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
+ uint64_t remaining = range.size();
+ uint64_t written = 0;
+ while (remaining) {
+ uint32_t step = (remaining > maxSingleStepLength ?
+ maxSingleStepLength : remaining);
+ stream.next_in = const_cast<uint8_t*>(range.data() + written);
+ stream.avail_in = step;
+ remaining -= step;
+ written += step;
+
+ while (stream.avail_in != 0) {
+ if (stream.avail_out == 0) {
+ out->prependChain(addOutputBuffer(&stream, defaultBufferLength));
+ }
+
+ rc = deflate(&stream, Z_NO_FLUSH);
+
+ CHECK_EQ(rc, Z_OK) << stream.msg;
}
-
- rc = deflate(&stream, Z_NO_FLUSH);
-
- CHECK_EQ(rc, Z_OK) << stream.msg;
}
}
stream.zfree = nullptr;
stream.opaque = nullptr;
- int rc = inflateInit(&stream);
+ // "The windowBits parameter is the base two logarithm of the maximum window
+ // size (...) The default value is 15 (...) add 16 to decode only the gzip
+ // format (the zlib format will return a Z_DATA_ERROR)."
+ int windowBits = 15 + (type() == CodecType::GZIP ? 16 : 0);
+ int rc = inflateInit2(&stream, windowBits);
if (rc != Z_OK) {
throw std::runtime_error(to<std::string>(
"ZlibCodec: inflateInit error: ", rc, ": ", stream.msg));
/**
* LZMA2 compression
*/
-class LZMA2Codec FOLLY_FINAL : public Codec {
+class LZMA2Codec final : public Codec {
public:
static std::unique_ptr<Codec> create(int level, CodecType type);
explicit LZMA2Codec(int level, CodecType type);
private:
- bool doNeedsUncompressedLength() const FOLLY_OVERRIDE;
- uint64_t doMaxUncompressedLength() const FOLLY_OVERRIDE;
+ bool doNeedsUncompressedLength() const override;
+ uint64_t doMaxUncompressedLength() const override;
bool encodeSize() const { return type() == CodecType::LZMA2_VARINT_SIZE; }
- std::unique_ptr<IOBuf> doCompress(const IOBuf* data) FOLLY_OVERRIDE;
+ std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
std::unique_ptr<IOBuf> doUncompress(
const IOBuf* data,
- uint64_t uncompressedLength) FOLLY_OVERRIDE;
+ uint64_t uncompressedLength) override;
std::unique_ptr<IOBuf> addOutputBuffer(lzma_stream* stream, size_t length);
bool doInflate(lzma_stream* stream, IOBuf* head, size_t bufferLength);
defaultBufferLength));
bool streamEnd = false;
- auto buf = cursor.peek();
- while (buf.second != 0) {
- stream.next_in = const_cast<uint8_t*>(buf.first);
- stream.avail_in = buf.second;
+ auto buf = cursor.peekBytes();
+ while (!buf.empty()) {
+ stream.next_in = const_cast<uint8_t*>(buf.data());
+ stream.avail_in = buf.size();
while (stream.avail_in != 0) {
if (streamEnd) {
streamEnd = doInflate(&stream, out.get(), defaultBufferLength);
}
- cursor.skip(buf.second);
- buf = cursor.peek();
+ cursor.skip(buf.size());
+ buf = cursor.peekBytes();
}
while (!streamEnd) {
#endif // FOLLY_HAVE_LIBLZMA
-typedef std::unique_ptr<Codec> (*CodecFactory)(int, CodecType);
+#ifdef FOLLY_HAVE_LIBZSTD
+
+/**
+ * ZSTD_BETA compression
+ */
+class ZSTDCodec final : public Codec {
+ public:
+ static std::unique_ptr<Codec> create(int level, CodecType);
+ explicit ZSTDCodec(int level, CodecType type);
+
+ private:
+ bool doNeedsUncompressedLength() const override;
+ std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
+ std::unique_ptr<IOBuf> doUncompress(
+ const IOBuf* data,
+ uint64_t uncompressedLength) override;
+
+ int level_{1};
+};
+
+std::unique_ptr<Codec> ZSTDCodec::create(int level, CodecType type) {
+ return make_unique<ZSTDCodec>(level, type);
+}
+
+ZSTDCodec::ZSTDCodec(int level, CodecType type) : Codec(type) {
+ DCHECK(type == CodecType::ZSTD_BETA);
+ switch (level) {
+ case COMPRESSION_LEVEL_FASTEST:
+ level_ = 1;
+ break;
+ case COMPRESSION_LEVEL_DEFAULT:
+ level_ = 1;
+ break;
+ case COMPRESSION_LEVEL_BEST:
+ level_ = 19;
+ break;
+ }
+}
+
+bool ZSTDCodec::doNeedsUncompressedLength() const {
+ return true;
+}
+
+std::unique_ptr<IOBuf> ZSTDCodec::doCompress(const IOBuf* data) {
+ size_t rc;
+ size_t maxCompressedLength = ZSTD_compressBound(data->length());
+ auto out = IOBuf::createCombined(maxCompressedLength);
+
+ CHECK_EQ(out->length(), 0);
+
+ rc = ZSTD_compress(out->writableTail(),
+ out->capacity(),
+ data->data(),
+ data->length(),
+ level_);
+
+ if (ZSTD_isError(rc)) {
+ throw std::runtime_error(to<std::string>(
+ "ZSTD compression returned an error: ",
+ ZSTD_getErrorName(rc)));
+ }
+
+ out->append(rc);
+ CHECK_EQ(out->length(), rc);
+
+ return out;
+}
+
+std::unique_ptr<IOBuf> ZSTDCodec::doUncompress(const IOBuf* data,
+ uint64_t uncompressedLength) {
+ size_t rc;
+ auto out = IOBuf::createCombined(uncompressedLength);
+
+ CHECK_GE(out->capacity(), uncompressedLength);
+ CHECK_EQ(out->length(), 0);
+
+ rc = ZSTD_decompress(
+ out->writableTail(), out->capacity(), data->data(), data->length());
+
+ if (ZSTD_isError(rc)) {
+ throw std::runtime_error(to<std::string>(
+ "ZSTD decompression returned an error: ",
+ ZSTD_getErrorName(rc)));
+ }
+
+ out->append(rc);
+ CHECK_EQ(out->length(), rc);
+
+ return out;
+}
+
+#endif // FOLLY_HAVE_LIBZSTD
-CodecFactory gCodecFactories[
+} // namespace
+
+std::unique_ptr<Codec> getCodec(CodecType type, int level) {
+ typedef std::unique_ptr<Codec> (*CodecFactory)(int, CodecType);
+
+ static CodecFactory codecFactories[
static_cast<size_t>(CodecType::NUM_CODEC_TYPES)] = {
- nullptr, // USER_DEFINED
- NoCompressionCodec::create,
+ nullptr, // USER_DEFINED
+ NoCompressionCodec::create,
#if FOLLY_HAVE_LIBLZ4
- LZ4Codec::create,
+ LZ4Codec::create,
#else
- nullptr,
+ nullptr,
#endif
#if FOLLY_HAVE_LIBSNAPPY
- SnappyCodec::create,
+ SnappyCodec::create,
#else
- nullptr,
+ nullptr,
#endif
#if FOLLY_HAVE_LIBZ
- ZlibCodec::create,
+ ZlibCodec::create,
#else
- nullptr,
+ nullptr,
#endif
#if FOLLY_HAVE_LIBLZ4
- LZ4Codec::create,
+ LZ4Codec::create,
#else
- nullptr,
+ nullptr,
#endif
#if FOLLY_HAVE_LIBLZMA
- LZMA2Codec::create,
- LZMA2Codec::create,
+ LZMA2Codec::create,
+ LZMA2Codec::create,
#else
- nullptr,
- nullptr,
+ nullptr,
+ nullptr,
#endif
-};
-} // namespace
+#if FOLLY_HAVE_LIBZSTD
+ ZSTDCodec::create,
+#else
+ nullptr,
+#endif
+
+#if FOLLY_HAVE_LIBZ
+ ZlibCodec::create,
+#else
+ nullptr,
+#endif
+ };
-std::unique_ptr<Codec> getCodec(CodecType type, int level) {
size_t idx = static_cast<size_t>(type);
if (idx >= static_cast<size_t>(CodecType::NUM_CODEC_TYPES)) {
throw std::invalid_argument(to<std::string>(
"Compression type ", idx, " not supported"));
}
- auto factory = gCodecFactories[idx];
+ auto factory = codecFactories[idx];
if (!factory) {
throw std::invalid_argument(to<std::string>(
"Compression type ", idx, " not supported"));
}
}} // namespaces
-