+ std::vector<std::string> validPrefixes() const override;
+ bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
+ const override;
+
+ private:
+ bool doNeedsUncompressedLength() const override;
+ std::unique_ptr<IOBuf> doCompress(const IOBuf* data) override;
+ std::unique_ptr<IOBuf> doUncompress(
+ const IOBuf* data,
+ uint64_t uncompressedLength) override;
+
+ int level_;
+};
+
+static constexpr uint32_t kZSTDMagicLE = 0xFD2FB528;
+
+std::vector<std::string> ZSTDCodec::validPrefixes() const {
+ return {prefixToStringLE(kZSTDMagicLE)};
+}
+
+bool ZSTDCodec::canUncompress(const IOBuf* data, uint64_t) const {
+ return dataStartsWithLE(data, kZSTDMagicLE);
+}
+
+std::unique_ptr<Codec> ZSTDCodec::create(int level, CodecType type) {
+ return make_unique<ZSTDCodec>(level, type);
+}
+
+ZSTDCodec::ZSTDCodec(int level, CodecType type) : Codec(type) {
+ DCHECK(type == CodecType::ZSTD);
+ switch (level) {
+ case COMPRESSION_LEVEL_FASTEST:
+ level = 1;
+ break;
+ case COMPRESSION_LEVEL_DEFAULT:
+ level = 1;
+ break;
+ case COMPRESSION_LEVEL_BEST:
+ level = 19;
+ break;
+ }
+ if (level < 1 || level > ZSTD_maxCLevel()) {
+ throw std::invalid_argument(
+ to<std::string>("ZSTD: invalid level: ", level));
+ }
+ level_ = level;
+}
+
+bool ZSTDCodec::doNeedsUncompressedLength() const {
+ return false;
+}
+
+void zstdThrowIfError(size_t rc) {
+ if (!ZSTD_isError(rc)) {
+ return;
+ }
+ throw std::runtime_error(
+ to<std::string>("ZSTD returned an error: ", ZSTD_getErrorName(rc)));
+}
+
+std::unique_ptr<IOBuf> ZSTDCodec::doCompress(const IOBuf* data) {
+ // Support earlier versions of the codec (working with a single IOBuf,
+ // and using ZSTD_decompress which requires ZSTD frame to contain size,
+ // which isn't populated by streaming API).
+ if (!data->isChained()) {
+ auto out = IOBuf::createCombined(ZSTD_compressBound(data->length()));
+ const auto rc = ZSTD_compress(
+ out->writableData(),
+ out->capacity(),
+ data->data(),
+ data->length(),
+ level_);
+ zstdThrowIfError(rc);
+ out->append(rc);
+ return out;
+ }
+
+ auto zcs = ZSTD_createCStream();
+ SCOPE_EXIT {
+ ZSTD_freeCStream(zcs);
+ };
+
+ auto rc = ZSTD_initCStream(zcs, level_);
+ zstdThrowIfError(rc);
+
+ Cursor cursor(data);
+ auto result = IOBuf::createCombined(ZSTD_compressBound(cursor.totalLength()));
+
+ ZSTD_outBuffer out;
+ out.dst = result->writableTail();
+ out.size = result->capacity();
+ out.pos = 0;
+
+ for (auto buffer = cursor.peekBytes(); !buffer.empty();) {
+ ZSTD_inBuffer in;
+ in.src = buffer.data();
+ in.size = buffer.size();
+ for (in.pos = 0; in.pos != in.size;) {
+ rc = ZSTD_compressStream(zcs, &out, &in);
+ zstdThrowIfError(rc);
+ }
+ cursor.skip(in.size);
+ buffer = cursor.peekBytes();
+ }
+
+ rc = ZSTD_endStream(zcs, &out);
+ zstdThrowIfError(rc);
+ CHECK_EQ(rc, 0);
+
+ result->append(out.pos);
+ return result;
+}
+
+static std::unique_ptr<IOBuf> zstdUncompressBuffer(
+ const IOBuf* data,
+ uint64_t uncompressedLength) {
+ // Check preconditions
+ DCHECK(!data->isChained());
+ DCHECK(uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH);
+
+ auto uncompressed = IOBuf::create(uncompressedLength);
+ const auto decompressedSize = ZSTD_decompress(
+ uncompressed->writableTail(),
+ uncompressed->tailroom(),
+ data->data(),
+ data->length());
+ zstdThrowIfError(decompressedSize);
+ if (decompressedSize != uncompressedLength) {
+ throw std::runtime_error("ZSTD: invalid uncompressed length");
+ }
+ uncompressed->append(decompressedSize);
+ return uncompressed;
+}
+
+static std::unique_ptr<IOBuf> zstdUncompressStream(
+ const IOBuf* data,
+ uint64_t uncompressedLength) {
+ auto zds = ZSTD_createDStream();
+ SCOPE_EXIT {
+ ZSTD_freeDStream(zds);
+ };
+
+ auto rc = ZSTD_initDStream(zds);
+ zstdThrowIfError(rc);
+
+ ZSTD_outBuffer out{};
+ ZSTD_inBuffer in{};
+
+ auto outputSize = ZSTD_DStreamOutSize();
+ if (uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH) {
+ outputSize = uncompressedLength;
+ }
+
+ IOBufQueue queue(IOBufQueue::cacheChainLength());
+
+ Cursor cursor(data);
+ for (rc = 0;;) {
+ if (in.pos == in.size) {
+ auto buffer = cursor.peekBytes();
+ in.src = buffer.data();
+ in.size = buffer.size();
+ in.pos = 0;
+ cursor.skip(in.size);
+ if (rc > 1 && in.size == 0) {
+ throw std::runtime_error(to<std::string>("ZSTD: incomplete input"));
+ }
+ }
+ if (out.pos == out.size) {
+ if (out.pos != 0) {
+ queue.postallocate(out.pos);
+ }
+ auto buffer = queue.preallocate(outputSize, outputSize);
+ out.dst = buffer.first;
+ out.size = buffer.second;
+ out.pos = 0;
+ outputSize = ZSTD_DStreamOutSize();
+ }
+ rc = ZSTD_decompressStream(zds, &out, &in);
+ zstdThrowIfError(rc);
+ if (rc == 0) {
+ break;
+ }
+ }
+ if (out.pos != 0) {
+ queue.postallocate(out.pos);
+ }
+ if (in.pos != in.size || !cursor.isAtEnd()) {
+ throw std::runtime_error("ZSTD: junk after end of data");
+ }
+ if (uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH &&
+ queue.chainLength() != uncompressedLength) {
+ throw std::runtime_error("ZSTD: invalid uncompressed length");
+ }
+
+ return queue.move();
+}
+
+std::unique_ptr<IOBuf> ZSTDCodec::doUncompress(
+ const IOBuf* data,
+ uint64_t uncompressedLength) {
+ {
+ // Read decompressed size from frame if available in first IOBuf.
+ const auto decompressedSize =
+ ZSTD_getDecompressedSize(data->data(), data->length());
+ if (decompressedSize != 0) {
+ if (uncompressedLength != Codec::UNKNOWN_UNCOMPRESSED_LENGTH &&
+ uncompressedLength != decompressedSize) {
+ throw std::runtime_error("ZSTD: invalid uncompressed length");
+ }
+ uncompressedLength = decompressedSize;
+ }
+ }
+ // Faster to decompress using ZSTD_decompress() if we can.
+ if (uncompressedLength != UNKNOWN_UNCOMPRESSED_LENGTH && !data->isChained()) {
+ return zstdUncompressBuffer(data, uncompressedLength);
+ }
+ // Fall back to slower streaming decompression.
+ return zstdUncompressStream(data, uncompressedLength);
+}
+
+#endif // FOLLY_HAVE_LIBZSTD
+
+/**
+ * Automatic decompression
+ */
+class AutomaticCodec final : public Codec {
+ public:
+ static std::unique_ptr<Codec> create(
+ std::vector<std::unique_ptr<Codec>> customCodecs);
+ explicit AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs);
+
+ std::vector<std::string> validPrefixes() const override;
+ bool canUncompress(const IOBuf* data, uint64_t uncompressedLength)
+ const override;
+
+ private:
+ bool doNeedsUncompressedLength() const override;
+ uint64_t doMaxUncompressedLength() const override;
+
+ std::unique_ptr<IOBuf> doCompress(const IOBuf*) override {
+ throw std::runtime_error("AutomaticCodec error: compress() not supported.");
+ }
+ std::unique_ptr<IOBuf> doUncompress(
+ const IOBuf* data,
+ uint64_t uncompressedLength) override;
+
+ void addCodecIfSupported(CodecType type);
+
+ // Throws iff the codecs aren't compatible (very slow)
+ void checkCompatibleCodecs() const;
+
+ std::vector<std::unique_ptr<Codec>> codecs_;
+ bool needsUncompressedLength_;
+ uint64_t maxUncompressedLength_;
+};
+
+std::vector<std::string> AutomaticCodec::validPrefixes() const {
+ std::unordered_set<std::string> prefixes;
+ for (const auto& codec : codecs_) {
+ const auto codecPrefixes = codec->validPrefixes();
+ prefixes.insert(codecPrefixes.begin(), codecPrefixes.end());
+ }
+ return std::vector<std::string>{prefixes.begin(), prefixes.end()};
+}
+
+bool AutomaticCodec::canUncompress(
+ const IOBuf* data,
+ uint64_t uncompressedLength) const {
+ return std::any_of(
+ codecs_.begin(),
+ codecs_.end(),
+ [data, uncompressedLength](std::unique_ptr<Codec> const& codec) {
+ return codec->canUncompress(data, uncompressedLength);
+ });
+}
+
+void AutomaticCodec::addCodecIfSupported(CodecType type) {
+ const bool present = std::any_of(
+ codecs_.begin(),
+ codecs_.end(),
+ [&type](std::unique_ptr<Codec> const& codec) {
+ return codec->type() == type;
+ });
+ if (hasCodec(type) && !present) {
+ codecs_.push_back(getCodec(type));
+ }
+}
+
+/* static */ std::unique_ptr<Codec> AutomaticCodec::create(
+ std::vector<std::unique_ptr<Codec>> customCodecs) {
+ return make_unique<AutomaticCodec>(std::move(customCodecs));
+}
+
+AutomaticCodec::AutomaticCodec(std::vector<std::unique_ptr<Codec>> customCodecs)
+ : Codec(CodecType::USER_DEFINED), codecs_(std::move(customCodecs)) {
+ // Fastest -> slowest
+ addCodecIfSupported(CodecType::LZ4_FRAME);
+ addCodecIfSupported(CodecType::ZSTD);
+ addCodecIfSupported(CodecType::ZLIB);
+ addCodecIfSupported(CodecType::GZIP);
+ addCodecIfSupported(CodecType::LZMA2);
+ if (kIsDebug) {
+ checkCompatibleCodecs();
+ }
+ // Check that none of the codes are are null
+ DCHECK(std::none_of(
+ codecs_.begin(), codecs_.end(), [](std::unique_ptr<Codec> const& codec) {
+ return codec == nullptr;
+ }));
+
+ needsUncompressedLength_ = std::any_of(
+ codecs_.begin(), codecs_.end(), [](std::unique_ptr<Codec> const& codec) {
+ return codec->needsUncompressedLength();
+ });
+
+ const auto it = std::max_element(
+ codecs_.begin(),
+ codecs_.end(),
+ [](std::unique_ptr<Codec> const& lhs, std::unique_ptr<Codec> const& rhs) {
+ return lhs->maxUncompressedLength() < rhs->maxUncompressedLength();
+ });
+ DCHECK(it != codecs_.end());
+ maxUncompressedLength_ = (*it)->maxUncompressedLength();
+}
+
+void AutomaticCodec::checkCompatibleCodecs() const {
+ // Keep track of all the possible headers.
+ std::unordered_set<std::string> headers;
+ // The empty header is not allowed.
+ headers.insert("");
+ // Step 1:
+ // Construct a set of headers and check that none of the headers occur twice.
+ // Eliminate edge cases.
+ for (auto&& codec : codecs_) {
+ const auto codecHeaders = codec->validPrefixes();
+ // Codecs without any valid headers are not allowed.
+ if (codecHeaders.empty()) {
+ throw std::invalid_argument{
+ "AutomaticCodec: validPrefixes() must not be empty."};
+ }
+ // Insert all the headers for the current codec.
+ const size_t beforeSize = headers.size();
+ headers.insert(codecHeaders.begin(), codecHeaders.end());
+ // Codecs are not compatible if any header occurred twice.
+ if (beforeSize + codecHeaders.size() != headers.size()) {
+ throw std::invalid_argument{
+ "AutomaticCodec: Two valid prefixes collide."};
+ }
+ }
+ // Step 2:
+ // Check if any strict non-empty prefix of any header is a header.
+ for (const auto& header : headers) {
+ for (size_t i = 1; i < header.size(); ++i) {
+ if (headers.count(header.substr(0, i))) {
+ throw std::invalid_argument{
+ "AutomaticCodec: One valid prefix is a prefix of another valid "
+ "prefix."};
+ }
+ }
+ }
+}
+
+bool AutomaticCodec::doNeedsUncompressedLength() const {
+ return needsUncompressedLength_;
+}
+
+uint64_t AutomaticCodec::doMaxUncompressedLength() const {
+ return maxUncompressedLength_;
+}
+
+std::unique_ptr<IOBuf> AutomaticCodec::doUncompress(
+ const IOBuf* data,
+ uint64_t uncompressedLength) {
+ for (auto&& codec : codecs_) {
+ if (codec->canUncompress(data, uncompressedLength)) {
+ return codec->uncompress(data, uncompressedLength);
+ }
+ }
+ throw std::runtime_error("AutomaticCodec error: Unknown compressed data");
+}
+
+} // namespace
+
+typedef std::unique_ptr<Codec> (*CodecFactory)(int, CodecType);
+static constexpr CodecFactory
+ codecFactories[static_cast<size_t>(CodecType::NUM_CODEC_TYPES)] = {
+ nullptr, // USER_DEFINED
+ NoCompressionCodec::create,