2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
23 #include <folly/Range.h>
24 #include <folly/io/IOBuf.h>
27 * Compression / decompression over IOBufs
30 namespace folly { namespace io {
32 enum class CodecType {
34 * This codec type is not defined; getCodec() will throw an exception
35 * if used. Useful if deriving your own classes from Codec without
36 * going through the getCodec() interface.
47 * Use LZ4 compression.
48 * Levels supported: 1 = fast, 2 = best; default = 1
53 * Use Snappy compression.
59 * Use zlib compression.
60 * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
65 * Use LZ4 compression, prefixed with size (as Varint).
70 * Use LZMA2 compression.
71 * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
74 LZMA2_VARINT_SIZE = 7,
77 * Use ZSTD compression.
82 * Use gzip compression. This is the same compression algorithm as ZLIB but
83 * gzip-compressed files tend to be easier to work with from the command line.
84 * Levels supported: 0 = no compression, 1 = fast, ..., 9 = best; default = 6
89 * Use LZ4 frame compression.
90 * Levels supported: 0 = fast, 16 = best; default = 0
102 * Return the maximum length of data that may be compressed with this codec.
103 * NO_COMPRESSION and ZLIB support arbitrary lengths;
104 * LZ4 supports up to 1.9GiB; SNAPPY supports up to 4GiB.
105 * May return UNLIMITED_UNCOMPRESSED_LENGTH if unlimited.
107 uint64_t maxUncompressedLength() const;
110 * Return the codec's type.
112 CodecType type() const { return type_; }
115 * Does this codec need the exact uncompressed length on decompression?
117 bool needsUncompressedLength() const;
120 * Compress data, returning an IOBuf (which may share storage with data).
121 * Throws std::invalid_argument if data is larger than
122 * maxUncompressedLength().
124 * Regardless of the behavior of the underlying compressor, compressing
125 * an empty IOBuf chain will return an empty IOBuf chain.
127 std::unique_ptr<IOBuf> compress(const folly::IOBuf* data);
130 * Compresses data. May involve additional copies compared to the overload
131 * that takes and returns IOBufs. Has the same error semantics as the IOBuf
134 std::string compress(StringPiece data);
137 * Uncompress data. Throws std::runtime_error on decompression error.
139 * Some codecs (LZ4) require the exact uncompressed length; this is indicated
140 * by needsUncompressedLength().
142 * For other codes (zlib), knowing the exact uncompressed length ahead of
143 * time might be faster.
145 * Regardless of the behavior of the underlying compressor, uncompressing
146 * an empty IOBuf chain will return an empty IOBuf chain.
148 static constexpr uint64_t UNKNOWN_UNCOMPRESSED_LENGTH = uint64_t(-1);
149 static constexpr uint64_t UNLIMITED_UNCOMPRESSED_LENGTH = uint64_t(-2);
151 std::unique_ptr<IOBuf> uncompress(
153 uint64_t uncompressedLength = UNKNOWN_UNCOMPRESSED_LENGTH);
156 * Uncompresses data. May involve additional copies compared to the overload
157 * that takes and returns IOBufs. Has the same error semantics as the IOBuf
160 std::string uncompress(
162 uint64_t uncompressedLength = UNKNOWN_UNCOMPRESSED_LENGTH);
165 explicit Codec(CodecType type);
168 // default: no limits (save for special value UNKNOWN_UNCOMPRESSED_LENGTH)
169 virtual uint64_t doMaxUncompressedLength() const;
170 // default: doesn't need uncompressed length
171 virtual bool doNeedsUncompressedLength() const;
172 virtual std::unique_ptr<IOBuf> doCompress(const folly::IOBuf* data) = 0;
173 virtual std::unique_ptr<IOBuf> doUncompress(const folly::IOBuf* data,
174 uint64_t uncompressedLength) = 0;
175 // default: an implementation is provided by default to wrap the strings into
176 // IOBufs and delegate to the IOBuf methods. This incurs a copy of the output
177 // from IOBuf to string. Implementers, at their discretion, can override
178 // these methods to avoid the copy.
179 virtual std::string doCompressString(StringPiece data);
180 virtual std::string doUncompressString(
182 uint64_t uncompressedLength);
187 constexpr int COMPRESSION_LEVEL_FASTEST = -1;
188 constexpr int COMPRESSION_LEVEL_DEFAULT = -2;
189 constexpr int COMPRESSION_LEVEL_BEST = -3;
192 * Return a codec for the given type. Throws on error. The level
193 * is a non-negative codec-dependent integer indicating the level of
194 * compression desired, or one of the following constants:
196 * COMPRESSION_LEVEL_FASTEST is fastest (uses least CPU / memory,
198 * COMPRESSION_LEVEL_DEFAULT is the default (likely a tradeoff between
200 * COMPRESSION_LEVEL_BEST is the best compression (uses most CPU / memory,
203 * When decompressing, the compression level is ignored. All codecs will
204 * decompress all data compressed with the a codec of the same type, regardless
205 * of compression level.
207 std::unique_ptr<Codec> getCodec(CodecType type,
208 int level = COMPRESSION_LEVEL_DEFAULT);
211 * Check if a specified codec is supported.
213 bool hasCodec(CodecType type);