From 630673beeb35e790ed27c38ea5abe1f5436cb08e Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 21 Mar 2017 11:13:36 -0700 Subject: [PATCH] Add IOBuf::cloneCoalesced() Summary: `LZ4Codec::doCompress()` and `doUncompress()` need to coalesce a `const IOBuf*`, so they `clone()` and then `coalesce()`. It is more efficient to do this in one step, as shown by the benchmarks. Reviewed By: yfeldblum, simpkins Differential Revision: D4727802 fbshipit-source-id: f8d5f6187f2ee804301232b3c75b848330169fc5 --- folly/io/Compression.cpp | 14 ++++----- folly/io/IOBuf.cpp | 34 +++++++++++++++++++++ folly/io/IOBuf.h | 20 +++++++++++++ folly/io/test/IOBufBenchmark.cpp | 21 ++++++++++++- folly/io/test/IOBufTest.cpp | 51 ++++++++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 9 deletions(-) diff --git a/folly/io/Compression.cpp b/folly/io/Compression.cpp index 27fedd81..c626be74 100644 --- a/folly/io/Compression.cpp +++ b/folly/io/Compression.cpp @@ -303,12 +303,11 @@ uint64_t LZ4Codec::doMaxUncompressedLength() const { } std::unique_ptr LZ4Codec::doCompress(const IOBuf* data) { - std::unique_ptr clone; + IOBuf clone; if (data->isChained()) { // LZ4 doesn't support streaming, so we have to coalesce - clone = data->clone(); - clone->coalesce(); - data = clone.get(); + clone = data->cloneCoalescedAsValue(); + data = &clone; } uint32_t extraSize = encodeSize() ? kMaxVarintLength64 : 0; @@ -345,12 +344,11 @@ std::unique_ptr LZ4Codec::doCompress(const IOBuf* data) { std::unique_ptr LZ4Codec::doUncompress( const IOBuf* data, uint64_t uncompressedLength) { - std::unique_ptr clone; + IOBuf clone; if (data->isChained()) { // LZ4 doesn't support streaming, so we have to coalesce - clone = data->clone(); - clone->coalesce(); - data = clone.get(); + clone = data->cloneCoalescedAsValue(); + data = &clone; } folly::io::Cursor cursor(data); diff --git a/folly/io/IOBuf.cpp b/folly/io/IOBuf.cpp index 2a295aa8..571e772d 100644 --- a/folly/io/IOBuf.cpp +++ b/folly/io/IOBuf.cpp @@ -513,6 +513,10 @@ unique_ptr IOBuf::cloneOne() const { return make_unique(cloneOneAsValue()); } +unique_ptr IOBuf::cloneCoalesced() const { + return make_unique(cloneCoalescedAsValue()); +} + IOBuf IOBuf::cloneAsValue() const { auto tmp = cloneOneAsValue(); @@ -537,6 +541,36 @@ IOBuf IOBuf::cloneOneAsValue() const { length_); } +IOBuf IOBuf::cloneCoalescedAsValue() const { + if (!isChained()) { + return cloneOneAsValue(); + } + // Coalesce into newBuf + const uint64_t newLength = computeChainDataLength(); + const uint64_t newHeadroom = headroom(); + const uint64_t newTailroom = prev()->tailroom(); + const uint64_t newCapacity = newLength + newHeadroom + newTailroom; + IOBuf newBuf{CREATE, newCapacity}; + newBuf.advance(newHeadroom); + + auto current = this; + do { + if (current->length() > 0) { + DCHECK_NOTNULL(current->data()); + DCHECK_LE(current->length(), newBuf.tailroom()); + memcpy(newBuf.writableTail(), current->data(), current->length()); + newBuf.append(current->length()); + } + current = current->next(); + } while (current != this); + + DCHECK_EQ(newLength, newBuf.length()); + DCHECK_EQ(newHeadroom, newBuf.headroom()); + DCHECK_LE(newTailroom, newBuf.tailroom()); + + return newBuf; +} + void IOBuf::unshareOneSlow() { // Allocate a new buffer for the data uint8_t* buf; diff --git a/folly/io/IOBuf.h b/folly/io/IOBuf.h index 87a3ec9e..15f8839c 100644 --- a/folly/io/IOBuf.h +++ b/folly/io/IOBuf.h @@ -1123,6 +1123,26 @@ class IOBuf { */ IOBuf cloneOneAsValue() const; + /** + * Return a new unchained IOBuf that may share the same data as this chain. + * + * If the IOBuf chain is not chained then the new IOBuf will point to the same + * underlying data buffer as the original chain. Otherwise, it will clone and + * coalesce the IOBuf chain. + * + * The new IOBuf will have at least as much headroom as the first IOBuf in the + * chain, and at least as much tailroom as the last IOBuf in the chain. + * + * Throws std::bad_alloc on error. + */ + std::unique_ptr cloneCoalesced() const; + + /** + * Similar to cloneCoalesced(). But returns IOBuf by value rather than + * heap-allocating it. + */ + IOBuf cloneCoalescedAsValue() const; + /** * Similar to Clone(). But use other as the head node. Other nodes in the * chain (if any) will be allocted on heap. diff --git a/folly/io/test/IOBufBenchmark.cpp b/folly/io/test/IOBufBenchmark.cpp index 2298ddbb..1fa52b18 100644 --- a/folly/io/test/IOBufBenchmark.cpp +++ b/folly/io/test/IOBufBenchmark.cpp @@ -70,6 +70,23 @@ BENCHMARK(copyBenchmark, iters) { } } +BENCHMARK(cloneCoalescedBaseline, iters) { + std::unique_ptr buf = IOBuf::createChain(100, 10); + while (iters--) { + auto clone = buf->cloneAsValue(); + clone.coalesce(); + folly::doNotOptimizeAway(clone.capacity()); + } +} + +BENCHMARK_RELATIVE(cloneCoalescedBenchmark, iters) { + std::unique_ptr buf = IOBuf::createChain(100, 10); + while (iters--) { + auto copy = buf->cloneCoalescedAsValue(); + folly::doNotOptimizeAway(copy.capacity()); + } +} + /** * ============================================================================ * folly/io/test/IOBufBenchmark.cpp relative time/iter iters/s @@ -80,8 +97,10 @@ BENCHMARK(copyBenchmark, iters) { * cloneIntoBenchmark 30.03ns 33.30M * moveBenchmark 15.35ns 65.14M * copyBenchmark 33.63ns 29.73M + * cloneCoalescedBaseline 344.33ns 2.90M + * cloneCoalescedBenchmark 605.62% 56.86ns 17.59M * ============================================================================ -*/ + */ int main(int argc, char** argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); diff --git a/folly/io/test/IOBufTest.cpp b/folly/io/test/IOBufTest.cpp index 6e0cf441..e33287bb 100644 --- a/folly/io/test/IOBufTest.cpp +++ b/folly/io/test/IOBufTest.cpp @@ -531,6 +531,15 @@ TEST(IOBuf, Chaining) { gen.seed(fillSeed); checkChain(chainClone.get(), gen); + // cloneCoalesced + { + auto chainCloneCoalesced = chainClone->cloneCoalesced(); + EXPECT_EQ(1, chainCloneCoalesced->countChainElements()); + EXPECT_EQ(fullLength, chainCloneCoalesced->computeChainDataLength()); + gen.seed(fillSeed); + checkChain(chainCloneCoalesced.get(), gen); + } + // Coalesce the entire chain chainClone->coalesce(); EXPECT_EQ(1, chainClone->countChainElements()); @@ -1343,3 +1352,45 @@ TEST(IOBuf, CoalesceEmptyBuffers) { EXPECT_TRUE(ByteRange(StringPiece("hello")) == br); } + +TEST(IOBuf, CloneCoalescedChain) { + auto b = IOBuf::createChain(1000, 100); + b->advance(10); + const uint32_t fillSeed = 0x12345678; + boost::mt19937 gen(fillSeed); + { + auto c = b.get(); + uint64_t length = c->tailroom(); + do { + length = std::min(length, c->tailroom()); + c->append(length--); + fillBuf(c, gen); + c = c->next(); + } while (c != b.get()); + } + auto c = b->cloneCoalescedAsValue(); + EXPECT_FALSE(c.isChained()); // Not chained + EXPECT_FALSE(c.isSharedOne()); // Not shared + EXPECT_EQ(b->headroom(), c.headroom()); // Preserves headroom + EXPECT_LE(b->prev()->tailroom(), c.tailroom()); // Preserves minimum tailroom + EXPECT_EQ(b->computeChainDataLength(), c.length()); // Same length + gen.seed(fillSeed); + checkBuf(&c, gen); // Same contents +} + +TEST(IOBuf, CloneCoalescedSingle) { + auto b = IOBuf::create(1000); + b->advance(10); + b->append(900); + const uint32_t fillSeed = 0x12345678; + boost::mt19937 gen(fillSeed); + fillBuf(b.get(), gen); + + auto c = b->cloneCoalesced(); + EXPECT_FALSE(c->isChained()); // Not chained + EXPECT_TRUE(c->isSharedOne()); // Shared + EXPECT_EQ(b->buffer(), c->buffer()); + EXPECT_EQ(b->capacity(), c->capacity()); + EXPECT_EQ(b->data(), c->data()); + EXPECT_EQ(b->length(), c->length()); +} -- 2.34.1