From 792e823ba3636730a6a0afad367c16c2faea8ca2 Mon Sep 17 00:00:00 2001 From: Adam Simpkins Date: Fri, 27 May 2016 17:48:14 -0700 Subject: [PATCH] add Cursor::readWhile() and skipWhile() Summary: Add generic functions for reading or skipping until a predicate check fails. This will allow us to simplify a few different call sites that have their own logic similar to this. Also change readTerminatedString() to use readWhile(). Reviewed By: alandau Differential Revision: D3337581 fbshipit-source-id: 9f50914c83adfc882219046862972661bed0e72a --- folly/Makefile.am | 1 + folly/io/Cursor-inl.h | 110 ++++++++++++++++++++++++++++++ folly/io/Cursor.h | 56 ++++++++------- folly/io/test/IOBufCursorTest.cpp | 93 +++++++++++++++++++++++++ 4 files changed, 234 insertions(+), 26 deletions(-) create mode 100644 folly/io/Cursor-inl.h diff --git a/folly/Makefile.am b/folly/Makefile.am index 275d2477..60bd725d 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -202,6 +202,7 @@ nobase_follyinclude_HEADERS = \ IntrusiveList.h \ io/Compression.h \ io/Cursor.h \ + io/Cursor-inl.h \ io/IOBuf.h \ io/IOBufQueue.h \ io/RecordIO.h \ diff --git a/folly/io/Cursor-inl.h b/folly/io/Cursor-inl.h new file mode 100644 index 00000000..ec5038e5 --- /dev/null +++ b/folly/io/Cursor-inl.h @@ -0,0 +1,110 @@ +/* + * Copyright 2016 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// Copyright 2004-present Facebook. All Rights Reserved. +#pragma once + +namespace folly { +namespace io { +namespace detail { + +/* + * Helper classes for use with CursorBase::readWhile() + */ +class CursorStringAppender { + public: + void append(ByteRange bytes) { + str_.append(reinterpret_cast(bytes.data()), bytes.size()); + } + std::string extractString() { + return std::move(str_); + } + + private: + std::string str_; +}; + +class CursorNoopAppender { + public: + void append(ByteRange) {} +}; + +template +std::string CursorBase::readTerminatedString( + char termChar, + size_t maxLength) { + size_t bytesRead{0}; + auto keepReading = [&bytesRead, termChar, maxLength](uint8_t byte) { + if (byte == termChar) { + return false; + } + ++bytesRead; + if (bytesRead >= maxLength) { + throw std::length_error("string overflow"); + } + return true; + }; + + auto result = readWhile(keepReading); + // skip over the terminator character + if (isAtEnd()) { + throw std::out_of_range("terminator not found"); + } + skip(1); + + return result; +} + +template +template +std::string CursorBase::readWhile( + const Predicate& predicate) { + CursorStringAppender s; + readWhile(predicate, s); + return s.extractString(); +} + +template +template +void CursorBase::readWhile( + const Predicate& predicate, + Output& out) { + while (true) { + auto peeked = peekBytes(); + if (peeked.empty()) { + return; + } + for (size_t idx = 0; idx < peeked.size(); ++idx) { + if (!predicate(peeked[idx])) { + peeked.reset(peeked.data(), idx); + out.append(peeked); + skip(idx); + return; + } + } + out.append(peeked); + skip(peeked.size()); + } +} + +template +template +void CursorBase::skipWhile(const Predicate& predicate) { + CursorNoopAppender appender; + readWhile(predicate, appender); +} +} +} +} // folly::io::detail diff --git a/folly/io/Cursor.h b/folly/io/Cursor.h index 5e1ce527..f33a0cea 100644 --- a/folly/io/Cursor.h +++ b/folly/io/Cursor.h @@ -229,34 +229,36 @@ class CursorBase { */ std::string readTerminatedString( char termChar = '\0', - size_t maxLength = std::numeric_limits::max()) { - std::string str; - - while (!isAtEnd()) { - const uint8_t* buf = data(); - size_t buflen = length(); - - size_t i = 0; - while (i < buflen && buf[i] != termChar) { - ++i; + size_t maxLength = std::numeric_limits::max()); - // Do this check after incrementing 'i', as even though we start at the - // 0 byte, it still represents a single character - if (str.length() + i >= maxLength) { - throw std::length_error("string overflow"); - } - } + /* + * Read all bytes until the specified predicate returns true. + * + * The predicate will be called on each byte in turn, until it returns false + * or until the end of the IOBuf chain is reached. + * + * Returns the result as a string. + */ + template + std::string readWhile(const Predicate& predicate); - str.append(reinterpret_cast(buf), i); - if (i < buflen) { - skip(i + 1); - return str; - } + /* + * Read all bytes until the specified predicate returns true. + * + * This is a more generic version of readWhile() takes an arbitrary Output + * object, and calls Output::append() with each chunk of matching data. + */ + template + void readWhile(const Predicate& predicate, Output& out); - skip(i); - } - throw std::out_of_range("terminator not found"); - } + /* + * Skip all bytes until the specified predicate returns true. + * + * The predicate will be called on each byte in turn, until it returns false + * or until the end of the IOBuf chain is reached. + */ + template + void skipWhile(const Predicate& predicate); size_t skipAtMost(size_t len) { if (LIKELY(length() >= len)) { @@ -419,7 +421,7 @@ class CursorBase { size_t operator-(const BufType* buf) const { size_t len = 0; - BufType *curBuf = buf; + const BufType* curBuf = buf; while (curBuf != crtBuf_) { len += curBuf->length(); curBuf = curBuf->next(); @@ -934,3 +936,5 @@ class QueueAppender : public detail::Writable { }; }} // folly::io + +#include diff --git a/folly/io/test/IOBufCursorTest.cpp b/folly/io/test/IOBufCursorTest.cpp index a5bc4eb9..08cb23d4 100644 --- a/folly/io/test/IOBufCursorTest.cpp +++ b/folly/io/test/IOBufCursorTest.cpp @@ -770,3 +770,96 @@ TEST(IOBuf, StringOperations) { EXPECT_STREQ("hello", curs.readFixedString(5).c_str()); } } + +TEST(IOBuf, ReadWhileTrue) { + auto isAlpha = [](uint8_t ch) { + return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); + }; + auto isDigit = [](uint8_t ch) { return (ch >= '0' && ch <= '9'); }; + + // Test reading alternating alphabetic and numeric strings + { + std::unique_ptr chain(IOBuf::create(32)); + Appender app(chain.get(), 0); + app.push(StringPiece("hello123world456")); + + Cursor curs(chain.get()); + EXPECT_STREQ("hello", curs.readWhile(isAlpha).c_str()); + EXPECT_STREQ("123", curs.readWhile(isDigit).c_str()); + EXPECT_STREQ("world", curs.readWhile(isAlpha).c_str()); + EXPECT_STREQ("456", curs.readWhile(isDigit).c_str()); + EXPECT_TRUE(curs.isAtEnd()); + } + + // The same, but also use skipWhile() + { + std::unique_ptr chain(IOBuf::create(16)); + Appender app(chain.get(), 0); + app.push(StringPiece("hello123world456")); + + Cursor curs(chain.get()); + EXPECT_STREQ("hello", curs.readWhile(isAlpha).c_str()); + curs.skipWhile(isDigit); + curs.skipWhile(isAlpha); + EXPECT_STREQ("456", curs.readWhile(isDigit).c_str()); + EXPECT_TRUE(curs.isAtEnd()); + } + + // Test readWhile() using data split across multiple buffers, + // including some empty buffers in the middle of the chain. + { + std::unique_ptr chain; + + // First element in the chain has "he" + auto buf = IOBuf::create(40); + Appender app(buf.get(), 0); + app.push(StringPiece("he")); + chain = std::move(buf); + + // The second element has "ll", after 10 bytes of headroom + buf = IOBuf::create(40); + buf->advance(10); + app = Appender{buf.get(), 0}; + app.push(StringPiece("ll")); + chain->prependChain(std::move(buf)); + + // The third element is empty + buf = IOBuf::create(40); + buf->advance(15); + chain->prependChain(std::move(buf)); + + // The fourth element has "o12" + buf = IOBuf::create(40); + buf->advance(37); + app = Appender{buf.get(), 0}; + app.push(StringPiece("o12")); + chain->prependChain(std::move(buf)); + + // The fifth element has "3" + buf = IOBuf::create(40); + app = Appender{buf.get(), 0}; + app.push(StringPiece("3")); + chain->prependChain(std::move(buf)); + + // The sixth element is empty + buf = IOBuf::create(40); + chain->prependChain(std::move(buf)); + + // The seventh element has "world456" + buf = IOBuf::create(40); + app = Appender{buf.get(), 0}; + app.push(StringPiece("world456")); + chain->prependChain(std::move(buf)); + + // The eighth element is empty + buf = IOBuf::create(40); + chain->prependChain(std::move(buf)); + + Cursor curs(chain.get()); + EXPECT_STREQ("hello", curs.readWhile(isAlpha).c_str()); + EXPECT_STREQ("123", curs.readWhile(isDigit).c_str()); + EXPECT_STREQ("world", curs.readWhile(isAlpha).c_str()); + EXPECT_STREQ("456", curs.readWhile(isDigit).c_str()); + EXPECT_TRUE(curs.isAtEnd()); + } +} -- 2.34.1