fix varint decoding in case the first buffer is smaller than encoded varint
authorMainak Mandal <mmandal@fb.com>
Wed, 26 Nov 2014 02:27:13 +0000 (18:27 -0800)
committerDave Watson <davejwatson@fb.com>
Thu, 11 Dec 2014 15:59:03 +0000 (07:59 -0800)
Summary: This is not a super clean solution, given that I have just copied the code over form Varint.h. Another way would be to add a peek(IOBuf&, size_t) method to CursorBase and use that to peek deeper into the chain.

Test Plan: unit tests

Reviewed By: tulloch@fb.com

Subscribers: trunkagent, njormrod, dcapel, benr, folly-diffs@

FB internal diff: D1689872

Signature: t1:1689872:1416964989:b7f12a2686233f161401288ffcb8c51926b01fdf

folly/Varint.h
folly/io/Compression.cpp
folly/io/test/CompressionTest.cpp

index 2f8739fe138d71ecac06932d502224adc0f913b2..a58866fd10392b8f6dd2394db056977a5db61dc3 100644 (file)
@@ -108,7 +108,7 @@ inline uint64_t decodeVarint(ByteRange& data) {
       b = *p++; val |= (b & 0x7f) << 49; if (b >= 0) break;
       b = *p++; val |= (b & 0x7f) << 56; if (b >= 0) break;
       b = *p++; val |= (b & 0x7f) << 63; if (b >= 0) break;
-      throw std::invalid_argument("Invalid varint value");  // too big
+      throw std::invalid_argument("Invalid varint value. Too big.");
     } while (false);
   } else {
     int shift = 0;
@@ -116,7 +116,10 @@ inline uint64_t decodeVarint(ByteRange& data) {
       val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
       shift += 7;
     }
-    if (p == end) throw std::invalid_argument("Invalid varint value");
+    if (p == end) {
+      throw std::invalid_argument("Invalid varint value. Too small: " +
+                                  std::to_string(end - begin) + " bytes");
+    }
     val |= static_cast<uint64_t>(*p++) << shift;
   }
 
index d7a0544dc3f094d371774f249607e04a9adc2717..1a820d42d7339d9351413652e6c84d7aee0d11aa 100644 (file)
@@ -155,12 +155,19 @@ void encodeVarintToIOBuf(uint64_t val, folly::IOBuf* out) {
   out->append(encodeVarint(val, out->writableTail()));
 }
 
-uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) {
-  // Must have enough room in *this* buffer.
-  auto p = cursor.peek();
-  folly::ByteRange range(p.first, p.second);
-  uint64_t val = decodeVarint(range);
-  cursor.skip(range.data() - p.first);
+inline uint64_t decodeVarintFromCursor(folly::io::Cursor& cursor) {
+  uint64_t val = 0;
+  int8_t b = 0;
+  for (int shift = 0; shift <= 63; shift += 7) {
+    b = cursor.pullByte();
+    val |= static_cast<uint64_t>(b & 0x7f) << shift;
+    if (b >= 0) {
+      break;
+    }
+  }
+  if (b < 0) {
+    throw std::invalid_argument("Invalid varint value. Too big.");
+  }
   return val;
 }
 
index 51faa4e0cc403466b5b6b6161b0b6adba22793f1..d2c542da9c18029c62bdff6d44ef5384252877c7 100644 (file)
@@ -27,6 +27,7 @@
 #include <folly/Benchmark.h>
 #include <folly/Hash.h>
 #include <folly/Random.h>
+#include <folly/Varint.h>
 #include <folly/io/IOBufQueue.h>
 
 namespace folly { namespace io { namespace test {
@@ -129,8 +130,8 @@ TEST(CompressionTestNeedsUncompressedLength, Simple) {
     ->needsUncompressedLength());
 }
 
-class CompressionTest : public testing::TestWithParam<
-    std::tr1::tuple<int, CodecType>> {
+class CompressionTest
+    : public testing::TestWithParam<std::tr1::tuple<int, CodecType>> {
   protected:
    void SetUp() {
      auto tup = GetParam();
@@ -149,6 +150,7 @@ void CompressionTest::runSimpleTest(const DataHolder& dh) {
   auto compressed = codec_->compress(original.get());
   if (!codec_->needsUncompressedLength()) {
     auto uncompressed = codec_->uncompress(compressed.get());
+
     EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
     EXPECT_EQ(dh.hash(uncompressedLength_), hashIOBuf(uncompressed.get()));
   }
@@ -171,15 +173,67 @@ TEST_P(CompressionTest, ConstantData) {
 INSTANTIATE_TEST_CASE_P(
     CompressionTest,
     CompressionTest,
-    testing::Combine(
-        testing::Values(0, 1, 12, 22, 25, 27),
-        testing::Values(CodecType::NO_COMPRESSION,
-                        CodecType::LZ4,
-                        CodecType::SNAPPY,
-                        CodecType::ZLIB,
-                        CodecType::LZ4_VARINT_SIZE,
-                        CodecType::LZMA2,
-                        CodecType::LZMA2_VARINT_SIZE)));
+    testing::Combine(testing::Values(0, 1, 12, 22, 25, 27),
+                     testing::Values(CodecType::NO_COMPRESSION,
+                                     CodecType::LZ4,
+                                     CodecType::SNAPPY,
+                                     CodecType::ZLIB,
+                                     CodecType::LZ4_VARINT_SIZE,
+                                     CodecType::LZMA2,
+                                     CodecType::LZMA2_VARINT_SIZE)));
+
+class CompressionVarintTest
+    : public testing::TestWithParam<std::tr1::tuple<int, CodecType>> {
+ protected:
+  void SetUp() {
+    auto tup = GetParam();
+    uncompressedLength_ = uint64_t(1) << std::tr1::get<0>(tup);
+    codec_ = getCodec(std::tr1::get<1>(tup));
+  }
+
+  void runSimpleTest(const DataHolder& dh);
+
+  uint64_t uncompressedLength_;
+  std::unique_ptr<Codec> codec_;
+};
+
+inline uint64_t oneBasedMsbPos(uint64_t number) {
+  uint64_t pos = 0;
+  for (; number > 0; ++pos, number >>= 1) {
+  }
+  return pos;
+}
+
+void CompressionVarintTest::runSimpleTest(const DataHolder& dh) {
+  auto original = IOBuf::wrapBuffer(dh.data(uncompressedLength_));
+  auto compressed = codec_->compress(original.get());
+  auto breakPoint =
+      1UL +
+      Random::rand64(std::max(9UL, oneBasedMsbPos(uncompressedLength_)) / 9UL);
+  auto tinyBuf = IOBuf::copyBuffer(compressed->data(),
+                                   std::min(compressed->length(), breakPoint));
+  compressed->trimStart(breakPoint);
+  tinyBuf->prependChain(std::move(compressed));
+  compressed = std::move(tinyBuf);
+
+  auto uncompressed = codec_->uncompress(compressed.get());
+
+  EXPECT_EQ(uncompressedLength_, uncompressed->computeChainDataLength());
+  EXPECT_EQ(dh.hash(uncompressedLength_), hashIOBuf(uncompressed.get()));
+}
+
+TEST_P(CompressionVarintTest, RandomData) { runSimpleTest(randomDataHolder); }
+
+TEST_P(CompressionVarintTest, ConstantData) {
+  runSimpleTest(constantDataHolder);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    CompressionVarintTest,
+    CompressionVarintTest,
+    testing::Combine(testing::Values(0, 1, 12, 22, 25, 27),
+                     testing::Values(CodecType::LZ4_VARINT_SIZE,
+                                     CodecType::LZMA2_VARINT_SIZE)));
 
 class CompressionCorruptionTest : public testing::TestWithParam<CodecType> {
  protected: