From ebcef6dcbcd0c0c7a532d8a477e2af14627832d2 Mon Sep 17 00:00:00 2001 From: Wez Furlong Date: Tue, 2 Feb 2016 14:54:56 -0800 Subject: [PATCH] folly: add bser encode/decode for dynamic Summary: To support consuming Watchman from within fbcode and hhvm in particular, these functions add a BSER serialization for the folly::dynamic data type. Reviewed By: bhamiltoncx Differential Revision: D2876539 fb-gh-sync-id: bc49d6bc453cc66cebda7185a5907a6f70970b24 --- folly/Makefile.am | 3 + folly/experimental/bser/Bser.h | 103 +++++++++ folly/experimental/bser/Dump.cpp | 246 ++++++++++++++++++++++ folly/experimental/bser/Load.cpp | 225 ++++++++++++++++++++ folly/experimental/bser/test/BserTest.cpp | 116 ++++++++++ 5 files changed, 693 insertions(+) create mode 100644 folly/experimental/bser/Bser.h create mode 100644 folly/experimental/bser/Dump.cpp create mode 100644 folly/experimental/bser/Load.cpp create mode 100644 folly/experimental/bser/test/BserTest.cpp diff --git a/folly/Makefile.am b/folly/Makefile.am index c10e1cf1..d37966b3 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -89,6 +89,7 @@ nobase_follyinclude_HEADERS = \ experimental/EliasFanoCoding.h \ experimental/EventCount.h \ experimental/Instructions.h \ + experimental/bser/Bser.h \ experimental/fibers/AddTasks.h \ experimental/fibers/AddTasks-inl.h \ experimental/fibers/Baton.h \ @@ -395,6 +396,8 @@ libfolly_la_SOURCES = \ TimeoutQueue.cpp \ Uri.cpp \ Version.cpp \ + experimental/bser/Dump.cpp \ + experimental/bser/Load.cpp \ experimental/fibers/Baton.cpp \ experimental/fibers/Fiber.cpp \ experimental/fibers/FiberManager.cpp \ diff --git a/folly/experimental/bser/Bser.h b/folly/experimental/bser/Bser.h new file mode 100644 index 00000000..a8df110c --- /dev/null +++ b/folly/experimental/bser/Bser.h @@ -0,0 +1,103 @@ +/* + * Copyright 2016 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include +#include +#include +#include + +/* This is an implementation of the BSER binary serialization scheme. + * BSER was created as a binary, local-system-only representation of + * JSON values. It is more space efficient in its output text than JSON, + * and cheaper to decode. + * It has no requirement that string values be UTF-8. + * BSER was created for use with Watchman. + * https://facebook.github.io/watchman/docs/bser.html + */ + +namespace folly { +namespace bser { + +class BserDecodeError : public std::runtime_error { + public: + using std::runtime_error::runtime_error; +}; + +enum class BserType : int8_t { + Array = 0, + Object, + String, + Int8, + Int16, + Int32, + Int64, + Real, + True, + False, + Null, + Template, + Skip, +}; +extern const uint8_t kMagic[2]; + +struct serialization_opts { + serialization_opts(); + + // Whether to sort keys of object values before serializing them. + // Note that this is potentially slow and that it does not apply + // to templated arrays defined via defineTemplate; its keys are always + // emitted in the order defined by the template. + bool sort_keys; + + // incremental growth size for the underlying Appender when allocating + // storage for the encoded output + size_t growth_increment; + + // BSER allows generating a more space efficient representation of a list of + // object values. These are stored as an "object template" listing the keys + // of the objects ahead of the objects themselves. The objects are then + // serialized without repeating the key string for each element. + // + // You may use the templates field to associate a template with an + // array. You should construct this map after all mutations have been + // performed on the dynamic instance that you intend to serialize as bser, + // as it captures the address of the dynamic to match at encoding time. + // https://facebook.github.io/watchman/docs/bser.html#array-of-templated-objects + using TemplateMap = std::unordered_map; + folly::Optional templates; +}; + +// parse a BSER value from a variety of sources. +// The complete BSER data must be present to succeed. +folly::dynamic parseBser(folly::StringPiece); +folly::dynamic parseBser(folly::ByteRange); +folly::dynamic parseBser(const folly::IOBuf*); + +// When reading incrementally, it is useful to know how much data to +// read to fully decode a BSER pdu. +// Throws std::out_of_range if more data needs to be read to decode +// the header, or throws a runtime_error if the header is invalid +size_t decodePduLength(const folly::IOBuf*); + +folly::fbstring toBser(folly::dynamic const&, const serialization_opts&); +std::unique_ptr toBserIOBuf(folly::dynamic const&, + const serialization_opts&); +} +} + +/* vim:ts=2:sw=2:et: + */ diff --git a/folly/experimental/bser/Dump.cpp b/folly/experimental/bser/Dump.cpp new file mode 100644 index 00000000..ae820af2 --- /dev/null +++ b/folly/experimental/bser/Dump.cpp @@ -0,0 +1,246 @@ +/* + * Copyright 2016 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "Bser.h" +#include + +using namespace folly; +using folly::io::QueueAppender; +using folly::bser::serialization_opts; + +namespace folly { +namespace bser { + +const uint8_t kMagic[2] = {0, 1}; + +static void bserEncode(dynamic const& dyn, + QueueAppender& appender, + const serialization_opts& opts); + +serialization_opts::serialization_opts() + : sort_keys(false), growth_increment(8192) {} + +static const dynamic* getTemplate(const serialization_opts& opts, + dynamic const& dynArray) { + if (!opts.templates.hasValue()) { + return nullptr; + } + const auto& templates = opts.templates.value(); + const auto it = templates.find(&dynArray); + if (it == templates.end()) { + return nullptr; + } + return &it->second; +} + +static void bserEncodeInt(int64_t ival, QueueAppender& appender) { + /* Return the smallest size int that can store the value */ + auto size = + ((ival == ((int8_t)ival)) ? 1 : (ival == ((int16_t)ival)) + ? 2 + : (ival == ((int32_t)ival)) ? 4 : 8); + + switch (size) { + case 1: + appender.write((int8_t)BserType::Int8); + appender.write(int8_t(ival)); + return; + case 2: + appender.write((int8_t)BserType::Int16); + appender.write(int16_t(ival)); + return; + case 4: + appender.write((int8_t)BserType::Int32); + appender.write(int32_t(ival)); + return; + case 8: + appender.write((int8_t)BserType::Int64); + appender.write(ival); + return; + default: + throw std::runtime_error("impossible integer size"); + } +} + +static void bserEncodeString(folly::StringPiece str, QueueAppender& appender) { + appender.write((int8_t)BserType::String); + bserEncodeInt(str.size(), appender); + appender.push((uint8_t*)str.data(), str.size()); +} + +static void bserEncodeArraySimple(dynamic const& dyn, + QueueAppender& appender, + const serialization_opts& opts) { + appender.write((int8_t)BserType::Array); + bserEncodeInt(dyn.size(), appender); + for (const auto& ele : dyn) { + bserEncode(ele, appender, opts); + } +} + +static void bserEncodeArray(dynamic const& dyn, + QueueAppender& appender, + const serialization_opts& opts) { + + auto templ = getTemplate(opts, dyn); + if (UNLIKELY(templ != nullptr)) { + appender.write((int8_t)BserType::Template); + + // Emit the list of property names + bserEncodeArraySimple(*templ, appender, opts); + + // The number of objects in the array + bserEncodeInt(dyn.size(), appender); + + // For each object in the array + for (const auto& ele : dyn) { + // For each key in the template + for (const auto& name : *templ) { + if (auto found = ele.get_ptr(name)) { + if (found->isNull()) { + // Prefer to Skip rather than encode a null value for + // compatibility with the other bser implementations + appender.write((int8_t)BserType::Skip); + } else { + bserEncode(*found, appender, opts); + } + } else { + appender.write((int8_t)BserType::Skip); + } + } + } + return; + } + + bserEncodeArraySimple(dyn, appender, opts); +} + +static void bserEncodeObject(dynamic const& dyn, + QueueAppender& appender, + const serialization_opts& opts) { + appender.write((int8_t)BserType::Object); + bserEncodeInt(dyn.size(), appender); + + if (opts.sort_keys) { + std::vector> sorted(dyn.items().begin(), + dyn.items().end()); + std::sort(sorted.begin(), sorted.end()); + for (const auto& item : sorted) { + bserEncode(item.first, appender, opts); + bserEncode(item.second, appender, opts); + } + } else { + for (const auto& item : dyn.items()) { + bserEncode(item.first, appender, opts); + bserEncode(item.second, appender, opts); + } + } +} + +static void bserEncode(dynamic const& dyn, + QueueAppender& appender, + const serialization_opts& opts) { + switch (dyn.type()) { + case dynamic::Type::NULLT: + appender.write((int8_t)BserType::Null); + return; + case dynamic::Type::BOOL: + appender.write( + (int8_t)(dyn.getBool() ? BserType::True : BserType::False)); + return; + case dynamic::Type::DOUBLE: { + double dval = dyn.getDouble(); + appender.write((int8_t)BserType::Real); + appender.write(dval); + return; + } + case dynamic::Type::INT64: + bserEncodeInt(dyn.getInt(), appender); + return; + case dynamic::Type::OBJECT: + bserEncodeObject(dyn, appender, opts); + return; + case dynamic::Type::ARRAY: + bserEncodeArray(dyn, appender, opts); + return; + case dynamic::Type::STRING: + bserEncodeString(dyn.getString(), appender); + return; + } +} + +std::unique_ptr toBserIOBuf(folly::dynamic const& dyn, + const serialization_opts& opts) { + IOBufQueue q(IOBufQueue::cacheChainLength()); + uint8_t hdrbuf[sizeof(kMagic) + 1 + sizeof(int64_t)]; + + // Reserve some headroom for the overall PDU size; we'll fill this in + // after we've serialized the data and know the length + auto firstbuf = IOBuf::create(opts.growth_increment); + firstbuf->advance(sizeof(hdrbuf)); + q.append(std::move(firstbuf)); + + // encode the value + QueueAppender appender(&q, opts.growth_increment); + bserEncode(dyn, appender, opts); + + // compute the length + auto len = q.chainLength(); + if (len > std::numeric_limits::max()) { + throw std::range_error(folly::to( + "serialized data size ", len, " is too large to represent as BSER")); + } + + // This is a bit verbose, but it computes a header that is appropriate + // to the size of the serialized data + + memcpy(hdrbuf, kMagic, sizeof(kMagic)); + size_t hdrlen = sizeof(kMagic) + 1; + auto magicptr = hdrbuf + sizeof(kMagic); + auto lenptr = hdrbuf + hdrlen; + + if (len > std::numeric_limits::max()) { + *magicptr = (int8_t)BserType::Int64; + *(int64_t*)lenptr = (int64_t)len; + hdrlen += sizeof(int64_t); + } else if (len > std::numeric_limits::max()) { + *magicptr = (int8_t)BserType::Int32; + *(int32_t*)lenptr = (int32_t)len; + hdrlen += sizeof(int32_t); + } else if (len > std::numeric_limits::max()) { + *magicptr = (int8_t)BserType::Int16; + *(int16_t*)lenptr = (int16_t)len; + hdrlen += sizeof(int16_t); + } else { + *magicptr = (int8_t)BserType::Int8; + *(int8_t*)lenptr = (int8_t)len; + hdrlen += sizeof(int8_t); + } + + // and place the data in the headroom + q.prepend(hdrbuf, hdrlen); + + return q.move(); +} + +fbstring toBser(dynamic const& dyn, const serialization_opts& opts) { + auto buf = toBserIOBuf(dyn, opts); + return buf->moveToFbString(); +} +} +} + +/* vim:ts=2:sw=2:et: + */ diff --git a/folly/experimental/bser/Load.cpp b/folly/experimental/bser/Load.cpp new file mode 100644 index 00000000..874037b4 --- /dev/null +++ b/folly/experimental/bser/Load.cpp @@ -0,0 +1,225 @@ +/* + * Copyright 2016 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "Bser.h" +#include +#include + +using namespace folly; +using folly::io::Cursor; + +namespace folly { +namespace bser { +static dynamic parseBser(Cursor& curs); + +template +static FOLLY_NORETURN void throwDecodeError(Cursor& curs, ARGS&&... args) { + throw BserDecodeError(folly::to(std::forward(args)..., + " with ", + curs.length(), + " bytes remaining in cursor")); +} + +static int64_t decodeInt(Cursor& curs) { + auto enc = (BserType)curs.read(); + switch (enc) { + case BserType::Int8: + return curs.read(); + case BserType::Int16: + return curs.read(); + case BserType::Int32: + return curs.read(); + case BserType::Int64: + return curs.read(); + default: + throwDecodeError( + curs, "invalid integer encoding detected (", (int8_t)enc, ")"); + } +} + +static fbstring decodeString(Cursor& curs) { + auto len = decodeInt(curs); + folly::fbstring str; + + if (len < 0) { + throw std::range_error("string length must not be negative"); + } + str.reserve(len); + + size_t available = curs.length(); + while (available < (size_t)len) { + if (available == 0) { + // Saw this case when we decodeHeader was returning the incorrect length + // and we were splitting off too few bytes from the IOBufQueue + throwDecodeError(curs, + "no data available while decoding a string, header was " + "not decoded properly"); + } + str.append(reinterpret_cast(curs.data()), available); + curs.skipAtMost(available); + len -= available; + available = curs.length(); + } + + str.append(reinterpret_cast(curs.data()), len); + curs.skipAtMost(len); + return str; +} + +static dynamic decodeArray(Cursor& curs) { + dynamic arr{}; + auto size = decodeInt(curs); + while (size-- > 0) { + arr.push_back(parseBser(curs)); + } + return arr; +} + +static dynamic decodeObject(Cursor& curs) { + dynamic obj = dynamic::object; + auto size = decodeInt(curs); + while (size-- > 0) { + if ((BserType)curs.read() != BserType::String) { + throwDecodeError(curs, "expected String"); + } + auto key = decodeString(curs); + obj[key] = parseBser(curs); + } + return obj; +} + +static dynamic decodeTemplate(Cursor& curs) { + std::vector arr; + + // List of property names + if ((BserType)curs.read() != BserType::Array) { + throw std::runtime_error("Expected array encoding for property names"); + } + auto names = decodeArray(curs); + + auto size = decodeInt(curs); + arr.reserve(size); + + while (size-- > 0) { + dynamic obj = dynamic::object; + + for (auto& name : names) { + auto pair = curs.peek(); + if ((BserType)pair.first[0] == BserType::Skip) { + obj[name.getString()] = nullptr; + curs.skipAtMost(1); + continue; + } + + obj[name.getString()] = parseBser(curs); + } + + arr.emplace_back(std::move(obj)); + } + + return dynamic(std::move(arr)); +} + +static dynamic parseBser(Cursor& curs) { + switch ((BserType)curs.read()) { + case BserType::Int8: + return curs.read(); + case BserType::Int16: + return curs.read(); + case BserType::Int32: + return curs.read(); + case BserType::Int64: + return curs.read(); + case BserType::Real: { + double dval; + curs.pull((void*)&dval, sizeof(dval)); + return dval; + } + case BserType::Null: + return nullptr; + case BserType::True: + return (bool)true; + case BserType::False: + return (bool)false; + case BserType::String: + return decodeString(curs); + case BserType::Array: + return decodeArray(curs); + case BserType::Object: + return decodeObject(curs); + case BserType::Template: + return decodeTemplate(curs); + case BserType::Skip: + throw std::runtime_error( + "Skip not valid at this location in the bser stream"); + default: + throw std::runtime_error("invalid bser encoding"); + } +} + +static size_t decodeHeader(Cursor& curs) { + char header[sizeof(kMagic)]; + curs.pull(header, sizeof(header)); + if (memcmp(header, kMagic, sizeof(kMagic))) { + throw std::runtime_error("invalid BSER magic header"); + } + + auto enc = (BserType)curs.peek().first[0]; + size_t int_size; + switch (enc) { + case BserType::Int8: + int_size = 1; + break; + case BserType::Int16: + int_size = 2; + break; + case BserType::Int32: + int_size = 4; + break; + case BserType::Int64: + int_size = 8; + break; + default: + int_size = 0; + } + + return int_size + 3 /* magic + int type */ + decodeInt(curs); +} + +size_t decodePduLength(const folly::IOBuf* buf) { + Cursor curs(buf); + return decodeHeader(curs); +} + +folly::dynamic parseBser(const IOBuf* buf) { + Cursor curs(buf); + + decodeHeader(curs); + return parseBser(curs); +} + +folly::dynamic parseBser(ByteRange str) { + auto buf = IOBuf::wrapBuffer(str.data(), str.size()); + return parseBser(&*buf); +} + +folly::dynamic parseBser(StringPiece str) { + return parseBser(ByteRange((uint8_t*)str.data(), str.size())); +} +} +} + +/* vim:ts=2:sw=2:et: + */ diff --git a/folly/experimental/bser/test/BserTest.cpp b/folly/experimental/bser/test/BserTest.cpp new file mode 100644 index 00000000..e2672cbe --- /dev/null +++ b/folly/experimental/bser/test/BserTest.cpp @@ -0,0 +1,116 @@ +/* + * Copyright 2016 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +using folly::dynamic; + +static const dynamic roundtrips[] = { + 1, + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::min(), + std::numeric_limits::min(), + std::numeric_limits::min(), + std::numeric_limits::min(), + bool(true), + bool(false), + nullptr, + 1.5, + "hello", + {1, 2, 3}, + dynamic::object("key", "value")("otherkey", "otherval"), +}; + +// Here's a blob from the watchman test suite +const uint8_t template_blob[] = + "\x00\x01\x03\x28" + "\x0b\x00\x03\x02\x02\x03\x04\x6e\x61\x6d\x65\x02" + "\x03\x03\x61\x67\x65\x03\x03\x02\x03\x04\x66\x72" + "\x65\x64\x03\x14\x02\x03\x04\x70\x65\x74\x65\x03" + "\x1e\x0c\x03\x19"; + +// and here's what it represents +static const dynamic template_dynamic = { + dynamic::object("name", "fred")("age", 20), + dynamic::object("name", "pete")("age", 30), + dynamic::object("name", nullptr)("age", 25), +}; + +TEST(Bser, RoundTrip) { + dynamic decoded(nullptr); + folly::fbstring str; + + for (const auto& dyn : roundtrips) { + try { + str = folly::bser::toBser(dyn, folly::bser::serialization_opts()); + decoded = folly::bser::parseBser(str); + + EXPECT_EQ(decoded, dyn); + } catch (const std::exception& err) { + LOG(ERROR) << err.what() << "\nInput: " << dyn.typeName() << ": " << dyn + << " decoded back as " << decoded.typeName() << ": " << decoded + << "\n" << folly::hexDump(str.data(), str.size()); + throw; + } + } +} + +TEST(Bser, Template) { + dynamic decoded(nullptr); + folly::fbstring str; + // Decode the template value provided from elsewhere + decoded = folly::bser::parseBser( + folly::ByteRange(template_blob, sizeof(template_blob) - 1)); + EXPECT_EQ(decoded, template_dynamic) + << "Didn't load template value." + "\nInput: " << template_dynamic.typeName() << ": " << template_dynamic + << " decoded back as " << decoded.typeName() << ": " << decoded << "\n" + << folly::hexDump(template_blob, sizeof(template_blob) - 1); + + // Now check that we can generate this same data representation + folly::bser::serialization_opts opts; + folly::bser::serialization_opts::TemplateMap templates = { + std::make_pair(&decoded, folly::dynamic{"name", "age"})}; + opts.templates = templates; + + str = folly::bser::toBser(decoded, opts); + EXPECT_EQ(folly::ByteRange((const uint8_t*)str.data(), str.size()), + folly::ByteRange(template_blob, sizeof(template_blob) - 1)) + << "Expected:\n" + << folly::hexDump(template_blob, sizeof(template_blob) - 1) << "\nGot:\n" + << folly::hexDump(str.data(), str.size()); +} + +TEST(Bser, PduLength) { + EXPECT_THROW([] { + // Try to decode PDU for a short buffer that doesn't even have the + // complete length available + auto buf = folly::IOBuf::wrapBuffer(template_blob, 3); + auto len = folly::bser::decodePduLength(&*buf); + LOG(ERROR) << "managed to return a length, but only had 3 bytes"; + }(), std::out_of_range); + + auto buf = folly::IOBuf::wrapBuffer(template_blob, sizeof(template_blob)); + auto len = folly::bser::decodePduLength(&*buf); + EXPECT_EQ(len, 44) << "PduLength should be 44, got " << len; +} + +/* vim:ts=2:sw=2:et: + */ -- 2.34.1