Expose more functions/constructors on BucketedTimeSeries
authorXuli Liu <xulil@fb.com>
Fri, 3 Feb 2017 22:53:09 +0000 (14:53 -0800)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 3 Feb 2017 23:03:00 +0000 (15:03 -0800)
Summary: Currently BucketedTimeSeries does not expose enough functions to allow assessing its data, therefore there is no way to do serialization/deserialization. Adding functions/constructors to support this.

Reviewed By: simpkins

Differential Revision: D4500075

fbshipit-source-id: 656ac8a208547d8d3fadf9ea150606b6e74775c9

folly/stats/BucketedTimeSeries-defs.h
folly/stats/BucketedTimeSeries.h
folly/test/TimeseriesTest.cpp

index a24aff86e395537e0ed26e9fe03dcb78156ca620..5b3b63917adea20f5f37b685215f6d401d52abe2 100644 (file)
 
 #pragma once
 
-#include <algorithm>
-#include <glog/logging.h>
 #include <folly/Likely.h>
 #include <folly/stats/BucketedTimeSeries.h>
+#include <glog/logging.h>
+#include <algorithm>
+#include <stdexcept>
 
 namespace folly {
 
@@ -43,6 +44,41 @@ BucketedTimeSeries<VT, CT>::BucketedTimeSeries(
   }
 }
 
+template <typename VT, typename CT>
+BucketedTimeSeries<VT, CT>::BucketedTimeSeries(
+    TimePoint theFirstTime,
+    TimePoint theLatestTime,
+    Duration maxDuration,
+    const std::vector<Bucket>& bucketsList)
+    : firstTime_(theFirstTime),
+      latestTime_(theLatestTime),
+      duration_(maxDuration),
+      buckets_(bucketsList) {
+  // Come up with the total_ from buckets_ being passed in
+  for (auto const& bucket : buckets_) {
+    total_.add(bucket.sum, bucket.count);
+  }
+
+  // Verify the integrity of the data
+
+  // If firstTime is greater than latestTime, the total count should be 0.
+  // (firstTime being greater than latestTime means that no data points have
+  // ever been added to the time series.)
+  if (firstTime_ > latestTime_ && (total_.sum != 0 || total_.count != 0)) {
+    throw std::invalid_argument(
+        "The total should have been 0 "
+        "if firstTime is greater than lastestTime");
+  }
+
+  // If firstTime is less than or equal to latestTime,
+  // latestTime - firstTime should be less than or equal to the duration.
+  if (firstTime_ <= latestTime_ && latestTime_ - firstTime_ > duration_) {
+    throw std::invalid_argument(
+        "The difference between firstTime and latestTime "
+        "should be less than or equal to the duration");
+  }
+}
+
 template <typename VT, typename CT>
 bool BucketedTimeSeries<VT, CT>::addValue(TimePoint now, const ValueType& val) {
   return addValueAggregated(now, val, 1);
index 14468d6175babcaf6f241d2c6ffff6ae90470ebe..a31904daf6652819edb949b1c562e9a91cba3b15 100644 (file)
@@ -81,6 +81,18 @@ class BucketedTimeSeries {
    */
   BucketedTimeSeries(size_t numBuckets, Duration duration);
 
+  /*
+   * Create a new BucketedTimeSeries.
+   *
+   * This constructor is used to reconstruct a timeseries using
+   * previously saved data
+   */
+  BucketedTimeSeries(
+      TimePoint theFirstTime,
+      TimePoint theLatestTime,
+      Duration maxDuration,
+      const std::vector<Bucket>& bucketsList);
+
   /*
    * Adds the value 'val' at time 'now'
    *
@@ -192,6 +204,29 @@ class BucketedTimeSeries {
     return firstTime_ > latestTime_;
   }
 
+  /*
+   * Returns time of first update() since clear()/constructor.
+   * Note that the returned value is only meaningful when empty() is false.
+   */
+  TimePoint firstTime() const {
+    return firstTime_;
+  }
+
+  /*
+   * Returns time of last update().
+   * Note that the returned value is only meaningful when empty() is false.
+   */
+  TimePoint latestTime() const {
+    return latestTime_;
+  }
+
+  /*
+   * Returns actual buckets of values
+   */
+  const std::vector<Bucket>& buckets() const {
+    return buckets_;
+  }
+
   /*
    * Get the amount of time tracked by this timeseries.
    *
index 66f4a1cf507f07b0287c9e6f91e6393f994ea354..27e73117f52d144c61553a5776f984b42783a057 100644 (file)
  * limitations under the License.
  */
 
-#include <folly/stats/BucketedTimeSeries.h>
+#include <folly/detail/Stats.h>
 #include <folly/stats/BucketedTimeSeries-defs.h>
-#include <folly/stats/MultiLevelTimeSeries.h>
+#include <folly/stats/BucketedTimeSeries.h>
 #include <folly/stats/MultiLevelTimeSeries-defs.h>
+#include <folly/stats/MultiLevelTimeSeries.h>
 
 #include <array>
 
@@ -31,8 +32,10 @@ using std::string;
 using std::vector;
 using folly::BucketedTimeSeries;
 
+using Bucket = folly::detail::Bucket<int64_t>;
 using StatsClock = folly::LegacyStatsClock<std::chrono::seconds>;
 using TimePoint = StatsClock::time_point;
+using Duration = StatsClock::duration;
 
 /*
  * Helper functions to allow us to directly log time points and duration
@@ -795,6 +798,97 @@ TEST(BucketedTimeSeries, addHistorical) {
   EXPECT_EQ(10, b.count());
 }
 
+TEST(BucketedTimeSeries, reConstructEmptyTimeSeries) {
+  auto verify = [](auto ts) {
+    EXPECT_TRUE(ts.empty());
+    EXPECT_EQ(0, ts.sum());
+    EXPECT_EQ(0, ts.count());
+  };
+
+  // Create a 100 second timeseries with 10 buckets_
+  BucketedTimeSeries<int64_t> ts(10, seconds(100));
+
+  verify(ts);
+
+  auto firstTime = ts.firstTime();
+  auto latestTime = ts.latestTime();
+  auto duration = ts.duration();
+  auto buckets = ts.buckets();
+
+  // Reconstruct the timeseries
+  BucketedTimeSeries<int64_t> newTs(firstTime, latestTime, duration, buckets);
+
+  verify(newTs);
+}
+
+TEST(BucketedTimeSeries, reConstructWithValidData) {
+  // Create a 100 second timeseries with 10 buckets_
+  BucketedTimeSeries<int64_t> ts(10, seconds(100));
+
+  auto setup = [&] {
+    ts.clear();
+    // Add 1 value to each bucket
+    for (int n = 5; n <= 95; n += 10) {
+      ts.addValue(seconds(n), 6);
+    }
+
+    EXPECT_EQ(10, ts.count());
+    EXPECT_EQ(60, ts.sum());
+    EXPECT_EQ(6, ts.avg());
+  };
+
+  setup();
+
+  auto firstTime = ts.firstTime();
+  auto latestTime = ts.latestTime();
+  auto duration = ts.duration();
+  auto buckets = ts.buckets();
+
+  // Reconstruct the timeseries
+  BucketedTimeSeries<int64_t> newTs(firstTime, latestTime, duration, buckets);
+
+  auto compare = [&] {
+    EXPECT_EQ(ts.firstTime(), newTs.firstTime());
+    EXPECT_EQ(ts.latestTime(), newTs.latestTime());
+    EXPECT_EQ(ts.duration(), newTs.duration());
+    EXPECT_EQ(ts.buckets().size(), newTs.buckets().size());
+    EXPECT_EQ(ts.sum(), newTs.sum());
+    EXPECT_EQ(ts.count(), newTs.count());
+
+    for (auto it1 = ts.buckets().begin(), it2 = newTs.buckets().begin();
+         it1 != ts.buckets().end();
+         it1++, it2++) {
+      EXPECT_EQ(it1->sum, it2->sum);
+      EXPECT_EQ(it1->count, it2->count);
+    }
+  };
+
+  compare();
+}
+
+TEST(BucketedTimeSeries, reConstructWithCorruptedData) {
+  // The total should have been 0 as firstTime > latestTime
+  EXPECT_THROW(
+      {
+        std::vector<Bucket> buckets(10);
+        buckets[0].sum = 1;
+        buckets[0].count = 1;
+
+        BucketedTimeSeries<int64_t> ts(
+            mkTimePoint(1), mkTimePoint(0), Duration(10), buckets);
+      },
+      std::invalid_argument);
+
+  // The duration should be no less than latestTime - firstTime
+  EXPECT_THROW(
+      BucketedTimeSeries<int64_t>(
+          mkTimePoint(1),
+          mkTimePoint(100),
+          Duration(10),
+          std::vector<Bucket>(10)),
+      std::invalid_argument);
+}
+
 namespace IntMHTS {
   enum Levels {
     MINUTE,