From: Adam Simpkins Date: Wed, 17 Apr 2013 01:32:23 +0000 (-0700) Subject: Make BucketedTimeSeries::rate() more accurate X-Git-Tag: v0.22.0~997 X-Git-Url: http://plrg.eecs.uci.edu/git/?p=folly.git;a=commitdiff_plain;h=e9bf39f0e3d57672e03d01a788f097c76405362e;ds=sidebyside Make BucketedTimeSeries::rate() more accurate Summary: Make rate() and countRate() more accurate when queried for a specific time range. Previously these functions divided the estimated sum/count by the entire time range specified. This underestimated the rate if we don't actually have data for the entire time period. (Since the sum computed only takes into account the time range for which we have data.) For example, if the timeseries duration was 60 seconds, but only 30 seconds of data had been entered so far, rate(now - 60, now) would underestimate the rate by half, since there was only 30 seconds worth of data available. The no-argument version of rate() did work correctly in that case. Test Plan: Added a new unit test for this behavior. Also fixed the existing rate test code, which had the same bug and expected the underestimated rate. Reviewed By: delong.j@fb.com FB internal diff: D778114 --- diff --git a/folly/stats/BucketedTimeSeries-defs.h b/folly/stats/BucketedTimeSeries-defs.h index a4d7c438..e5892e62 100644 --- a/folly/stats/BucketedTimeSeries-defs.h +++ b/folly/stats/BucketedTimeSeries-defs.h @@ -161,13 +161,12 @@ void BucketedTimeSeries::clear() { template -TT BucketedTimeSeries::elapsed() const { +TT BucketedTimeSeries::getEarliestTime() const { if (empty()) { return TimeType(0); } - if (isAllTime()) { - return latestTime_ - firstTime_ + TimeType(1); + return firstTime_; } size_t currentBucket; @@ -183,7 +182,28 @@ TT BucketedTimeSeries::elapsed() const { // We're never tracking data before firstTime_ earliestTime = std::max(earliestTime, firstTime_); - return latestTime_ - earliestTime + TimeType(1); + return earliestTime; +} + +template +TT BucketedTimeSeries::elapsed() const { + if (empty()) { + return TimeType(0); + } + + // Add 1 since [latestTime_, earliestTime] is an inclusive interval. + return latestTime_ - getEarliestTime() + TimeType(1); +} + +template +TT BucketedTimeSeries::elapsed(TimeType start, TimeType end) const { + if (empty()) { + return TimeType(0); + } + start = std::max(start, getEarliestTime()); + end = std::min(end, latestTime_ + TimeType(1)); + end = std::max(start, end); + return end - start; } template diff --git a/folly/stats/BucketedTimeSeries.h b/folly/stats/BucketedTimeSeries.h index ef852b53..1c8a4ab0 100644 --- a/folly/stats/BucketedTimeSeries.h +++ b/folly/stats/BucketedTimeSeries.h @@ -104,11 +104,29 @@ class BucketedTimeSeries { /* * Get the latest time that has ever been passed to update() or addValue(). + * + * If no data has ever been added to this timeseries, 0 will be returned. */ TimeType getLatestTime() const { return latestTime_; } + /* + * Get the time of the earliest data point stored in this timeseries. + * + * If no data has ever been added to this timeseries, 0 will be returned. + * + * If isAllTime() is true, this is simply the time when the first data point + * was recorded. + * + * For non-all-time data, the timestamp reflects the first data point still + * remembered. As new data points are added, old data will be expired. + * getEarliestTime() returns the timestamp of the oldest bucket still present + * in the timeseries. This will never be older than (getLatestTime() - + * duration()). + */ + TimeType getEarliestTime() const; + /* * Return the number of buckets. */ @@ -170,6 +188,16 @@ class BucketedTimeSeries { */ TimeType elapsed() const; + /* + * Get the amount of time tracked by this timeseries, between the specified + * start and end times. + * + * If the timeseries contains data for the entire time range specified, this + * simply returns (end - start). However, if start is earlier than + * getEarliestTime(), this returns (end - getEarliestTime()). + */ + TimeType elapsed(TimeType start, TimeType end) const; + /* * Return the sum of all the data points currently tracked by this * BucketedTimeSeries. @@ -243,8 +271,8 @@ class BucketedTimeSeries { * * Note that data outside of the timeseries duration will no longer be * available for use in the estimation. Specifying a start time earlier than - * (getLatestTime() - elapsed()) will not have much effect, since only data - * points after that point in time will be counted. + * getEarliestTime() will not have much effect, since only data points after + * that point in time will be counted. * * Note that the value returned is an estimate, and may not be precise. */ @@ -277,7 +305,8 @@ class BucketedTimeSeries { template ReturnType rate(TimeType start, TimeType end) const { ValueType intervalSum = sum(start, end); - return rateHelper(intervalSum, end - start); + TimeType interval = elapsed(start, end); + return rateHelper(intervalSum, interval); } /* @@ -290,7 +319,8 @@ class BucketedTimeSeries { template ReturnType countRate(TimeType start, TimeType end) const { uint64_t intervalCount = count(start, end); - return rateHelper(intervalCount, end - start); + TimeType interval = elapsed(start, end); + return rateHelper(intervalCount, interval); } /* diff --git a/folly/test/TimeseriesTest.cpp b/folly/test/TimeseriesTest.cpp index 7972eda7..89347a11 100644 --- a/folly/test/TimeseriesTest.cpp +++ b/folly/test/TimeseriesTest.cpp @@ -600,6 +600,7 @@ TEST(BucketedTimeSeries, queryByInterval) { for (unsigned int i = kDuration; i < kDuration + 3; ++i) { b.addValue(seconds(i), i); } + EXPECT_EQ(seconds(4), b.getEarliestTime()); // Current bucket state: // 0: time=[6, 8): values=(6, 7), sum=13, count=2 @@ -644,10 +645,69 @@ TEST(BucketedTimeSeries, queryByInterval) { "i=" << i << ", j=" << j << ", interval=[" << start.count() << ", " << end.count() << ")"; - double expectedRate = j ? expectedSum / j : 0; + seconds dataStart = std::max(start, b.getEarliestTime()); + seconds dataEnd = std::max(end, dataStart); + seconds expectedInterval = dataEnd - dataStart; + EXPECT_EQ(expectedInterval, b.elapsed(start, end)) << + "i=" << i << ", j=" << j << + ", interval=[" << start.count() << ", " << end.count() << ")"; + + double expectedRate = expectedInterval.count() ? + expectedSum / expectedInterval.count() : 0; EXPECT_EQ(expectedRate, b.rate(start, end)) << "i=" << i << ", j=" << j << ", interval=[" << start.count() << ", " << end.count() << ")"; } } } + +TEST(BucketedTimeSeries, rateByInterval) { + const int kNumBuckets = 5; + const seconds kDuration(10); + BucketedTimeSeries b(kNumBuckets, kDuration); + + // Add data points at a constant rate of 10 per second. + // Start adding data points at kDuration, and fill half of the buckets for + // now. + seconds start = kDuration; + seconds end = kDuration + (kDuration / 2); + const double kFixedRate = 10.0; + for (seconds i = start; i < end; ++i) { + b.addValue(i, kFixedRate); + } + + // Querying the rate should yield kFixedRate. + EXPECT_EQ(kFixedRate, b.rate()); + EXPECT_EQ(kFixedRate, b.rate(start, end)); + EXPECT_EQ(kFixedRate, b.rate(start, start + kDuration)); + EXPECT_EQ(kFixedRate, b.rate(end - kDuration, end)); + EXPECT_EQ(kFixedRate, b.rate(end - seconds(1), end)); + // We have been adding 1 data point per second, so countRate() + // should be 1. + EXPECT_EQ(1.0, b.countRate()); + EXPECT_EQ(1.0, b.countRate(start, end)); + EXPECT_EQ(1.0, b.countRate(start, start + kDuration)); + EXPECT_EQ(1.0, b.countRate(end - kDuration, end)); + EXPECT_EQ(1.0, b.countRate(end - seconds(1), end)); + + // We haven't added anything before time kDuration. + // Querying data earlier than this should result in a rate of 0. + EXPECT_EQ(0.0, b.rate(seconds(0), seconds(1))); + EXPECT_EQ(0.0, b.countRate(seconds(0), seconds(1))); + + // Fill the remainder of the timeseries from kDuration to kDuration*2 + start = end; + end = kDuration * 2; + for (seconds i = start; i < end; ++i) { + b.addValue(i, kFixedRate); + } + + EXPECT_EQ(kFixedRate, b.rate()); + EXPECT_EQ(kFixedRate, b.rate(kDuration, kDuration * 2)); + EXPECT_EQ(kFixedRate, b.rate(seconds(0), kDuration * 2)); + EXPECT_EQ(kFixedRate, b.rate(seconds(0), kDuration * 10)); + EXPECT_EQ(1.0, b.countRate()); + EXPECT_EQ(1.0, b.countRate(kDuration, kDuration * 2)); + EXPECT_EQ(1.0, b.countRate(seconds(0), kDuration * 2)); + EXPECT_EQ(1.0, b.countRate(seconds(0), kDuration * 10)); +}