From 8b030a19052a279d33c162a59a205dd62a724c67 Mon Sep 17 00:00:00 2001 From: Adam Simpkins Date: Thu, 15 Sep 2016 11:02:47 -0700 Subject: [PATCH] a simple first step towards using clocks properly in the stats code Summary: Update the timeseries and histogram classes to accept a clock parameter as a template parameter, instead of a time duration type. This is a first step towards transitioning the code to correctly distinguishing between time_point and duration types. This defines TimePoint and Duration type aliases, but does not start using them yet. In upcoming diffs I will start converting more APIs to correctly use TimePoint instead of just Duration. For now the default clock type is folly::LegacyStatsClock, which still uses std::chrono::seconds as the default duration. At the moment the stats code is optimized for second granularity--the addValue() code has a fast path when called in the same second as the last update. When using finer granularity durations this fast path can't be used as often. I will also send out subsequent diffs to make the code optimized for updates within the same bucket, rather than just updates with the exact same time value. Reviewed By: yfeldblum Differential Revision: D3807715 fbshipit-source-id: 77696c4f44a8d85e4d6ff84d7656fe7a9709797c --- folly/stats/BucketedTimeSeries-defs.h | 114 ++++++++++++----------- folly/stats/BucketedTimeSeries.h | 32 ++++++- folly/stats/MultiLevelTimeSeries-defs.h | 74 +++++++-------- folly/stats/MultiLevelTimeSeries.h | 14 ++- folly/stats/TimeseriesHistogram-defs.h | 116 ++++++++++++------------ folly/stats/TimeseriesHistogram.h | 24 +++-- 6 files changed, 212 insertions(+), 162 deletions(-) diff --git a/folly/stats/BucketedTimeSeries-defs.h b/folly/stats/BucketedTimeSeries-defs.h index a726f2ee..8941503c 100644 --- a/folly/stats/BucketedTimeSeries-defs.h +++ b/folly/stats/BucketedTimeSeries-defs.h @@ -22,12 +22,11 @@ namespace folly { -template -BucketedTimeSeries::BucketedTimeSeries(size_t nBuckets, - TimeType maxDuration) - : firstTime_(1), - latestTime_(0), - duration_(maxDuration) { +template +BucketedTimeSeries::BucketedTimeSeries( + size_t nBuckets, + TimeType maxDuration) + : firstTime_(1), latestTime_(0), duration_(maxDuration) { // For tracking all-time data we only use total_, and don't need to bother // with buckets_ if (!isAllTime()) { @@ -43,22 +42,24 @@ BucketedTimeSeries::BucketedTimeSeries(size_t nBuckets, } } -template -bool BucketedTimeSeries::addValue(TimeType now, const ValueType& val) { +template +bool BucketedTimeSeries::addValue(TimeType now, const ValueType& val) { return addValueAggregated(now, val, 1); } -template -bool BucketedTimeSeries::addValue(TimeType now, - const ValueType& val, - int64_t times) { +template +bool BucketedTimeSeries::addValue( + TimeType now, + const ValueType& val, + int64_t times) { return addValueAggregated(now, val * times, times); } -template -bool BucketedTimeSeries::addValueAggregated(TimeType now, - const ValueType& total, - int64_t nsamples) { +template +bool BucketedTimeSeries::addValueAggregated( + TimeType now, + const ValueType& total, + int64_t nsamples) { if (isAllTime()) { if (UNLIKELY(empty())) { firstTime_ = now; @@ -98,8 +99,8 @@ bool BucketedTimeSeries::addValueAggregated(TimeType now, return true; } -template -size_t BucketedTimeSeries::update(TimeType now) { +template +size_t BucketedTimeSeries::update(TimeType now) { if (empty()) { // This is the first data point. firstTime_ = now; @@ -121,8 +122,8 @@ size_t BucketedTimeSeries::update(TimeType now) { return updateBuckets(now); } -template -size_t BucketedTimeSeries::updateBuckets(TimeType now) { +template +size_t BucketedTimeSeries::updateBuckets(TimeType now) { // We could cache nextBucketStart as a member variable, so we don't have to // recompute it each time update() is called with a new timestamp value. // This makes things faster when update() (or addValue()) is called once @@ -172,8 +173,8 @@ size_t BucketedTimeSeries::updateBuckets(TimeType now) { } } -template -void BucketedTimeSeries::clear() { +template +void BucketedTimeSeries::clear() { for (Bucket& bucket : buckets_) { bucket.clear(); } @@ -184,9 +185,8 @@ void BucketedTimeSeries::clear() { latestTime_ = TimeType(0); } - -template -TT BucketedTimeSeries::getEarliestTime() const { +template +typename CT::duration BucketedTimeSeries::getEarliestTime() const { if (empty()) { return TimeType(0); } @@ -203,8 +203,9 @@ TT BucketedTimeSeries::getEarliestTime() const { return earliestTime; } -template -TT BucketedTimeSeries::getEarliestTimeNonEmpty() const { +template +typename CT::duration BucketedTimeSeries::getEarliestTimeNonEmpty() + const { size_t currentBucket; TimeType currentBucketStart; TimeType nextBucketStart; @@ -216,8 +217,8 @@ TT BucketedTimeSeries::getEarliestTimeNonEmpty() const { return nextBucketStart - duration_; } -template -TT BucketedTimeSeries::elapsed() const { +template +typename CT::duration BucketedTimeSeries::elapsed() const { if (empty()) { return TimeType(0); } @@ -226,8 +227,10 @@ TT BucketedTimeSeries::elapsed() const { return latestTime_ - getEarliestTime() + TimeType(1); } -template -TT BucketedTimeSeries::elapsed(TimeType start, TimeType end) const { +template +typename CT::duration BucketedTimeSeries::elapsed( + TimeType start, + TimeType end) const { if (empty()) { return TimeType(0); } @@ -237,8 +240,8 @@ TT BucketedTimeSeries::elapsed(TimeType start, TimeType end) const { return end - start; } -template -VT BucketedTimeSeries::sum(TimeType start, TimeType end) const { +template +VT BucketedTimeSeries::sum(TimeType start, TimeType end) const { ValueType total = ValueType(); forEachBucket(start, end, [&](const Bucket& bucket, TimeType bucketStart, @@ -251,8 +254,8 @@ VT BucketedTimeSeries::sum(TimeType start, TimeType end) const { return total; } -template -uint64_t BucketedTimeSeries::count(TimeType start, TimeType end) const { +template +uint64_t BucketedTimeSeries::count(TimeType start, TimeType end) const { uint64_t sample_count = 0; forEachBucket(start, end, [&](const Bucket& bucket, TimeType bucketStart, @@ -265,9 +268,9 @@ uint64_t BucketedTimeSeries::count(TimeType start, TimeType end) const { return sample_count; } -template +template template -ReturnType BucketedTimeSeries::avg(TimeType start, TimeType end) const { +ReturnType BucketedTimeSeries::avg(TimeType start, TimeType end) const { ValueType total = ValueType(); uint64_t sample_count = 0; forEachBucket(start, end, [&](const Bucket& bucket, @@ -304,8 +307,8 @@ ReturnType BucketedTimeSeries::avg(TimeType start, TimeType end) const { * into, we then divide by duration_. */ -template -size_t BucketedTimeSeries::getBucketIdx(TimeType time) const { +template +size_t BucketedTimeSeries::getBucketIdx(TimeType time) const { // For all-time data we don't use buckets_. Everything is tracked in total_. DCHECK(!isAllTime()); @@ -317,10 +320,12 @@ size_t BucketedTimeSeries::getBucketIdx(TimeType time) const { * Compute the bucket index for the specified time, as well as the earliest * time that falls into this bucket. */ -template -void BucketedTimeSeries::getBucketInfo( - TimeType time, size_t *bucketIdx, - TimeType* bucketStart, TimeType* nextBucketStart) const { +template +void BucketedTimeSeries::getBucketInfo( + TimeType time, + size_t* bucketIdx, + TimeType* bucketStart, + TimeType* nextBucketStart) const { typedef typename TimeType::rep TimeInt; DCHECK(!isAllTime()); @@ -349,9 +354,9 @@ void BucketedTimeSeries::getBucketInfo( *nextBucketStart = nextBucketStartMod + durationStart; } -template +template template -void BucketedTimeSeries::forEachBucket(Function fn) const { +void BucketedTimeSeries::forEachBucket(Function fn) const { if (isAllTime()) { fn(total_, firstTime_, latestTime_ + TimeType(1)); return; @@ -417,10 +422,13 @@ void BucketedTimeSeries::forEachBucket(Function fn) const { * For example, if the bucket spans time [10, 20), but we only care about the * range [10, 16), this will return 60% of the input value. */ -template -VT BucketedTimeSeries::rangeAdjust( - TimeType bucketStart, TimeType nextBucketStart, - TimeType start, TimeType end, ValueType input) const { +template +VT BucketedTimeSeries::rangeAdjust( + TimeType bucketStart, + TimeType nextBucketStart, + TimeType start, + TimeType end, + ValueType input) const { // If nextBucketStart is greater than latestTime_, treat nextBucketStart as // if it were latestTime_. This makes us more accurate when someone is // querying for all of the data up to latestTime_. Even though latestTime_ @@ -442,10 +450,12 @@ VT BucketedTimeSeries::rangeAdjust( (nextBucketStart - bucketStart); } -template +template template -void BucketedTimeSeries::forEachBucket(TimeType start, TimeType end, - Function fn) const { +void BucketedTimeSeries::forEachBucket( + TimeType start, + TimeType end, + Function fn) const { forEachBucket([&start, &end, &fn] (const Bucket& bucket, TimeType bucketStart, TimeType nextBucketStart) -> bool { if (start >= nextBucketStart) { diff --git a/folly/stats/BucketedTimeSeries.h b/folly/stats/BucketedTimeSeries.h index c3c2c1e4..c64053a8 100644 --- a/folly/stats/BucketedTimeSeries.h +++ b/folly/stats/BucketedTimeSeries.h @@ -23,6 +23,22 @@ namespace folly { +/* + * A helper clock type to helper older code using BucketedTimeSeries with + * std::chrono::seconds transition to properly using clock types and time_point + * objects. + */ +template +class LegacyStatsClock { + public: + using duration = TT; + using time_point = std::chrono::time_point; + + // This clock does not actually implement now(), since the older API + // did not really specify what clock should be used. (In practice most + // callers unfortuantely used wall clock time rather than a monotonic clock.) +}; + /* * This class represents a bucketed time series which keeps track of values * added in the recent past, and merges these values together into a fixed @@ -44,12 +60,18 @@ namespace folly { * * This class is not thread-safe -- use your own synchronization! */ -template +template > class BucketedTimeSeries { public: - typedef VT ValueType; - typedef TT TimeType; - typedef detail::Bucket Bucket; + using ValueType = VT; + using Clock = CT; + using Duration = typename Clock::duration; + using TimePoint = typename Clock::time_point; + // The legacy TimeType. The older code used this instead of Duration and + // TimePoint. This will eventually be removed as the code is transitioned to + // Duration and TimePoint. + using TimeType = typename Clock::duration; + using Bucket = detail::Bucket; /* * Create a new BucketedTimeSeries. @@ -61,7 +83,7 @@ class BucketedTimeSeries { * and does not need the rolling buckets. The numBuckets parameter is * ignored when duration is 0. */ - BucketedTimeSeries(size_t numBuckets, TimeType duration); + BucketedTimeSeries(size_t numBuckets, Duration duration); /* * Adds the value 'val' at time 'now' diff --git a/folly/stats/MultiLevelTimeSeries-defs.h b/folly/stats/MultiLevelTimeSeries-defs.h index 763aaebe..c3ddba89 100644 --- a/folly/stats/MultiLevelTimeSeries-defs.h +++ b/folly/stats/MultiLevelTimeSeries-defs.h @@ -21,30 +21,28 @@ namespace folly { -template -MultiLevelTimeSeries::MultiLevelTimeSeries( - size_t nBuckets, - size_t nLevels, - const TimeType levelDurations[]) - : cachedTime_(0), - cachedSum_(0), - cachedCount_(0) { - CHECK_GT(nLevels, 0); - CHECK(levelDurations); +template +MultiLevelTimeSeries::MultiLevelTimeSeries( + size_t nBuckets, + size_t nLevels, + const TimeType levelDurations[]) + : cachedTime_(0), cachedSum_(0), cachedCount_(0) { + CHECK_GT(nLevels, 0); + CHECK(levelDurations); - levels_.reserve(nLevels); - for (size_t i = 0; i < nLevels; ++i) { - if (levelDurations[i] == TT(0)) { - CHECK_EQ(i, nLevels - 1); - } else if (i > 0) { - CHECK(levelDurations[i-1] < levelDurations[i]); - } - levels_.emplace_back(nBuckets, levelDurations[i]); + levels_.reserve(nLevels); + for (size_t i = 0; i < nLevels; ++i) { + if (levelDurations[i] == Duration(0)) { + CHECK_EQ(i, nLevels - 1); + } else if (i > 0) { + CHECK(levelDurations[i - 1] < levelDurations[i]); } + levels_.emplace_back(nBuckets, levelDurations[i]); + } } -template -MultiLevelTimeSeries::MultiLevelTimeSeries( +template +MultiLevelTimeSeries::MultiLevelTimeSeries( size_t nBuckets, std::initializer_list durations) : cachedTime_(0), cachedSum_(0), cachedCount_(0) { @@ -54,7 +52,7 @@ MultiLevelTimeSeries::MultiLevelTimeSeries( int i = 0; TimeType prev; for (auto dur : durations) { - if (dur == TT(0)) { + if (dur == Duration(0)) { CHECK_EQ(i, durations.size() - 1); } else if (i > 0) { CHECK(prev < dur); @@ -65,24 +63,26 @@ MultiLevelTimeSeries::MultiLevelTimeSeries( } } -template -void MultiLevelTimeSeries::addValue( +template +void MultiLevelTimeSeries::addValue( TimeType now, const ValueType& val) { addValueAggregated(now, val, 1); } -template -void MultiLevelTimeSeries::addValue(TimeType now, - const ValueType& val, - int64_t times) { +template +void MultiLevelTimeSeries::addValue( + TimeType now, + const ValueType& val, + int64_t times) { addValueAggregated(now, val * times, times); } -template -void MultiLevelTimeSeries::addValueAggregated(TimeType now, - const ValueType& total, - int64_t nsamples) { +template +void MultiLevelTimeSeries::addValueAggregated( + TimeType now, + const ValueType& total, + int64_t nsamples) { if (cachedTime_ != now) { flush(); cachedTime_ = now; @@ -91,16 +91,16 @@ void MultiLevelTimeSeries::addValueAggregated(TimeType now, cachedCount_ += nsamples; } -template -void MultiLevelTimeSeries::update(TimeType now) { +template +void MultiLevelTimeSeries::update(TimeType now) { flush(); for (size_t i = 0; i < levels_.size(); ++i) { levels_[i].update(now); } } -template -void MultiLevelTimeSeries::flush() { +template +void MultiLevelTimeSeries::flush() { // update all the underlying levels if (cachedCount_ > 0) { for (size_t i = 0; i < levels_.size(); ++i) { @@ -111,8 +111,8 @@ void MultiLevelTimeSeries::flush() { } } -template -void MultiLevelTimeSeries::clear() { +template +void MultiLevelTimeSeries::clear() { for (auto & level : levels_) { level.clear(); } diff --git a/folly/stats/MultiLevelTimeSeries.h b/folly/stats/MultiLevelTimeSeries.h index f5a9d5a6..7a43f175 100644 --- a/folly/stats/MultiLevelTimeSeries.h +++ b/folly/stats/MultiLevelTimeSeries.h @@ -49,12 +49,18 @@ namespace folly { * * The class is not thread-safe -- use your own synchronization! */ -template +template > class MultiLevelTimeSeries { public: - typedef VT ValueType; - typedef TT TimeType; - typedef folly::BucketedTimeSeries Level; + using ValueType = VT; + using Clock = CT; + using Duration = typename Clock::duration; + using TimePoint = typename Clock::time_point; + // The legacy TimeType. The older code used this instead of Duration and + // TimePoint. This will eventually be removed as the code is transitioned to + // Duration and TimePoint. + using TimeType = typename Clock::duration; + using Level = folly::BucketedTimeSeries; /* * Create a new MultiLevelTimeSeries. diff --git a/folly/stats/TimeseriesHistogram-defs.h b/folly/stats/TimeseriesHistogram-defs.h index f41b749e..3594bec4 100644 --- a/folly/stats/TimeseriesHistogram-defs.h +++ b/folly/stats/TimeseriesHistogram-defs.h @@ -23,34 +23,37 @@ namespace folly { -template -TimeseriesHistogram::TimeseriesHistogram(ValueType bucketSize, - ValueType min, - ValueType max, - const ContainerType& copyMe) - : buckets_(bucketSize, min, max, copyMe), - haveNotSeenValue_(true), - singleUniqueValue_(false) { -} - -template -void TimeseriesHistogram::addValue(TimeType now, - const ValueType& value) { +template +TimeseriesHistogram::TimeseriesHistogram( + ValueType bucketSize, + ValueType min, + ValueType max, + const ContainerType& copyMe) + : buckets_(bucketSize, min, max, copyMe), + haveNotSeenValue_(true), + singleUniqueValue_(false) {} + +template +void TimeseriesHistogram::addValue( + TimeType now, + const ValueType& value) { buckets_.getByValue(value).addValue(now, value); maybeHandleSingleUniqueValue(value); } -template -void TimeseriesHistogram::addValue(TimeType now, - const ValueType& value, - int64_t times) { +template +void TimeseriesHistogram::addValue( + TimeType now, + const ValueType& value, + int64_t times) { buckets_.getByValue(value).addValue(now, value, times); maybeHandleSingleUniqueValue(value); } -template -void TimeseriesHistogram::addValues( - TimeType now, const folly::Histogram& hist) { +template +void TimeseriesHistogram::addValues( + TimeType now, + const folly::Histogram& hist) { CHECK_EQ(hist.getMin(), getMin()); CHECK_EQ(hist.getMax(), getMax()); CHECK_EQ(hist.getBucketSize(), getBucketSize()); @@ -68,9 +71,9 @@ void TimeseriesHistogram::addValues( singleUniqueValue_ = false; } -template -void TimeseriesHistogram::maybeHandleSingleUniqueValue( - const ValueType& value) { +template +void TimeseriesHistogram::maybeHandleSingleUniqueValue( + const ValueType& value) { if (haveNotSeenValue_) { firstValue_ = value; singleUniqueValue_ = true; @@ -82,9 +85,9 @@ void TimeseriesHistogram::maybeHandleSingleUniqueValue( } } -template -T TimeseriesHistogram::getPercentileEstimate(double pct, - int level) const { +template +T TimeseriesHistogram::getPercentileEstimate(double pct, int level) + const { if (singleUniqueValue_) { return firstValue_; } @@ -93,10 +96,11 @@ T TimeseriesHistogram::getPercentileEstimate(double pct, AvgFromLevel(level)); } -template -T TimeseriesHistogram::getPercentileEstimate(double pct, - TimeType start, - TimeType end) const { +template +T TimeseriesHistogram::getPercentileEstimate( + double pct, + TimeType start, + TimeType end) const { if (singleUniqueValue_) { return firstValue_; } @@ -106,38 +110,37 @@ T TimeseriesHistogram::getPercentileEstimate(double pct, AvgFromInterval(start, end)); } -template -int TimeseriesHistogram::getPercentileBucketIdx( - double pct, - int level -) const { +template +int TimeseriesHistogram::getPercentileBucketIdx(double pct, int level) + const { return buckets_.getPercentileBucketIdx(pct / 100.0, CountFromLevel(level)); } -template -int TimeseriesHistogram::getPercentileBucketIdx(double pct, - TimeType start, - TimeType end) const { +template +int TimeseriesHistogram::getPercentileBucketIdx( + double pct, + TimeType start, + TimeType end) const { return buckets_.getPercentileBucketIdx(pct / 100.0, CountFromInterval(start, end)); } -template -void TimeseriesHistogram::clear() { +template +void TimeseriesHistogram::clear() { for (size_t i = 0; i < buckets_.getNumBuckets(); i++) { buckets_.getByIndex(i).clear(); } } -template -void TimeseriesHistogram::update(TimeType now) { +template +void TimeseriesHistogram::update(TimeType now) { for (size_t i = 0; i < buckets_.getNumBuckets(); i++) { buckets_.getByIndex(i).update(now); } } -template -std::string TimeseriesHistogram::getString(int level) const { +template +std::string TimeseriesHistogram::getString(int level) const { std::string result; for (size_t i = 0; i < buckets_.getNumBuckets(); i++) { @@ -153,9 +156,10 @@ std::string TimeseriesHistogram::getString(int level) const { return result; } -template -std::string TimeseriesHistogram::getString(TimeType start, - TimeType end) const { +template +std::string TimeseriesHistogram::getString( + TimeType start, + TimeType end) const { std::string result; for (size_t i = 0; i < buckets_.getNumBuckets(); i++) { @@ -171,8 +175,8 @@ std::string TimeseriesHistogram::getString(TimeType start, return result; } -template -void TimeseriesHistogram::computeAvgData( +template +void TimeseriesHistogram::computeAvgData( ValueType* total, int64_t* nsamples, int level) const { @@ -183,8 +187,8 @@ void TimeseriesHistogram::computeAvgData( } } -template -void TimeseriesHistogram::computeAvgData( +template +void TimeseriesHistogram::computeAvgData( ValueType* total, int64_t* nsamples, TimeType start, @@ -196,8 +200,8 @@ void TimeseriesHistogram::computeAvgData( } } -template -void TimeseriesHistogram::computeRateData( +template +void TimeseriesHistogram::computeRateData( ValueType* total, TimeType* elapsed, int level) const { @@ -208,8 +212,8 @@ void TimeseriesHistogram::computeRateData( } } -template -void TimeseriesHistogram::computeRateData( +template +void TimeseriesHistogram::computeRateData( ValueType* total, TimeType* elapsed, TimeType start, diff --git a/folly/stats/TimeseriesHistogram.h b/folly/stats/TimeseriesHistogram.h index d5ce089d..71471887 100644 --- a/folly/stats/TimeseriesHistogram.h +++ b/folly/stats/TimeseriesHistogram.h @@ -47,20 +47,28 @@ namespace folly { * The memory usage for a typical histogram is roughly 3k * (# of buckets). All * insertion operations are amortized O(1), and all queries are O(# of buckets). */ -template > +template < + class T, + class CT = LegacyStatsClock, + class C = folly::MultiLevelTimeSeries> class TimeseriesHistogram { private: // NOTE: T must be equivalent to _signed_ numeric type for our math. static_assert(std::numeric_limits::is_signed, ""); public: - // values to be inserted into container - typedef T ValueType; - // the container type we use internally for each bucket - typedef C ContainerType; - // The time type. - typedef TT TimeType; + // Values to be inserted into container + using ValueType = T; + // The container type we use internally for each bucket + using ContainerType = C; + // Clock, duration, and time point types + using Clock = CT; + using Duration = typename Clock::duration; + using TimePoint = typename Clock::time_point; + // The legacy TimeType. The older code used this instead of Duration and + // TimePoint. This will eventually be removed as the code is transitioned to + // Duration and TimePoint. + using TimeType = typename Clock::duration; /* * Create a TimeSeries histogram and initialize the bucketing and levels. -- 2.34.1