/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2012-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef FOLLY_STATS_BUCKETEDTIMESERIES_H_
-#define FOLLY_STATS_BUCKETEDTIMESERIES_H_
+#pragma once
#include <chrono>
#include <vector>
-#include "folly/detail/Stats.h"
+#include <folly/stats/detail/Bucket.h>
namespace folly {
+/*
+ * A helper clock type to helper older code using BucketedTimeSeries with
+ * std::chrono::seconds transition to properly using clock types and time_point
+ * objects.
+ */
+template <typename TT = std::chrono::seconds>
+class LegacyStatsClock {
+ public:
+ using duration = TT;
+ using time_point = std::chrono::time_point<LegacyStatsClock, TT>;
+
+ // This clock does not actually implement now(), since the older API
+ // did not really specify what clock should be used. (In practice most
+ // callers unfortuantely used wall clock time rather than a monotonic clock.)
+};
+
/*
* This class represents a bucketed time series which keeps track of values
* added in the recent past, and merges these values together into a fixed
* be discarded and new data will go into the newly opened bucket. Internally,
* it uses a circular array of buckets that it reuses as time advances.
*
- * The class assumes that time advances forward -- you can't retroactively add
- * values for events in the past -- the 'now' argument is provided for better
- * efficiency and ease of unittesting.
- *
+ * This class assumes that time advances forwards. The window of time tracked
+ * by the timeseries will advance forwards whenever a more recent timestamp is
+ * passed to addValue(). While it is possible to pass old time values to
+ * addValue(), this will never move the time window backwards. If the old time
+ * value falls outside the tracked window of time, the data point will be
+ * ignored.
*
* This class is not thread-safe -- use your own synchronization!
*/
-template <typename VT, typename TT=std::chrono::seconds>
+template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>>
class BucketedTimeSeries {
public:
- typedef VT ValueType;
- typedef TT TimeType;
- typedef detail::Bucket<ValueType> Bucket;
+ using ValueType = VT;
+ using Clock = CT;
+ using Duration = typename Clock::duration;
+ using TimePoint = typename Clock::time_point;
+ using Bucket = detail::Bucket<ValueType>;
/*
* Create a new BucketedTimeSeries.
* and does not need the rolling buckets. The numBuckets parameter is
* ignored when duration is 0.
*/
- BucketedTimeSeries(size_t numBuckets, TimeType duration);
+ BucketedTimeSeries(size_t numBuckets, Duration duration);
+
+ /*
+ * Create a new BucketedTimeSeries.
+ *
+ * This constructor is used to reconstruct a timeseries using
+ * previously saved data
+ */
+ BucketedTimeSeries(
+ TimePoint theFirstTime,
+ TimePoint theLatestTime,
+ Duration maxDuration,
+ const std::vector<Bucket>& bucketsList);
/*
* Adds the value 'val' at time 'now'
*
- * This function expects time to always move forwards: it cannot be used to
- * add historical data points that have occurred in the past. If now is
- * older than the another timestamp that has already been passed to
- * addValue() or update(), now will be ignored and the latest timestamp will
- * be used.
+ * This function expects time to generally move forwards. The window of time
+ * tracked by this time series will move forwards with time. If 'now' is
+ * more recent than any time previously seen, addValue() will automatically
+ * call update(now) to advance the time window tracked by this data
+ * structure.
+ *
+ * Values in the recent past may be added to the data structure by passing in
+ * a slightly older value of 'now', as long as this time point still falls
+ * within the tracked duration. If 'now' is older than the tracked duration
+ * of time, the data point value will be ignored, and addValue() will return
+ * false without doing anything else.
+ *
+ * Returns true on success, or false if now was older than the tracked time
+ * window.
*/
- void addValue(TimeType now, const ValueType& val);
+ bool addValue(TimePoint now, const ValueType& val);
/*
* Adds the value 'val' the given number of 'times' at time 'now'
*/
- void addValue(TimeType now, const ValueType& val, int64_t times);
+ bool addValue(TimePoint now, const ValueType& val, uint64_t times);
/*
- * Adds the value 'sum' as the sum of 'nsamples' samples
+ * Adds the value 'total' as the sum of 'nsamples' samples
*/
- void addValueAggregated(TimeType now, const ValueType& sum, int64_t nsamples);
+ bool
+ addValueAggregated(TimePoint now, const ValueType& total, uint64_t nsamples);
/*
* Updates the container to the specified time, doing all the necessary
*
* Returns the current bucket index after the update.
*/
- size_t update(TimeType now);
+ size_t update(TimePoint now);
/*
* Reset the timeseries to an empty state,
/*
* Get the latest time that has ever been passed to update() or addValue().
+ *
+ * If no data has ever been added to this timeseries, 0 will be returned.
*/
- TimeType getLatestTime() const {
+ TimePoint getLatestTime() const {
return latestTime_;
}
+ /*
+ * Get the time of the earliest data point stored in this timeseries.
+ *
+ * If no data has ever been added to this timeseries, 0 will be returned.
+ *
+ * If isAllTime() is true, this is simply the time when the first data point
+ * was recorded.
+ *
+ * For non-all-time data, the timestamp reflects the first data point still
+ * remembered. As new data points are added, old data will be expired.
+ * getEarliestTime() returns the timestamp of the oldest bucket still present
+ * in the timeseries. This will never be older than (getLatestTime() -
+ * duration()).
+ */
+ TimePoint getEarliestTime() const;
+
/*
* Return the number of buckets.
*/
* Return the maximum duration of data that can be tracked by this
* BucketedTimeSeries.
*/
- TimeType duration() const {
+ Duration duration() const {
return duration_;
}
* ever rolling over into new buckets.
*/
bool isAllTime() const {
- return (duration_ == TimeType(0));
+ return (duration_ == Duration(0));
}
/*
return firstTime_ > latestTime_;
}
+ /*
+ * Returns time of first update() since clear()/constructor.
+ * Note that the returned value is only meaningful when empty() is false.
+ */
+ TimePoint firstTime() const {
+ return firstTime_;
+ }
+
+ /*
+ * Returns time of last update().
+ * Note that the returned value is only meaningful when empty() is false.
+ */
+ TimePoint latestTime() const {
+ return latestTime_;
+ }
+
+ /*
+ * Returns actual buckets of values
+ */
+ const std::vector<Bucket>& buckets() const {
+ return buckets_;
+ }
+
/*
* Get the amount of time tracked by this timeseries.
*
* Note that you generally should call update() before calling elapsed(), to
* make sure you are not reading stale data.
*/
- TimeType elapsed() const;
+ Duration elapsed() const;
+
+ /*
+ * Get the amount of time tracked by this timeseries, between the specified
+ * start and end times.
+ *
+ * If the timeseries contains data for the entire time range specified, this
+ * simply returns (end - start). However, if start is earlier than
+ * getEarliestTime(), this returns (end - getEarliestTime()).
+ */
+ Duration elapsed(TimePoint start, TimePoint end) const;
/*
* Return the sum of all the data points currently tracked by this
* Note that you generally should call update() before calling avg(), to
* make sure you are not reading stale data.
*/
- template <typename ReturnType=double>
+ template <typename ReturnType = double>
ReturnType avg() const {
- return total_.avg<ReturnType>();
+ return total_.template avg<ReturnType>();
}
/*
* Note that you generally should call update() before calling rate(), to
* make sure you are not reading stale data.
*/
- template <typename ReturnType=double, typename Interval=TimeType>
+ template <typename ReturnType = double, typename Interval = Duration>
ReturnType rate() const {
- return rateHelper<ReturnType, Interval>(total_.sum, elapsed());
+ return rateHelper<ReturnType, Interval>(ReturnType(total_.sum), elapsed());
}
/*
* Note that you generally should call update() before calling countRate(),
* to make sure you are not reading stale data.
*/
- template <typename ReturnType=double, typename Interval=TimeType>
+ template <typename ReturnType = double, typename Interval = Duration>
ReturnType countRate() const {
- return rateHelper<ReturnType, Interval>(total_.count, elapsed());
+ return rateHelper<ReturnType, Interval>(
+ ReturnType(total_.count), elapsed());
}
/*
*
* Note that data outside of the timeseries duration will no longer be
* available for use in the estimation. Specifying a start time earlier than
- * (getLatestTime() - elapsed()) will not have much effect, since only data
- * points after that point in time will be counted.
+ * getEarliestTime() will not have much effect, since only data points after
+ * that point in time will be counted.
*
* Note that the value returned is an estimate, and may not be precise.
*/
- ValueType sum(TimeType start, TimeType end) const;
+ ValueType sum(TimePoint start, TimePoint end) const;
/*
* Estimate the number of data points that occurred in the specified time
* period.
*
- * The same caveats documented in the sum(TimeType start, TimeType end)
+ * The same caveats documented in the sum(TimePoint start, TimePoint end)
* comments apply here as well.
*/
- uint64_t count(TimeType start, TimeType end) const;
+ uint64_t count(TimePoint start, TimePoint end) const;
/*
* Estimate the average value during the specified time period.
*
- * The same caveats documented in the sum(TimeType start, TimeType end)
+ * The same caveats documented in the sum(TimePoint start, TimePoint end)
* comments apply here as well.
*/
- template <typename ReturnType=double>
- ReturnType avg(TimeType start, TimeType end) const;
+ template <typename ReturnType = double>
+ ReturnType avg(TimePoint start, TimePoint end) const;
/*
* Estimate the rate during the specified time period.
*
- * The same caveats documented in the sum(TimeType start, TimeType end)
+ * The same caveats documented in the sum(TimePoint start, TimePoint end)
* comments apply here as well.
*/
- template <typename ReturnType=double, typename Interval=TimeType>
- ReturnType rate(TimeType start, TimeType end) const {
+ template <typename ReturnType = double, typename Interval = Duration>
+ ReturnType rate(TimePoint start, TimePoint end) const {
ValueType intervalSum = sum(start, end);
- return rateHelper<ReturnType, Interval>(intervalSum, end - start);
+ Duration interval = elapsed(start, end);
+ return rateHelper<ReturnType, Interval>(intervalSum, interval);
}
/*
* Estimate the rate of data points being added during the specified time
* period.
*
- * The same caveats documented in the sum(TimeType start, TimeType end)
+ * The same caveats documented in the sum(TimePoint start, TimePoint end)
* comments apply here as well.
*/
- template <typename ReturnType=double, typename Interval=TimeType>
- ReturnType countRate(TimeType start, TimeType end) const {
+ template <typename ReturnType = double, typename Interval = Duration>
+ ReturnType countRate(TimePoint start, TimePoint end) const {
uint64_t intervalCount = count(start, end);
- return rateHelper<ReturnType, Interval>(intervalCount, end - start);
+ Duration interval = elapsed(start, end);
+ return rateHelper<ReturnType, Interval>(
+ ReturnType(intervalCount), interval);
}
/*
* to break out of the loop and stop, without calling the function on any
* more buckets.
*
- * bool function(const Bucket& bucket, TimeType bucketStart,
- * TimeType nextBucketStart)
+ * bool function(const Bucket& bucket, TimePoint bucketStart,
+ * TimePoint nextBucketStart)
*/
template <typename Function>
void forEachBucket(Function fn) const;
*
* This method may not be called for all-time data.
*/
- size_t getBucketIdx(TimeType time) const;
+ size_t getBucketIdx(TimePoint time) const;
/*
* Get the bucket at the specified index.
*
* This method may not be called for all-time data.
*/
- void getBucketInfo(TimeType time, size_t* bucketIdx,
- TimeType* bucketStart, TimeType* nextBucketStart) const;
+ void getBucketInfo(
+ TimePoint time,
+ size_t* bucketIdx,
+ TimePoint* bucketStart,
+ TimePoint* nextBucketStart) const;
+
+ /*
+ * Legacy APIs that accept a Duration parameters rather than TimePoint.
+ *
+ * These treat the Duration as relative to the clock epoch.
+ * Prefer using the correct TimePoint-based APIs instead. These APIs will
+ * eventually be deprecated and removed.
+ */
+ bool addValue(Duration now, const ValueType& val) {
+ return addValueAggregated(TimePoint(now), val, 1);
+ }
+ bool addValue(Duration now, const ValueType& val, uint64_t times) {
+ return addValueAggregated(TimePoint(now), val * ValueType(times), times);
+ }
+ bool
+ addValueAggregated(Duration now, const ValueType& total, uint64_t nsamples) {
+ return addValueAggregated(TimePoint(now), total, nsamples);
+ }
+ size_t update(Duration now) {
+ return update(TimePoint(now));
+ }
private:
- template <typename ReturnType=double, typename Interval=TimeType>
- ReturnType rateHelper(ReturnType numerator, TimeType elapsed) const {
- if (elapsed == TimeType(0)) {
- return 0;
- }
-
- // Use std::chrono::duration_cast to convert between the native
- // duration and the desired interval. However, convert the rates,
- // rather than just converting the elapsed duration. Converting the
- // elapsed time first may collapse it down to 0 if the elapsed interval
- // is less than the desired interval, which will incorrectly result in
- // an infinite rate.
- typedef std::chrono::duration<
- ReturnType, std::ratio<TimeType::period::den,
- TimeType::period::num>> NativeRate;
- typedef std::chrono::duration<
- ReturnType, std::ratio<Interval::period::den,
- Interval::period::num>> DesiredRate;
-
- NativeRate native(numerator / elapsed.count());
- DesiredRate desired = std::chrono::duration_cast<DesiredRate>(native);
- return desired.count();
+ template <typename ReturnType = double, typename Interval = Duration>
+ ReturnType rateHelper(ReturnType numerator, Duration elapsedTime) const {
+ return detail::rateHelper<ReturnType, Duration, Interval>(
+ numerator, elapsedTime);
}
- ValueType rangeAdjust(TimeType bucketStart, TimeType nextBucketStart,
- TimeType start, TimeType end,
- ValueType input) const;
+ TimePoint getEarliestTimeNonEmpty() const;
+ size_t updateBuckets(TimePoint now);
+
+ ValueType rangeAdjust(
+ TimePoint bucketStart,
+ TimePoint nextBucketStart,
+ TimePoint start,
+ TimePoint end,
+ ValueType input) const;
template <typename Function>
- void forEachBucket(TimeType start, TimeType end, Function fn) const;
+ void forEachBucket(TimePoint start, TimePoint end, Function fn) const;
- TimeType firstTime_; // time of first update() since clear()/constructor
- TimeType latestTime_; // time of last update()
- TimeType duration_; // total duration ("window length") of the time series
+ TimePoint firstTime_; // time of first update() since clear()/constructor
+ TimePoint latestTime_; // time of last update()
+ Duration duration_; // total duration ("window length") of the time series
- Bucket total_; // sum and count of everything in time series
- std::vector<Bucket> buckets_; // actual buckets of values
+ Bucket total_; // sum and count of everything in time series
+ std::vector<Bucket> buckets_; // actual buckets of values
};
-} // folly
-
-#endif // FOLLY_STATS_BUCKETEDTIMESERIES_H_
+} // namespace folly