2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_STATS_BUCKETEDTIMESERIES_H_
18 #define FOLLY_STATS_BUCKETEDTIMESERIES_H_
23 #include <folly/detail/Stats.h>
28 * This class represents a bucketed time series which keeps track of values
29 * added in the recent past, and merges these values together into a fixed
30 * number of buckets to keep a lid on memory use if the number of values
31 * added is very large.
33 * For example, a BucketedTimeSeries() with duration == 60s and 10 buckets
34 * will keep track of 10 6-second buckets, and discard all data added more
35 * than 1 minute ago. As time ticks by, a 6-second bucket at a time will
36 * be discarded and new data will go into the newly opened bucket. Internally,
37 * it uses a circular array of buckets that it reuses as time advances.
39 * This class assumes that time advances forwards. The window of time tracked
40 * by the timeseries will advance forwards whenever a more recent timestamp is
41 * passed to addValue(). While it is possible to pass old time values to
42 * addValue(), this will never move the time window backwards. If the old time
43 * value falls outside the tracked window of time, the data point will be
46 * This class is not thread-safe -- use your own synchronization!
48 template <typename VT, typename TT=std::chrono::seconds>
49 class BucketedTimeSeries {
53 typedef detail::Bucket<ValueType> Bucket;
56 * Create a new BucketedTimeSeries.
58 * This creates a new BucketedTimeSeries with the specified number of
59 * buckets, storing data for the specified amount of time.
61 * If the duration is 0, the BucketedTimeSeries will track data forever,
62 * and does not need the rolling buckets. The numBuckets parameter is
63 * ignored when duration is 0.
65 BucketedTimeSeries(size_t numBuckets, TimeType duration);
68 * Adds the value 'val' at time 'now'
70 * This function expects time to generally move forwards. The window of time
71 * tracked by this time series will move forwards with time. If 'now' is
72 * more recent than any time previously seen, addValue() will automatically
73 * call update(now) to advance the time window tracked by this data
76 * Values in the recent past may be added to the data structure by passing in
77 * a slightly older value of 'now', as long as this time point still falls
78 * within the tracked duration. If 'now' is older than the tracked duration
79 * of time, the data point value will be ignored, and addValue() will return
80 * false without doing anything else.
82 * Returns true on success, or false if now was older than the tracked time
85 bool addValue(TimeType now, const ValueType& val);
88 * Adds the value 'val' the given number of 'times' at time 'now'
90 bool addValue(TimeType now, const ValueType& val, int64_t times);
93 * Adds the value 'sum' as the sum of 'nsamples' samples
95 bool addValueAggregated(TimeType now, const ValueType& sum, int64_t nsamples);
98 * Updates the container to the specified time, doing all the necessary
99 * work to rotate the buckets and remove any stale data points.
101 * The addValue() methods automatically call update() when adding new data
102 * points. However, when reading data from the timeseries, you should make
103 * sure to manually call update() before accessing the data. Otherwise you
104 * may be reading stale data if update() has not been called recently.
106 * Returns the current bucket index after the update.
108 size_t update(TimeType now);
111 * Reset the timeseries to an empty state,
112 * as if no data points have ever been added to it.
117 * Get the latest time that has ever been passed to update() or addValue().
119 * If no data has ever been added to this timeseries, 0 will be returned.
121 TimeType getLatestTime() const {
126 * Get the time of the earliest data point stored in this timeseries.
128 * If no data has ever been added to this timeseries, 0 will be returned.
130 * If isAllTime() is true, this is simply the time when the first data point
133 * For non-all-time data, the timestamp reflects the first data point still
134 * remembered. As new data points are added, old data will be expired.
135 * getEarliestTime() returns the timestamp of the oldest bucket still present
136 * in the timeseries. This will never be older than (getLatestTime() -
139 TimeType getEarliestTime() const;
142 * Return the number of buckets.
144 size_t numBuckets() const {
145 return buckets_.size();
149 * Return the maximum duration of data that can be tracked by this
150 * BucketedTimeSeries.
152 TimeType duration() const {
157 * Returns true if this BucketedTimeSeries stores data for all-time, without
158 * ever rolling over into new buckets.
160 bool isAllTime() const {
161 return (duration_ == TimeType(0));
165 * Returns true if no calls to update() have been made since the last call to
169 // We set firstTime_ greater than latestTime_ in the constructor and in
170 // clear, so we use this to distinguish if the timeseries is empty.
172 // Once a data point has been added, latestTime_ will always be greater
173 // than or equal to firstTime_.
174 return firstTime_ > latestTime_;
178 * Get the amount of time tracked by this timeseries.
180 * For an all-time timeseries, this returns the length of time since the
181 * first data point was added to the time series.
183 * Otherwise, this never returns a value greater than the overall timeseries
184 * duration. If the first data point was recorded less than a full duration
185 * ago, the time since the first data point is returned. If a full duration
186 * has elapsed, and we have already thrown away some data, the time since the
187 * oldest bucket is returned.
189 * For example, say we are tracking 600 seconds worth of data, in 60 buckets.
190 * - If less than 600 seconds have elapsed since the first data point,
191 * elapsed() returns the total elapsed time so far.
192 * - If more than 600 seconds have elapsed, we have already thrown away some
193 * data. However, we throw away a full bucket (10 seconds worth) at once,
194 * so at any point in time we have from 590 to 600 seconds worth of data.
195 * elapsed() will therefore return a value between 590 and 600.
197 * Note that you generally should call update() before calling elapsed(), to
198 * make sure you are not reading stale data.
200 TimeType elapsed() const;
203 * Get the amount of time tracked by this timeseries, between the specified
204 * start and end times.
206 * If the timeseries contains data for the entire time range specified, this
207 * simply returns (end - start). However, if start is earlier than
208 * getEarliestTime(), this returns (end - getEarliestTime()).
210 TimeType elapsed(TimeType start, TimeType end) const;
213 * Return the sum of all the data points currently tracked by this
214 * BucketedTimeSeries.
216 * Note that you generally should call update() before calling sum(), to
217 * make sure you are not reading stale data.
219 const ValueType& sum() const {
224 * Return the number of data points currently tracked by this
225 * BucketedTimeSeries.
227 * Note that you generally should call update() before calling count(), to
228 * make sure you are not reading stale data.
230 uint64_t count() const {
235 * Return the average value (sum / count).
237 * The return type may be specified to control whether floating-point or
238 * integer division should be performed.
240 * Note that you generally should call update() before calling avg(), to
241 * make sure you are not reading stale data.
243 template <typename ReturnType=double>
244 ReturnType avg() const {
245 return total_.template avg<ReturnType>();
249 * Return the sum divided by the elapsed time.
251 * Note that you generally should call update() before calling rate(), to
252 * make sure you are not reading stale data.
254 template <typename ReturnType=double, typename Interval=TimeType>
255 ReturnType rate() const {
256 return rateHelper<ReturnType, Interval>(total_.sum, elapsed());
260 * Return the count divided by the elapsed time.
262 * The Interval template parameter causes the elapsed time to be converted to
263 * the Interval type before using it. For example, if Interval is
264 * std::chrono::seconds, the return value will be the count per second.
265 * If Interval is std::chrono::hours, the return value will be the count per
268 * Note that you generally should call update() before calling countRate(),
269 * to make sure you are not reading stale data.
271 template <typename ReturnType=double, typename Interval=TimeType>
272 ReturnType countRate() const {
273 return rateHelper<ReturnType, Interval>(total_.count, elapsed());
277 * Estimate the sum of the data points that occurred in the specified time
280 * The range queried is [start, end).
281 * That is, start is inclusive, and end is exclusive.
283 * Note that data outside of the timeseries duration will no longer be
284 * available for use in the estimation. Specifying a start time earlier than
285 * getEarliestTime() will not have much effect, since only data points after
286 * that point in time will be counted.
288 * Note that the value returned is an estimate, and may not be precise.
290 ValueType sum(TimeType start, TimeType end) const;
293 * Estimate the number of data points that occurred in the specified time
296 * The same caveats documented in the sum(TimeType start, TimeType end)
297 * comments apply here as well.
299 uint64_t count(TimeType start, TimeType end) const;
302 * Estimate the average value during the specified time period.
304 * The same caveats documented in the sum(TimeType start, TimeType end)
305 * comments apply here as well.
307 template <typename ReturnType=double>
308 ReturnType avg(TimeType start, TimeType end) const;
311 * Estimate the rate during the specified time period.
313 * The same caveats documented in the sum(TimeType start, TimeType end)
314 * comments apply here as well.
316 template <typename ReturnType=double, typename Interval=TimeType>
317 ReturnType rate(TimeType start, TimeType end) const {
318 ValueType intervalSum = sum(start, end);
319 TimeType interval = elapsed(start, end);
320 return rateHelper<ReturnType, Interval>(intervalSum, interval);
324 * Estimate the rate of data points being added during the specified time
327 * The same caveats documented in the sum(TimeType start, TimeType end)
328 * comments apply here as well.
330 template <typename ReturnType=double, typename Interval=TimeType>
331 ReturnType countRate(TimeType start, TimeType end) const {
332 uint64_t intervalCount = count(start, end);
333 TimeType interval = elapsed(start, end);
334 return rateHelper<ReturnType, Interval>(intervalCount, interval);
338 * Invoke a function for each bucket.
340 * The function will take as arguments the bucket index,
341 * the bucket start time, and the start time of the subsequent bucket.
343 * It should return true to continue iterating through the buckets, and false
344 * to break out of the loop and stop, without calling the function on any
347 * bool function(const Bucket& bucket, TimeType bucketStart,
348 * TimeType nextBucketStart)
350 template <typename Function>
351 void forEachBucket(Function fn) const;
354 * Get the index for the bucket containing the specified time.
356 * Note that the index is only valid if this time actually falls within one
357 * of the current buckets. If you pass in a value more recent than
358 * getLatestTime() or older than (getLatestTime() - elapsed()), the index
359 * returned will not be valid.
361 * This method may not be called for all-time data.
363 size_t getBucketIdx(TimeType time) const;
366 * Get the bucket at the specified index.
368 * This method may not be called for all-time data.
370 const Bucket& getBucketByIndex(size_t idx) const {
371 return buckets_[idx];
375 * Compute the bucket index that the specified time falls into,
376 * as well as the bucket start time and the next bucket's start time.
378 * This method may not be called for all-time data.
380 void getBucketInfo(TimeType time, size_t* bucketIdx,
381 TimeType* bucketStart, TimeType* nextBucketStart) const;
384 template <typename ReturnType=double, typename Interval=TimeType>
385 ReturnType rateHelper(ReturnType numerator, TimeType elapsedTime) const {
386 return detail::rateHelper<ReturnType, TimeType, Interval>(numerator,
390 TimeType getEarliestTimeNonEmpty() const;
391 size_t updateBuckets(TimeType now);
393 ValueType rangeAdjust(TimeType bucketStart, TimeType nextBucketStart,
394 TimeType start, TimeType end,
395 ValueType input) const;
397 template <typename Function>
398 void forEachBucket(TimeType start, TimeType end, Function fn) const;
400 TimeType firstTime_; // time of first update() since clear()/constructor
401 TimeType latestTime_; // time of last update()
402 TimeType duration_; // total duration ("window length") of the time series
404 Bucket total_; // sum and count of everything in time series
405 std::vector<Bucket> buckets_; // actual buckets of values
410 #endif // FOLLY_STATS_BUCKETEDTIMESERIES_H_