2 * Copyright 2013 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_STATS_BUCKETEDTIMESERIES_H_
18 #define FOLLY_STATS_BUCKETEDTIMESERIES_H_
23 #include "folly/detail/Stats.h"
28 * This class represents a bucketed time series which keeps track of values
29 * added in the recent past, and merges these values together into a fixed
30 * number of buckets to keep a lid on memory use if the number of values
31 * added is very large.
33 * For example, a BucketedTimeSeries() with duration == 60s and 10 buckets
34 * will keep track of 10 6-second buckets, and discard all data added more
35 * than 1 minute ago. As time ticks by, a 6-second bucket at a time will
36 * be discarded and new data will go into the newly opened bucket. Internally,
37 * it uses a circular array of buckets that it reuses as time advances.
39 * The class assumes that time advances forward -- you can't retroactively add
40 * values for events in the past -- the 'now' argument is provided for better
41 * efficiency and ease of unittesting.
44 * This class is not thread-safe -- use your own synchronization!
46 template <typename VT, typename TT=std::chrono::seconds>
47 class BucketedTimeSeries {
51 typedef detail::Bucket<ValueType> Bucket;
54 * Create a new BucketedTimeSeries.
56 * This creates a new BucketedTimeSeries with the specified number of
57 * buckets, storing data for the specified amount of time.
59 * If the duration is 0, the BucketedTimeSeries will track data forever,
60 * and does not need the rolling buckets. The numBuckets parameter is
61 * ignored when duration is 0.
63 BucketedTimeSeries(size_t numBuckets, TimeType duration);
66 * Adds the value 'val' at time 'now'
68 * This function expects time to always move forwards: it cannot be used to
69 * add historical data points that have occurred in the past. If now is
70 * older than the another timestamp that has already been passed to
71 * addValue() or update(), now will be ignored and the latest timestamp will
74 void addValue(TimeType now, const ValueType& val);
77 * Adds the value 'val' the given number of 'times' at time 'now'
79 void addValue(TimeType now, const ValueType& val, int64_t times);
82 * Adds the value 'sum' as the sum of 'nsamples' samples
84 void addValueAggregated(TimeType now, const ValueType& sum, int64_t nsamples);
87 * Updates the container to the specified time, doing all the necessary
88 * work to rotate the buckets and remove any stale data points.
90 * The addValue() methods automatically call update() when adding new data
91 * points. However, when reading data from the timeseries, you should make
92 * sure to manually call update() before accessing the data. Otherwise you
93 * may be reading stale data if update() has not been called recently.
95 * Returns the current bucket index after the update.
97 size_t update(TimeType now);
100 * Reset the timeseries to an empty state,
101 * as if no data points have ever been added to it.
106 * Get the latest time that has ever been passed to update() or addValue().
108 TimeType getLatestTime() const {
113 * Return the number of buckets.
115 size_t numBuckets() const {
116 return buckets_.size();
120 * Return the maximum duration of data that can be tracked by this
121 * BucketedTimeSeries.
123 TimeType duration() const {
128 * Returns true if this BucketedTimeSeries stores data for all-time, without
129 * ever rolling over into new buckets.
131 bool isAllTime() const {
132 return (duration_ == TimeType(0));
136 * Returns true if no calls to update() have been made since the last call to
140 // We set firstTime_ greater than latestTime_ in the constructor and in
141 // clear, so we use this to distinguish if the timeseries is empty.
143 // Once a data point has been added, latestTime_ will always be greater
144 // than or equal to firstTime_.
145 return firstTime_ > latestTime_;
149 * Get the amount of time tracked by this timeseries.
151 * For an all-time timeseries, this returns the length of time since the
152 * first data point was added to the time series.
154 * Otherwise, this never returns a value greater than the overall timeseries
155 * duration. If the first data point was recorded less than a full duration
156 * ago, the time since the first data point is returned. If a full duration
157 * has elapsed, and we have already thrown away some data, the time since the
158 * oldest bucket is returned.
160 * For example, say we are tracking 600 seconds worth of data, in 60 buckets.
161 * - If less than 600 seconds have elapsed since the first data point,
162 * elapsed() returns the total elapsed time so far.
163 * - If more than 600 seconds have elapsed, we have already thrown away some
164 * data. However, we throw away a full bucket (10 seconds worth) at once,
165 * so at any point in time we have from 590 to 600 seconds worth of data.
166 * elapsed() will therefore return a value between 590 and 600.
168 * Note that you generally should call update() before calling elapsed(), to
169 * make sure you are not reading stale data.
171 TimeType elapsed() const;
174 * Return the sum of all the data points currently tracked by this
175 * BucketedTimeSeries.
177 * Note that you generally should call update() before calling sum(), to
178 * make sure you are not reading stale data.
180 const ValueType& sum() const {
185 * Return the number of data points currently tracked by this
186 * BucketedTimeSeries.
188 * Note that you generally should call update() before calling count(), to
189 * make sure you are not reading stale data.
191 uint64_t count() const {
196 * Return the average value (sum / count).
198 * The return type may be specified to control whether floating-point or
199 * integer division should be performed.
201 * Note that you generally should call update() before calling avg(), to
202 * make sure you are not reading stale data.
204 template <typename ReturnType=double>
205 ReturnType avg() const {
206 return total_.template avg<ReturnType>();
210 * Return the sum divided by the elapsed time.
212 * Note that you generally should call update() before calling rate(), to
213 * make sure you are not reading stale data.
215 template <typename ReturnType=double, typename Interval=TimeType>
216 ReturnType rate() const {
217 return rateHelper<ReturnType, Interval>(total_.sum, elapsed());
221 * Return the count divided by the elapsed time.
223 * The Interval template parameter causes the elapsed time to be converted to
224 * the Interval type before using it. For example, if Interval is
225 * std::chrono::seconds, the return value will be the count per second.
226 * If Interval is std::chrono::hours, the return value will be the count per
229 * Note that you generally should call update() before calling countRate(),
230 * to make sure you are not reading stale data.
232 template <typename ReturnType=double, typename Interval=TimeType>
233 ReturnType countRate() const {
234 return rateHelper<ReturnType, Interval>(total_.count, elapsed());
238 * Estimate the sum of the data points that occurred in the specified time
241 * The range queried is [start, end).
242 * That is, start is inclusive, and end is exclusive.
244 * Note that data outside of the timeseries duration will no longer be
245 * available for use in the estimation. Specifying a start time earlier than
246 * (getLatestTime() - elapsed()) will not have much effect, since only data
247 * points after that point in time will be counted.
249 * Note that the value returned is an estimate, and may not be precise.
251 ValueType sum(TimeType start, TimeType end) const;
254 * Estimate the number of data points that occurred in the specified time
257 * The same caveats documented in the sum(TimeType start, TimeType end)
258 * comments apply here as well.
260 uint64_t count(TimeType start, TimeType end) const;
263 * Estimate the average value during the specified time period.
265 * The same caveats documented in the sum(TimeType start, TimeType end)
266 * comments apply here as well.
268 template <typename ReturnType=double>
269 ReturnType avg(TimeType start, TimeType end) const;
272 * Estimate the rate during the specified time period.
274 * The same caveats documented in the sum(TimeType start, TimeType end)
275 * comments apply here as well.
277 template <typename ReturnType=double, typename Interval=TimeType>
278 ReturnType rate(TimeType start, TimeType end) const {
279 ValueType intervalSum = sum(start, end);
280 return rateHelper<ReturnType, Interval>(intervalSum, end - start);
284 * Estimate the rate of data points being added during the specified time
287 * The same caveats documented in the sum(TimeType start, TimeType end)
288 * comments apply here as well.
290 template <typename ReturnType=double, typename Interval=TimeType>
291 ReturnType countRate(TimeType start, TimeType end) const {
292 uint64_t intervalCount = count(start, end);
293 return rateHelper<ReturnType, Interval>(intervalCount, end - start);
297 * Invoke a function for each bucket.
299 * The function will take as arguments the bucket index,
300 * the bucket start time, and the start time of the subsequent bucket.
302 * It should return true to continue iterating through the buckets, and false
303 * to break out of the loop and stop, without calling the function on any
306 * bool function(const Bucket& bucket, TimeType bucketStart,
307 * TimeType nextBucketStart)
309 template <typename Function>
310 void forEachBucket(Function fn) const;
313 * Get the index for the bucket containing the specified time.
315 * Note that the index is only valid if this time actually falls within one
316 * of the current buckets. If you pass in a value more recent than
317 * getLatestTime() or older than (getLatestTime() - elapsed()), the index
318 * returned will not be valid.
320 * This method may not be called for all-time data.
322 size_t getBucketIdx(TimeType time) const;
325 * Get the bucket at the specified index.
327 * This method may not be called for all-time data.
329 const Bucket& getBucketByIndex(size_t idx) const {
330 return buckets_[idx];
334 * Compute the bucket index that the specified time falls into,
335 * as well as the bucket start time and the next bucket's start time.
337 * This method may not be called for all-time data.
339 void getBucketInfo(TimeType time, size_t* bucketIdx,
340 TimeType* bucketStart, TimeType* nextBucketStart) const;
343 template <typename ReturnType=double, typename Interval=TimeType>
344 ReturnType rateHelper(ReturnType numerator, TimeType elapsed) const {
345 if (elapsed == TimeType(0)) {
349 // Use std::chrono::duration_cast to convert between the native
350 // duration and the desired interval. However, convert the rates,
351 // rather than just converting the elapsed duration. Converting the
352 // elapsed time first may collapse it down to 0 if the elapsed interval
353 // is less than the desired interval, which will incorrectly result in
355 typedef std::chrono::duration<
356 ReturnType, std::ratio<TimeType::period::den,
357 TimeType::period::num>> NativeRate;
358 typedef std::chrono::duration<
359 ReturnType, std::ratio<Interval::period::den,
360 Interval::period::num>> DesiredRate;
362 NativeRate native(numerator / elapsed.count());
363 DesiredRate desired = std::chrono::duration_cast<DesiredRate>(native);
364 return desired.count();
367 ValueType rangeAdjust(TimeType bucketStart, TimeType nextBucketStart,
368 TimeType start, TimeType end,
369 ValueType input) const;
371 template <typename Function>
372 void forEachBucket(TimeType start, TimeType end, Function fn) const;
374 TimeType firstTime_; // time of first update() since clear()/constructor
375 TimeType latestTime_; // time of last update()
376 TimeType duration_; // total duration ("window length") of the time series
378 Bucket total_; // sum and count of everything in time series
379 std::vector<Bucket> buckets_; // actual buckets of values
384 #endif // FOLLY_STATS_BUCKETEDTIMESERIES_H_