2 * Copyright 2013 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_STATS_MULTILEVELTIMESERIES_H_
18 #define FOLLY_STATS_MULTILEVELTIMESERIES_H_
24 #include "folly/stats/BucketedTimeSeries.h"
29 * This class represents a timeseries which keeps several levels of data
30 * granularity (similar in principle to the loads reported by the UNIX
31 * 'uptime' command). It uses several instances (one per level) of
32 * BucketedTimeSeries as the underlying storage.
34 * This can easily be used to track sums (and thus rates or averages) over
35 * several predetermined time periods, as well as all-time sums. For example,
36 * you would use to it to track query rate or response speed over the last
37 * 5, 15, 30, and 60 minutes.
39 * The MultiLevelTimeSeries takes a list of level durations as an input; the
40 * durations must be strictly increasing. Furthermore a special level can be
41 * provided with a duration of '0' -- this will be an "all-time" level. If
42 * an all-time level is provided, it MUST be the last level present.
44 * The class assumes that time advances forward -- you can't retroactively add
45 * values for events in the past -- the 'now' argument is provided for better
46 * efficiency and ease of unittesting.
48 * The class is not thread-safe -- use your own synchronization!
50 template <typename VT, typename TT=std::chrono::seconds>
51 class MultiLevelTimeSeries {
55 typedef folly::BucketedTimeSeries<ValueType, TimeType> Level;
58 * Create a new MultiLevelTimeSeries.
60 * This creates a new MultiLevelTimeSeries that tracks time series data at the
61 * specified time durations (level). The time series data tracked at each
62 * level is then further divided by numBuckets for memory efficiency.
64 * The durations must be strictly increasing. Furthermore a special level can
65 * be provided with a duration of '0' -- this will be an "all-time" level. If
66 * an all-time level is provided, it MUST be the last level present.
68 MultiLevelTimeSeries(size_t numBuckets,
70 const TimeType levelDurations[]);
73 * Return the number of buckets used to track time series at each level.
75 size_t numBuckets() const { return numBuckets_; }
78 * Return the number of levels tracked by MultiLevelTimeSeries.
80 size_t numLevels() const { return levels_.size(); }
83 * Get the BucketedTimeSeries backing the specified level.
85 * Note: you should generally call update() or flush() before accessing the
86 * data. Otherwise you may be reading stale data if update() or flush() has
87 * not been called recently.
89 const Level& getLevel(int level) const {
91 CHECK_LT(level, levels_.size());
92 return levels_[level];
96 * Get the highest granularity level that is still large enough to contain
97 * data going back to the specified start time.
99 * Note: you should generally call update() or flush() before accessing the
100 * data. Otherwise you may be reading stale data if update() or flush() has
101 * not been called recently.
103 const Level& getLevel(TimeType start) const {
104 for (const auto& level : levels_) {
105 if (level.isAllTime()) {
108 // Note that we use duration() here rather than elapsed().
109 // If duration is large enough to contain the start time then this level
110 // is good enough, even if elapsed() indicates that no data was recorded
111 // before the specified start time.
112 if (level.getLatestTime() - level.duration() <= start) {
116 // We should always have an all-time level, so this is never reached.
117 LOG(FATAL) << "No level of timeseries covers internval"
118 << " from " << start.count() << " to now";
119 return levels_.back();
123 * Return the sum of all the data points currently tracked at this level.
125 * Note: you should generally call update() or flush() before accessing the
126 * data. Otherwise you may be reading stale data if update() or flush() has
127 * not been called recently.
129 ValueType sum(int level) const {
130 return getLevel(level).sum();
134 * Return the average (sum / count) of all the data points currently tracked
137 * The return type may be specified to control whether floating-point or
138 * integer division should be performed.
140 * Note: you should generally call update() or flush() before accessing the
141 * data. Otherwise you may be reading stale data if update() or flush() has
142 * not been called recently.
144 template <typename ReturnType=double>
145 ReturnType avg(int level) const {
146 return getLevel(level).template avg<ReturnType>();
150 * Return the rate (sum divided by elaspsed time) of the all data points
151 * currently tracked at this level.
153 * Note: you should generally call update() or flush() before accessing the
154 * data. Otherwise you may be reading stale data if update() or flush() has
155 * not been called recently.
157 template <typename ReturnType=double, typename Interval=TimeType>
158 ValueType rate(int level) const {
159 return getLevel(level).template rate<ReturnType, Interval>();
163 * Return the number of data points currently tracked at this level.
165 * Note: you should generally call update() or flush() before accessing the
166 * data. Otherwise you may be reading stale data if update() or flush() has
167 * not been called recently.
169 int64_t count(int level) const {
170 return getLevel(level).count();
174 * Return the count divided by the elapsed time tracked at this level.
176 * Note: you should generally call update() or flush() before accessing the
177 * data. Otherwise you may be reading stale data if update() or flush() has
178 * not been called recently.
180 template <typename ReturnType=double, typename Interval=TimeType>
181 ReturnType countRate(int level) const {
182 return getLevel(level).template countRate<ReturnType, Interval>();
186 * Estimate the sum of the data points that occurred in the specified time
187 * period at this level.
189 * The range queried is [start, end).
190 * That is, start is inclusive, and end is exclusive.
192 * Note that data outside of the timeseries duration will no longer be
193 * available for use in the estimation. Specifying a start time earlier than
194 * getEarliestTime() will not have much effect, since only data points after
195 * that point in time will be counted.
197 * Note that the value returned is an estimate, and may not be precise.
199 * Note: you should generally call update() or flush() before accessing the
200 * data. Otherwise you may be reading stale data if update() or flush() has
201 * not been called recently.
203 ValueType sum(TimeType start, TimeType end) const {
204 return getLevel(start).sum(start, end);
208 * Estimate the average value during the specified time period.
210 * The same caveats documented in the sum(TimeType start, TimeType end)
211 * comments apply here as well.
213 * Note: you should generally call update() or flush() before accessing the
214 * data. Otherwise you may be reading stale data if update() or flush() has
215 * not been called recently.
217 template <typename ReturnType=double>
218 ReturnType avg(TimeType start, TimeType end) const {
219 return getLevel(start).template avg<ReturnType>(start, end);
223 * Estimate the rate during the specified time period.
225 * The same caveats documented in the sum(TimeType start, TimeType end)
226 * comments apply here as well.
228 * Note: you should generally call update() or flush() before accessing the
229 * data. Otherwise you may be reading stale data if update() or flush() has
230 * not been called recently.
232 template <typename ReturnType=double>
233 ReturnType rate(TimeType start, TimeType end) const {
234 return getLevel(start).template rate<ReturnType>(start, end);
238 * Estimate the count during the specified time period.
240 * The same caveats documented in the sum(TimeType start, TimeType end)
241 * comments apply here as well.
243 * Note: you should generally call update() or flush() before accessing the
244 * data. Otherwise you may be reading stale data if update() or flush() has
245 * not been called recently.
247 int64_t count(TimeType start, TimeType end) const {
248 return getLevel(start).count(start, end);
252 * Adds the value 'val' at time 'now' to all levels.
254 * Data points added at the same time point is cached internally here and not
255 * propagated to the underlying levels until either flush() is called or when
256 * update from a different time comes.
258 * This function expects time to always move forwards: it cannot be used to
259 * add historical data points that have occurred in the past. If now is
260 * older than the another timestamp that has already been passed to
261 * addValue() or update(), now will be ignored and the latest timestamp will
264 void addValue(TimeType now, const ValueType& val);
267 * Adds the value 'val' at time 'now' to all levels.
269 void addValue(TimeType now, const ValueType& val, int64_t times);
272 * Adds the value 'val' at time 'now' to all levels as the sum of 'nsamples'
275 void addValueAggregated(TimeType now, const ValueType& sum, int64_t nsamples);
278 * Update all the levels to the specified time, doing all the necessary
279 * work to rotate the buckets and remove any stale data points.
281 * When reading data from the timeseries, you should make sure to manually
282 * call update() before accessing the data. Otherwise you may be reading
283 * stale data if update() has not been called recently.
285 void update(TimeType now);
288 * Reset all the timeseries to an empty state as if no data points have ever
294 * Flush all cached updates.
300 std::vector<Level> levels_;
302 // Updates within the same time interval are cached
303 // They are flushed out when updates from a different time comes,
304 // or flush() is called.
305 TimeType cachedTime_;
306 ValueType cachedSum_;
312 #endif // FOLLY_STATS_MULTILEVELTIMESERIES_H_