2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 #include <folly/stats/Histogram.h>
21 #include <folly/stats/MultiLevelTimeSeries.h>
26 * TimeseriesHistogram tracks data distributions as they change over time.
28 * Specifically, it is a bucketed histogram with different value ranges assigned
29 * to each bucket. Within each bucket is a MultiLevelTimeSeries from
30 * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a
31 * different set of data for different historical time periods, and one can
32 * query data distributions over different trailing time windows.
34 * For example, this can answer questions: "What is the data distribution over
35 * the last minute? Over the last 10 minutes? Since I last cleared this
38 * The class can also estimate percentiles and answer questions like: "What was
39 * the 99th percentile data value over the last 10 minutes?"
41 * Note: that depending on the size of your buckets and the smoothness
42 * of your data distribution, the estimate may be way off from the actual
43 * value. In particular, if the given percentile falls outside of the bucket
44 * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
45 * around 115,000) this estimate may be very wrong.
47 * The memory usage for a typical histogram is roughly 3k * (# of buckets). All
48 * insertion operations are amortized O(1), and all queries are O(# of buckets).
52 class CT = LegacyStatsClock<std::chrono::seconds>,
53 class C = folly::MultiLevelTimeSeries<T, CT>>
54 class TimeseriesHistogram {
56 // NOTE: T must be equivalent to _signed_ numeric type for our math.
57 static_assert(std::numeric_limits<T>::is_signed, "");
60 // Values to be inserted into container
62 // The container type we use internally for each bucket
63 using ContainerType = C;
64 // Clock, duration, and time point types
66 using Duration = typename Clock::duration;
67 using TimePoint = typename Clock::time_point;
70 * Create a TimeSeries histogram and initialize the bucketing and levels.
72 * The buckets are created by chopping the range [min, max) into pieces
73 * of size bucketSize, with the last bucket being potentially shorter. Two
74 * additional buckets are always created -- the "under" bucket for the range
75 * (-inf, min) and the "over" bucket for the range [max, +inf).
77 * @param bucketSize the width of each bucket
78 * @param min the smallest value for the bucket range.
79 * @param max the largest value for the bucket range
80 * @param defaultContainer a pre-initialized timeseries with the desired
81 * number of levels and their durations.
83 TimeseriesHistogram(ValueType bucketSize, ValueType min, ValueType max,
84 const ContainerType& defaultContainer);
86 /* Return the bucket size of each bucket in the histogram. */
87 ValueType getBucketSize() const { return buckets_.getBucketSize(); }
89 /* Return the min value at which bucketing begins. */
90 ValueType getMin() const { return buckets_.getMin(); }
92 /* Return the max value at which bucketing ends. */
93 ValueType getMax() const { return buckets_.getMax(); }
95 /* Return the number of levels of the Timeseries object in each bucket */
96 int getNumLevels() const {
97 return buckets_.getByIndex(0).numLevels();
100 /* Return the number of buckets */
101 int getNumBuckets() const { return buckets_.getNumBuckets(); }
104 * Return the threshold of the bucket for the given index in range
105 * [0..numBuckets). The bucket will have range [thresh, thresh + bucketSize)
106 * or [thresh, max), whichever is shorter.
108 ValueType getBucketMin(int bucketIdx) const {
109 return buckets_.getBucketMin(bucketIdx);
112 /* Return the actual timeseries in the given bucket (for reading only!) */
113 const ContainerType& getBucket(int bucketIdx) const {
114 return buckets_.getByIndex(bucketIdx);
117 /* Total count of values at the given timeseries level (all buckets). */
118 int64_t count(int level) const {
120 for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
121 total += buckets_.getByIndex(b).count(level);
126 /* Total count of values added during the given interval (all buckets). */
127 int64_t count(TimePoint start, TimePoint end) const {
129 for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
130 total += buckets_.getByIndex(b).count(start, end);
135 /* Total sum of values at the given timeseries level (all buckets). */
136 ValueType sum(int level) const {
137 ValueType total = ValueType();
138 for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
139 total += buckets_.getByIndex(b).sum(level);
144 /* Total sum of values added during the given interval (all buckets). */
145 ValueType sum(TimePoint start, TimePoint end) const {
146 ValueType total = ValueType();
147 for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
148 total += buckets_.getByIndex(b).sum(start, end);
153 /* Average of values at the given timeseries level (all buckets). */
154 template <typename ReturnType = double>
155 ReturnType avg(int level) const {
156 auto total = ValueType();
157 int64_t nsamples = 0;
158 computeAvgData(&total, &nsamples, level);
159 return folly::detail::avgHelper<ReturnType>(total, nsamples);
162 /* Average of values added during the given interval (all buckets). */
163 template <typename ReturnType = double>
164 ReturnType avg(TimePoint start, TimePoint end) const {
165 auto total = ValueType();
166 int64_t nsamples = 0;
167 computeAvgData(&total, &nsamples, start, end);
168 return folly::detail::avgHelper<ReturnType>(total, nsamples);
172 * Rate at the given timeseries level (all buckets).
173 * This is the sum of all values divided by the time interval (in seconds).
175 template <typename ReturnType = double>
176 ReturnType rate(int level) const {
177 auto total = ValueType();
179 computeRateData(&total, &elapsed, level);
180 return folly::detail::rateHelper<ReturnType, Duration, Duration>(
185 * Rate for the given interval (all buckets).
186 * This is the sum of all values divided by the time interval (in seconds).
188 template <typename ReturnType = double>
189 ReturnType rate(TimePoint start, TimePoint end) const {
190 auto total = ValueType();
192 computeRateData(&total, &elapsed, start, end);
193 return folly::detail::rateHelper<ReturnType, Duration, Duration>(
198 * Update every underlying timeseries object with the given timestamp. You
199 * must call this directly before querying to ensure that the data in all
200 * buckets is decayed properly.
202 void update(TimePoint now);
204 /* clear all the data from the histogram. */
207 /* Add a value into the histogram with timestamp 'now' */
208 void addValue(TimePoint now, const ValueType& value);
209 /* Add a value the given number of times with timestamp 'now' */
210 void addValue(TimePoint now, const ValueType& value, int64_t times);
213 * Add all of the values from the specified histogram.
215 * All of the values will be added to the current time-slot.
217 * One use of this is for thread-local caching of frequently updated
218 * histogram data. For example, each thread can store a thread-local
219 * Histogram that is updated frequently, and only add it to the global
220 * TimeseriesHistogram once a second.
222 void addValues(TimePoint now, const folly::Histogram<ValueType>& values);
225 * Return an estimate of the value at the given percentile in the histogram
226 * in the given timeseries level. The percentile is estimated as follows:
228 * - We retrieve a count of the values in each bucket (at the given level)
229 * - We determine via the counts which bucket the given percentile falls in.
230 * - We assume the average value in the bucket is also its median
231 * - We then linearly interpolate within the bucket, by assuming that the
232 * distribution is uniform in the two value ranges [left, median) and
233 * [median, right) where [left, right) is the bucket value range.
236 * - If the histogram is empty, this always returns ValueType(), usually 0.
237 * - For the 'under' and 'over' special buckets, their range is unbounded
238 * on one side. In order for the interpolation to work, we assume that
239 * the average value in the bucket is equidistant from the two edges of
240 * the bucket. In other words, we assume that the distance between the
241 * average and the known bound is equal to the distance between the average
242 * and the unknown bound.
244 ValueType getPercentileEstimate(double pct, int level) const;
246 * Return an estimate of the value at the given percentile in the histogram
247 * in the given historical interval. Please see the documentation for
248 * getPercentileEstimate(int pct, int level) for the explanation of the
249 * estimation algorithm.
251 ValueType getPercentileEstimate(double pct, TimePoint start, TimePoint end)
255 * Return the bucket index that the given percentile falls into (in the
256 * given timeseries level). This index can then be used to retrieve either
257 * the bucket threshold, or other data from inside the bucket.
259 int getPercentileBucketIdx(double pct, int level) const;
261 * Return the bucket index that the given percentile falls into (in the
262 * given historical interval). This index can then be used to retrieve either
263 * the bucket threshold, or other data from inside the bucket.
265 int getPercentileBucketIdx(double pct, TimePoint start, TimePoint end) const;
267 /* Get the bucket threshold for the bucket containing the given pct. */
268 int getPercentileBucketMin(double pct, int level) const {
269 return getBucketMin(getPercentileBucketIdx(pct, level));
271 /* Get the bucket threshold for the bucket containing the given pct. */
272 int getPercentileBucketMin(double pct, TimePoint start, TimePoint end) const {
273 return getBucketMin(getPercentileBucketIdx(pct, start, end));
277 * Print out serialized data from all buckets at the given level.
278 * Format is: BUCKET [',' BUCKET ...]
279 * Where: BUCKET == bucketMin ':' count ':' avg
281 std::string getString(int level) const;
284 * Print out serialized data for all buckets in the historical interval.
285 * For format, please see getString(int level).
287 std::string getString(TimePoint start, TimePoint end) const;
290 * Legacy APIs that accept a Duration parameters rather than TimePoint.
292 * These treat the Duration as relative to the clock epoch.
293 * Prefer using the correct TimePoint-based APIs instead. These APIs will
294 * eventually be deprecated and removed.
296 void update(Duration now) {
297 update(TimePoint(now));
299 void addValue(Duration now, const ValueType& value) {
300 addValue(TimePoint(now), value);
302 void addValue(Duration now, const ValueType& value, int64_t times) {
303 addValue(TimePoint(now), value, times);
305 void addValues(Duration now, const folly::Histogram<ValueType>& values) {
306 addValues(TimePoint(now), values);
310 typedef ContainerType Bucket;
311 struct CountFromLevel {
312 explicit CountFromLevel(int level) : level_(level) {}
314 uint64_t operator()(const ContainerType& bucket) const {
315 return bucket.count(level_);
321 struct CountFromInterval {
322 explicit CountFromInterval(TimePoint start, TimePoint end)
323 : start_(start), end_(end) {}
325 uint64_t operator()(const ContainerType& bucket) const {
326 return bucket.count(start_, end_);
334 struct AvgFromLevel {
335 explicit AvgFromLevel(int level) : level_(level) {}
337 ValueType operator()(const ContainerType& bucket) const {
338 return bucket.template avg<ValueType>(level_);
345 template <typename ReturnType>
346 struct AvgFromInterval {
347 explicit AvgFromInterval(TimePoint start, TimePoint end)
348 : start_(start), end_(end) {}
350 ReturnType operator()(const ContainerType& bucket) const {
351 return bucket.template avg<ReturnType>(start_, end_);
360 * Special logic for the case of only one unique value registered
361 * (this can happen when clients don't pick good bucket ranges or have
362 * other bugs). It's a lot easier for clients to track down these issues
363 * if they are getting the correct value.
365 void maybeHandleSingleUniqueValue(const ValueType& value);
367 void computeAvgData(ValueType* total, int64_t* nsamples, int level) const;
372 TimePoint end) const;
373 void computeRateData(ValueType* total, Duration* elapsed, int level) const;
374 void computeRateData(
378 TimePoint end) const;
380 folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
381 bool haveNotSeenValue_;
382 bool singleUniqueValue_;
383 ValueType firstValue_;