2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_HISTOGRAM_H_
18 #define FOLLY_HISTOGRAM_H_
27 #include <folly/detail/Stats.h>
34 * A helper class to manage a set of histogram buckets.
36 template <typename T, typename BucketT>
37 class HistogramBuckets {
40 typedef BucketT BucketType;
43 * Create a set of histogram buckets.
45 * One bucket will be created for each bucketSize interval of values within
46 * the specified range. Additionally, one bucket will be created to track
47 * all values that fall below the specified minimum, and one bucket will be
48 * created for all values above the specified maximum.
50 * If (max - min) is not a multiple of bucketSize, the last bucket will cover
51 * a smaller range of values than the other buckets.
53 * (max - min) must be larger than or equal to bucketSize.
55 HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max,
56 const BucketType& defaultBucket);
58 /* Returns the bucket size of each bucket in the histogram. */
59 ValueType getBucketSize() const {
63 /* Returns the min value at which bucketing begins. */
64 ValueType getMin() const {
68 /* Returns the max value at which bucketing ends. */
69 ValueType getMax() const {
74 * Returns the number of buckets.
76 * This includes the total number of buckets for the [min, max) range,
77 * plus 2 extra buckets, one for handling values less than min, and one for
78 * values greater than max.
80 unsigned int getNumBuckets() const {
81 return buckets_.size();
84 /* Returns the bucket index into which the given value would fall. */
85 unsigned int getBucketIdx(ValueType value) const;
87 /* Returns the bucket for the specified value */
88 BucketType& getByValue(ValueType value) {
89 return buckets_[getBucketIdx(value)];
92 /* Returns the bucket for the specified value */
93 const BucketType& getByValue(ValueType value) const {
94 return buckets_[getBucketIdx(value)];
98 * Returns the bucket at the specified index.
100 * Note that index 0 is the bucket for all values less than the specified
101 * minimum. Index 1 is the first bucket in the specified bucket range.
103 BucketType& getByIndex(unsigned int idx) {
104 return buckets_[idx];
107 /* Returns the bucket at the specified index. */
108 const BucketType& getByIndex(unsigned int idx) const {
109 return buckets_[idx];
113 * Returns the minimum threshold for the bucket at the given index.
115 * The bucket at the specified index will store values in the range
116 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
117 * max is smaller than bucketMin + bucketSize.
119 ValueType getBucketMin(unsigned int idx) const {
121 return std::numeric_limits<ValueType>::min();
123 if (idx == buckets_.size() - 1) {
127 return min_ + ((idx - 1) * bucketSize_);
131 * Returns the maximum threshold for the bucket at the given index.
133 * The bucket at the specified index will store values in the range
134 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
135 * max is smaller than bucketMin + bucketSize.
137 ValueType getBucketMax(unsigned int idx) const {
138 if (idx == buckets_.size() - 1) {
139 return std::numeric_limits<ValueType>::max();
142 return min_ + (idx * bucketSize_);
146 * Computes the total number of values stored across all buckets.
148 * Runs in O(numBuckets)
150 * @param countFn A function that takes a const BucketType&, and returns the
151 * number of values in that bucket
152 * @return Returns the total number of values stored across all buckets
154 template <typename CountFn>
155 const uint64_t computeTotalCount(CountFn countFromBucket) const;
158 * Determine which bucket the specified percentile falls into.
160 * Looks for the bucket that contains the Nth percentile data point.
162 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
163 * @param countFn A function that takes a const BucketType&, and returns the
164 * number of values in that bucket.
165 * @param lowPct The lowest percentile stored in the selected bucket will be
166 * returned via this parameter.
167 * @param highPct The highest percentile stored in the selected bucket will
168 * be returned via this parameter.
170 * @return Returns the index of the bucket that contains the Nth percentile
173 template <typename CountFn>
174 unsigned int getPercentileBucketIdx(double pct,
175 CountFn countFromBucket,
176 double* lowPct = nullptr,
177 double* highPct = nullptr) const;
180 * Estimate the value at the specified percentile.
182 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
183 * @param countFn A function that takes a const BucketType&, and returns the
184 * number of values in that bucket.
185 * @param avgFn A function that takes a const BucketType&, and returns the
186 * average of all the values in that bucket.
188 * @return Returns an estimate for N, where N is the number where exactly pct
189 * percentage of the data points in the histogram are less than N.
191 template <typename CountFn, typename AvgFn>
192 ValueType getPercentileEstimate(double pct,
193 CountFn countFromBucket,
194 AvgFn avgFromBucket) const;
197 * Iterator access to the buckets.
199 * Note that the first bucket is for all values less than min, and the last
200 * bucket is for all values greater than max. The buckets tracking values in
201 * the [min, max) actually start at the second bucket.
203 typename std::vector<BucketType>::const_iterator begin() const {
204 return buckets_.begin();
206 typename std::vector<BucketType>::iterator begin() {
207 return buckets_.begin();
209 typename std::vector<BucketType>::const_iterator end() const {
210 return buckets_.end();
212 typename std::vector<BucketType>::iterator end() {
213 return buckets_.end();
217 ValueType bucketSize_;
220 std::vector<BucketType> buckets_;
227 * A basic histogram class.
229 * Groups data points into equally-sized buckets, and stores the overall sum of
230 * the data points in each bucket, as well as the number of data points in the
233 * The caller must specify the minimum and maximum data points to expect ahead
234 * of time, as well as the bucket width.
236 template <typename T>
240 typedef detail::Bucket<T> Bucket;
242 Histogram(ValueType bucketSize, ValueType min, ValueType max)
243 : buckets_(bucketSize, min, max, Bucket()) {}
245 /* Add a data point to the histogram */
246 void addValue(ValueType value) {
247 Bucket& bucket = buckets_.getByValue(value);
248 // TODO: It would be nice to handle overflow here.
253 /* Add multiple same data points to the histogram */
254 void addRepeatedValue(ValueType value, uint64_t nSamples) {
255 Bucket& bucket = buckets_.getByValue(value);
256 // TODO: It would be nice to handle overflow here.
257 bucket.sum += value * nSamples;
258 bucket.count += nSamples;
262 * Remove a data point to the histogram
264 * Note that this method does not actually verify that this exact data point
265 * had previously been added to the histogram; it merely subtracts the
266 * requested value from the appropriate bucket's sum.
268 void removeValue(ValueType value) {
269 Bucket& bucket = buckets_.getByValue(value);
270 // TODO: It would be nice to handle overflow here.
271 if (bucket.count > 0) {
275 bucket.sum = ValueType();
280 /* Remove multiple same data points from the histogram */
281 void removeRepeatedValue(ValueType value, uint64_t nSamples) {
282 Bucket& bucket = buckets_.getByValue(value);
283 // TODO: It would be nice to handle overflow here.
284 if (bucket.count >= nSamples) {
285 bucket.sum -= value * nSamples;
286 bucket.count -= nSamples;
288 bucket.sum = ValueType();
293 /* Remove all data points from the histogram */
295 for (unsigned int i = 0; i < buckets_.getNumBuckets(); i++) {
296 buckets_.getByIndex(i).clear();
300 /* Subtract another histogram data from the histogram */
301 void subtract(const Histogram &hist) {
302 // the two histogram bucket definitions must match to support
304 if (getBucketSize() != hist.getBucketSize() ||
305 getMin() != hist.getMin() ||
306 getMax() != hist.getMax() ||
307 getNumBuckets() != hist.getNumBuckets() ) {
308 throw std::invalid_argument("Cannot subtract input histogram.");
311 for (unsigned int i = 0; i < buckets_.getNumBuckets(); i++) {
312 buckets_.getByIndex(i) -= hist.buckets_.getByIndex(i);
316 /* Merge two histogram data together */
317 void merge(const Histogram &hist) {
318 // the two histogram bucket definitions must match to support
320 if (getBucketSize() != hist.getBucketSize() ||
321 getMin() != hist.getMin() ||
322 getMax() != hist.getMax() ||
323 getNumBuckets() != hist.getNumBuckets() ) {
324 throw std::invalid_argument("Cannot merge from input histogram.");
327 for (unsigned int i = 0; i < buckets_.getNumBuckets(); i++) {
328 buckets_.getByIndex(i) += hist.buckets_.getByIndex(i);
332 /* Copy bucket values from another histogram */
333 void copy(const Histogram &hist) {
334 // the two histogram bucket definition must match
335 if (getBucketSize() != hist.getBucketSize() ||
336 getMin() != hist.getMin() ||
337 getMax() != hist.getMax() ||
338 getNumBuckets() != hist.getNumBuckets() ) {
339 throw std::invalid_argument("Cannot copy from input histogram.");
342 for (unsigned int i = 0; i < buckets_.getNumBuckets(); i++) {
343 buckets_.getByIndex(i) = hist.buckets_.getByIndex(i);
347 /* Returns the bucket size of each bucket in the histogram. */
348 ValueType getBucketSize() const {
349 return buckets_.getBucketSize();
351 /* Returns the min value at which bucketing begins. */
352 ValueType getMin() const {
353 return buckets_.getMin();
355 /* Returns the max value at which bucketing ends. */
356 ValueType getMax() const {
357 return buckets_.getMax();
359 /* Returns the number of buckets */
360 unsigned int getNumBuckets() const {
361 return buckets_.getNumBuckets();
364 /* Returns the specified bucket (for reading only!) */
365 const Bucket& getBucketByIndex(int idx) const {
366 return buckets_.getByIndex(idx);
370 * Returns the minimum threshold for the bucket at the given index.
372 * The bucket at the specified index will store values in the range
373 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
374 * max is smaller than bucketMin + bucketSize.
376 ValueType getBucketMin(unsigned int idx) const {
377 return buckets_.getBucketMin(idx);
381 * Returns the maximum threshold for the bucket at the given index.
383 * The bucket at the specified index will store values in the range
384 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
385 * max is smaller than bucketMin + bucketSize.
387 ValueType getBucketMax(unsigned int idx) const {
388 return buckets_.getBucketMax(idx);
392 * Computes the total number of values stored across all buckets.
394 * Runs in O(numBuckets)
396 const uint64_t computeTotalCount() const {
397 CountFromBucket countFn;
398 return buckets_.computeTotalCount(countFn);
402 * Get the bucket that the specified percentile falls into
404 * The lowest and highest percentile data points in returned bucket will be
405 * returned in the lowPct and highPct arguments, if they are non-NULL.
407 unsigned int getPercentileBucketIdx(double pct,
408 double* lowPct = nullptr,
409 double* highPct = nullptr) const {
410 // We unfortunately can't use lambdas here yet;
411 // Some users of this code are still built with gcc-4.4.
412 CountFromBucket countFn;
413 return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
417 * Estimate the value at the specified percentile.
419 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
421 * @return Returns an estimate for N, where N is the number where exactly pct
422 * percentage of the data points in the histogram are less than N.
424 ValueType getPercentileEstimate(double pct) const {
425 CountFromBucket countFn;
427 return buckets_.getPercentileEstimate(pct, countFn, avgFn);
431 * Get a human-readable string describing the histogram contents
433 std::string debugString() const;
436 * Write the histogram contents in tab-separated values (TSV) format.
437 * Format is "min max count sum".
439 void toTSV(std::ostream& out, bool skipEmptyBuckets = true) const;
441 struct CountFromBucket {
442 uint64_t operator()(const Bucket& bucket) const {
446 struct AvgFromBucket {
447 ValueType operator()(const Bucket& bucket) const {
448 if (bucket.count == 0) {
451 // Cast bucket.count to a signed integer type. This ensures that we
452 // perform division properly here: If bucket.sum is a signed integer
453 // type but we divide by an unsigned number, unsigned division will be
454 // performed and bucket.sum will be converted to unsigned first.
455 // If bucket.sum is unsigned, the code will still do unsigned division
458 // The only downside is if bucket.count is large enough to be negative
459 // when treated as signed. That should be extremely unlikely, though.
460 return bucket.sum / static_cast<int64_t>(bucket.count);
465 detail::HistogramBuckets<ValueType, Bucket> buckets_;
470 #endif // FOLLY_HISTOGRAM_H_