2 * Copyright 2012 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_HISTOGRAM_H_
18 #define FOLLY_HISTOGRAM_H_
31 * A helper class to manage a set of histogram buckets.
33 template <typename T, typename BucketT>
34 class HistogramBuckets {
37 typedef BucketT BucketType;
40 * Create a set of histogram buckets.
42 * One bucket will be created for each bucketSize interval of values within
43 * the specified range. Additionally, one bucket will be created to track
44 * all values that fall below the specified minimum, and one bucket will be
45 * created for all values above the specified maximum.
47 * If (max - min) is not a multiple of bucketSize, the last bucket will cover
48 * a smaller range of values than the other buckets.
50 * (max - min) must be larger than or equal to bucketSize.
52 HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max,
53 const BucketType& defaultBucket);
55 /* Returns the bucket size of each bucket in the histogram. */
56 ValueType getBucketSize() const {
60 /* Returns the min value at which bucketing begins. */
61 ValueType getMin() const {
65 /* Returns the max value at which bucketing ends. */
66 ValueType getMax() const {
71 * Returns the number of buckets.
73 * This includes the total number of buckets for the [min, max) range,
74 * plus 2 extra buckets, one for handling values less than min, and one for
75 * values greater than max.
77 unsigned int getNumBuckets() const {
78 return buckets_.size();
81 /* Returns the bucket index into which the given value would fall. */
82 unsigned int getBucketIdx(ValueType value) const;
84 /* Returns the bucket for the specified value */
85 BucketType& getByValue(ValueType value) {
86 return buckets_[getBucketIdx(value)];
89 /* Returns the bucket for the specified value */
90 const BucketType& getByValue(ValueType value) const {
91 return buckets_[getBucketIdx(value)];
95 * Returns the bucket at the specified index.
97 * Note that index 0 is the bucket for all values less than the specified
98 * minimum. Index 1 is the first bucket in the specified bucket range.
100 BucketType& getByIndex(unsigned int idx) {
101 return buckets_[idx];
104 /* Returns the bucket at the specified index. */
105 const BucketType& getByIndex(unsigned int idx) const {
106 return buckets_[idx];
110 * Returns the minimum threshold for the bucket at the given index.
112 * The bucket at the specified index will store values in the range
113 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
114 * max is smaller than bucketMin + bucketSize.
116 ValueType getBucketMin(unsigned int idx) const {
118 return std::numeric_limits<ValueType>::min();
120 if (idx == buckets_.size() - 1) {
124 return min_ + ((idx - 1) * bucketSize_);
128 * Returns the maximum threshold for the bucket at the given index.
130 * The bucket at the specified index will store values in the range
131 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
132 * max is smaller than bucketMin + bucketSize.
134 ValueType getBucketMax(unsigned int idx) const {
135 if (idx == buckets_.size() - 1) {
136 return std::numeric_limits<ValueType>::max();
139 return min_ + (idx * bucketSize_);
143 * Determine which bucket the specified percentile falls into.
145 * Looks for the bucket that contains the Nth percentile data point.
147 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
148 * @param countFn A function that takes a const BucketType&, and returns the
149 * number of values in that bucket.
150 * @param lowPct The lowest percentile stored in the selected bucket will be
151 * returned via this parameter.
152 * @param highPct The highest percentile stored in the selected bucket will
153 * be returned via this parameter.
155 * @return Returns the index of the bucket that contains the Nth percentile
158 template <typename CountFn>
159 unsigned int getPercentileBucketIdx(double pct,
160 CountFn countFromBucket,
161 double* lowPct = NULL,
162 double* highPct = NULL) const;
165 * Estimate the value at the specified percentile.
167 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
168 * @param countFn A function that takes a const BucketType&, and returns the
169 * number of values in that bucket.
170 * @param avgFn A function that takes a const BucketType&, and returns the
171 * average of all the values in that bucket.
173 * @return Returns an estimate for N, where N is the number where exactly pct
174 * percentage of the data points in the histogram are less than N.
176 template <typename CountFn, typename AvgFn>
177 ValueType getPercentileEstimate(double pct,
178 CountFn countFromBucket,
179 AvgFn avgFromBucket) const;
182 * Iterator access to the buckets.
184 * Note that the first bucket is for all values less than min, and the last
185 * bucket is for all values greater than max. The buckets tracking values in
186 * the [min, max) actually start at the second bucket.
188 typename std::vector<BucketType>::const_iterator begin() const {
189 return buckets_.begin();
191 typename std::vector<BucketType>::iterator begin() {
192 return buckets_.begin();
194 typename std::vector<BucketType>::const_iterator end() const {
195 return buckets_.end();
197 typename std::vector<BucketType>::iterator end() {
198 return buckets_.end();
202 const ValueType bucketSize_;
203 const ValueType min_;
204 const ValueType max_;
205 std::vector<BucketType> buckets_;
212 * A basic histogram class.
214 * Groups data points into equally-sized buckets, and stores the overall sum of
215 * the data points in each bucket, as well as the number of data points in the
218 * The caller must specify the minimum and maximum data points to expect ahead
219 * of time, as well as the bucket width.
221 template <typename T>
240 Histogram(ValueType bucketSize, ValueType min, ValueType max)
241 : buckets_(bucketSize, min, max, Bucket()) {}
243 /* Add a data point to the histogram */
244 void addValue(ValueType value) {
245 Bucket& bucket = buckets_.getByValue(value);
246 // TODO: It would be nice to handle overflow here.
252 * Remove a data point to the histogram
254 * Note that this method does not actually verify that this exact data point
255 * had previously been added to the histogram; it merely subtracts the
256 * requested value from the appropriate bucket's sum.
258 void removeValue(ValueType value) {
259 Bucket& bucket = buckets_.getByValue(value);
260 // TODO: It would be nice to handle overflow here.
265 /* Remove all data points from the histogram */
267 for (int i = 0; i < buckets_.getNumBuckets(); i++) {
268 buckets_.getByIndex(i).clear();
272 /* Returns the bucket size of each bucket in the histogram. */
273 ValueType getBucketSize() const {
274 return buckets_.getBucketSize();
276 /* Returns the min value at which bucketing begins. */
277 ValueType getMin() const {
278 return buckets_.getMin();
280 /* Returns the max value at which bucketing ends. */
281 ValueType getMax() const {
282 return buckets_.getMax();
284 /* Returns the number of buckets */
285 unsigned int getNumBuckets() const {
286 return buckets_.getNumBuckets();
289 /* Returns the specified bucket (for reading only!) */
290 const Bucket& getBucketByIndex(int idx) const {
291 return buckets_.getByIndex(idx);
295 * Returns the minimum threshold for the bucket at the given index.
297 * The bucket at the specified index will store values in the range
298 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
299 * max is smaller than bucketMin + bucketSize.
301 ValueType getBucketMin(unsigned int idx) const {
302 return buckets_.getBucketMin(idx);
306 * Returns the maximum threshold for the bucket at the given index.
308 * The bucket at the specified index will store values in the range
309 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
310 * max is smaller than bucketMin + bucketSize.
312 ValueType getBucketMax(unsigned int idx) const {
313 return buckets_.getBucketMax(idx);
317 * Get the bucket that the specified percentile falls into
319 * The lowest and highest percentile data points in returned bucket will be
320 * returned in the lowPct and highPct arguments, if they are non-NULL.
322 unsigned int getPercentileBucketIdx(double pct,
323 double* lowPct = NULL,
324 double* highPct = NULL) const {
325 // We unfortunately can't use lambdas here yet;
326 // Some users of this code are still built with gcc-4.4.
327 CountFromBucket countFn;
328 return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
332 * Estimate the value at the specified percentile.
334 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
336 * @return Returns an estimate for N, where N is the number where exactly pct
337 * percentage of the data points in the histogram are less than N.
339 ValueType getPercentileEstimate(double pct) const {
340 CountFromBucket countFn;
342 return buckets_.getPercentileEstimate(pct, countFn, avgFn);
346 * Get a human-readable string describing the histogram contents
348 std::string debugString() const;
351 struct CountFromBucket {
352 uint64_t operator()(const Bucket& bucket) const {
356 struct AvgFromBucket {
357 ValueType operator()(const Bucket& bucket) const {
358 if (bucket.count == 0) {
361 // Cast bucket.count to a signed integer type. This ensures that we
362 // perform division properly here: If bucket.sum is a signed integer
363 // type but we divide by an unsigned number, unsigned division will be
364 // performed and bucket.sum will be converted to unsigned first.
365 // If bucket.sum is unsigned, the code will still do unsigned division
368 // The only downside is if bucket.count is large enough to be negative
369 // when treated as signed. That should be extremely unlikely, though.
370 return bucket.sum / static_cast<int64_t>(bucket.count);
374 detail::HistogramBuckets<ValueType, Bucket> buckets_;
379 #include "folly/Histogram-inl.h"
381 #endif // FOLLY_HISTOGRAM_H_