2 * Copyright 2012 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_HISTOGRAM_H_
18 #define FOLLY_HISTOGRAM_H_
32 * A helper class to manage a set of histogram buckets.
34 template <typename T, typename BucketT>
35 class HistogramBuckets {
38 typedef BucketT BucketType;
41 * Create a set of histogram buckets.
43 * One bucket will be created for each bucketSize interval of values within
44 * the specified range. Additionally, one bucket will be created to track
45 * all values that fall below the specified minimum, and one bucket will be
46 * created for all values above the specified maximum.
48 * If (max - min) is not a multiple of bucketSize, the last bucket will cover
49 * a smaller range of values than the other buckets.
51 * (max - min) must be larger than or equal to bucketSize.
53 HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max,
54 const BucketType& defaultBucket);
56 /* Returns the bucket size of each bucket in the histogram. */
57 ValueType getBucketSize() const {
61 /* Returns the min value at which bucketing begins. */
62 ValueType getMin() const {
66 /* Returns the max value at which bucketing ends. */
67 ValueType getMax() const {
72 * Returns the number of buckets.
74 * This includes the total number of buckets for the [min, max) range,
75 * plus 2 extra buckets, one for handling values less than min, and one for
76 * values greater than max.
78 unsigned int getNumBuckets() const {
79 return buckets_.size();
82 /* Returns the bucket index into which the given value would fall. */
83 unsigned int getBucketIdx(ValueType value) const;
85 /* Returns the bucket for the specified value */
86 BucketType& getByValue(ValueType value) {
87 return buckets_[getBucketIdx(value)];
90 /* Returns the bucket for the specified value */
91 const BucketType& getByValue(ValueType value) const {
92 return buckets_[getBucketIdx(value)];
96 * Returns the bucket at the specified index.
98 * Note that index 0 is the bucket for all values less than the specified
99 * minimum. Index 1 is the first bucket in the specified bucket range.
101 BucketType& getByIndex(unsigned int idx) {
102 return buckets_[idx];
105 /* Returns the bucket at the specified index. */
106 const BucketType& getByIndex(unsigned int idx) const {
107 return buckets_[idx];
111 * Returns the minimum threshold for the bucket at the given index.
113 * The bucket at the specified index will store values in the range
114 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
115 * max is smaller than bucketMin + bucketSize.
117 ValueType getBucketMin(unsigned int idx) const {
119 return std::numeric_limits<ValueType>::min();
121 if (idx == buckets_.size() - 1) {
125 return min_ + ((idx - 1) * bucketSize_);
129 * Returns the maximum threshold for the bucket at the given index.
131 * The bucket at the specified index will store values in the range
132 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
133 * max is smaller than bucketMin + bucketSize.
135 ValueType getBucketMax(unsigned int idx) const {
136 if (idx == buckets_.size() - 1) {
137 return std::numeric_limits<ValueType>::max();
140 return min_ + (idx * bucketSize_);
144 * Determine which bucket the specified percentile falls into.
146 * Looks for the bucket that contains the Nth percentile data point.
148 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
149 * @param countFn A function that takes a const BucketType&, and returns the
150 * number of values in that bucket.
151 * @param lowPct The lowest percentile stored in the selected bucket will be
152 * returned via this parameter.
153 * @param highPct The highest percentile stored in the selected bucket will
154 * be returned via this parameter.
156 * @return Returns the index of the bucket that contains the Nth percentile
159 template <typename CountFn>
160 unsigned int getPercentileBucketIdx(double pct,
161 CountFn countFromBucket,
162 double* lowPct = NULL,
163 double* highPct = NULL) const;
166 * Estimate the value at the specified percentile.
168 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
169 * @param countFn A function that takes a const BucketType&, and returns the
170 * number of values in that bucket.
171 * @param avgFn A function that takes a const BucketType&, and returns the
172 * average of all the values in that bucket.
174 * @return Returns an estimate for N, where N is the number where exactly pct
175 * percentage of the data points in the histogram are less than N.
177 template <typename CountFn, typename AvgFn>
178 ValueType getPercentileEstimate(double pct,
179 CountFn countFromBucket,
180 AvgFn avgFromBucket) const;
183 * Iterator access to the buckets.
185 * Note that the first bucket is for all values less than min, and the last
186 * bucket is for all values greater than max. The buckets tracking values in
187 * the [min, max) actually start at the second bucket.
189 typename std::vector<BucketType>::const_iterator begin() const {
190 return buckets_.begin();
192 typename std::vector<BucketType>::iterator begin() {
193 return buckets_.begin();
195 typename std::vector<BucketType>::const_iterator end() const {
196 return buckets_.end();
198 typename std::vector<BucketType>::iterator end() {
199 return buckets_.end();
203 const ValueType bucketSize_;
204 const ValueType min_;
205 const ValueType max_;
206 std::vector<BucketType> buckets_;
213 * A basic histogram class.
215 * Groups data points into equally-sized buckets, and stores the overall sum of
216 * the data points in each bucket, as well as the number of data points in the
219 * The caller must specify the minimum and maximum data points to expect ahead
220 * of time, as well as the bucket width.
222 template <typename T>
237 Bucket& merge(const Bucket &bucket) {
238 if (this != &bucket) {
240 count += bucket.count;
245 Bucket& operator=(const Bucket& bucket) {
246 if (this != &bucket) {
248 count = bucket.count;
257 Histogram(ValueType bucketSize, ValueType min, ValueType max)
258 : buckets_(bucketSize, min, max, Bucket()) {}
260 /* Add a data point to the histogram */
261 void addValue(ValueType value) {
262 Bucket& bucket = buckets_.getByValue(value);
263 // TODO: It would be nice to handle overflow here.
269 * Remove a data point to the histogram
271 * Note that this method does not actually verify that this exact data point
272 * had previously been added to the histogram; it merely subtracts the
273 * requested value from the appropriate bucket's sum.
275 void removeValue(ValueType value) {
276 Bucket& bucket = buckets_.getByValue(value);
277 // TODO: It would be nice to handle overflow here.
282 /* Remove all data points from the histogram */
284 for (int i = 0; i < buckets_.getNumBuckets(); i++) {
285 buckets_.getByIndex(i).clear();
289 /* Merge two histogram data together */
290 void merge(Histogram &hist) {
291 // the two histogram bucket definitions must match to support
293 if (getBucketSize() != hist.getBucketSize() ||
294 getMin() != hist.getMin() ||
295 getMax() != hist.getMax() ||
296 getNumBuckets() != hist.getNumBuckets() ) {
297 throw std::invalid_argument("Cannot merge from input histogram.");
300 for (int i = 0; i < buckets_.getNumBuckets(); i++) {
301 buckets_.getByIndex(i).merge(hist.buckets_.getByIndex(i));
305 /* Copy bucket values from another histogram */
306 void copy(Histogram &hist) {
307 // the two histogram bucket definition must match
308 if (getBucketSize() != hist.getBucketSize() ||
309 getMin() != hist.getMin() ||
310 getMax() != hist.getMax() ||
311 getNumBuckets() != hist.getNumBuckets() ) {
312 throw std::invalid_argument("Cannot copy from input histogram.");
315 for (int i = 0; i < buckets_.getNumBuckets(); i++) {
316 buckets_.getByIndex(i) = hist.buckets_.getByIndex(i);
320 /* Returns the bucket size of each bucket in the histogram. */
321 ValueType getBucketSize() const {
322 return buckets_.getBucketSize();
324 /* Returns the min value at which bucketing begins. */
325 ValueType getMin() const {
326 return buckets_.getMin();
328 /* Returns the max value at which bucketing ends. */
329 ValueType getMax() const {
330 return buckets_.getMax();
332 /* Returns the number of buckets */
333 unsigned int getNumBuckets() const {
334 return buckets_.getNumBuckets();
337 /* Returns the specified bucket (for reading only!) */
338 const Bucket& getBucketByIndex(int idx) const {
339 return buckets_.getByIndex(idx);
343 * Returns the minimum threshold for the bucket at the given index.
345 * The bucket at the specified index will store values in the range
346 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
347 * max is smaller than bucketMin + bucketSize.
349 ValueType getBucketMin(unsigned int idx) const {
350 return buckets_.getBucketMin(idx);
354 * Returns the maximum threshold for the bucket at the given index.
356 * The bucket at the specified index will store values in the range
357 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
358 * max is smaller than bucketMin + bucketSize.
360 ValueType getBucketMax(unsigned int idx) const {
361 return buckets_.getBucketMax(idx);
365 * Get the bucket that the specified percentile falls into
367 * The lowest and highest percentile data points in returned bucket will be
368 * returned in the lowPct and highPct arguments, if they are non-NULL.
370 unsigned int getPercentileBucketIdx(double pct,
371 double* lowPct = NULL,
372 double* highPct = NULL) const {
373 // We unfortunately can't use lambdas here yet;
374 // Some users of this code are still built with gcc-4.4.
375 CountFromBucket countFn;
376 return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
380 * Estimate the value at the specified percentile.
382 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
384 * @return Returns an estimate for N, where N is the number where exactly pct
385 * percentage of the data points in the histogram are less than N.
387 ValueType getPercentileEstimate(double pct) const {
388 CountFromBucket countFn;
390 return buckets_.getPercentileEstimate(pct, countFn, avgFn);
394 * Get a human-readable string describing the histogram contents
396 std::string debugString() const;
399 struct CountFromBucket {
400 uint64_t operator()(const Bucket& bucket) const {
404 struct AvgFromBucket {
405 ValueType operator()(const Bucket& bucket) const {
406 if (bucket.count == 0) {
409 // Cast bucket.count to a signed integer type. This ensures that we
410 // perform division properly here: If bucket.sum is a signed integer
411 // type but we divide by an unsigned number, unsigned division will be
412 // performed and bucket.sum will be converted to unsigned first.
413 // If bucket.sum is unsigned, the code will still do unsigned division
416 // The only downside is if bucket.count is large enough to be negative
417 // when treated as signed. That should be extremely unlikely, though.
418 return bucket.sum / static_cast<int64_t>(bucket.count);
422 detail::HistogramBuckets<ValueType, Bucket> buckets_;
427 #include "folly/Histogram-inl.h"
429 #endif // FOLLY_HISTOGRAM_H_