2 * Copyright 2012 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_HISTOGRAM_H_
18 #define FOLLY_HISTOGRAM_H_
26 #include "folly/detail/Stats.h"
33 * A helper class to manage a set of histogram buckets.
35 template <typename T, typename BucketT>
36 class HistogramBuckets {
39 typedef BucketT BucketType;
42 * Create a set of histogram buckets.
44 * One bucket will be created for each bucketSize interval of values within
45 * the specified range. Additionally, one bucket will be created to track
46 * all values that fall below the specified minimum, and one bucket will be
47 * created for all values above the specified maximum.
49 * If (max - min) is not a multiple of bucketSize, the last bucket will cover
50 * a smaller range of values than the other buckets.
52 * (max - min) must be larger than or equal to bucketSize.
54 HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max,
55 const BucketType& defaultBucket);
57 /* Returns the bucket size of each bucket in the histogram. */
58 ValueType getBucketSize() const {
62 /* Returns the min value at which bucketing begins. */
63 ValueType getMin() const {
67 /* Returns the max value at which bucketing ends. */
68 ValueType getMax() const {
73 * Returns the number of buckets.
75 * This includes the total number of buckets for the [min, max) range,
76 * plus 2 extra buckets, one for handling values less than min, and one for
77 * values greater than max.
79 unsigned int getNumBuckets() const {
80 return buckets_.size();
83 /* Returns the bucket index into which the given value would fall. */
84 unsigned int getBucketIdx(ValueType value) const;
86 /* Returns the bucket for the specified value */
87 BucketType& getByValue(ValueType value) {
88 return buckets_[getBucketIdx(value)];
91 /* Returns the bucket for the specified value */
92 const BucketType& getByValue(ValueType value) const {
93 return buckets_[getBucketIdx(value)];
97 * Returns the bucket at the specified index.
99 * Note that index 0 is the bucket for all values less than the specified
100 * minimum. Index 1 is the first bucket in the specified bucket range.
102 BucketType& getByIndex(unsigned int idx) {
103 return buckets_[idx];
106 /* Returns the bucket at the specified index. */
107 const BucketType& getByIndex(unsigned int idx) const {
108 return buckets_[idx];
112 * Returns the minimum threshold for the bucket at the given index.
114 * The bucket at the specified index will store values in the range
115 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
116 * max is smaller than bucketMin + bucketSize.
118 ValueType getBucketMin(unsigned int idx) const {
120 return std::numeric_limits<ValueType>::min();
122 if (idx == buckets_.size() - 1) {
126 return min_ + ((idx - 1) * bucketSize_);
130 * Returns the maximum threshold for the bucket at the given index.
132 * The bucket at the specified index will store values in the range
133 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
134 * max is smaller than bucketMin + bucketSize.
136 ValueType getBucketMax(unsigned int idx) const {
137 if (idx == buckets_.size() - 1) {
138 return std::numeric_limits<ValueType>::max();
141 return min_ + (idx * bucketSize_);
145 * Determine which bucket the specified percentile falls into.
147 * Looks for the bucket that contains the Nth percentile data point.
149 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
150 * @param countFn A function that takes a const BucketType&, and returns the
151 * number of values in that bucket.
152 * @param lowPct The lowest percentile stored in the selected bucket will be
153 * returned via this parameter.
154 * @param highPct The highest percentile stored in the selected bucket will
155 * be returned via this parameter.
157 * @return Returns the index of the bucket that contains the Nth percentile
160 template <typename CountFn>
161 unsigned int getPercentileBucketIdx(double pct,
162 CountFn countFromBucket,
163 double* lowPct = NULL,
164 double* highPct = NULL) const;
167 * Estimate the value at the specified percentile.
169 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
170 * @param countFn A function that takes a const BucketType&, and returns the
171 * number of values in that bucket.
172 * @param avgFn A function that takes a const BucketType&, and returns the
173 * average of all the values in that bucket.
175 * @return Returns an estimate for N, where N is the number where exactly pct
176 * percentage of the data points in the histogram are less than N.
178 template <typename CountFn, typename AvgFn>
179 ValueType getPercentileEstimate(double pct,
180 CountFn countFromBucket,
181 AvgFn avgFromBucket) const;
184 * Iterator access to the buckets.
186 * Note that the first bucket is for all values less than min, and the last
187 * bucket is for all values greater than max. The buckets tracking values in
188 * the [min, max) actually start at the second bucket.
190 typename std::vector<BucketType>::const_iterator begin() const {
191 return buckets_.begin();
193 typename std::vector<BucketType>::iterator begin() {
194 return buckets_.begin();
196 typename std::vector<BucketType>::const_iterator end() const {
197 return buckets_.end();
199 typename std::vector<BucketType>::iterator end() {
200 return buckets_.end();
204 const ValueType bucketSize_;
205 const ValueType min_;
206 const ValueType max_;
207 std::vector<BucketType> buckets_;
214 * A basic histogram class.
216 * Groups data points into equally-sized buckets, and stores the overall sum of
217 * the data points in each bucket, as well as the number of data points in the
220 * The caller must specify the minimum and maximum data points to expect ahead
221 * of time, as well as the bucket width.
223 template <typename T>
227 typedef detail::Bucket<T> Bucket;
229 Histogram(ValueType bucketSize, ValueType min, ValueType max)
230 : buckets_(bucketSize, min, max, Bucket()) {}
232 /* Add a data point to the histogram */
233 void addValue(ValueType value) {
234 Bucket& bucket = buckets_.getByValue(value);
235 // TODO: It would be nice to handle overflow here.
241 * Remove a data point to the histogram
243 * Note that this method does not actually verify that this exact data point
244 * had previously been added to the histogram; it merely subtracts the
245 * requested value from the appropriate bucket's sum.
247 void removeValue(ValueType value) {
248 Bucket& bucket = buckets_.getByValue(value);
249 // TODO: It would be nice to handle overflow here.
254 /* Remove all data points from the histogram */
256 for (int i = 0; i < buckets_.getNumBuckets(); i++) {
257 buckets_.getByIndex(i).clear();
261 /* Merge two histogram data together */
262 void merge(Histogram &hist) {
263 // the two histogram bucket definitions must match to support
265 if (getBucketSize() != hist.getBucketSize() ||
266 getMin() != hist.getMin() ||
267 getMax() != hist.getMax() ||
268 getNumBuckets() != hist.getNumBuckets() ) {
269 throw std::invalid_argument("Cannot merge from input histogram.");
272 for (int i = 0; i < buckets_.getNumBuckets(); i++) {
273 buckets_.getByIndex(i) += hist.buckets_.getByIndex(i);
277 /* Copy bucket values from another histogram */
278 void copy(Histogram &hist) {
279 // the two histogram bucket definition must match
280 if (getBucketSize() != hist.getBucketSize() ||
281 getMin() != hist.getMin() ||
282 getMax() != hist.getMax() ||
283 getNumBuckets() != hist.getNumBuckets() ) {
284 throw std::invalid_argument("Cannot copy from input histogram.");
287 for (int i = 0; i < buckets_.getNumBuckets(); i++) {
288 buckets_.getByIndex(i) = hist.buckets_.getByIndex(i);
292 /* Returns the bucket size of each bucket in the histogram. */
293 ValueType getBucketSize() const {
294 return buckets_.getBucketSize();
296 /* Returns the min value at which bucketing begins. */
297 ValueType getMin() const {
298 return buckets_.getMin();
300 /* Returns the max value at which bucketing ends. */
301 ValueType getMax() const {
302 return buckets_.getMax();
304 /* Returns the number of buckets */
305 unsigned int getNumBuckets() const {
306 return buckets_.getNumBuckets();
309 /* Returns the specified bucket (for reading only!) */
310 const Bucket& getBucketByIndex(int idx) const {
311 return buckets_.getByIndex(idx);
315 * Returns the minimum threshold for the bucket at the given index.
317 * The bucket at the specified index will store values in the range
318 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
319 * max is smaller than bucketMin + bucketSize.
321 ValueType getBucketMin(unsigned int idx) const {
322 return buckets_.getBucketMin(idx);
326 * Returns the maximum threshold for the bucket at the given index.
328 * The bucket at the specified index will store values in the range
329 * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
330 * max is smaller than bucketMin + bucketSize.
332 ValueType getBucketMax(unsigned int idx) const {
333 return buckets_.getBucketMax(idx);
337 * Get the bucket that the specified percentile falls into
339 * The lowest and highest percentile data points in returned bucket will be
340 * returned in the lowPct and highPct arguments, if they are non-NULL.
342 unsigned int getPercentileBucketIdx(double pct,
343 double* lowPct = NULL,
344 double* highPct = NULL) const {
345 // We unfortunately can't use lambdas here yet;
346 // Some users of this code are still built with gcc-4.4.
347 CountFromBucket countFn;
348 return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
352 * Estimate the value at the specified percentile.
354 * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
356 * @return Returns an estimate for N, where N is the number where exactly pct
357 * percentage of the data points in the histogram are less than N.
359 ValueType getPercentileEstimate(double pct) const {
360 CountFromBucket countFn;
362 return buckets_.getPercentileEstimate(pct, countFn, avgFn);
366 * Get a human-readable string describing the histogram contents
368 std::string debugString() const;
371 struct CountFromBucket {
372 uint64_t operator()(const Bucket& bucket) const {
376 struct AvgFromBucket {
377 ValueType operator()(const Bucket& bucket) const {
378 if (bucket.count == 0) {
381 // Cast bucket.count to a signed integer type. This ensures that we
382 // perform division properly here: If bucket.sum is a signed integer
383 // type but we divide by an unsigned number, unsigned division will be
384 // performed and bucket.sum will be converted to unsigned first.
385 // If bucket.sum is unsigned, the code will still do unsigned division
388 // The only downside is if bucket.count is large enough to be negative
389 // when treated as signed. That should be extremely unlikely, though.
390 return bucket.sum / static_cast<int64_t>(bucket.count);
394 detail::HistogramBuckets<ValueType, Bucket> buckets_;
399 #include "folly/Histogram-inl.h"
401 #endif // FOLLY_HISTOGRAM_H_