+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_HISTOGRAM_H_
+#define FOLLY_HISTOGRAM_H_
+
+#include <cstddef>
+#include <limits>
+#include <ostream>
+#include <string>
+#include <vector>
+#include <stdexcept>
+
+#include "folly/detail/Stats.h"
+
+namespace folly {
+
+namespace detail {
+
+/*
+ * A helper class to manage a set of histogram buckets.
+ */
+template <typename T, typename BucketT>
+class HistogramBuckets {
+ public:
+ typedef T ValueType;
+ typedef BucketT BucketType;
+
+ /*
+ * Create a set of histogram buckets.
+ *
+ * One bucket will be created for each bucketSize interval of values within
+ * the specified range. Additionally, one bucket will be created to track
+ * all values that fall below the specified minimum, and one bucket will be
+ * created for all values above the specified maximum.
+ *
+ * If (max - min) is not a multiple of bucketSize, the last bucket will cover
+ * a smaller range of values than the other buckets.
+ *
+ * (max - min) must be larger than or equal to bucketSize.
+ */
+ HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max,
+ const BucketType& defaultBucket);
+
+ /* Returns the bucket size of each bucket in the histogram. */
+ ValueType getBucketSize() const {
+ return bucketSize_;
+ }
+
+ /* Returns the min value at which bucketing begins. */
+ ValueType getMin() const {
+ return min_;
+ }
+
+ /* Returns the max value at which bucketing ends. */
+ ValueType getMax() const {
+ return max_;
+ }
+
+ /*
+ * Returns the number of buckets.
+ *
+ * This includes the total number of buckets for the [min, max) range,
+ * plus 2 extra buckets, one for handling values less than min, and one for
+ * values greater than max.
+ */
+ unsigned int getNumBuckets() const {
+ return buckets_.size();
+ }
+
+ /* Returns the bucket index into which the given value would fall. */
+ unsigned int getBucketIdx(ValueType value) const;
+
+ /* Returns the bucket for the specified value */
+ BucketType& getByValue(ValueType value) {
+ return buckets_[getBucketIdx(value)];
+ }
+
+ /* Returns the bucket for the specified value */
+ const BucketType& getByValue(ValueType value) const {
+ return buckets_[getBucketIdx(value)];
+ }
+
+ /*
+ * Returns the bucket at the specified index.
+ *
+ * Note that index 0 is the bucket for all values less than the specified
+ * minimum. Index 1 is the first bucket in the specified bucket range.
+ */
+ BucketType& getByIndex(unsigned int idx) {
+ return buckets_[idx];
+ }
+
+ /* Returns the bucket at the specified index. */
+ const BucketType& getByIndex(unsigned int idx) const {
+ return buckets_[idx];
+ }
+
+ /*
+ * Returns the minimum threshold for the bucket at the given index.
+ *
+ * The bucket at the specified index will store values in the range
+ * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
+ * max is smaller than bucketMin + bucketSize.
+ */
+ ValueType getBucketMin(unsigned int idx) const {
+ if (idx == 0) {
+ return std::numeric_limits<ValueType>::min();
+ }
+ if (idx == buckets_.size() - 1) {
+ return max_;
+ }
+
+ return min_ + ((idx - 1) * bucketSize_);
+ }
+
+ /*
+ * Returns the maximum threshold for the bucket at the given index.
+ *
+ * The bucket at the specified index will store values in the range
+ * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
+ * max is smaller than bucketMin + bucketSize.
+ */
+ ValueType getBucketMax(unsigned int idx) const {
+ if (idx == buckets_.size() - 1) {
+ return std::numeric_limits<ValueType>::max();
+ }
+
+ return min_ + (idx * bucketSize_);
+ }
+
+ /**
+ * Determine which bucket the specified percentile falls into.
+ *
+ * Looks for the bucket that contains the Nth percentile data point.
+ *
+ * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
+ * @param countFn A function that takes a const BucketType&, and returns the
+ * number of values in that bucket.
+ * @param lowPct The lowest percentile stored in the selected bucket will be
+ * returned via this parameter.
+ * @param highPct The highest percentile stored in the selected bucket will
+ * be returned via this parameter.
+ *
+ * @return Returns the index of the bucket that contains the Nth percentile
+ * data point.
+ */
+ template <typename CountFn>
+ unsigned int getPercentileBucketIdx(double pct,
+ CountFn countFromBucket,
+ double* lowPct = NULL,
+ double* highPct = NULL) const;
+
+ /**
+ * Estimate the value at the specified percentile.
+ *
+ * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
+ * @param countFn A function that takes a const BucketType&, and returns the
+ * number of values in that bucket.
+ * @param avgFn A function that takes a const BucketType&, and returns the
+ * average of all the values in that bucket.
+ *
+ * @return Returns an estimate for N, where N is the number where exactly pct
+ * percentage of the data points in the histogram are less than N.
+ */
+ template <typename CountFn, typename AvgFn>
+ ValueType getPercentileEstimate(double pct,
+ CountFn countFromBucket,
+ AvgFn avgFromBucket) const;
+
+ /*
+ * Iterator access to the buckets.
+ *
+ * Note that the first bucket is for all values less than min, and the last
+ * bucket is for all values greater than max. The buckets tracking values in
+ * the [min, max) actually start at the second bucket.
+ */
+ typename std::vector<BucketType>::const_iterator begin() const {
+ return buckets_.begin();
+ }
+ typename std::vector<BucketType>::iterator begin() {
+ return buckets_.begin();
+ }
+ typename std::vector<BucketType>::const_iterator end() const {
+ return buckets_.end();
+ }
+ typename std::vector<BucketType>::iterator end() {
+ return buckets_.end();
+ }
+
+ private:
+ const ValueType bucketSize_;
+ const ValueType min_;
+ const ValueType max_;
+ std::vector<BucketType> buckets_;
+};
+
+} // detail
+
+
+/*
+ * A basic histogram class.
+ *
+ * Groups data points into equally-sized buckets, and stores the overall sum of
+ * the data points in each bucket, as well as the number of data points in the
+ * bucket.
+ *
+ * The caller must specify the minimum and maximum data points to expect ahead
+ * of time, as well as the bucket width.
+ */
+template <typename T>
+class Histogram {
+ public:
+ typedef T ValueType;
+ typedef detail::Bucket<T> Bucket;
+
+ Histogram(ValueType bucketSize, ValueType min, ValueType max)
+ : buckets_(bucketSize, min, max, Bucket()) {}
+
+ /* Add a data point to the histogram */
+ void addValue(ValueType value) {
+ Bucket& bucket = buckets_.getByValue(value);
+ // TODO: It would be nice to handle overflow here.
+ bucket.sum += value;
+ bucket.count += 1;
+ }
+
+ /*
+ * Remove a data point to the histogram
+ *
+ * Note that this method does not actually verify that this exact data point
+ * had previously been added to the histogram; it merely subtracts the
+ * requested value from the appropriate bucket's sum.
+ */
+ void removeValue(ValueType value) {
+ Bucket& bucket = buckets_.getByValue(value);
+ // TODO: It would be nice to handle overflow here.
+ bucket.sum -= value;
+ bucket.count -= 1;
+ }
+
+ /* Remove all data points from the histogram */
+ void clear() {
+ for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ buckets_.getByIndex(i).clear();
+ }
+ }
+
+ /* Merge two histogram data together */
+ void merge(Histogram &hist) {
+ // the two histogram bucket definitions must match to support
+ // a merge.
+ if (getBucketSize() != hist.getBucketSize() ||
+ getMin() != hist.getMin() ||
+ getMax() != hist.getMax() ||
+ getNumBuckets() != hist.getNumBuckets() ) {
+ throw std::invalid_argument("Cannot merge from input histogram.");
+ }
+
+ for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ buckets_.getByIndex(i) += hist.buckets_.getByIndex(i);
+ }
+ }
+
+ /* Copy bucket values from another histogram */
+ void copy(Histogram &hist) {
+ // the two histogram bucket definition must match
+ if (getBucketSize() != hist.getBucketSize() ||
+ getMin() != hist.getMin() ||
+ getMax() != hist.getMax() ||
+ getNumBuckets() != hist.getNumBuckets() ) {
+ throw std::invalid_argument("Cannot copy from input histogram.");
+ }
+
+ for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ buckets_.getByIndex(i) = hist.buckets_.getByIndex(i);
+ }
+ }
+
+ /* Returns the bucket size of each bucket in the histogram. */
+ ValueType getBucketSize() const {
+ return buckets_.getBucketSize();
+ }
+ /* Returns the min value at which bucketing begins. */
+ ValueType getMin() const {
+ return buckets_.getMin();
+ }
+ /* Returns the max value at which bucketing ends. */
+ ValueType getMax() const {
+ return buckets_.getMax();
+ }
+ /* Returns the number of buckets */
+ unsigned int getNumBuckets() const {
+ return buckets_.getNumBuckets();
+ }
+
+ /* Returns the specified bucket (for reading only!) */
+ const Bucket& getBucketByIndex(int idx) const {
+ return buckets_.getByIndex(idx);
+ }
+
+ /*
+ * Returns the minimum threshold for the bucket at the given index.
+ *
+ * The bucket at the specified index will store values in the range
+ * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
+ * max is smaller than bucketMin + bucketSize.
+ */
+ ValueType getBucketMin(unsigned int idx) const {
+ return buckets_.getBucketMin(idx);
+ }
+
+ /*
+ * Returns the maximum threshold for the bucket at the given index.
+ *
+ * The bucket at the specified index will store values in the range
+ * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
+ * max is smaller than bucketMin + bucketSize.
+ */
+ ValueType getBucketMax(unsigned int idx) const {
+ return buckets_.getBucketMax(idx);
+ }
+
+ /*
+ * Get the bucket that the specified percentile falls into
+ *
+ * The lowest and highest percentile data points in returned bucket will be
+ * returned in the lowPct and highPct arguments, if they are non-NULL.
+ */
+ unsigned int getPercentileBucketIdx(double pct,
+ double* lowPct = NULL,
+ double* highPct = NULL) const {
+ // We unfortunately can't use lambdas here yet;
+ // Some users of this code are still built with gcc-4.4.
+ CountFromBucket countFn;
+ return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
+ }
+
+ /**
+ * Estimate the value at the specified percentile.
+ *
+ * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
+ *
+ * @return Returns an estimate for N, where N is the number where exactly pct
+ * percentage of the data points in the histogram are less than N.
+ */
+ ValueType getPercentileEstimate(double pct) const {
+ CountFromBucket countFn;
+ AvgFromBucket avgFn;
+ return buckets_.getPercentileEstimate(pct, countFn, avgFn);
+ }
+
+ /*
+ * Get a human-readable string describing the histogram contents
+ */
+ std::string debugString() const;
+
+ /*
+ * Write the histogram contents in tab-separated values (TSV) format.
+ * Format is "min max count sum".
+ */
+ void toTSV(std::ostream& out, bool skipEmptyBuckets = true) const;
+
+ private:
+ struct CountFromBucket {
+ uint64_t operator()(const Bucket& bucket) const {
+ return bucket.count;
+ }
+ };
+ struct AvgFromBucket {
+ ValueType operator()(const Bucket& bucket) const {
+ if (bucket.count == 0) {
+ return ValueType(0);
+ }
+ // Cast bucket.count to a signed integer type. This ensures that we
+ // perform division properly here: If bucket.sum is a signed integer
+ // type but we divide by an unsigned number, unsigned division will be
+ // performed and bucket.sum will be converted to unsigned first.
+ // If bucket.sum is unsigned, the code will still do unsigned division
+ // correctly.
+ //
+ // The only downside is if bucket.count is large enough to be negative
+ // when treated as signed. That should be extremely unlikely, though.
+ return bucket.sum / static_cast<int64_t>(bucket.count);
+ }
+ };
+
+ detail::HistogramBuckets<ValueType, Bucket> buckets_;
+};
+
+} // folly
+
+#endif // FOLLY_HISTOGRAM_H_