/*
- * Copyright 2014 Facebook, Inc.
+ * Copyright 2017 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* limitations under the License.
*/
-#ifndef FOLLY_HISTOGRAM_H_
-#define FOLLY_HISTOGRAM_H_
+#pragma once
#include <cstddef>
+#include <cstdint>
#include <limits>
#include <ostream>
+#include <stdexcept>
#include <string>
#include <vector>
-#include <stdexcept>
-#include "folly/detail/Stats.h"
+#include <folly/CPortability.h>
+#include <folly/stats/detail/Bucket.h>
namespace folly {
*
* (max - min) must be larger than or equal to bucketSize.
*/
- HistogramBuckets(ValueType bucketSize, ValueType min, ValueType max,
- const BucketType& defaultBucket);
+ HistogramBuckets(
+ ValueType bucketSize,
+ ValueType min,
+ ValueType max,
+ const BucketType& defaultBucket);
/* Returns the bucket size of each bucket in the histogram. */
ValueType getBucketSize() const {
* plus 2 extra buckets, one for handling values less than min, and one for
* values greater than max.
*/
- unsigned int getNumBuckets() const {
+ size_t getNumBuckets() const {
return buckets_.size();
}
/* Returns the bucket index into which the given value would fall. */
- unsigned int getBucketIdx(ValueType value) const;
+ size_t getBucketIdx(ValueType value) const;
/* Returns the bucket for the specified value */
BucketType& getByValue(ValueType value) {
* Note that index 0 is the bucket for all values less than the specified
* minimum. Index 1 is the first bucket in the specified bucket range.
*/
- BucketType& getByIndex(unsigned int idx) {
+ BucketType& getByIndex(size_t idx) {
return buckets_[idx];
}
/* Returns the bucket at the specified index. */
- const BucketType& getByIndex(unsigned int idx) const {
+ const BucketType& getByIndex(size_t idx) const {
return buckets_[idx];
}
* [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
* max is smaller than bucketMin + bucketSize.
*/
- ValueType getBucketMin(unsigned int idx) const {
+ ValueType getBucketMin(size_t idx) const {
if (idx == 0) {
return std::numeric_limits<ValueType>::min();
}
return max_;
}
- return min_ + ((idx - 1) * bucketSize_);
+ return ValueType(min_ + ((idx - 1) * bucketSize_));
}
/*
* [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
* max is smaller than bucketMin + bucketSize.
*/
- ValueType getBucketMax(unsigned int idx) const {
+ ValueType getBucketMax(size_t idx) const {
if (idx == buckets_.size() - 1) {
return std::numeric_limits<ValueType>::max();
}
- return min_ + (idx * bucketSize_);
+ return ValueType(min_ + (idx * bucketSize_));
}
+ /**
+ * Computes the total number of values stored across all buckets.
+ *
+ * Runs in O(numBuckets)
+ *
+ * @param countFn A function that takes a const BucketType&, and returns the
+ * number of values in that bucket
+ * @return Returns the total number of values stored across all buckets
+ */
+ template <typename CountFn>
+ uint64_t computeTotalCount(CountFn countFromBucket) const;
+
/**
* Determine which bucket the specified percentile falls into.
*
* data point.
*/
template <typename CountFn>
- unsigned int getPercentileBucketIdx(double pct,
- CountFn countFromBucket,
- double* lowPct = NULL,
- double* highPct = NULL) const;
+ size_t getPercentileBucketIdx(
+ double pct,
+ CountFn countFromBucket,
+ double* lowPct = nullptr,
+ double* highPct = nullptr) const;
/**
* Estimate the value at the specified percentile.
* percentage of the data points in the histogram are less than N.
*/
template <typename CountFn, typename AvgFn>
- ValueType getPercentileEstimate(double pct,
- CountFn countFromBucket,
- AvgFn avgFromBucket) const;
+ ValueType getPercentileEstimate(
+ double pct,
+ CountFn countFromBucket,
+ AvgFn avgFromBucket) const;
/*
* Iterator access to the buckets.
}
private:
- const ValueType bucketSize_;
- const ValueType min_;
- const ValueType max_;
+ ValueType bucketSize_;
+ ValueType min_;
+ ValueType max_;
std::vector<BucketType> buckets_;
};
-} // detail
-
+} // namespace detail
/*
* A basic histogram class.
typedef detail::Bucket<T> Bucket;
Histogram(ValueType bucketSize, ValueType min, ValueType max)
- : buckets_(bucketSize, min, max, Bucket()) {}
+ : buckets_(bucketSize, min, max, Bucket()) {}
/* Add a data point to the histogram */
- void addValue(ValueType value) {
+ void addValue(ValueType value) FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(
+ "signed-integer-overflow",
+ "unsigned-integer-overflow") {
Bucket& bucket = buckets_.getByValue(value);
- // TODO: It would be nice to handle overflow here.
+ // NOTE: Overflow is handled elsewhere and tests check this
+ // behavior (see HistogramTest.cpp TestOverflow* tests).
+ // TODO: It would be nice to handle overflow here and redesign this class.
bucket.sum += value;
bucket.count += 1;
}
+ /* Add multiple same data points to the histogram */
+ void addRepeatedValue(ValueType value, uint64_t nSamples)
+ FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(
+ "signed-integer-overflow",
+ "unsigned-integer-overflow") {
+ Bucket& bucket = buckets_.getByValue(value);
+ // NOTE: Overflow is handled elsewhere and tests check this
+ // behavior (see HistogramTest.cpp TestOverflow* tests).
+ // TODO: It would be nice to handle overflow here and redesign this class.
+ bucket.sum += value * nSamples;
+ bucket.count += nSamples;
+ }
+
/*
* Remove a data point to the histogram
*
* had previously been added to the histogram; it merely subtracts the
* requested value from the appropriate bucket's sum.
*/
- void removeValue(ValueType value) {
+ void removeValue(ValueType value) FOLLY_DISABLE_UNDEFINED_BEHAVIOR_SANITIZER(
+ "signed-integer-overflow",
+ "unsigned-integer-overflow") {
+ Bucket& bucket = buckets_.getByValue(value);
+ // NOTE: Overflow is handled elsewhere and tests check this
+ // behavior (see HistogramTest.cpp TestOverflow* tests).
+ // TODO: It would be nice to handle overflow here and redesign this class.
+ if (bucket.count > 0) {
+ bucket.sum -= value;
+ bucket.count -= 1;
+ } else {
+ bucket.sum = ValueType();
+ bucket.count = 0;
+ }
+ }
+
+ /* Remove multiple same data points from the histogram */
+ void removeRepeatedValue(ValueType value, uint64_t nSamples) {
Bucket& bucket = buckets_.getByValue(value);
// TODO: It would be nice to handle overflow here.
- bucket.sum -= value;
- bucket.count -= 1;
+ if (bucket.count >= nSamples) {
+ bucket.sum -= value * nSamples;
+ bucket.count -= nSamples;
+ } else {
+ bucket.sum = ValueType();
+ bucket.count = 0;
+ }
}
/* Remove all data points from the histogram */
void clear() {
- for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
buckets_.getByIndex(i).clear();
}
}
+ /* Subtract another histogram data from the histogram */
+ void subtract(const Histogram& hist) {
+ // the two histogram bucket definitions must match to support
+ // subtract.
+ if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
+ getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
+ throw std::invalid_argument("Cannot subtract input histogram.");
+ }
+
+ for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
+ buckets_.getByIndex(i) -= hist.buckets_.getByIndex(i);
+ }
+ }
+
/* Merge two histogram data together */
- void merge(Histogram &hist) {
+ void merge(const Histogram& hist) {
// the two histogram bucket definitions must match to support
// a merge.
- if (getBucketSize() != hist.getBucketSize() ||
- getMin() != hist.getMin() ||
- getMax() != hist.getMax() ||
- getNumBuckets() != hist.getNumBuckets() ) {
+ if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
+ getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
throw std::invalid_argument("Cannot merge from input histogram.");
}
- for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
buckets_.getByIndex(i) += hist.buckets_.getByIndex(i);
}
}
/* Copy bucket values from another histogram */
- void copy(Histogram &hist) {
+ void copy(const Histogram& hist) {
// the two histogram bucket definition must match
- if (getBucketSize() != hist.getBucketSize() ||
- getMin() != hist.getMin() ||
- getMax() != hist.getMax() ||
- getNumBuckets() != hist.getNumBuckets() ) {
+ if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
+ getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
throw std::invalid_argument("Cannot copy from input histogram.");
}
- for (int i = 0; i < buckets_.getNumBuckets(); i++) {
+ for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
buckets_.getByIndex(i) = hist.buckets_.getByIndex(i);
}
}
return buckets_.getMax();
}
/* Returns the number of buckets */
- unsigned int getNumBuckets() const {
+ size_t getNumBuckets() const {
return buckets_.getNumBuckets();
}
/* Returns the specified bucket (for reading only!) */
- const Bucket& getBucketByIndex(int idx) const {
+ const Bucket& getBucketByIndex(size_t idx) const {
return buckets_.getByIndex(idx);
}
* [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
* max is smaller than bucketMin + bucketSize.
*/
- ValueType getBucketMin(unsigned int idx) const {
+ ValueType getBucketMin(size_t idx) const {
return buckets_.getBucketMin(idx);
}
* [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
* max is smaller than bucketMin + bucketSize.
*/
- ValueType getBucketMax(unsigned int idx) const {
+ ValueType getBucketMax(size_t idx) const {
return buckets_.getBucketMax(idx);
}
+ /**
+ * Computes the total number of values stored across all buckets.
+ *
+ * Runs in O(numBuckets)
+ */
+ uint64_t computeTotalCount() const {
+ CountFromBucket countFn;
+ return buckets_.computeTotalCount(countFn);
+ }
+
/*
* Get the bucket that the specified percentile falls into
*
* The lowest and highest percentile data points in returned bucket will be
- * returned in the lowPct and highPct arguments, if they are non-NULL.
+ * returned in the lowPct and highPct arguments, if they are not nullptr.
*/
- unsigned int getPercentileBucketIdx(double pct,
- double* lowPct = NULL,
- double* highPct = NULL) const {
+ size_t getPercentileBucketIdx(
+ double pct,
+ double* lowPct = nullptr,
+ double* highPct = nullptr) const {
// We unfortunately can't use lambdas here yet;
// Some users of this code are still built with gcc-4.4.
CountFromBucket countFn;
*/
void toTSV(std::ostream& out, bool skipEmptyBuckets = true) const;
- private:
struct CountFromBucket {
uint64_t operator()(const Bucket& bucket) const {
return bucket.count;
}
};
+ private:
detail::HistogramBuckets<ValueType, Bucket> buckets_;
};
-} // folly
-
-#endif // FOLLY_HISTOGRAM_H_
+} // namespace folly