--- /dev/null
+/*
+ * Copyright 2013 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FOLLY_HISTOGRAM_DEFS_H_
+#define FOLLY_HISTOGRAM_DEFS_H_
+
+#include "folly/Conv.h"
+
+#include <glog/logging.h>
+
+namespace folly {
+
+namespace detail {
+
+template <typename T, typename BucketT>
+HistogramBuckets<T, BucketT>::HistogramBuckets(ValueType bucketSize,
+ ValueType min,
+ ValueType max,
+ const BucketType& defaultBucket)
+ : bucketSize_(bucketSize),
+ min_(min),
+ max_(max) {
+ CHECK_GT(bucketSize_, ValueType(0));
+ CHECK_LT(min_, max_);
+
+ unsigned int numBuckets = (max - min) / bucketSize;
+ // Round up if the bucket size does not fit evenly
+ if (numBuckets * bucketSize < max - min) {
+ ++numBuckets;
+ }
+ // Add 2 for the extra 'below min' and 'above max' buckets
+ numBuckets += 2;
+ buckets_.assign(numBuckets, defaultBucket);
+}
+
+template <typename T, typename BucketType>
+unsigned int HistogramBuckets<T, BucketType>::getBucketIdx(
+ ValueType value) const {
+ if (value < min_) {
+ return 0;
+ } else if (value >= max_) {
+ return buckets_.size() - 1;
+ } else {
+ // the 1 is the below_min bucket
+ return ((value - min_) / bucketSize_) + 1;
+ }
+}
+
+template <typename T, typename BucketType>
+template <typename CountFn>
+unsigned int HistogramBuckets<T, BucketType>::getPercentileBucketIdx(
+ double pct,
+ CountFn countFromBucket,
+ double* lowPct, double* highPct) const {
+ CHECK_GE(pct, 0.0);
+ CHECK_LE(pct, 1.0);
+
+ unsigned int numBuckets = buckets_.size();
+
+ // Compute the counts in each bucket
+ std::vector<uint64_t> counts(numBuckets);
+ uint64_t totalCount = 0;
+ for (unsigned int n = 0; n < numBuckets; ++n) {
+ uint64_t bucketCount =
+ countFromBucket(const_cast<const BucketType&>(buckets_[n]));
+ counts[n] = bucketCount;
+ totalCount += bucketCount;
+ }
+
+ // If there are no elements, just return the lowest bucket.
+ // Note that we return bucket 1, which is the first bucket in the
+ // histogram range; bucket 0 is for all values below min_.
+ if (totalCount == 0) {
+ // Set lowPct and highPct both to 0.
+ // getPercentileEstimate() will recognize this to mean that the histogram
+ // is empty.
+ if (lowPct) {
+ *lowPct = 0.0;
+ }
+ if (highPct) {
+ *highPct = 0.0;
+ }
+ return 1;
+ }
+
+ // Loop through all the buckets, keeping track of each bucket's
+ // percentile range: [0,10], [10,17], [17,45], etc. When we find a range
+ // that includes our desired percentile, we return that bucket index.
+ double prevPct = 0.0;
+ double curPct = 0.0;
+ uint64_t curCount = 0;
+ unsigned int idx;
+ for (idx = 0; idx < numBuckets; ++idx) {
+ if (counts[idx] == 0) {
+ // skip empty buckets
+ continue;
+ }
+
+ prevPct = curPct;
+ curCount += counts[idx];
+ curPct = static_cast<double>(curCount) / totalCount;
+ if (pct <= curPct) {
+ // This is the desired bucket
+ break;
+ }
+ }
+
+ if (lowPct) {
+ *lowPct = prevPct;
+ }
+ if (highPct) {
+ *highPct = curPct;
+ }
+ return idx;
+}
+
+template <typename T, typename BucketType>
+template <typename CountFn, typename AvgFn>
+T HistogramBuckets<T, BucketType>::getPercentileEstimate(
+ double pct,
+ CountFn countFromBucket,
+ AvgFn avgFromBucket) const {
+
+ // Find the bucket where this percentile falls
+ double lowPct;
+ double highPct;
+ unsigned int bucketIdx = getPercentileBucketIdx(pct, countFromBucket,
+ &lowPct, &highPct);
+ if (lowPct == 0.0 && highPct == 0.0) {
+ // Invalid range -- the buckets must all be empty
+ // Return the default value for ValueType.
+ return ValueType();
+ }
+ if (lowPct == highPct) {
+ // Unlikely to have exact equality,
+ // but just return the bucket average in this case.
+ // We handle this here to avoid division by 0 below.
+ return avgFromBucket(buckets_[bucketIdx]);
+ }
+
+ CHECK_GE(pct, lowPct);
+ CHECK_LE(pct, highPct);
+ CHECK_LT(lowPct, highPct);
+
+ // Compute information about this bucket
+ ValueType avg = avgFromBucket(buckets_[bucketIdx]);
+ ValueType low;
+ ValueType high;
+ if (bucketIdx == 0) {
+ if (avg > min_) {
+ // This normally shouldn't happen. This bucket is only supposed to track
+ // values less than min_. Most likely this means that integer overflow
+ // occurred, and the code in avgFromBucket() returned a huge value
+ // instead of a small one. Just return the minimum possible value for
+ // now.
+ //
+ // (Note that if the counter keeps being decremented, eventually it will
+ // wrap and become small enough that we won't detect this any more, and
+ // we will return bogus information.)
+ LOG(ERROR) << "invalid average value in histogram minimum bucket: " <<
+ avg << " > " << min_ << ": possible integer overflow?";
+ return getBucketMin(bucketIdx);
+ }
+ // For the below-min bucket, just assume the lowest value ever seen is
+ // twice as far away from min_ as avg.
+ high = min_;
+ low = high - (2 * (high - avg));
+ // Adjust low in case it wrapped
+ if (low > avg) {
+ low = std::numeric_limits<ValueType>::min();
+ }
+ } else if (bucketIdx == buckets_.size() - 1) {
+ if (avg < max_) {
+ // Most likely this means integer overflow occurred. See the comments
+ // above in the minimum case.
+ LOG(ERROR) << "invalid average value in histogram maximum bucket: " <<
+ avg << " < " << max_ << ": possible integer overflow?";
+ return getBucketMax(bucketIdx);
+ }
+ // Similarly for the above-max bucket, assume the highest value ever seen
+ // is twice as far away from max_ as avg.
+ low = max_;
+ high = low + (2 * (avg - low));
+ // Adjust high in case it wrapped
+ if (high < avg) {
+ high = std::numeric_limits<ValueType>::max();
+ }
+ } else {
+ low = getBucketMin(bucketIdx);
+ high = getBucketMax(bucketIdx);
+ if (avg < low || avg > high) {
+ // Most likely this means an integer overflow occurred.
+ // See the comments above. Return the midpoint between low and high
+ // as a best guess, since avg is meaningless.
+ LOG(ERROR) << "invalid average value in histogram bucket: " <<
+ avg << " not in range [" << low << ", " << high <<
+ "]: possible integer overflow?";
+ return (low + high) / 2;
+ }
+ }
+
+ // Since we know the average value in this bucket, we can do slightly better
+ // than just assuming the data points in this bucket are uniformly
+ // distributed between low and high.
+ //
+ // Assume that the median value in this bucket is the same as the average
+ // value.
+ double medianPct = (lowPct + highPct) / 2.0;
+ if (pct < medianPct) {
+ // Assume that the data points lower than the median of this bucket
+ // are uniformly distributed between low and avg
+ double pctThroughSection = (pct - lowPct) / (medianPct - lowPct);
+ return low + ((avg - low) * pctThroughSection);
+ } else {
+ // Assume that the data points greater than the median of this bucket
+ // are uniformly distributed between avg and high
+ double pctThroughSection = (pct - medianPct) / (highPct - medianPct);
+ return avg + ((high - avg) * pctThroughSection);
+ }
+}
+
+} // detail
+
+
+template <typename T>
+std::string Histogram<T>::debugString() const {
+ std::string ret = folly::to<std::string>(
+ "num buckets: ", buckets_.getNumBuckets(),
+ ", bucketSize: ", buckets_.getBucketSize(),
+ ", min: ", buckets_.getMin(), ", max: ", buckets_.getMax(), "\n");
+
+ for (unsigned int i = 0; i < buckets_.getNumBuckets(); ++i) {
+ folly::toAppend(" ", buckets_.getBucketMin(i), ": ",
+ buckets_.getByIndex(i).count, "\n",
+ &ret);
+ }
+
+ return ret;
+}
+
+template <typename T>
+void Histogram<T>::toTSV(std::ostream& out, bool skipEmptyBuckets) const {
+ for (unsigned int i = 0; i < buckets_.getNumBuckets(); ++i) {
+ // Do not output empty buckets in order to reduce data file size.
+ if (skipEmptyBuckets && getBucketByIndex(i).count == 0) {
+ continue;
+ }
+ const auto& bucket = getBucketByIndex(i);
+ out << getBucketMin(i) << '\t' << getBucketMax(i) << '\t'
+ << bucket.count << '\t' << bucket.sum << '\n';
+ }
+}
+
+} // folly
+
+#endif // FOLLY_HISTOGRAM_DEFS_H_