folly/stats/TimeseriesHistogram.h

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <string>
  20 #include <folly/stats/Histogram.h>
  21 #include <folly/stats/MultiLevelTimeSeries.h>
  22
  23 namespace folly {
  24
  25 /*
  26  * TimeseriesHistogram tracks data distributions as they change over time.
  27  *
  28  * Specifically, it is a bucketed histogram with different value ranges assigned
  29  * to each bucket.  Within each bucket is a MultiLevelTimeSeries from
  30  * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a
  31  * different set of data for different historical time periods, and one can
  32  * query data distributions over different trailing time windows.
  33  *
  34  * For example, this can answer questions: "What is the data distribution over
  35  * the last minute? Over the last 10 minutes?  Since I last cleared this
  36  * histogram?"
  37  *
  38  * The class can also estimate percentiles and answer questions like: "What was
  39  * the 99th percentile data value over the last 10 minutes?"
  40  *
  41  * Note: that depending on the size of your buckets and the smoothness
  42  * of your data distribution, the estimate may be way off from the actual
  43  * value.  In particular, if the given percentile falls outside of the bucket
  44  * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
  45  * around 115,000) this estimate may be very wrong.
  46  *
  47  * The memory usage for a typical histogram is roughly 3k * (# of buckets).  All
  48  * insertion operations are amortized O(1), and all queries are O(# of buckets).
  49  */
  50 template <class T, class TT=std::chrono::seconds,
  51           class C=folly::MultiLevelTimeSeries<T, TT>>
  52 class TimeseriesHistogram {
  53  private:
  54    // NOTE: T must be equivalent to _signed_ numeric type for our math.
  55    static_assert(std::numeric_limits<T>::is_signed, "");
  56
  57  public:
  58   // values to be inserted into container
  59   typedef T ValueType;
  60   // the container type we use internally for each bucket
  61   typedef C ContainerType;
  62   // The time type.
  63   typedef TT TimeType;
  64
  65   /*
  66    * Create a TimeSeries histogram and initialize the bucketing and levels.
  67    *
  68    * The buckets are created by chopping the range [min, max) into pieces
  69    * of size bucketSize, with the last bucket being potentially shorter.  Two
  70    * additional buckets are always created -- the "under" bucket for the range
  71    * (-inf, min) and the "over" bucket for the range [max, +inf).
  72    *
  73    * @param bucketSize the width of each bucket
  74    * @param min the smallest value for the bucket range.
  75    * @param max the largest value for the bucket range
  76    * @param defaultContainer a pre-initialized timeseries with the desired
  77    *                         number of levels and their durations.
  78    */
  79   TimeseriesHistogram(ValueType bucketSize, ValueType min, ValueType max,
  80                       const ContainerType& defaultContainer);
  81
  82   /* Return the bucket size of each bucket in the histogram. */
  83   ValueType getBucketSize() const { return buckets_.getBucketSize(); }
  84
  85   /* Return the min value at which bucketing begins. */
  86   ValueType getMin() const { return buckets_.getMin(); }
  87
  88   /* Return the max value at which bucketing ends. */
  89   ValueType getMax() const { return buckets_.getMax(); }
  90
  91   /* Return the number of levels of the Timeseries object in each bucket */
  92   int getNumLevels() const {
  93     return buckets_.getByIndex(0).numLevels();
  94   }
  95
  96   /* Return the number of buckets */
  97   int getNumBuckets() const { return buckets_.getNumBuckets(); }
  98
  99   /*
 100    * Return the threshold of the bucket for the given index in range
 101    * [0..numBuckets).  The bucket will have range [thresh, thresh + bucketSize)
 102    * or [thresh, max), whichever is shorter.
 103    */
 104   ValueType getBucketMin(int bucketIdx) const {
 105     return buckets_.getBucketMin(bucketIdx);
 106   }
 107
 108   /* Return the actual timeseries in the given bucket (for reading only!) */
 109   const ContainerType& getBucket(int bucketIdx) const {
 110     return buckets_.getByIndex(bucketIdx);
 111   }
 112
 113   /* Total count of values at the given timeseries level (all buckets). */
 114   int64_t count(int level) const {
 115     int64_t total = 0;
 116     for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
 117       total += buckets_.getByIndex(b).count(level);
 118     }
 119     return total;
 120   }
 121
 122   /* Total count of values added during the given interval (all buckets). */
 123   int64_t count(TimeType start, TimeType end) const {
 124     int64_t total = 0;
 125     for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
 126       total += buckets_.getByIndex(b).count(start, end);
 127     }
 128     return total;
 129   }
 130
 131   /* Total sum of values at the given timeseries level (all buckets). */
 132   ValueType sum(int level) const {
 133     ValueType total = ValueType();
 134     for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
 135       total += buckets_.getByIndex(b).sum(level);
 136     }
 137     return total;
 138   }
 139
 140   /* Total sum of values added during the given interval (all buckets). */
 141   ValueType sum(TimeType start, TimeType end) const {
 142     ValueType total = ValueType();
 143     for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
 144       total += buckets_.getByIndex(b).sum(start, end);
 145     }
 146     return total;
 147   }
 148
 149   /* Average of values at the given timeseries level (all buckets). */
 150   template <typename ReturnType=double>
 151   ReturnType avg(int level) const;
 152
 153   /* Average of values added during the given interval (all buckets). */
 154   template <typename ReturnType=double>
 155   ReturnType avg(TimeType start, TimeType end) const;
 156
 157   /*
 158    * Rate at the given timeseries level (all buckets).
 159    * This is the sum of all values divided by the time interval (in seconds).
 160    */
 161   ValueType rate(int level) const;
 162
 163   /*
 164    * Rate for the given interval (all buckets).
 165    * This is the sum of all values divided by the time interval (in seconds).
 166    */
 167   template <typename ReturnType=double>
 168   ReturnType rate(TimeType start, TimeType end) const;
 169
 170   /*
 171    * Update every underlying timeseries object with the given timestamp. You
 172    * must call this directly before querying to ensure that the data in all
 173    * buckets is decayed properly.
 174    */
 175   void update(TimeType now);
 176
 177   /* clear all the data from the histogram. */
 178   void clear();
 179
 180   /* Add a value into the histogram with timestamp 'now' */
 181   void addValue(TimeType now, const ValueType& value);
 182   /* Add a value the given number of times with timestamp 'now' */
 183   void addValue(TimeType now, const ValueType& value, int64_t times);
 184
 185   /*
 186    * Add all of the values from the specified histogram.
 187    *
 188    * All of the values will be added to the current time-slot.
 189    *
 190    * One use of this is for thread-local caching of frequently updated
 191    * histogram data.  For example, each thread can store a thread-local
 192    * Histogram that is updated frequently, and only add it to the global
 193    * TimeseriesHistogram once a second.
 194    */
 195   void addValues(TimeType now, const folly::Histogram<ValueType>& values);
 196
 197   /*
 198    * Return an estimate of the value at the given percentile in the histogram
 199    * in the given timeseries level.  The percentile is estimated as follows:
 200    *
 201    * - We retrieve a count of the values in each bucket (at the given level)
 202    * - We determine via the counts which bucket the given percentile falls in.
 203    * - We assume the average value in the bucket is also its median
 204    * - We then linearly interpolate within the bucket, by assuming that the
 205    *   distribution is uniform in the two value ranges [left, median) and
 206    *   [median, right) where [left, right) is the bucket value range.
 207    *
 208    * Caveats:
 209    * - If the histogram is empty, this always returns ValueType(), usually 0.
 210    * - For the 'under' and 'over' special buckets, their range is unbounded
 211    *   on one side.  In order for the interpolation to work, we assume that
 212    *   the average value in the bucket is equidistant from the two edges of
 213    *   the bucket.  In other words, we assume that the distance between the
 214    *   average and the known bound is equal to the distance between the average
 215    *   and the unknown bound.
 216    */
 217   ValueType getPercentileEstimate(double pct, int level) const;
 218   /*
 219    * Return an estimate of the value at the given percentile in the histogram
 220    * in the given historical interval.  Please see the documentation for
 221    * getPercentileEstimate(int pct, int level) for the explanation of the
 222    * estimation algorithm.
 223    */
 224   ValueType getPercentileEstimate(double pct, TimeType start, TimeType end)
 225     const;
 226
 227   /*
 228    * Return the bucket index that the given percentile falls into (in the
 229    * given timeseries level).  This index can then be used to retrieve either
 230    * the bucket threshold, or other data from inside the bucket.
 231    */
 232   int getPercentileBucketIdx(double pct, int level) const;
 233   /*
 234    * Return the bucket index that the given percentile falls into (in the
 235    * given historical interval).  This index can then be used to retrieve either
 236    * the bucket threshold, or other data from inside the bucket.
 237    */
 238   int getPercentileBucketIdx(double pct, TimeType start, TimeType end) const;
 239
 240   /* Get the bucket threshold for the bucket containing the given pct. */
 241   int getPercentileBucketMin(double pct, int level) const {
 242     return getBucketMin(getPercentileBucketIdx(pct, level));
 243   }
 244   /* Get the bucket threshold for the bucket containing the given pct. */
 245   int getPercentileBucketMin(double pct, TimeType start, TimeType end) const {
 246     return getBucketMin(getPercentileBucketIdx(pct, start, end));
 247   }
 248
 249   /*
 250    * Print out serialized data from all buckets at the given level.
 251    * Format is: BUCKET [',' BUCKET ...]
 252    * Where: BUCKET == bucketMin ':' count ':' avg
 253    */
 254   std::string getString(int level) const;
 255
 256   /*
 257    * Print out serialized data for all buckets in the historical interval.
 258    * For format, please see getString(int level).
 259    */
 260   std::string getString(TimeType start, TimeType end) const;
 261
 262  private:
 263   typedef ContainerType Bucket;
 264   struct CountFromLevel {
 265     explicit CountFromLevel(int level) : level_(level) {}
 266
 267     uint64_t operator()(const ContainerType& bucket) const {
 268       return bucket.count(level_);
 269     }
 270
 271    private:
 272     int level_;
 273   };
 274   struct CountFromInterval {
 275     explicit CountFromInterval(TimeType start, TimeType end)
 276       : start_(start),
 277         end_(end) {}
 278
 279     uint64_t operator()(const ContainerType& bucket) const {
 280       return bucket.count(start_, end_);
 281     }
 282
 283    private:
 284     TimeType start_;
 285     TimeType end_;
 286   };
 287
 288   struct AvgFromLevel {
 289     explicit AvgFromLevel(int level) : level_(level) {}
 290
 291     ValueType operator()(const ContainerType& bucket) const {
 292       return bucket.template avg<ValueType>(level_);
 293     }
 294
 295    private:
 296     int level_;
 297   };
 298
 299   template <typename ReturnType>
 300   struct AvgFromInterval {
 301     explicit AvgFromInterval(TimeType start, TimeType end)
 302       : start_(start),
 303         end_(end) {}
 304
 305     ReturnType operator()(const ContainerType& bucket) const {
 306       return bucket.template avg<ReturnType>(start_, end_);
 307     }
 308
 309    private:
 310     TimeType start_;
 311     TimeType end_;
 312   };
 313
 314   /*
 315    * Special logic for the case of only one unique value registered
 316    * (this can happen when clients don't pick good bucket ranges or have
 317    * other bugs).  It's a lot easier for clients to track down these issues
 318    * if they are getting the correct value.
 319    */
 320   void maybeHandleSingleUniqueValue(const ValueType& value);
 321
 322   folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
 323   bool haveNotSeenValue_;
 324   bool singleUniqueValue_;
 325   ValueType firstValue_;
 326 };
 327
 328 }  // folly