folly/stats/TimeseriesHistogram.h

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <string>
  20 #include <folly/stats/Histogram.h>
  21 #include <folly/stats/MultiLevelTimeSeries.h>
  22
  23 namespace folly {
  24
  25 /*
  26  * TimeseriesHistogram tracks data distributions as they change over time.
  27  *
  28  * Specifically, it is a bucketed histogram with different value ranges assigned
  29  * to each bucket.  Within each bucket is a MultiLevelTimeSeries from
  30  * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a
  31  * different set of data for different historical time periods, and one can
  32  * query data distributions over different trailing time windows.
  33  *
  34  * For example, this can answer questions: "What is the data distribution over
  35  * the last minute? Over the last 10 minutes?  Since I last cleared this
  36  * histogram?"
  37  *
  38  * The class can also estimate percentiles and answer questions like: "What was
  39  * the 99th percentile data value over the last 10 minutes?"
  40  *
  41  * Note: that depending on the size of your buckets and the smoothness
  42  * of your data distribution, the estimate may be way off from the actual
  43  * value.  In particular, if the given percentile falls outside of the bucket
  44  * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
  45  * around 115,000) this estimate may be very wrong.
  46  *
  47  * The memory usage for a typical histogram is roughly 3k * (# of buckets).  All
  48  * insertion operations are amortized O(1), and all queries are O(# of buckets).
  49  */
  50 template <
  51     class T,
  52     class CT = LegacyStatsClock<std::chrono::seconds>,
  53     class C = folly::MultiLevelTimeSeries<T, CT>>
  54 class TimeseriesHistogram {
  55  private:
  56    // NOTE: T must be equivalent to _signed_ numeric type for our math.
  57    static_assert(std::numeric_limits<T>::is_signed, "");
  58
  59  public:
  60   // Values to be inserted into container
  61   using ValueType = T;
  62   // The container type we use internally for each bucket
  63   using ContainerType = C;
  64   // Clock, duration, and time point types
  65   using Clock = CT;
  66   using Duration = typename Clock::duration;
  67   using TimePoint = typename Clock::time_point;
  68   // The legacy TimeType.  The older code used this instead of Duration and
  69   // TimePoint.  This will eventually be removed as the code is transitioned to
  70   // Duration and TimePoint.
  71   using TimeType = typename Clock::duration;
  72
  73   /*
  74    * Create a TimeSeries histogram and initialize the bucketing and levels.
  75    *
  76    * The buckets are created by chopping the range [min, max) into pieces
  77    * of size bucketSize, with the last bucket being potentially shorter.  Two
  78    * additional buckets are always created -- the "under" bucket for the range
  79    * (-inf, min) and the "over" bucket for the range [max, +inf).
  80    *
  81    * @param bucketSize the width of each bucket
  82    * @param min the smallest value for the bucket range.
  83    * @param max the largest value for the bucket range
  84    * @param defaultContainer a pre-initialized timeseries with the desired
  85    *                         number of levels and their durations.
  86    */
  87   TimeseriesHistogram(ValueType bucketSize, ValueType min, ValueType max,
  88                       const ContainerType& defaultContainer);
  89
  90   /* Return the bucket size of each bucket in the histogram. */
  91   ValueType getBucketSize() const { return buckets_.getBucketSize(); }
  92
  93   /* Return the min value at which bucketing begins. */
  94   ValueType getMin() const { return buckets_.getMin(); }
  95
  96   /* Return the max value at which bucketing ends. */
  97   ValueType getMax() const { return buckets_.getMax(); }
  98
  99   /* Return the number of levels of the Timeseries object in each bucket */
 100   int getNumLevels() const {
 101     return buckets_.getByIndex(0).numLevels();
 102   }
 103
 104   /* Return the number of buckets */
 105   int getNumBuckets() const { return buckets_.getNumBuckets(); }
 106
 107   /*
 108    * Return the threshold of the bucket for the given index in range
 109    * [0..numBuckets).  The bucket will have range [thresh, thresh + bucketSize)
 110    * or [thresh, max), whichever is shorter.
 111    */
 112   ValueType getBucketMin(int bucketIdx) const {
 113     return buckets_.getBucketMin(bucketIdx);
 114   }
 115
 116   /* Return the actual timeseries in the given bucket (for reading only!) */
 117   const ContainerType& getBucket(int bucketIdx) const {
 118     return buckets_.getByIndex(bucketIdx);
 119   }
 120
 121   /* Total count of values at the given timeseries level (all buckets). */
 122   int64_t count(int level) const {
 123     int64_t total = 0;
 124     for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
 125       total += buckets_.getByIndex(b).count(level);
 126     }
 127     return total;
 128   }
 129
 130   /* Total count of values added during the given interval (all buckets). */
 131   int64_t count(TimeType start, TimeType end) const {
 132     int64_t total = 0;
 133     for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
 134       total += buckets_.getByIndex(b).count(start, end);
 135     }
 136     return total;
 137   }
 138
 139   /* Total sum of values at the given timeseries level (all buckets). */
 140   ValueType sum(int level) const {
 141     ValueType total = ValueType();
 142     for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
 143       total += buckets_.getByIndex(b).sum(level);
 144     }
 145     return total;
 146   }
 147
 148   /* Total sum of values added during the given interval (all buckets). */
 149   ValueType sum(TimeType start, TimeType end) const {
 150     ValueType total = ValueType();
 151     for (unsigned int b = 0; b < buckets_.getNumBuckets(); ++b) {
 152       total += buckets_.getByIndex(b).sum(start, end);
 153     }
 154     return total;
 155   }
 156
 157   /* Average of values at the given timeseries level (all buckets). */
 158   template <typename ReturnType = double>
 159   ReturnType avg(int level) const {
 160     auto total = ValueType();
 161     int64_t nsamples = 0;
 162     computeAvgData(&total, &nsamples, level);
 163     return folly::detail::avgHelper<ReturnType>(total, nsamples);
 164   }
 165
 166   /* Average of values added during the given interval (all buckets). */
 167   template <typename ReturnType = double>
 168   ReturnType avg(TimeType start, TimeType end) const {
 169     auto total = ValueType();
 170     int64_t nsamples = 0;
 171     computeAvgData(&total, &nsamples, start, end);
 172     return folly::detail::avgHelper<ReturnType>(total, nsamples);
 173   }
 174
 175   /*
 176    * Rate at the given timeseries level (all buckets).
 177    * This is the sum of all values divided by the time interval (in seconds).
 178    */
 179   template <typename ReturnType = double>
 180   ReturnType rate(int level) const {
 181     auto total = ValueType();
 182     TimeType elapsed(0);
 183     computeRateData(&total, &elapsed, level);
 184     return folly::detail::rateHelper<ReturnType, TimeType, TimeType>(
 185         total, elapsed);
 186   }
 187
 188   /*
 189    * Rate for the given interval (all buckets).
 190    * This is the sum of all values divided by the time interval (in seconds).
 191    */
 192   template <typename ReturnType = double>
 193   ReturnType rate(TimeType start, TimeType end) const {
 194     auto total = ValueType();
 195     TimeType elapsed(0);
 196     computeRateData(&total, &elapsed, start, end);
 197     return folly::detail::rateHelper<ReturnType, TimeType, TimeType>(
 198         total, elapsed);
 199   }
 200
 201   /*
 202    * Update every underlying timeseries object with the given timestamp. You
 203    * must call this directly before querying to ensure that the data in all
 204    * buckets is decayed properly.
 205    */
 206   void update(TimeType now);
 207
 208   /* clear all the data from the histogram. */
 209   void clear();
 210
 211   /* Add a value into the histogram with timestamp 'now' */
 212   void addValue(TimeType now, const ValueType& value);
 213   /* Add a value the given number of times with timestamp 'now' */
 214   void addValue(TimeType now, const ValueType& value, int64_t times);
 215
 216   /*
 217    * Add all of the values from the specified histogram.
 218    *
 219    * All of the values will be added to the current time-slot.
 220    *
 221    * One use of this is for thread-local caching of frequently updated
 222    * histogram data.  For example, each thread can store a thread-local
 223    * Histogram that is updated frequently, and only add it to the global
 224    * TimeseriesHistogram once a second.
 225    */
 226   void addValues(TimeType now, const folly::Histogram<ValueType>& values);
 227
 228   /*
 229    * Return an estimate of the value at the given percentile in the histogram
 230    * in the given timeseries level.  The percentile is estimated as follows:
 231    *
 232    * - We retrieve a count of the values in each bucket (at the given level)
 233    * - We determine via the counts which bucket the given percentile falls in.
 234    * - We assume the average value in the bucket is also its median
 235    * - We then linearly interpolate within the bucket, by assuming that the
 236    *   distribution is uniform in the two value ranges [left, median) and
 237    *   [median, right) where [left, right) is the bucket value range.
 238    *
 239    * Caveats:
 240    * - If the histogram is empty, this always returns ValueType(), usually 0.
 241    * - For the 'under' and 'over' special buckets, their range is unbounded
 242    *   on one side.  In order for the interpolation to work, we assume that
 243    *   the average value in the bucket is equidistant from the two edges of
 244    *   the bucket.  In other words, we assume that the distance between the
 245    *   average and the known bound is equal to the distance between the average
 246    *   and the unknown bound.
 247    */
 248   ValueType getPercentileEstimate(double pct, int level) const;
 249   /*
 250    * Return an estimate of the value at the given percentile in the histogram
 251    * in the given historical interval.  Please see the documentation for
 252    * getPercentileEstimate(int pct, int level) for the explanation of the
 253    * estimation algorithm.
 254    */
 255   ValueType getPercentileEstimate(double pct, TimeType start, TimeType end)
 256     const;
 257
 258   /*
 259    * Return the bucket index that the given percentile falls into (in the
 260    * given timeseries level).  This index can then be used to retrieve either
 261    * the bucket threshold, or other data from inside the bucket.
 262    */
 263   int getPercentileBucketIdx(double pct, int level) const;
 264   /*
 265    * Return the bucket index that the given percentile falls into (in the
 266    * given historical interval).  This index can then be used to retrieve either
 267    * the bucket threshold, or other data from inside the bucket.
 268    */
 269   int getPercentileBucketIdx(double pct, TimeType start, TimeType end) const;
 270
 271   /* Get the bucket threshold for the bucket containing the given pct. */
 272   int getPercentileBucketMin(double pct, int level) const {
 273     return getBucketMin(getPercentileBucketIdx(pct, level));
 274   }
 275   /* Get the bucket threshold for the bucket containing the given pct. */
 276   int getPercentileBucketMin(double pct, TimeType start, TimeType end) const {
 277     return getBucketMin(getPercentileBucketIdx(pct, start, end));
 278   }
 279
 280   /*
 281    * Print out serialized data from all buckets at the given level.
 282    * Format is: BUCKET [',' BUCKET ...]
 283    * Where: BUCKET == bucketMin ':' count ':' avg
 284    */
 285   std::string getString(int level) const;
 286
 287   /*
 288    * Print out serialized data for all buckets in the historical interval.
 289    * For format, please see getString(int level).
 290    */
 291   std::string getString(TimeType start, TimeType end) const;
 292
 293  private:
 294   typedef ContainerType Bucket;
 295   struct CountFromLevel {
 296     explicit CountFromLevel(int level) : level_(level) {}
 297
 298     uint64_t operator()(const ContainerType& bucket) const {
 299       return bucket.count(level_);
 300     }
 301
 302    private:
 303     int level_;
 304   };
 305   struct CountFromInterval {
 306     explicit CountFromInterval(TimeType start, TimeType end)
 307       : start_(start),
 308         end_(end) {}
 309
 310     uint64_t operator()(const ContainerType& bucket) const {
 311       return bucket.count(start_, end_);
 312     }
 313
 314    private:
 315     TimeType start_;
 316     TimeType end_;
 317   };
 318
 319   struct AvgFromLevel {
 320     explicit AvgFromLevel(int level) : level_(level) {}
 321
 322     ValueType operator()(const ContainerType& bucket) const {
 323       return bucket.template avg<ValueType>(level_);
 324     }
 325
 326    private:
 327     int level_;
 328   };
 329
 330   template <typename ReturnType>
 331   struct AvgFromInterval {
 332     explicit AvgFromInterval(TimeType start, TimeType end)
 333       : start_(start),
 334         end_(end) {}
 335
 336     ReturnType operator()(const ContainerType& bucket) const {
 337       return bucket.template avg<ReturnType>(start_, end_);
 338     }
 339
 340    private:
 341     TimeType start_;
 342     TimeType end_;
 343   };
 344
 345   /*
 346    * Special logic for the case of only one unique value registered
 347    * (this can happen when clients don't pick good bucket ranges or have
 348    * other bugs).  It's a lot easier for clients to track down these issues
 349    * if they are getting the correct value.
 350    */
 351   void maybeHandleSingleUniqueValue(const ValueType& value);
 352
 353   void computeAvgData(ValueType* total, int64_t* nsamples, int level) const;
 354   void computeAvgData(
 355       ValueType* total,
 356       int64_t* nsamples,
 357       TimeType start,
 358       TimeType end) const;
 359   void computeRateData(ValueType* total, TimeType* elapsed, int level) const;
 360   void computeRateData(
 361       ValueType* total,
 362       TimeType* elapsed,
 363       TimeType start,
 364       TimeType end) const;
 365
 366   folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
 367   bool haveNotSeenValue_;
 368   bool singleUniqueValue_;
 369   ValueType firstValue_;
 370 };
 371 }  // folly