folly/stats/TimeseriesHistogram.h

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <folly/stats/Histogram.h>
  20 #include <folly/stats/MultiLevelTimeSeries.h>
  21 #include <string>
  22
  23 namespace folly {
  24
  25 /*
  26  * TimeseriesHistogram tracks data distributions as they change over time.
  27  *
  28  * Specifically, it is a bucketed histogram with different value ranges assigned
  29  * to each bucket.  Within each bucket is a MultiLevelTimeSeries from
  30  * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a
  31  * different set of data for different historical time periods, and one can
  32  * query data distributions over different trailing time windows.
  33  *
  34  * For example, this can answer questions: "What is the data distribution over
  35  * the last minute? Over the last 10 minutes?  Since I last cleared this
  36  * histogram?"
  37  *
  38  * The class can also estimate percentiles and answer questions like: "What was
  39  * the 99th percentile data value over the last 10 minutes?"
  40  *
  41  * Note: that depending on the size of your buckets and the smoothness
  42  * of your data distribution, the estimate may be way off from the actual
  43  * value.  In particular, if the given percentile falls outside of the bucket
  44  * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
  45  * around 115,000) this estimate may be very wrong.
  46  *
  47  * The memory usage for a typical histogram is roughly 3k * (# of buckets).  All
  48  * insertion operations are amortized O(1), and all queries are O(# of buckets).
  49  */
  50 template <
  51     class T,
  52     class CT = LegacyStatsClock<std::chrono::seconds>,
  53     class C = folly::MultiLevelTimeSeries<T, CT>>
  54 class TimeseriesHistogram {
  55  private:
  56   // NOTE: T must be equivalent to _signed_ numeric type for our math.
  57   static_assert(std::numeric_limits<T>::is_signed, "");
  58
  59  public:
  60   // Values to be inserted into container
  61   using ValueType = T;
  62   // The container type we use internally for each bucket
  63   using ContainerType = C;
  64   // Clock, duration, and time point types
  65   using Clock = CT;
  66   using Duration = typename Clock::duration;
  67   using TimePoint = typename Clock::time_point;
  68
  69   /*
  70    * Create a TimeSeries histogram and initialize the bucketing and levels.
  71    *
  72    * The buckets are created by chopping the range [min, max) into pieces
  73    * of size bucketSize, with the last bucket being potentially shorter.  Two
  74    * additional buckets are always created -- the "under" bucket for the range
  75    * (-inf, min) and the "over" bucket for the range [max, +inf).
  76    *
  77    * @param bucketSize the width of each bucket
  78    * @param min the smallest value for the bucket range.
  79    * @param max the largest value for the bucket range
  80    * @param defaultContainer a pre-initialized timeseries with the desired
  81    *                         number of levels and their durations.
  82    */
  83   TimeseriesHistogram(
  84       ValueType bucketSize,
  85       ValueType min,
  86       ValueType max,
  87       const ContainerType& defaultContainer);
  88
  89   /* Return the bucket size of each bucket in the histogram. */
  90   ValueType getBucketSize() const {
  91     return buckets_.getBucketSize();
  92   }
  93
  94   /* Return the min value at which bucketing begins. */
  95   ValueType getMin() const {
  96     return buckets_.getMin();
  97   }
  98
  99   /* Return the max value at which bucketing ends. */
 100   ValueType getMax() const {
 101     return buckets_.getMax();
 102   }
 103
 104   /* Return the number of levels of the Timeseries object in each bucket */
 105   size_t getNumLevels() const {
 106     return buckets_.getByIndex(0).numLevels();
 107   }
 108
 109   /* Return the number of buckets */
 110   size_t getNumBuckets() const {
 111     return buckets_.getNumBuckets();
 112   }
 113
 114   /*
 115    * Return the threshold of the bucket for the given index in range
 116    * [0..numBuckets).  The bucket will have range [thresh, thresh + bucketSize)
 117    * or [thresh, max), whichever is shorter.
 118    */
 119   ValueType getBucketMin(size_t bucketIdx) const {
 120     return buckets_.getBucketMin(bucketIdx);
 121   }
 122
 123   /* Return the actual timeseries in the given bucket (for reading only!) */
 124   const ContainerType& getBucket(size_t bucketIdx) const {
 125     return buckets_.getByIndex(bucketIdx);
 126   }
 127
 128   /* Total count of values at the given timeseries level (all buckets). */
 129   uint64_t count(size_t level) const {
 130     uint64_t total = 0;
 131     for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
 132       total += buckets_.getByIndex(b).count(level);
 133     }
 134     return total;
 135   }
 136
 137   /* Total count of values added during the given interval (all buckets). */
 138   uint64_t count(TimePoint start, TimePoint end) const {
 139     uint64_t total = 0;
 140     for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
 141       total += buckets_.getByIndex(b).count(start, end);
 142     }
 143     return total;
 144   }
 145
 146   /* Total sum of values at the given timeseries level (all buckets). */
 147   ValueType sum(size_t level) const {
 148     ValueType total = ValueType();
 149     for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
 150       total += buckets_.getByIndex(b).sum(level);
 151     }
 152     return total;
 153   }
 154
 155   /* Total sum of values added during the given interval (all buckets). */
 156   ValueType sum(TimePoint start, TimePoint end) const {
 157     ValueType total = ValueType();
 158     for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
 159       total += buckets_.getByIndex(b).sum(start, end);
 160     }
 161     return total;
 162   }
 163
 164   /* Average of values at the given timeseries level (all buckets). */
 165   template <typename ReturnType = double>
 166   ReturnType avg(size_t level) const {
 167     auto total = ValueType();
 168     uint64_t nsamples = 0;
 169     computeAvgData(&total, &nsamples, level);
 170     return folly::detail::avgHelper<ReturnType>(total, nsamples);
 171   }
 172
 173   /* Average of values added during the given interval (all buckets). */
 174   template <typename ReturnType = double>
 175   ReturnType avg(TimePoint start, TimePoint end) const {
 176     auto total = ValueType();
 177     uint64_t nsamples = 0;
 178     computeAvgData(&total, &nsamples, start, end);
 179     return folly::detail::avgHelper<ReturnType>(total, nsamples);
 180   }
 181
 182   /*
 183    * Rate at the given timeseries level (all buckets).
 184    * This is the sum of all values divided by the time interval (in seconds).
 185    */
 186   template <typename ReturnType = double>
 187   ReturnType rate(size_t level) const {
 188     auto total = ValueType();
 189     Duration elapsed(0);
 190     computeRateData(&total, &elapsed, level);
 191     return folly::detail::rateHelper<ReturnType, Duration, Duration>(
 192         total, elapsed);
 193   }
 194
 195   /*
 196    * Rate for the given interval (all buckets).
 197    * This is the sum of all values divided by the time interval (in seconds).
 198    */
 199   template <typename ReturnType = double>
 200   ReturnType rate(TimePoint start, TimePoint end) const {
 201     auto total = ValueType();
 202     Duration elapsed(0);
 203     computeRateData(&total, &elapsed, start, end);
 204     return folly::detail::rateHelper<ReturnType, Duration, Duration>(
 205         total, elapsed);
 206   }
 207
 208   /*
 209    * Update every underlying timeseries object with the given timestamp. You
 210    * must call this directly before querying to ensure that the data in all
 211    * buckets is decayed properly.
 212    */
 213   void update(TimePoint now);
 214
 215   /* clear all the data from the histogram. */
 216   void clear();
 217
 218   /* Add a value into the histogram with timestamp 'now' */
 219   void addValue(TimePoint now, const ValueType& value);
 220   /* Add a value the given number of times with timestamp 'now' */
 221   void addValue(TimePoint now, const ValueType& value, uint64_t times);
 222
 223   /*
 224    * Add all of the values from the specified histogram.
 225    *
 226    * All of the values will be added to the current time-slot.
 227    *
 228    * One use of this is for thread-local caching of frequently updated
 229    * histogram data.  For example, each thread can store a thread-local
 230    * Histogram that is updated frequently, and only add it to the global
 231    * TimeseriesHistogram once a second.
 232    */
 233   void addValues(TimePoint now, const folly::Histogram<ValueType>& values);
 234
 235   /*
 236    * Return an estimate of the value at the given percentile in the histogram
 237    * in the given timeseries level.  The percentile is estimated as follows:
 238    *
 239    * - We retrieve a count of the values in each bucket (at the given level)
 240    * - We determine via the counts which bucket the given percentile falls in.
 241    * - We assume the average value in the bucket is also its median
 242    * - We then linearly interpolate within the bucket, by assuming that the
 243    *   distribution is uniform in the two value ranges [left, median) and
 244    *   [median, right) where [left, right) is the bucket value range.
 245    *
 246    * Caveats:
 247    * - If the histogram is empty, this always returns ValueType(), usually 0.
 248    * - For the 'under' and 'over' special buckets, their range is unbounded
 249    *   on one side.  In order for the interpolation to work, we assume that
 250    *   the average value in the bucket is equidistant from the two edges of
 251    *   the bucket.  In other words, we assume that the distance between the
 252    *   average and the known bound is equal to the distance between the average
 253    *   and the unknown bound.
 254    */
 255   ValueType getPercentileEstimate(double pct, size_t level) const;
 256   /*
 257    * Return an estimate of the value at the given percentile in the histogram
 258    * in the given historical interval.  Please see the documentation for
 259    * getPercentileEstimate(double pct, size_t level) for the explanation of the
 260    * estimation algorithm.
 261    */
 262   ValueType getPercentileEstimate(double pct, TimePoint start, TimePoint end)
 263       const;
 264
 265   /*
 266    * Return the bucket index that the given percentile falls into (in the
 267    * given timeseries level).  This index can then be used to retrieve either
 268    * the bucket threshold, or other data from inside the bucket.
 269    */
 270   size_t getPercentileBucketIdx(double pct, size_t level) const;
 271   /*
 272    * Return the bucket index that the given percentile falls into (in the
 273    * given historical interval).  This index can then be used to retrieve either
 274    * the bucket threshold, or other data from inside the bucket.
 275    */
 276   size_t getPercentileBucketIdx(double pct, TimePoint start, TimePoint end)
 277       const;
 278
 279   /* Get the bucket threshold for the bucket containing the given pct. */
 280   ValueType getPercentileBucketMin(double pct, size_t level) const {
 281     return getBucketMin(getPercentileBucketIdx(pct, level));
 282   }
 283   /* Get the bucket threshold for the bucket containing the given pct. */
 284   ValueType getPercentileBucketMin(double pct, TimePoint start, TimePoint end)
 285       const {
 286     return getBucketMin(getPercentileBucketIdx(pct, start, end));
 287   }
 288
 289   /*
 290    * Print out serialized data from all buckets at the given level.
 291    * Format is: BUCKET [',' BUCKET ...]
 292    * Where: BUCKET == bucketMin ':' count ':' avg
 293    */
 294   std::string getString(size_t level) const;
 295
 296   /*
 297    * Print out serialized data for all buckets in the historical interval.
 298    * For format, please see getString(size_t level).
 299    */
 300   std::string getString(TimePoint start, TimePoint end) const;
 301
 302   /*
 303    * Legacy APIs that accept a Duration parameters rather than TimePoint.
 304    *
 305    * These treat the Duration as relative to the clock epoch.
 306    * Prefer using the correct TimePoint-based APIs instead.  These APIs will
 307    * eventually be deprecated and removed.
 308    */
 309   void update(Duration now) {
 310     update(TimePoint(now));
 311   }
 312   void addValue(Duration now, const ValueType& value) {
 313     addValue(TimePoint(now), value);
 314   }
 315   void addValue(Duration now, const ValueType& value, uint64_t times) {
 316     addValue(TimePoint(now), value, times);
 317   }
 318   void addValues(Duration now, const folly::Histogram<ValueType>& values) {
 319     addValues(TimePoint(now), values);
 320   }
 321
 322  private:
 323   typedef ContainerType Bucket;
 324   struct CountFromLevel {
 325     explicit CountFromLevel(size_t level) : level_(level) {}
 326
 327     uint64_t operator()(const ContainerType& bucket) const {
 328       return bucket.count(level_);
 329     }
 330
 331    private:
 332     size_t level_;
 333   };
 334   struct CountFromInterval {
 335     explicit CountFromInterval(TimePoint start, TimePoint end)
 336         : start_(start), end_(end) {}
 337
 338     uint64_t operator()(const ContainerType& bucket) const {
 339       return bucket.count(start_, end_);
 340     }
 341
 342    private:
 343     TimePoint start_;
 344     TimePoint end_;
 345   };
 346
 347   struct AvgFromLevel {
 348     explicit AvgFromLevel(size_t level) : level_(level) {}
 349
 350     ValueType operator()(const ContainerType& bucket) const {
 351       return bucket.template avg<ValueType>(level_);
 352     }
 353
 354    private:
 355     size_t level_;
 356   };
 357
 358   template <typename ReturnType>
 359   struct AvgFromInterval {
 360     explicit AvgFromInterval(TimePoint start, TimePoint end)
 361         : start_(start), end_(end) {}
 362
 363     ReturnType operator()(const ContainerType& bucket) const {
 364       return bucket.template avg<ReturnType>(start_, end_);
 365     }
 366
 367    private:
 368     TimePoint start_;
 369     TimePoint end_;
 370   };
 371
 372   /*
 373    * Special logic for the case of only one unique value registered
 374    * (this can happen when clients don't pick good bucket ranges or have
 375    * other bugs).  It's a lot easier for clients to track down these issues
 376    * if they are getting the correct value.
 377    */
 378   void maybeHandleSingleUniqueValue(const ValueType& value);
 379
 380   void computeAvgData(ValueType* total, uint64_t* nsamples, size_t level) const;
 381   void computeAvgData(
 382       ValueType* total,
 383       uint64_t* nsamples,
 384       TimePoint start,
 385       TimePoint end) const;
 386   void computeRateData(ValueType* total, Duration* elapsed, size_t level) const;
 387   void computeRateData(
 388       ValueType* total,
 389       Duration* elapsed,
 390       TimePoint start,
 391       TimePoint end) const;
 392
 393   folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
 394   bool haveNotSeenValue_;
 395   bool singleUniqueValue_;
 396   ValueType firstValue_;
 397 };
 398 } // namespace folly