folly/stats/MultiLevelTimeSeries.h

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <chrono>
  20 #include <stdexcept>
  21 #include <string>
  22 #include <vector>
  23
  24 #include <folly/String.h>
  25 #include <folly/stats/BucketedTimeSeries.h>
  26 #include <glog/logging.h>
  27
  28 namespace folly {
  29
  30 /*
  31  * This class represents a timeseries which keeps several levels of data
  32  * granularity (similar in principle to the loads reported by the UNIX
  33  * 'uptime' command).  It uses several instances (one per level) of
  34  * BucketedTimeSeries as the underlying storage.
  35  *
  36  * This can easily be used to track sums (and thus rates or averages) over
  37  * several predetermined time periods, as well as all-time sums.  For example,
  38  * you would use to it to track query rate or response speed over the last
  39  * 5, 15, 30, and 60 minutes.
  40  *
  41  * The MultiLevelTimeSeries takes a list of level durations as an input; the
  42  * durations must be strictly increasing.  Furthermore a special level can be
  43  * provided with a duration of '0' -- this will be an "all-time" level.  If
  44  * an all-time level is provided, it MUST be the last level present.
  45  *
  46  * The class assumes that time advances forward --  you can't retroactively add
  47  * values for events in the past -- the 'now' argument is provided for better
  48  * efficiency and ease of unittesting.
  49  *
  50  * The class is not thread-safe -- use your own synchronization!
  51  */
  52 template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>>
  53 class MultiLevelTimeSeries {
  54  public:
  55   using ValueType = VT;
  56   using Clock = CT;
  57   using Duration = typename Clock::duration;
  58   using TimePoint = typename Clock::time_point;
  59   // The legacy TimeType.  The older code used this instead of Duration and
  60   // TimePoint.  This will eventually be removed as the code is transitioned to
  61   // Duration and TimePoint.
  62   using TimeType = typename Clock::duration;
  63   using Level = folly::BucketedTimeSeries<ValueType, Clock>;
  64
  65   /*
  66    * Create a new MultiLevelTimeSeries.
  67    *
  68    * This creates a new MultiLevelTimeSeries that tracks time series data at the
  69    * specified time durations (level). The time series data tracked at each
  70    * level is then further divided by numBuckets for memory efficiency.
  71    *
  72    * The durations must be strictly increasing. Furthermore a special level can
  73    * be provided with a duration of '0' -- this will be an "all-time" level. If
  74    * an all-time level is provided, it MUST be the last level present.
  75    */
  76   MultiLevelTimeSeries(size_t numBuckets,
  77                        size_t numLevels,
  78                        const TimeType levelDurations[]);
  79
  80   MultiLevelTimeSeries(
  81       size_t numBuckets,
  82       std::initializer_list<TimeType> durations);
  83
  84   /*
  85    * Return the number of buckets used to track time series at each level.
  86    */
  87   size_t numBuckets() const {
  88     // The constructor ensures that levels_ has at least one item
  89     return levels_[0].numBuckets();
  90   }
  91
  92   /*
  93    * Return the number of levels tracked by MultiLevelTimeSeries.
  94    */
  95   size_t numLevels() const { return levels_.size(); }
  96
  97   /*
  98    * Get the BucketedTimeSeries backing the specified level.
  99    *
 100    * Note: you should generally call update() or flush() before accessing the
 101    * data. Otherwise you may be reading stale data if update() or flush() has
 102    * not been called recently.
 103    */
 104   const Level& getLevel(int level) const {
 105     CHECK(level >= 0);
 106     CHECK_LT(level, levels_.size());
 107     return levels_[level];
 108   }
 109
 110   /*
 111    * Get the highest granularity level that is still large enough to contain
 112    * data going back to the specified start time.
 113    *
 114    * Note: you should generally call update() or flush() before accessing the
 115    * data. Otherwise you may be reading stale data if update() or flush() has
 116    * not been called recently.
 117    */
 118   const Level& getLevel(TimeType start) const {
 119     for (const auto& level : levels_) {
 120       if (level.isAllTime()) {
 121         return level;
 122       }
 123       // Note that we use duration() here rather than elapsed().
 124       // If duration is large enough to contain the start time then this level
 125       // is good enough, even if elapsed() indicates that no data was recorded
 126       // before the specified start time.
 127       if (level.getLatestTime() - level.duration() <= start) {
 128         return level;
 129       }
 130     }
 131     // We should always have an all-time level, so this is never reached.
 132     LOG(FATAL) << "No level of timeseries covers internval"
 133                << " from " << start.count() << " to now";
 134     return levels_.back();
 135   }
 136
 137   /*
 138    * Get the BucketedTimeSeries backing the specified level.
 139    *
 140    * Note: you should generally call update() or flush() before accessing the
 141    * data. Otherwise you may be reading stale data if update() or flush() has
 142    * not been called recently.
 143    */
 144   const Level& getLevelByDuration(TimeType duration) const {
 145     // since the number of levels is expected to be small (less than 5 in most
 146     // cases), a simple linear scan would be efficient and is intentionally
 147     // chosen here over other alternatives for lookup.
 148     for (const auto& level : levels_) {
 149       if (level.duration() == duration) {
 150         return level;
 151       }
 152     }
 153     throw std::out_of_range(folly::to<std::string>(
 154         "No level of duration ", duration.count(), " found"));
 155   }
 156
 157   /*
 158    * Return the sum of all the data points currently tracked at this level.
 159    *
 160    * Note: you should generally call update() or flush() before accessing the
 161    * data. Otherwise you may be reading stale data if update() or flush() has
 162    * not been called recently.
 163    */
 164   ValueType sum(int level) const {
 165     return getLevel(level).sum();
 166   }
 167
 168   /*
 169    * Return the average (sum / count) of all the data points currently tracked
 170    * at this level.
 171    *
 172    * The return type may be specified to control whether floating-point or
 173    * integer division should be performed.
 174    *
 175    * Note: you should generally call update() or flush() before accessing the
 176    * data. Otherwise you may be reading stale data if update() or flush() has
 177    * not been called recently.
 178    */
 179   template <typename ReturnType=double>
 180   ReturnType avg(int level) const {
 181     return getLevel(level).template avg<ReturnType>();
 182   }
 183
 184   /*
 185    * Return the rate (sum divided by elaspsed time) of the all data points
 186    * currently tracked at this level.
 187    *
 188    * Note: you should generally call update() or flush() before accessing the
 189    * data. Otherwise you may be reading stale data if update() or flush() has
 190    * not been called recently.
 191    */
 192   template <typename ReturnType=double, typename Interval=TimeType>
 193   ReturnType rate(int level) const {
 194     return getLevel(level).template rate<ReturnType, Interval>();
 195   }
 196
 197   /*
 198    * Return the number of data points currently tracked at this level.
 199    *
 200    * Note: you should generally call update() or flush() before accessing the
 201    * data. Otherwise you may be reading stale data if update() or flush() has
 202    * not been called recently.
 203    */
 204   int64_t count(int level) const {
 205     return getLevel(level).count();
 206   }
 207
 208   /*
 209    * Return the count divided by the elapsed time tracked at this level.
 210    *
 211    * Note: you should generally call update() or flush() before accessing the
 212    * data. Otherwise you may be reading stale data if update() or flush() has
 213    * not been called recently.
 214    */
 215   template <typename ReturnType=double, typename Interval=TimeType>
 216   ReturnType countRate(int level) const {
 217     return getLevel(level).template countRate<ReturnType, Interval>();
 218   }
 219
 220   /*
 221    * Return the sum of all the data points currently tracked at this level.
 222    *
 223    * This method is identical to sum(int level) above but takes in the
 224    * duration that the user is interested in querying as the parameter.
 225    *
 226    * Note: you should generally call update() or flush() before accessing the
 227    * data. Otherwise you may be reading stale data if update() or flush() has
 228    * not been called recently.
 229    */
 230   ValueType sum(TimeType duration) const {
 231     return getLevelByDuration(duration).sum();
 232   }
 233
 234   /*
 235    * Return the average (sum / count) of all the data points currently tracked
 236    * at this level.
 237    *
 238    * This method is identical to avg(int level) above but takes in the
 239    * duration that the user is interested in querying as the parameter.
 240    *
 241    * Note: you should generally call update() or flush() before accessing the
 242    * data. Otherwise you may be reading stale data if update() or flush() has
 243    * not been called recently.
 244    */
 245   template <typename ReturnType = double>
 246   ReturnType avg(TimeType duration) const {
 247     return getLevelByDuration(duration).template avg<ReturnType>();
 248   }
 249
 250   /*
 251    * Return the rate (sum divided by elaspsed time) of the all data points
 252    * currently tracked at this level.
 253    *
 254    * This method is identical to rate(int level) above but takes in the
 255    * duration that the user is interested in querying as the parameter.
 256    *
 257    * Note: you should generally call update() or flush() before accessing the
 258    * data. Otherwise you may be reading stale data if update() or flush() has
 259    * not been called recently.
 260    */
 261   template <typename ReturnType = double, typename Interval = TimeType>
 262   ReturnType rate(TimeType duration) const {
 263     return getLevelByDuration(duration).template rate<ReturnType, Interval>();
 264   }
 265
 266   /*
 267    * Return the number of data points currently tracked at this level.
 268    *
 269    * This method is identical to count(int level) above but takes in the
 270    * duration that the user is interested in querying as the parameter.
 271    *
 272    * Note: you should generally call update() or flush() before accessing the
 273    * data. Otherwise you may be reading stale data if update() or flush() has
 274    * not been called recently.
 275    */
 276   int64_t count(TimeType duration) const {
 277     return getLevelByDuration(duration).count();
 278   }
 279
 280   /*
 281    * Return the count divided by the elapsed time tracked at this level.
 282    *
 283    * This method is identical to countRate(int level) above but takes in the
 284    * duration that the user is interested in querying as the parameter.
 285    *
 286    * Note: you should generally call update() or flush() before accessing the
 287    * data. Otherwise you may be reading stale data if update() or flush() has
 288    * not been called recently.
 289    */
 290   template <typename ReturnType = double, typename Interval = TimeType>
 291   ReturnType countRate(TimeType duration) const {
 292     return getLevelByDuration(duration)
 293         .template countRate<ReturnType, Interval>();
 294   }
 295
 296   /*
 297    * Estimate the sum of the data points that occurred in the specified time
 298    * period at this level.
 299    *
 300    * The range queried is [start, end).
 301    * That is, start is inclusive, and end is exclusive.
 302    *
 303    * Note that data outside of the timeseries duration will no longer be
 304    * available for use in the estimation.  Specifying a start time earlier than
 305    * getEarliestTime() will not have much effect, since only data points after
 306    * that point in time will be counted.
 307    *
 308    * Note that the value returned is an estimate, and may not be precise.
 309    *
 310    * Note: you should generally call update() or flush() before accessing the
 311    * data. Otherwise you may be reading stale data if update() or flush() has
 312    * not been called recently.
 313    */
 314   ValueType sum(TimeType start, TimeType end) const {
 315     return getLevel(start).sum(start, end);
 316   }
 317
 318   /*
 319    * Estimate the average value during the specified time period.
 320    *
 321    * The same caveats documented in the sum(TimeType start, TimeType end)
 322    * comments apply here as well.
 323    *
 324    * Note: you should generally call update() or flush() before accessing the
 325    * data. Otherwise you may be reading stale data if update() or flush() has
 326    * not been called recently.
 327    */
 328   template <typename ReturnType=double>
 329   ReturnType avg(TimeType start, TimeType end) const {
 330     return getLevel(start).template avg<ReturnType>(start, end);
 331   }
 332
 333   /*
 334    * Estimate the rate during the specified time period.
 335    *
 336    * The same caveats documented in the sum(TimeType start, TimeType end)
 337    * comments apply here as well.
 338    *
 339    * Note: you should generally call update() or flush() before accessing the
 340    * data. Otherwise you may be reading stale data if update() or flush() has
 341    * not been called recently.
 342    */
 343   template <typename ReturnType=double>
 344   ReturnType rate(TimeType start, TimeType end) const {
 345     return getLevel(start).template rate<ReturnType>(start, end);
 346   }
 347
 348   /*
 349    * Estimate the count during the specified time period.
 350    *
 351    * The same caveats documented in the sum(TimeType start, TimeType end)
 352    * comments apply here as well.
 353    *
 354    * Note: you should generally call update() or flush() before accessing the
 355    * data. Otherwise you may be reading stale data if update() or flush() has
 356    * not been called recently.
 357    */
 358   int64_t count(TimeType start, TimeType end) const {
 359     return getLevel(start).count(start, end);
 360   }
 361
 362   /*
 363    * Adds the value 'val' at time 'now' to all levels.
 364    *
 365    * Data points added at the same time point is cached internally here and not
 366    * propagated to the underlying levels until either flush() is called or when
 367    * update from a different time comes.
 368    *
 369    * This function expects time to always move forwards: it cannot be used to
 370    * add historical data points that have occurred in the past.  If now is
 371    * older than the another timestamp that has already been passed to
 372    * addValue() or update(), now will be ignored and the latest timestamp will
 373    * be used.
 374    */
 375   void addValue(TimeType now, const ValueType& val);
 376
 377   /*
 378    * Adds the value 'val' at time 'now' to all levels.
 379    */
 380   void addValue(TimeType now, const ValueType& val, int64_t times);
 381
 382   /*
 383    * Adds the value 'val' at time 'now' to all levels as the sum of 'nsamples'
 384    * samples.
 385    */
 386   void addValueAggregated(TimeType now, const ValueType& sum, int64_t nsamples);
 387
 388   /*
 389    * Update all the levels to the specified time, doing all the necessary
 390    * work to rotate the buckets and remove any stale data points.
 391    *
 392    * When reading data from the timeseries, you should make sure to manually
 393    * call update() before accessing the data. Otherwise you may be reading
 394    * stale data if update() has not been called recently.
 395    */
 396   void update(TimeType now);
 397
 398   /*
 399    * Reset all the timeseries to an empty state as if no data points have ever
 400    * been added to it.
 401    */
 402   void clear();
 403
 404   /*
 405    * Flush all cached updates.
 406    */
 407   void flush();
 408
 409  private:
 410   std::vector<Level> levels_;
 411
 412   // Updates within the same time interval are cached
 413   // They are flushed out when updates from a different time comes,
 414   // or flush() is called.
 415   TimeType cachedTime_;
 416   ValueType cachedSum_;
 417   int cachedCount_;
 418 };
 419
 420 } // folly