folly/stats/MultiLevelTimeSeries.h

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <chrono>
  20 #include <string>
  21 #include <vector>
  22
  23 #include <glog/logging.h>
  24 #include <folly/stats/BucketedTimeSeries.h>
  25
  26 namespace folly {
  27
  28 /*
  29  * This class represents a timeseries which keeps several levels of data
  30  * granularity (similar in principle to the loads reported by the UNIX
  31  * 'uptime' command).  It uses several instances (one per level) of
  32  * BucketedTimeSeries as the underlying storage.
  33  *
  34  * This can easily be used to track sums (and thus rates or averages) over
  35  * several predetermined time periods, as well as all-time sums.  For example,
  36  * you would use to it to track query rate or response speed over the last
  37  * 5, 15, 30, and 60 minutes.
  38  *
  39  * The MultiLevelTimeSeries takes a list of level durations as an input; the
  40  * durations must be strictly increasing.  Furthermore a special level can be
  41  * provided with a duration of '0' -- this will be an "all-time" level.  If
  42  * an all-time level is provided, it MUST be the last level present.
  43  *
  44  * The class assumes that time advances forward --  you can't retroactively add
  45  * values for events in the past -- the 'now' argument is provided for better
  46  * efficiency and ease of unittesting.
  47  *
  48  * The class is not thread-safe -- use your own synchronization!
  49  */
  50 template <typename VT, typename TT=std::chrono::seconds>
  51 class MultiLevelTimeSeries {
  52  public:
  53   typedef VT ValueType;
  54   typedef TT TimeType;
  55   typedef folly::BucketedTimeSeries<ValueType, TimeType> Level;
  56
  57   /*
  58    * Create a new MultiLevelTimeSeries.
  59    *
  60    * This creates a new MultiLevelTimeSeries that tracks time series data at the
  61    * specified time durations (level). The time series data tracked at each
  62    * level is then further divided by numBuckets for memory efficiency.
  63    *
  64    * The durations must be strictly increasing. Furthermore a special level can
  65    * be provided with a duration of '0' -- this will be an "all-time" level. If
  66    * an all-time level is provided, it MUST be the last level present.
  67    */
  68   MultiLevelTimeSeries(size_t numBuckets,
  69                        size_t numLevels,
  70                        const TimeType levelDurations[]);
  71
  72   /*
  73    * Return the number of buckets used to track time series at each level.
  74    */
  75   size_t numBuckets() const {
  76     // The constructor ensures that levels_ has at least one item
  77     return levels_[0].numBuckets();
  78   }
  79
  80   /*
  81    * Return the number of levels tracked by MultiLevelTimeSeries.
  82    */
  83   size_t numLevels() const { return levels_.size(); }
  84
  85   /*
  86    * Get the BucketedTimeSeries backing the specified level.
  87    *
  88    * Note: you should generally call update() or flush() before accessing the
  89    * data. Otherwise you may be reading stale data if update() or flush() has
  90    * not been called recently.
  91    */
  92   const Level& getLevel(int level) const {
  93     CHECK(level >= 0);
  94     CHECK_LT(level, levels_.size());
  95     return levels_[level];
  96   }
  97
  98   /*
  99    * Get the highest granularity level that is still large enough to contain
 100    * data going back to the specified start time.
 101    *
 102    * Note: you should generally call update() or flush() before accessing the
 103    * data. Otherwise you may be reading stale data if update() or flush() has
 104    * not been called recently.
 105    */
 106   const Level& getLevel(TimeType start) const {
 107     for (const auto& level : levels_) {
 108       if (level.isAllTime()) {
 109         return level;
 110       }
 111       // Note that we use duration() here rather than elapsed().
 112       // If duration is large enough to contain the start time then this level
 113       // is good enough, even if elapsed() indicates that no data was recorded
 114       // before the specified start time.
 115       if (level.getLatestTime() - level.duration() <= start) {
 116         return level;
 117       }
 118     }
 119     // We should always have an all-time level, so this is never reached.
 120     LOG(FATAL) << "No level of timeseries covers internval"
 121                << " from " << start.count() << " to now";
 122     return levels_.back();
 123   }
 124
 125   /*
 126    * Return the sum of all the data points currently tracked at this level.
 127    *
 128    * Note: you should generally call update() or flush() before accessing the
 129    * data. Otherwise you may be reading stale data if update() or flush() has
 130    * not been called recently.
 131    */
 132   ValueType sum(int level) const {
 133     return getLevel(level).sum();
 134   }
 135
 136   /*
 137    * Return the average (sum / count) of all the data points currently tracked
 138    * at this level.
 139    *
 140    * The return type may be specified to control whether floating-point or
 141    * integer division should be performed.
 142    *
 143    * Note: you should generally call update() or flush() before accessing the
 144    * data. Otherwise you may be reading stale data if update() or flush() has
 145    * not been called recently.
 146    */
 147   template <typename ReturnType=double>
 148   ReturnType avg(int level) const {
 149     return getLevel(level).template avg<ReturnType>();
 150   }
 151
 152   /*
 153    * Return the rate (sum divided by elaspsed time) of the all data points
 154    * currently tracked at this level.
 155    *
 156    * Note: you should generally call update() or flush() before accessing the
 157    * data. Otherwise you may be reading stale data if update() or flush() has
 158    * not been called recently.
 159    */
 160   template <typename ReturnType=double, typename Interval=TimeType>
 161   ReturnType rate(int level) const {
 162     return getLevel(level).template rate<ReturnType, Interval>();
 163   }
 164
 165   /*
 166    * Return the number of data points currently tracked at this level.
 167    *
 168    * Note: you should generally call update() or flush() before accessing the
 169    * data. Otherwise you may be reading stale data if update() or flush() has
 170    * not been called recently.
 171    */
 172   int64_t count(int level) const {
 173     return getLevel(level).count();
 174   }
 175
 176   /*
 177    * Return the count divided by the elapsed time tracked at this level.
 178    *
 179    * Note: you should generally call update() or flush() before accessing the
 180    * data. Otherwise you may be reading stale data if update() or flush() has
 181    * not been called recently.
 182    */
 183   template <typename ReturnType=double, typename Interval=TimeType>
 184   ReturnType countRate(int level) const {
 185     return getLevel(level).template countRate<ReturnType, Interval>();
 186   }
 187
 188   /*
 189    * Estimate the sum of the data points that occurred in the specified time
 190    * period at this level.
 191    *
 192    * The range queried is [start, end).
 193    * That is, start is inclusive, and end is exclusive.
 194    *
 195    * Note that data outside of the timeseries duration will no longer be
 196    * available for use in the estimation.  Specifying a start time earlier than
 197    * getEarliestTime() will not have much effect, since only data points after
 198    * that point in time will be counted.
 199    *
 200    * Note that the value returned is an estimate, and may not be precise.
 201    *
 202    * Note: you should generally call update() or flush() before accessing the
 203    * data. Otherwise you may be reading stale data if update() or flush() has
 204    * not been called recently.
 205    */
 206   ValueType sum(TimeType start, TimeType end) const {
 207     return getLevel(start).sum(start, end);
 208   }
 209
 210   /*
 211    * Estimate the average value during the specified time period.
 212    *
 213    * The same caveats documented in the sum(TimeType start, TimeType end)
 214    * comments apply here as well.
 215    *
 216    * Note: you should generally call update() or flush() before accessing the
 217    * data. Otherwise you may be reading stale data if update() or flush() has
 218    * not been called recently.
 219    */
 220   template <typename ReturnType=double>
 221   ReturnType avg(TimeType start, TimeType end) const {
 222     return getLevel(start).template avg<ReturnType>(start, end);
 223   }
 224
 225   /*
 226    * Estimate the rate during the specified time period.
 227    *
 228    * The same caveats documented in the sum(TimeType start, TimeType end)
 229    * comments apply here as well.
 230    *
 231    * Note: you should generally call update() or flush() before accessing the
 232    * data. Otherwise you may be reading stale data if update() or flush() has
 233    * not been called recently.
 234    */
 235   template <typename ReturnType=double>
 236   ReturnType rate(TimeType start, TimeType end) const {
 237     return getLevel(start).template rate<ReturnType>(start, end);
 238   }
 239
 240   /*
 241    * Estimate the count during the specified time period.
 242    *
 243    * The same caveats documented in the sum(TimeType start, TimeType end)
 244    * comments apply here as well.
 245    *
 246    * Note: you should generally call update() or flush() before accessing the
 247    * data. Otherwise you may be reading stale data if update() or flush() has
 248    * not been called recently.
 249    */
 250   int64_t count(TimeType start, TimeType end) const {
 251     return getLevel(start).count(start, end);
 252   }
 253
 254   /*
 255    * Adds the value 'val' at time 'now' to all levels.
 256    *
 257    * Data points added at the same time point is cached internally here and not
 258    * propagated to the underlying levels until either flush() is called or when
 259    * update from a different time comes.
 260    *
 261    * This function expects time to always move forwards: it cannot be used to
 262    * add historical data points that have occurred in the past.  If now is
 263    * older than the another timestamp that has already been passed to
 264    * addValue() or update(), now will be ignored and the latest timestamp will
 265    * be used.
 266    */
 267   void addValue(TimeType now, const ValueType& val);
 268
 269   /*
 270    * Adds the value 'val' at time 'now' to all levels.
 271    */
 272   void addValue(TimeType now, const ValueType& val, int64_t times);
 273
 274   /*
 275    * Adds the value 'val' at time 'now' to all levels as the sum of 'nsamples'
 276    * samples.
 277    */
 278   void addValueAggregated(TimeType now, const ValueType& sum, int64_t nsamples);
 279
 280   /*
 281    * Update all the levels to the specified time, doing all the necessary
 282    * work to rotate the buckets and remove any stale data points.
 283    *
 284    * When reading data from the timeseries, you should make sure to manually
 285    * call update() before accessing the data. Otherwise you may be reading
 286    * stale data if update() has not been called recently.
 287    */
 288   void update(TimeType now);
 289
 290   /*
 291    * Reset all the timeseries to an empty state as if no data points have ever
 292    * been added to it.
 293    */
 294   void clear();
 295
 296   /*
 297    * Flush all cached updates.
 298    */
 299   void flush();
 300
 301  private:
 302   std::vector<Level> levels_;
 303
 304   // Updates within the same time interval are cached
 305   // They are flushed out when updates from a different time comes,
 306   // or flush() is called.
 307   TimeType cachedTime_;
 308   ValueType cachedSum_;
 309   int cachedCount_;
 310 };
 311
 312 } // folly