Include the correct headers in various tests
[folly.git] / folly / test / TimeseriesHistogramTest.cpp
1 /*
2  * Copyright 2016 Facebook, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <folly/stats/TimeseriesHistogram.h>
18 #include <folly/stats/TimeseriesHistogram-defs.h>
19
20 #include <random>
21
22 #include <gtest/gtest.h>
23
24 using namespace std;
25 using namespace folly;
26 using std::chrono::seconds;
27
28 namespace IntMTMHTS {
29   enum Levels {
30     MINUTE,
31     TEN_MINUTE,
32     HOUR,
33     ALLTIME,
34     NUM_LEVELS,
35   };
36
37   const seconds kDurations[] = {
38     seconds(60), seconds(600), seconds(3600), seconds(0)
39   };
40 };
41
42 namespace IntMHTS {
43   enum Levels {
44     MINUTE,
45     HOUR,
46     ALLTIME,
47     NUM_LEVELS,
48   };
49
50   const seconds kDurations[] = {
51     seconds(60), seconds(3600), seconds(0)
52   };
53 };
54
55 typedef std::mt19937 RandomInt32;
56
57 TEST(TimeseriesHistogram, Percentile) {
58   RandomInt32 random(5);
59   // [10, 109], 12 buckets including above and below
60   {
61     TimeseriesHistogram<int> h(10, 10, 110,
62                                MultiLevelTimeSeries<int>(
63                                  60, IntMTMHTS::NUM_LEVELS,
64                                  IntMTMHTS::kDurations));
65
66     EXPECT_EQ(0, h.getPercentileEstimate(0, IntMTMHTS::ALLTIME));
67
68     EXPECT_EQ(12, h.getNumBuckets());
69     EXPECT_EQ(10, h.getBucketSize());
70     EXPECT_EQ(10, h.getMin());
71     EXPECT_EQ(110, h.getMax());
72
73     for (int i = 0; i < h.getNumBuckets(); ++i) {
74       EXPECT_EQ(4, h.getBucket(i).numLevels());
75     }
76
77     int maxVal = 120;
78     h.addValue(seconds(0), 0);
79     h.addValue(seconds(0), maxVal);
80     for (int i = 0; i < 98; i++) {
81       h.addValue(seconds(0), random() % maxVal);
82     }
83
84     h.update(std::chrono::duration_cast<std::chrono::seconds>(
85                std::chrono::system_clock::now().time_since_epoch()));
86     // bucket 0 stores everything below min, so its minimum
87     // is the lowest possible number
88     EXPECT_EQ(std::numeric_limits<int>::min(),
89               h.getPercentileBucketMin(1, IntMTMHTS::ALLTIME));
90     EXPECT_EQ(110, h.getPercentileBucketMin(99, IntMTMHTS::ALLTIME));
91
92     EXPECT_EQ(-2, h.getPercentileEstimate(0, IntMTMHTS::ALLTIME));
93     EXPECT_EQ(-1, h.getPercentileEstimate(1, IntMTMHTS::ALLTIME));
94     EXPECT_EQ(119, h.getPercentileEstimate(99, IntMTMHTS::ALLTIME));
95     EXPECT_EQ(120, h.getPercentileEstimate(100, IntMTMHTS::ALLTIME));
96   }
97 }
98
99 TEST(TimeseriesHistogram, String) {
100   RandomInt32 random(5);
101   // [10, 109], 12 buckets including above and below
102   {
103     TimeseriesHistogram<int> hist(10, 10, 110,
104                                   MultiLevelTimeSeries<int>(
105                                     60, IntMTMHTS::NUM_LEVELS,
106                                     IntMTMHTS::kDurations));
107
108     int maxVal = 120;
109     hist.addValue(seconds(0), 0);
110     hist.addValue(seconds(0), maxVal);
111     for (int i = 0; i < 98; i++) {
112       hist.addValue(seconds(0), random() % maxVal);
113     }
114
115     hist.update(seconds(0));
116
117     const char* const kStringValues1[IntMTMHTS::NUM_LEVELS] =  {
118       "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
119         "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
120       "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
121         "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
122       "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
123         "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
124       "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
125         "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
126     };
127
128     CHECK_EQ(IntMTMHTS::NUM_LEVELS, hist.getNumLevels());
129
130     for (int level = 0; level < hist.getNumLevels(); ++level) {
131       EXPECT_EQ(kStringValues1[level], hist.getString(level));
132     }
133
134     const char* const kStringValues2[IntMTMHTS::NUM_LEVELS] =  {
135       "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
136         "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
137       "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
138         "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
139       "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
140         "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
141       "-2147483648:12:4,10:8:13,20:8:24,30:6:34,40:13:46,50:8:54,60:7:64,"
142         "70:7:74,80:8:84,90:10:94,100:3:103,110:10:115",
143     };
144
145     CHECK_EQ(IntMTMHTS::NUM_LEVELS, hist.getNumLevels());
146
147     for (int level = 0; level < hist.getNumLevels(); ++level) {
148       EXPECT_EQ(kStringValues2[level], hist.getString(level));
149     }
150   }
151 }
152
153 TEST(TimeseriesHistogram, Clear) {
154   {
155     TimeseriesHistogram<int> hist(10, 0, 100,
156                                   MultiLevelTimeSeries<int>(
157                                     60, IntMTMHTS::NUM_LEVELS,
158                                     IntMTMHTS::kDurations));
159
160     for (int now = 0; now < 3600; now++) {
161       for (int i = 0; i < 100; i++) {
162         hist.addValue(seconds(now), i, 2);  // adds each item 2 times
163       }
164     }
165
166     // check clearing
167     hist.clear();
168
169     for (int b = 0; b < hist.getNumBuckets(); ++b) {
170       EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::MINUTE));
171       EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
172       EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::HOUR));
173       EXPECT_EQ(0, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
174     }
175
176     for (int pct = 0; pct <= 100; pct++) {
177       EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
178       EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::TEN_MINUTE));
179       EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
180       EXPECT_EQ(0, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
181
182       EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::MINUTE));
183       EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::TEN_MINUTE));
184       EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::HOUR));
185       EXPECT_EQ(0, hist.getPercentileEstimate(pct, IntMTMHTS::ALLTIME));
186     }
187   }
188 }
189
190
191 TEST(TimeseriesHistogram, Basic) {
192   {
193     TimeseriesHistogram<int> hist(10, 0, 100,
194                                   MultiLevelTimeSeries<int>(
195                                     60, IntMTMHTS::NUM_LEVELS,
196                                     IntMTMHTS::kDurations));
197
198     for (int now = 0; now < 3600; now++) {
199       for (int i = 0; i < 100; i++) {
200         hist.addValue(seconds(now), i);
201       }
202     }
203
204     hist.update(seconds(3599));
205     for (int pct = 1; pct <= 100; pct++) {
206       int expected = (pct - 1) / 10 * 10;
207       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
208       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct,
209                                                       IntMTMHTS::TEN_MINUTE));
210       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
211       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
212     }
213
214     for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) {
215       EXPECT_EQ(600, hist.getBucket(b).count(IntMTMHTS::MINUTE));
216       EXPECT_EQ(6000, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
217       EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::HOUR));
218       EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
219     }
220     EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE));
221     EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
222                 IntMTMHTS::MINUTE));
223   }
224
225   // -----------------
226
227   {
228     TimeseriesHistogram<int> hist(10, 0, 100,
229                                   MultiLevelTimeSeries<int>(
230                                     60, IntMTMHTS::NUM_LEVELS,
231                                     IntMTMHTS::kDurations));
232
233     for (int now = 0; now < 3600; now++) {
234       for (int i = 0; i < 100; i++) {
235         hist.addValue(seconds(now), i, 2);  // adds each item 2 times
236       }
237     }
238
239     hist.update(seconds(3599));
240     for (int pct = 1; pct <= 100; pct++) {
241       int expected = (pct - 1) / 10 * 10;
242       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
243       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct,
244                                                       IntMTMHTS::TEN_MINUTE));
245       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
246       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
247    }
248
249     for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) {
250       EXPECT_EQ(600 * 2, hist.getBucket(b).count(IntMTMHTS::MINUTE));
251       EXPECT_EQ(6000 * 2, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
252       EXPECT_EQ(36000 * 2, hist.getBucket(b).count(IntMTMHTS::HOUR));
253       EXPECT_EQ(36000 * 2, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
254     }
255     EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE));
256     EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
257                 IntMTMHTS::MINUTE));
258   }
259
260   // -----------------
261
262   {
263     TimeseriesHistogram<int> hist(10, 0, 100,
264                                   MultiLevelTimeSeries<int>(
265                                     60, IntMTMHTS::NUM_LEVELS,
266                                     IntMTMHTS::kDurations));
267
268     for (int now = 0; now < 3600; now++) {
269       for (int i = 0; i < 50; i++) {
270         hist.addValue(seconds(now), i * 2, 2);  // adds each item 2 times
271       }
272     }
273
274     hist.update(seconds(3599));
275     for (int pct = 1; pct <= 100; pct++) {
276       int expected = (pct - 1) / 10 * 10;
277       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::MINUTE));
278       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct,
279                                                       IntMTMHTS::TEN_MINUTE));
280       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::HOUR));
281       EXPECT_EQ(expected, hist.getPercentileBucketMin(pct, IntMTMHTS::ALLTIME));
282     }
283
284     EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::MINUTE));
285     EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::TEN_MINUTE));
286     EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::HOUR));
287     EXPECT_EQ(0, hist.getBucket(0).count(IntMTMHTS::ALLTIME));
288     EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
289                 IntMTMHTS::MINUTE));
290     EXPECT_EQ(0,
291               hist.getBucket(hist.getNumBuckets() - 1).
292                 count(IntMTMHTS::TEN_MINUTE));
293     EXPECT_EQ(0, hist.getBucket(hist.getNumBuckets() - 1).count(
294                 IntMTMHTS::HOUR));
295     EXPECT_EQ(0,
296               hist.getBucket(hist.getNumBuckets() - 1).count(
297                 IntMTMHTS::ALLTIME));
298
299     for (int b = 1; (b + 1) < hist.getNumBuckets(); ++b) {
300       EXPECT_EQ(600, hist.getBucket(b).count(IntMTMHTS::MINUTE));
301       EXPECT_EQ(6000, hist.getBucket(b).count(IntMTMHTS::TEN_MINUTE));
302       EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::HOUR));
303       EXPECT_EQ(36000, hist.getBucket(b).count(IntMTMHTS::ALLTIME));
304     }
305
306     for (int i = 0; i < 100; ++i) {
307       hist.addValue(seconds(3599), 200 + i);
308     }
309     hist.update(seconds(3599));
310     EXPECT_EQ(100,
311               hist.getBucket(hist.getNumBuckets() - 1).count(
312                 IntMTMHTS::ALLTIME));
313
314   }
315 }
316
317 TEST(TimeseriesHistogram, QueryByInterval) {
318   TimeseriesHistogram<int> mhts(8, 8, 120,
319                                 MultiLevelTimeSeries<int>(
320                                   60, IntMHTS::NUM_LEVELS,
321                                   IntMHTS::kDurations));
322
323   mhts.update(seconds(0));
324
325   int curTime;
326   for (curTime = 0; curTime < 7200; curTime++) {
327     mhts.addValue(seconds(curTime), 1);
328   }
329   for (curTime = 7200; curTime < 7200 + 3540; curTime++) {
330     mhts.addValue(seconds(curTime), 10);
331   }
332   for (curTime = 7200 + 3540; curTime < 7200 + 3600; curTime++) {
333     mhts.addValue(seconds(curTime), 100);
334   }
335
336   mhts.update(seconds(7200 + 3600 - 1));
337
338   struct TimeInterval {
339     TimeInterval(int s, int e)
340       : start(s), end(e) {}
341
342     std::chrono::seconds start;
343     std::chrono::seconds end;
344   };
345   TimeInterval intervals[12] = {
346     { curTime - 60, curTime },
347     { curTime - 3600, curTime },
348     { curTime - 7200, curTime },
349     { curTime - 3600, curTime - 60 },
350     { curTime - 7200, curTime - 60 },
351     { curTime - 7200, curTime - 3600 },
352     { curTime - 50, curTime - 20 },
353     { curTime - 3020, curTime - 20 },
354     { curTime - 7200, curTime - 20 },
355     { curTime - 3000, curTime - 1000 },
356     { curTime - 7200, curTime - 1000 },
357     { curTime - 7200, curTime - 3600 },
358   };
359
360   int expectedSums[12] = {
361     6000, 41400, 32400, 35400, 32129, 16200, 3000, 33600, 32308, 20000, 27899,
362     16200
363   };
364
365   int expectedCounts[12] = {
366     60, 3600, 7200, 3540, 7139, 3600, 30, 3000, 7178, 2000, 6199, 3600
367   };
368
369   // The first 7200 values added all fell below the histogram minimum,
370   // and went into the bucket that tracks all of the too-small values.
371   // This bucket reports a minimum value of the smallest possible integer.
372   int belowMinBucket = std::numeric_limits<int>::min();
373
374   int expectedValues[12][3] = {
375     {96, 96, 96},
376     { 8,  8, 96},
377     { belowMinBucket,  belowMinBucket,  8}, // alltime
378     { 8,  8,  8},
379     { belowMinBucket,  belowMinBucket,  8}, // alltime
380     { belowMinBucket,  belowMinBucket,  8}, // alltime
381     {96, 96, 96},
382     { 8,  8, 96},
383     { belowMinBucket,  belowMinBucket,  8}, // alltime
384     { 8,  8,  8},
385     { belowMinBucket,  belowMinBucket,  8}, // alltime
386     { belowMinBucket,  belowMinBucket,  8}  // alltime
387   };
388
389   for (int i = 0; i < 12; i++) {
390     const auto& itv = intervals[i];
391     int s = mhts.sum(itv.start, itv.end);
392     EXPECT_EQ(expectedSums[i], s);
393
394     int c = mhts.count(itv.start, itv.end);
395     EXPECT_EQ(expectedCounts[i], c);
396   }
397
398   // 3 levels
399   for (int i = 1; i <= 100; i++) {
400     EXPECT_EQ(96, mhts.getPercentileBucketMin(i, 0));
401     EXPECT_EQ(96, mhts.getPercentileBucketMin(i, seconds(curTime - 60),
402                                               seconds(curTime)));
403     EXPECT_EQ(8, mhts.getPercentileBucketMin(i, seconds(curTime - 3540),
404                                              seconds(curTime - 60)));
405   }
406
407   EXPECT_EQ(8, mhts.getPercentileBucketMin(1, 1));
408   EXPECT_EQ(8, mhts.getPercentileBucketMin(98, 1));
409   EXPECT_EQ(96, mhts.getPercentileBucketMin(99, 1));
410   EXPECT_EQ(96, mhts.getPercentileBucketMin(100, 1));
411
412   EXPECT_EQ(belowMinBucket, mhts.getPercentileBucketMin(1, 2));
413   EXPECT_EQ(belowMinBucket, mhts.getPercentileBucketMin(66, 2));
414   EXPECT_EQ(8, mhts.getPercentileBucketMin(67, 2));
415   EXPECT_EQ(8, mhts.getPercentileBucketMin(99, 2));
416   EXPECT_EQ(96, mhts.getPercentileBucketMin(100, 2));
417
418   // 0 is currently the value for bucket 0 (below min)
419   for (int i = 0; i < 12; i++) {
420     const auto& itv = intervals[i];
421     int v = mhts.getPercentileBucketMin(1, itv.start, itv.end);
422     EXPECT_EQ(expectedValues[i][0], v);
423
424     v = mhts.getPercentileBucketMin(50, itv.start, itv.end);
425     EXPECT_EQ(expectedValues[i][1], v);
426
427     v = mhts.getPercentileBucketMin(99, itv.start, itv.end);
428     EXPECT_EQ(expectedValues[i][2], v);
429   }
430
431   for (int i = 0; i < 12; i++) {
432     const auto& itv = intervals[i];
433     // Some of the older intervals that fall in the alltime bucket
434     // are off by 1 or 2 in their estimated counts.
435     size_t tolerance = 0;
436     if (itv.start <= seconds(curTime - 7200)) {
437       tolerance = 2;
438     } else if (itv.start <= seconds(curTime - 3000)) {
439       tolerance = 1;
440     }
441     size_t actualCount = (itv.end - itv.start).count();
442     size_t estimatedCount = mhts.count(itv.start, itv.end);
443     EXPECT_GE(actualCount, estimatedCount);
444     EXPECT_LE(actualCount - tolerance, estimatedCount);
445   }
446 }
447
448 TEST(TimeseriesHistogram, SingleUniqueValue) {
449   int values[] = {-1, 0, 500, 1000, 1500};
450   for (int ii = 0; ii < 5; ++ii) {
451     int value = values[ii];
452     TimeseriesHistogram<int> h(10, 0, 1000,
453                                MultiLevelTimeSeries<int>(
454                                  60, IntMTMHTS::NUM_LEVELS,
455                                  IntMTMHTS::kDurations));
456
457     const int kNumIters = 1000;
458     for (int jj = 0; jj < kNumIters; ++jj) {
459       h.addValue(seconds(time(nullptr)), value);
460     }
461     h.update(seconds(time(nullptr)));
462     // since we've only added one unique value, all percentiles should
463     // be that value
464     EXPECT_EQ(h.getPercentileEstimate(10, 0), value);
465     EXPECT_EQ(h.getPercentileEstimate(50, 0), value);
466     EXPECT_EQ(h.getPercentileEstimate(99, 0), value);
467
468     // Things get trickier if there are multiple unique values.
469     const int kNewValue = 750;
470     for (int kk = 0; kk < 2*kNumIters; ++kk) {
471       h.addValue(seconds(time(nullptr)), kNewValue);
472     }
473     h.update(seconds(time(nullptr)));
474     EXPECT_NEAR(h.getPercentileEstimate(50, 0), kNewValue+5, 5);
475     if (value >= 0 && value <= 1000) {
476       // only do further testing if value is within our bucket range,
477       // else estimates can be wildly off
478       if (kNewValue > value) {
479         EXPECT_NEAR(h.getPercentileEstimate(10, 0), value+5, 5);
480         EXPECT_NEAR(h.getPercentileEstimate(99, 0), kNewValue+5, 5);
481       } else {
482         EXPECT_NEAR(h.getPercentileEstimate(10, 0), kNewValue+5, 5);
483         EXPECT_NEAR(h.getPercentileEstimate(99, 0), value+5, 5);
484       }
485     }
486   }
487 }