2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 #ifndef __STDC_FORMAT_MACROS
17 #define __STDC_FORMAT_MACROS
20 #include <folly/io/async/test/TimeUtil.h>
25 #include <sys/types.h>
27 #include <sys/utsname.h>
34 #include <folly/Conv.h>
35 #include <folly/portability/SysSyscall.h>
36 #include <folly/portability/Unistd.h>
37 #include <folly/portability/Windows.h>
39 #include <glog/logging.h>
42 using namespace std::chrono;
47 static pid_t gettid() {
48 return pid_t(GetCurrentThreadId());
52 * glibc doesn't provide gettid(), so define it ourselves.
54 static pid_t gettid() {
55 return syscall(FOLLY_SYS_gettid);
59 * The /proc/<pid>/schedstat file reports time values in jiffies.
61 * Determine how many jiffies are in a second.
62 * Returns -1 if the number of jiffies/second cannot be determined.
64 static int64_t determineJiffiesHZ() {
65 // It seems like the only real way to figure out the CONFIG_HZ value used by
66 // this kernel is to look it up in the config file.
68 // Look in /boot/config-<kernel_release>
69 struct utsname unameInfo;
70 if (uname(&unameInfo) != 0) {
71 LOG(ERROR) << "unable to determine jiffies/second: uname failed: %s"
77 snprintf(configPath, sizeof(configPath), "/boot/config-%s",
80 FILE* f = fopen(configPath, "r");
82 LOG(ERROR) << "unable to determine jiffies/second: "
83 "cannot open kernel config file %s" << configPath;
89 while (fgets(buf, sizeof(buf), f) != nullptr) {
90 if (strcmp(buf, "CONFIG_NO_HZ=y\n") == 0) {
91 // schedstat info seems to be reported in nanoseconds on tickless
94 // The CONFIG_HZ value doesn't matter for our purposes,
95 // so return as soon as we see CONFIG_NO_HZ.
98 } else if (strcmp(buf, "CONFIG_HZ=1000\n") == 0) {
100 } else if (strcmp(buf, "CONFIG_HZ=300\n") == 0) {
102 } else if (strcmp(buf, "CONFIG_HZ=250\n") == 0) {
104 } else if (strcmp(buf, "CONFIG_HZ=100\n") == 0) {
111 LOG(ERROR) << "unable to determine jiffies/second: no CONFIG_HZ setting "
112 "found in %s" << configPath;
121 * Determine how long this process has spent waiting to get scheduled on the
124 * Returns the number of milliseconds spent waiting, or -1 if the amount of
125 * time cannot be determined.
127 static milliseconds getTimeWaitingMS(pid_t tid) {
129 return milliseconds(0);
131 static int64_t jiffiesHZ = 0;
132 if (jiffiesHZ == 0) {
133 jiffiesHZ = determineJiffiesHZ();
137 // We couldn't figure out how many jiffies there are in a second.
138 // Don't bother reading the schedstat info if we can't interpret it.
139 return milliseconds(0);
144 char schedstatFile[256];
145 snprintf(schedstatFile, sizeof(schedstatFile),
146 "/proc/%d/schedstat", tid);
147 fd = open(schedstatFile, O_RDONLY);
149 throw std::runtime_error(
150 folly::to<string>("failed to open process schedstat file", errno));
154 ssize_t bytesReadRet = read(fd, buf, sizeof(buf) - 1);
155 if (bytesReadRet <= 0) {
156 throw std::runtime_error(
157 folly::to<string>("failed to read process schedstat file", errno));
159 size_t bytesRead = size_t(bytesReadRet);
161 if (buf[bytesRead - 1] != '\n') {
162 throw std::runtime_error("expected newline at end of schedstat data");
164 assert(bytesRead < sizeof(buf));
165 buf[bytesRead] = '\0';
167 uint64_t activeJiffies = 0;
168 uint64_t waitingJiffies = 0;
169 uint64_t numTasks = 0;
170 int rc = sscanf(buf, "%" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
171 &activeJiffies, &waitingJiffies, &numTasks);
173 throw std::runtime_error("failed to parse schedstat data");
177 return milliseconds((waitingJiffies * 1000) / jiffiesHZ);
178 } catch (const std::runtime_error& e) {
182 LOG(ERROR) << "error determining process wait time: %s" << e.what();
183 return milliseconds(0);
188 void TimePoint::reset() {
189 // Remember the current time
190 timeStart_ = system_clock::now();
192 // Remember how long this process has spent waiting to be scheduled
194 timeWaiting_ = getTimeWaitingMS(tid_);
196 // In case it took a while to read the schedstat info,
197 // also record the time after the schedstat check
198 timeEnd_ = system_clock::now();
201 std::ostream& operator<<(std::ostream& os, const TimePoint& timePoint) {
202 os << "TimePoint(" << timePoint.getTimeStart().time_since_epoch().count()
203 << ", " << timePoint.getTimeEnd().time_since_epoch().count() << ", "
204 << timePoint.getTimeWaiting().count() << ")";
209 checkTimeout(const TimePoint& start, const TimePoint& end,
210 milliseconds expectedMS, bool allowSmaller,
211 milliseconds tolerance) {
212 auto elapsedMS = end.getTimeStart() - start.getTimeEnd();
215 // Timeouts should never fire before the time was up.
216 // Allow 1ms of wiggle room for rounding errors.
217 if (elapsedMS < expectedMS - milliseconds(1)) {
222 // Check that the event fired within a reasonable time of the timout.
224 // If the system is under heavy load, our process may have had to wait for a
225 // while to be run. The time spent waiting for the processor shouldn't
226 // count against us, so exclude this time from the check.
227 milliseconds excludedMS;
228 if (end.getTid() != start.getTid()) {
229 // We can only correctly compute the amount of time waiting to be scheduled
230 // if both TimePoints were set in the same thread.
231 excludedMS = milliseconds(0);
233 excludedMS = end.getTimeWaiting() - start.getTimeWaiting();
234 assert(end.getTimeWaiting() >= start.getTimeWaiting());
235 // Add a tolerance here due to precision issues on linux, see below note.
236 assert( (elapsedMS + tolerance) >= excludedMS);
239 milliseconds effectiveElapsedMS = milliseconds(0);
240 if (elapsedMS > excludedMS) {
241 effectiveElapsedMS = duration_cast<milliseconds>(elapsedMS) - excludedMS;
244 // On x86 Linux, sleep calls generally have precision only to the nearest
245 // millisecond. The tolerance parameter lets users allow a few ms of slop.
246 milliseconds overrun = effectiveElapsedMS - expectedMS;
247 if (overrun > tolerance) {