#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
-#ifndef _MSC_VER
+#ifdef __linux__
#include <sys/utsname.h>
#endif
#include <stdexcept>
#include <folly/Conv.h>
-#include <folly/portability/SysSyscall.h>
+#include <folly/ScopeGuard.h>
+#include <folly/ThreadId.h>
#include <folly/portability/Unistd.h>
-#include <folly/portability/Windows.h>
#include <glog/logging.h>
namespace folly {
-#ifdef _MSC_VER
-static pid_t gettid() {
- return pid_t(GetCurrentThreadId());
-}
-#else
-/**
- * glibc doesn't provide gettid(), so define it ourselves.
- */
-static pid_t gettid() {
- return syscall(FOLLY_SYS_gettid);
+static int getLinuxVersion(StringPiece release) {
+ auto dot1 = release.find('.');
+ if (dot1 == StringPiece::npos) {
+ throw std::invalid_argument("could not find first dot");
+ }
+ auto v1 = folly::to<int>(release.subpiece(0, dot1));
+
+ auto dot2 = release.find('.', dot1 + 1);
+ if (dot2 == StringPiece::npos) {
+ throw std::invalid_argument("could not find second dot");
+ }
+ auto v2 = folly::to<int>(release.subpiece(dot1 + 1, dot2 - (dot1 + 1)));
+
+ int dash = release.find('-', dot2 + 1);
+ auto v3 = folly::to<int>(release.subpiece(dot2 + 1, dash - (dot2 + 1)));
+
+ return ((v1 * 1000 + v2) * 1000) + v3;
}
+#ifdef __linux__
/**
- * The /proc/<pid>/schedstat file reports time values in jiffies.
+ * Determine the time units used in /proc/<pid>/schedstat
*
- * Determine how many jiffies are in a second.
- * Returns -1 if the number of jiffies/second cannot be determined.
+ * Returns the number of nanoseconds per time unit,
+ * or -1 if we cannot determine the units.
*/
-static int64_t determineJiffiesHZ() {
- // It seems like the only real way to figure out the CONFIG_HZ value used by
- // this kernel is to look it up in the config file.
- //
- // Look in /boot/config-<kernel_release>
+static int64_t determineSchedstatUnits() {
struct utsname unameInfo;
if (uname(&unameInfo) != 0) {
LOG(ERROR) << "unable to determine jiffies/second: uname failed: %s"
return -1;
}
+ // In Linux version 2.6.23 and later, time time values are always
+ // reported in nanoseconds.
+ //
+ // This change appears to have been made in commit 425e0968a25f, which
+ // moved some of the sched stats code to a new file. Despite the commit
+ // message claiming "no code changes are caused by this patch", it changed
+ // the task.sched_info.cpu_time and task.sched_info.run_delay counters to be
+ // computed using sched_clock() rather than jiffies.
+ int linuxVersion;
+ try {
+ linuxVersion = getLinuxVersion(unameInfo.release);
+ } catch (const std::exception&) {
+ LOG(ERROR) << "unable to determine jiffies/second: failed to parse "
+ << "kernel release string \"" << unameInfo.release << "\"";
+ return -1;
+ }
+ if (linuxVersion >= 2006023) {
+ // The units are nanoseconds
+ return 1;
+ }
+
+ // In Linux versions prior to 2.6.23, the time values are reported in
+ // jiffies. This is somewhat unfortunate, as the number of jiffies per
+ // second is configurable. We have to determine the units being used.
+ //
+ // It seems like the only real way to figure out the CONFIG_HZ value used by
+ // this kernel is to look it up in the config file.
+ //
+ // Look in /boot/config-<kernel_release>
char configPath[256];
snprintf(configPath, sizeof(configPath), "/boot/config-%s",
unameInfo.release);
"cannot open kernel config file %s" << configPath;
return -1;
}
+ SCOPE_EXIT {
+ fclose(f);
+ };
int64_t hz = -1;
char buf[1024];
while (fgets(buf, sizeof(buf), f) != nullptr) {
if (strcmp(buf, "CONFIG_NO_HZ=y\n") == 0) {
- // schedstat info seems to be reported in nanoseconds on tickless
- // kernels.
- //
- // The CONFIG_HZ value doesn't matter for our purposes,
- // so return as soon as we see CONFIG_NO_HZ.
- fclose(f);
- return 1000000000;
+ LOG(ERROR) << "unable to determine jiffies/second: tickless kernel";
+ return -1;
} else if (strcmp(buf, "CONFIG_HZ=1000\n") == 0) {
hz = 1000;
} else if (strcmp(buf, "CONFIG_HZ=300\n") == 0) {
hz = 100;
}
}
- fclose(f);
if (hz == -1) {
LOG(ERROR) << "unable to determine jiffies/second: no CONFIG_HZ setting "
* time cannot be determined.
*/
static milliseconds getTimeWaitingMS(pid_t tid) {
-#ifdef _MSC_VER
+#ifndef __linux__
+ (void)tid;
return milliseconds(0);
#else
- static int64_t jiffiesHZ = 0;
- if (jiffiesHZ == 0) {
- jiffiesHZ = determineJiffiesHZ();
- }
-
- if (jiffiesHZ < 0) {
+ static int64_t timeUnits = determineSchedstatUnits();
+ if (timeUnits < 0) {
// We couldn't figure out how many jiffies there are in a second.
// Don't bother reading the schedstat info if we can't interpret it.
return milliseconds(0);
}
close(fd);
- return milliseconds((waitingJiffies * 1000) / jiffiesHZ);
+ return duration_cast<milliseconds>(nanoseconds(waitingJiffies * timeUnits));
} catch (const std::runtime_error& e) {
if (fd >= 0) {
close(fd);
timeStart_ = system_clock::now();
// Remember how long this process has spent waiting to be scheduled
- tid_ = gettid();
+ tid_ = getOSThreadID();
timeWaiting_ = getTimeWaitingMS(tid_);
// In case it took a while to read the schedstat info,
checkTimeout(const TimePoint& start, const TimePoint& end,
milliseconds expectedMS, bool allowSmaller,
milliseconds tolerance) {
- auto elapsedMS = end.getTimeStart() - start.getTimeEnd();
+ auto elapsedTime = end.getTimeStart() - start.getTimeEnd();
if (!allowSmaller) {
// Timeouts should never fire before the time was up.
// Allow 1ms of wiggle room for rounding errors.
- if (elapsedMS < expectedMS - milliseconds(1)) {
+ if (elapsedTime < (expectedMS - milliseconds(1))) {
return false;
}
}
excludedMS = end.getTimeWaiting() - start.getTimeWaiting();
assert(end.getTimeWaiting() >= start.getTimeWaiting());
// Add a tolerance here due to precision issues on linux, see below note.
- assert( (elapsedMS + tolerance) >= excludedMS);
+ assert((elapsedTime + tolerance) >= excludedMS);
}
milliseconds effectiveElapsedMS = milliseconds(0);
- if (elapsedMS > excludedMS) {
- effectiveElapsedMS = duration_cast<milliseconds>(elapsedMS) - excludedMS;
+ if (elapsedTime > excludedMS) {
+ effectiveElapsedMS = duration_cast<milliseconds>(elapsedTime) - excludedMS;
}
// On x86 Linux, sleep calls generally have precision only to the nearest