2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 // This is heavily inspired by the signal handler from google-glog
19 #include <folly/experimental/symbolizer/SignalHandler.h>
23 #include <sys/types.h>
32 #include <glog/logging.h>
34 #include <folly/Conv.h>
35 #include <folly/FileUtil.h>
36 #include <folly/Portability.h>
37 #include <folly/ScopeGuard.h>
38 #include <folly/experimental/symbolizer/ElfCache.h>
39 #include <folly/experimental/symbolizer/Symbolizer.h>
40 #include <folly/portability/SysSyscall.h>
42 namespace folly { namespace symbolizer {
47 * Fatal signal handler registry.
49 class FatalSignalCallbackRegistry {
51 FatalSignalCallbackRegistry();
53 void add(SignalCallback func);
58 std::atomic<bool> installed_;
60 std::vector<SignalCallback> handlers_;
63 FatalSignalCallbackRegistry::FatalSignalCallbackRegistry()
67 void FatalSignalCallbackRegistry::add(SignalCallback func) {
68 std::lock_guard<std::mutex> lock(mutex_);
70 << "FatalSignalCallbackRegistry::add may not be used "
71 "after installing the signal handlers.";
72 handlers_.push_back(func);
75 void FatalSignalCallbackRegistry::markInstalled() {
76 std::lock_guard<std::mutex> lock(mutex_);
77 CHECK(!installed_.exchange(true))
78 << "FatalSignalCallbackRegistry::markInstalled must be called "
82 void FatalSignalCallbackRegistry::run() {
87 for (auto& fn : handlers_) {
92 // Leak it so we don't have to worry about destruction order
93 FatalSignalCallbackRegistry* gFatalSignalCallbackRegistry =
94 new FatalSignalCallbackRegistry;
99 struct sigaction oldAction;
100 } kFatalSignals[] = {
101 { SIGSEGV, "SIGSEGV", {} },
102 { SIGILL, "SIGILL", {} },
103 { SIGFPE, "SIGFPE", {} },
104 { SIGABRT, "SIGABRT", {} },
105 { SIGBUS, "SIGBUS", {} },
106 { SIGTERM, "SIGTERM", {} },
110 void callPreviousSignalHandler(int signum) {
111 // Restore disposition to old disposition, then kill ourselves with the same
112 // signal. The signal will be blocked until we return from our handler,
113 // then it will invoke the default handler and abort.
114 for (auto p = kFatalSignals; p->name; ++p) {
115 if (p->number == signum) {
116 sigaction(signum, &p->oldAction, nullptr);
122 // Not one of the signals we know about. Oh well. Reset to default.
124 memset(&sa, 0, sizeof(sa));
125 sa.sa_handler = SIG_DFL;
126 sigaction(signum, &sa, nullptr);
130 // Note: not thread-safe, but that's okay, as we only let one thread
131 // in our signal handler at a time.
133 // Leak it so we don't have to worry about destruction order
134 constexpr size_t kMinSignalSafeElfCacheSize = 500;
135 auto gSignalSafeElfCache = new SignalSafeElfCache(
136 std::max(countLoadedElfFiles(), kMinSignalSafeElfCacheSize));
138 // Buffered writer (using a fixed-size buffer). We try to write only once
139 // to prevent interleaving with messages written from other threads.
141 // Leak it so we don't have to worry about destruction order.
142 auto gPrinter = new FDSymbolizePrinter(STDERR_FILENO,
143 SymbolizePrinter::COLOR_IF_TTY,
144 size_t(64) << 10); // 64KiB
146 // Flush gPrinter, also fsync, in case we're about to crash again...
149 fsyncNoInt(STDERR_FILENO);
152 void printDec(uint64_t val) {
154 uint32_t n = uint64ToBufferUnsafe(val, buf);
155 gPrinter->print(StringPiece(buf, n));
158 const char kHexChars[] = "0123456789abcdef";
159 void printHex(uint64_t val) {
160 // TODO(tudorb): Add this to folly/Conv.h
161 char buf[2 + 2 * sizeof(uint64_t)]; // "0x" prefix, 2 digits for each byte
163 char* end = buf + sizeof(buf);
166 *--p = kHexChars[val & 0x0f];
172 gPrinter->print(StringPiece(p, end));
175 void print(StringPiece sp) {
179 void dumpTimeInfo() {
180 SCOPE_EXIT { flush(); };
181 time_t now = time(nullptr);
182 print("*** Aborted at ");
184 print(" (Unix time, try 'date -d @");
189 const char* sigill_reason(int si_code) {
192 return "illegal opcode";
194 return "illegal operand";
196 return "illegal addressing mode";
198 return "illegal trap";
200 return "privileged opcode";
202 return "privileged register";
204 return "coprocessor error";
206 return "internal stack error";
213 const char* sigfpe_reason(int si_code) {
216 return "integer divide by zero";
218 return "integer overflow";
220 return "floating-point divide by zero";
222 return "floating-point overflow";
224 return "floating-point underflow";
226 return "floating-point inexact result";
228 return "floating-point invalid operation";
230 return "subscript out of range";
237 const char* sigsegv_reason(int si_code) {
240 return "address not mapped to object";
242 return "invalid permissions for mapped object";
249 const char* sigbus_reason(int si_code) {
252 return "invalid address alignment";
254 return "nonexistent physical address";
256 return "object-specific hardware error";
258 // MCEERR_AR and MCEERR_AO: in sigaction(2) but not in headers.
265 const char* sigtrap_reason(int si_code) {
268 return "process breakpoint";
270 return "process trace trap";
272 // TRAP_BRANCH and TRAP_HWBKPT: in sigaction(2) but not in headers.
279 const char* sigchld_reason(int si_code) {
282 return "child has exited";
284 return "child was killed";
286 return "child terminated abnormally";
288 return "traced child has trapped";
290 return "child has stopped";
292 return "stopped child has continued";
299 const char* sigio_reason(int si_code) {
302 return "data input available";
304 return "output buffers available";
306 return "input message available";
310 return "high priority input available";
312 return "device disconnected";
319 const char* signal_reason(int signum, int si_code) {
322 return sigill_reason(si_code);
324 return sigfpe_reason(si_code);
326 return sigsegv_reason(si_code);
328 return sigbus_reason(si_code);
330 return sigtrap_reason(si_code);
332 return sigchld_reason(si_code);
334 return sigio_reason(si_code); // aka SIGPOLL
341 void dumpSignalInfo(int signum, siginfo_t* siginfo) {
342 SCOPE_EXIT { flush(); };
343 // Get the signal name, if possible.
344 const char* name = nullptr;
345 for (auto p = kFatalSignals; p->name; ++p) {
346 if (p->number == signum) {
352 print("*** Signal ");
361 printHex(reinterpret_cast<uint64_t>(siginfo->si_addr));
362 print(") received by PID ");
364 print(" (pthread TID ");
365 printHex((uint64_t)pthread_self());
366 print(") (linux TID ");
367 printDec(syscall(__NR_gettid));
369 // Kernel-sourced signals don't give us useful info for pid/uid.
370 if (siginfo->si_code != SI_KERNEL) {
371 print(") (maybe from PID ");
372 printDec(siginfo->si_pid);
374 printDec(siginfo->si_uid);
377 auto reason = signal_reason(signum, siginfo->si_code);
379 if (reason != nullptr) {
384 print("), stack trace: ***\n");
387 FOLLY_NOINLINE void dumpStackTrace(bool symbolize);
389 void dumpStackTrace(bool symbolize) {
390 SCOPE_EXIT { flush(); };
391 // Get and symbolize stack trace
392 constexpr size_t kMaxStackTraceDepth = 100;
393 FrameArray<kMaxStackTraceDepth> addresses;
395 // Skip the getStackTrace frame
396 if (!getStackTraceSafe(addresses)) {
397 print("(error retrieving stack trace)\n");
398 } else if (symbolize) {
399 // Do our best to populate location info, process is going to terminate,
400 // so performance isn't critical.
401 Symbolizer symbolizer(gSignalSafeElfCache, Dwarf::LocationInfoMode::FULL);
402 symbolizer.symbolize(addresses);
404 // Skip the top 2 frames:
406 // dumpStackTrace (here)
408 // Leaving signalHandler on the stack for clarity, I think.
409 gPrinter->println(addresses, 2);
411 print("(safe mode, symbolizer not available)\n");
412 AddressFormatter formatter;
413 for (size_t i = 0; i < addresses.frameCount; ++i) {
414 print(formatter.format(addresses.addresses[i]));
420 // On Linux, pthread_t is a pointer, so 0 is an invalid value, which we
421 // take to indicate "no thread in the signal handler".
423 // POSIX defines PTHREAD_NULL for this purpose, but that's not available.
424 constexpr pthread_t kInvalidThreadId = 0;
426 std::atomic<pthread_t> gSignalThread(kInvalidThreadId);
427 std::atomic<bool> gInRecursiveSignalHandler(false);
430 void innerSignalHandler(int signum, siginfo_t* info, void* /* uctx */) {
431 // First, let's only let one thread in here at a time.
432 pthread_t myId = pthread_self();
434 pthread_t prevSignalThread = kInvalidThreadId;
435 while (!gSignalThread.compare_exchange_strong(prevSignalThread, myId)) {
436 if (pthread_equal(prevSignalThread, myId)) {
437 // First time here. Try to dump the stack trace without symbolization.
438 // If we still fail, well, we're mightily screwed, so we do nothing the
440 if (!gInRecursiveSignalHandler.exchange(true)) {
441 print("Entered fatal signal handler recursively. We're in trouble.\n");
442 dumpStackTrace(false); // no symbolization
447 // Wait a while, try again.
450 ts.tv_nsec = 100L * 1000 * 1000; // 100ms
451 nanosleep(&ts, nullptr);
453 prevSignalThread = kInvalidThreadId;
457 dumpSignalInfo(signum, info);
458 dumpStackTrace(true); // with symbolization
460 // Run user callbacks
461 gFatalSignalCallbackRegistry->run();
464 void signalHandler(int signum, siginfo_t* info, void* uctx) {
465 SCOPE_EXIT { flush(); };
466 innerSignalHandler(signum, info, uctx);
468 gSignalThread = kInvalidThreadId;
469 // Kill ourselves with the previous handler.
470 callPreviousSignalHandler(signum);
475 void addFatalSignalCallback(SignalCallback cb) {
476 gFatalSignalCallbackRegistry->add(cb);
479 void installFatalSignalCallbacks() {
480 gFatalSignalCallbackRegistry->markInstalled();
485 std::atomic<bool> gAlreadyInstalled;
489 void installFatalSignalHandler() {
490 if (gAlreadyInstalled.exchange(true)) {
496 memset(&sa, 0, sizeof(sa));
497 sigemptyset(&sa.sa_mask);
498 // By default signal handlers are run on the signaled thread's stack.
499 // In case of stack overflow running the SIGSEGV signal handler on
500 // the same stack leads to another SIGSEGV and crashes the program.
501 // Use SA_ONSTACK, so alternate stack is used (only if configured via
503 sa.sa_flags |= SA_SIGINFO | SA_ONSTACK;
504 sa.sa_sigaction = &signalHandler;
506 for (auto p = kFatalSignals; p->name; ++p) {
507 CHECK_ERR(sigaction(p->number, &sa, &p->oldAction));