sancov -not-covered-functions.
[oota-llvm.git] / tools / sancov / sancov.cc
index 9b54575..450c21b 100644 (file)
 //===----------------------------------------------------------------------===//
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
 
 #include <set>
 #include <stdio.h>
+#include <string>
 #include <vector>
 
 using namespace llvm;
@@ -35,13 +50,19 @@ namespace {
 
 // --------- COMMAND LINE FLAGS ---------
 
-enum ActionType { PrintAction, CoveredFunctionsAction };
+enum ActionType {
+  PrintAction,
+  CoveredFunctionsAction,
+  NotCoveredFunctionsAction
+};
 
 cl::opt<ActionType> Action(
     cl::desc("Action (required)"), cl::Required,
     cl::values(clEnumValN(PrintAction, "print", "Print coverage addresses"),
-               clEnumValN(CoveredFunctionsAction, "covered_functions",
+               clEnumValN(CoveredFunctionsAction, "covered-functions",
                           "Print all covered funcions."),
+               clEnumValN(NotCoveredFunctionsAction, "not-covered-functions",
+                          "Print all not covered funcions."),
                clEnumValEnd));
 
 static cl::list<std::string> ClInputFiles(cl::Positional, cl::OneOrMore,
@@ -55,6 +76,10 @@ static cl::opt<bool>
     ClDemangle("demangle", cl::init(true),
         cl::desc("Print demangled function name."));
 
+static cl::opt<std::string> ClStripPathPrefix(
+    "strip_path_prefix", cl::init(""),
+    cl::desc("Strip this prefix from file paths in reports."));
+
 // --------- FORMAT SPECIFICATION ---------
 
 struct FileHeader {
@@ -68,31 +93,256 @@ static const uint32_t Bitness64 = 0xFFFFFF64;
 
 // ---------
 
+static void FailIfError(std::error_code Error) {
+  if (!Error)
+    return;
+  errs() << "Error: " << Error.message() << "(" << Error.value() << ")\n";
+  exit(1);
+}
+
 template <typename T> static void FailIfError(const ErrorOr<T> &E) {
-  if (E)
+  FailIfError(E.getError());
+}
+
+static void FailIfNotEmpty(const std::string &E) {
+  if (E.empty())
     return;
+  errs() << "Error: " << E << "\n";
+  exit(1);
+}
 
-  auto Error = E.getError();
-  errs() << "Error: " << Error.message() << "(" << Error.value() << ")\n";
-  exit(-2);
+template <typename T>
+static void FailIfEmpty(const std::unique_ptr<T> &Ptr,
+                        const std::string &Message) {
+  if (Ptr.get())
+    return;
+  errs() << "Error: " << Message << "\n";
+  exit(1);
 }
 
 template <typename T>
 static void readInts(const char *Start, const char *End,
-                     std::vector<uint64_t> *V) {
+                     std::set<uint64_t> *Ints) {
   const T *S = reinterpret_cast<const T *>(Start);
   const T *E = reinterpret_cast<const T *>(End);
-  V->reserve(E - S);
-  std::copy(S, E, std::back_inserter(*V));
+  std::copy(S, E, std::inserter(*Ints, Ints->end()));
+}
+
+struct FileLoc {
+  bool operator<(const FileLoc &RHS) const {
+    return std::tie(FileName, Line) < std::tie(RHS.FileName, RHS.Line);
+  }
+
+  std::string FileName;
+  uint32_t Line;
+};
+
+struct FunctionLoc {
+  bool operator<(const FunctionLoc &RHS) const {
+    return std::tie(Loc, FunctionName) < std::tie(RHS.Loc, RHS.FunctionName);
+  }
+
+  FileLoc Loc;
+  std::string FunctionName;
+};
+
+std::string stripPathPrefix(std::string Path) {
+  if (ClStripPathPrefix.empty())
+    return Path;
+  size_t Pos = Path.find(ClStripPathPrefix);
+  if (Pos == std::string::npos)
+    return Path;
+  return Path.substr(Pos + ClStripPathPrefix.size());
+}
+
+// Compute [FileLoc -> FunctionName] map for given addresses.
+static std::map<FileLoc, std::string>
+computeFunctionsMap(const std::set<uint64_t> &Addrs) {
+  std::map<FileLoc, std::string> Fns;
+
+  symbolize::LLVMSymbolizer::Options SymbolizerOptions;
+  SymbolizerOptions.Demangle = ClDemangle;
+  SymbolizerOptions.UseSymbolTable = true;
+  symbolize::LLVMSymbolizer Symbolizer(SymbolizerOptions);
+
+  // Fill in Fns map.
+  for (auto Addr : Addrs) {
+    auto InliningInfo = Symbolizer.symbolizeInlinedCode(ClBinaryName, Addr);
+    FailIfError(InliningInfo);
+    for (uint32_t i = 0; i < InliningInfo->getNumberOfFrames(); ++i) {
+      auto FrameInfo = InliningInfo->getFrame(i);
+      SmallString<256> FileName(FrameInfo.FileName);
+      sys::path::remove_dots(FileName, /* remove_dot_dot */ true);
+      FileLoc Loc = {FileName.str(), FrameInfo.Line};
+      Fns[Loc] = FrameInfo.FunctionName;
+    }
+  }
+
+  return Fns;
+}
+
+// Compute functions for given addresses. It keeps only the first
+// occurence of a function within a file.
+std::set<FunctionLoc> computeFunctionLocs(const std::set<uint64_t> &Addrs) {
+  std::map<FileLoc, std::string> Fns = computeFunctionsMap(Addrs);
+
+  std::set<FunctionLoc> result;
+  std::string LastFileName;
+  std::set<std::string> ProcessedFunctions;
+
+  for (const auto &P : Fns) {
+    std::string FileName = P.first.FileName;
+    std::string FunctionName = P.second;
+
+    if (LastFileName != FileName)
+      ProcessedFunctions.clear();
+    LastFileName = FileName;
+
+    if (!ProcessedFunctions.insert(FunctionName).second)
+      continue;
+
+    result.insert(FunctionLoc{P.first, P.second});
+  }
+
+  return result;
+}
+
+// Locate __sanitizer_cov function address.
+static uint64_t findSanitizerCovFunction(const object::ObjectFile &O) {
+  for (const object::SymbolRef &Symbol : O.symbols()) {
+    ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
+    FailIfError(AddressOrErr);
+
+    ErrorOr<StringRef> Name = Symbol.getName();
+    FailIfError(Name);
+
+    if (Name.get() == "__sanitizer_cov") {
+      return AddressOrErr.get();
+    }
+  }
+  FailIfNotEmpty("__sanitizer_cov not found");
+  return 0; // not reachable.
+}
+
+// Locate addresses of all coverage points in a file. Coverage point
+// is defined as the 'address of instruction following __sanitizer_cov
+// call - 1'.
+static void getObjectCoveragePoints(const object::ObjectFile &O,
+                                    std::set<uint64_t> *Addrs) {
+  Triple TheTriple("unknown-unknown-unknown");
+  TheTriple.setArch(Triple::ArchType(O.getArch()));
+  auto TripleName = TheTriple.getTriple();
+
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  FailIfNotEmpty(Error);
+
+  std::unique_ptr<const MCSubtargetInfo> STI(
+      TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+  FailIfEmpty(STI, "no subtarget info for target " + TripleName);
+
+  std::unique_ptr<const MCRegisterInfo> MRI(
+      TheTarget->createMCRegInfo(TripleName));
+  FailIfEmpty(MRI, "no register info for target " + TripleName);
+
+  std::unique_ptr<const MCAsmInfo> AsmInfo(
+      TheTarget->createMCAsmInfo(*MRI, TripleName));
+  FailIfEmpty(AsmInfo, "no asm info for target " + TripleName);
+
+  std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
+  MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
+  std::unique_ptr<MCDisassembler> DisAsm(
+      TheTarget->createMCDisassembler(*STI, Ctx));
+  FailIfEmpty(DisAsm, "no disassembler info for target " + TripleName);
+
+  std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
+  FailIfEmpty(MII, "no instruction info for target " + TripleName);
+
+  std::unique_ptr<const MCInstrAnalysis> MIA(
+      TheTarget->createMCInstrAnalysis(MII.get()));
+  FailIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
+
+  uint64_t SanCovAddr = findSanitizerCovFunction(O);
+
+  for (const auto Section : O.sections()) {
+    if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
+      continue;
+    uint64_t SectionAddr = Section.getAddress();
+    uint64_t SectSize = Section.getSize();
+    if (!SectSize)
+      continue;
+
+    StringRef SectionName;
+    FailIfError(Section.getName(SectionName));
+
+    StringRef BytesStr;
+    FailIfError(Section.getContents(BytesStr));
+    ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
+                            BytesStr.size());
+
+    for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
+         Index += Size) {
+      MCInst Inst;
+      if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
+                                  SectionAddr + Index, nulls(), nulls())) {
+        if (Size == 0)
+          Size = 1;
+        continue;
+      }
+      uint64_t Target;
+      if (MIA->isCall(Inst) &&
+          MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) {
+        if (Target == SanCovAddr) {
+          // Sanitizer coverage uses the address of the next instruction - 1.
+          Addrs->insert(Index + SectionAddr + Size - 1);
+        }
+      }
+    }
+  }
 }
 
-static std::string CommonPrefix(std::string A, std::string B) {
-  if (A.size() > B.size())
-    return std::string(B.begin(),
-                       std::mismatch(B.begin(), B.end(), A.begin()).first);
+static void getArchiveCoveragePoints(const object::Archive &A,
+                                     std::set<uint64_t> *Addrs) {
+  for (auto &ErrorOrChild : A.children()) {
+    FailIfError(ErrorOrChild);
+    const object::Archive::Child &C = *ErrorOrChild;
+    ErrorOr<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
+    FailIfError(ChildOrErr);
+    if (object::ObjectFile *O =
+            dyn_cast<object::ObjectFile>(&*ChildOrErr.get()))
+      getObjectCoveragePoints(*O, Addrs);
+    else
+      FailIfError(object::object_error::invalid_file_type);
+  }
+}
+
+// Locate addresses of all coverage points in a file. Coverage point
+// is defined as the 'address of instruction following __sanitizer_cov
+// call - 1'.
+std::set<uint64_t> getCoveragePoints(std::string FileName) {
+  std::set<uint64_t> Result;
+
+  ErrorOr<object::OwningBinary<object::Binary>> BinaryOrErr =
+      object::createBinary(FileName);
+  FailIfError(BinaryOrErr);
+
+  object::Binary &Binary = *BinaryOrErr.get().getBinary();
+  if (object::Archive *A = dyn_cast<object::Archive>(&Binary))
+    getArchiveCoveragePoints(*A, &Result);
+  else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary))
+    getObjectCoveragePoints(*O, &Result);
   else
-    return std::string(A.begin(),
-                       std::mismatch(A.begin(), A.end(), B.begin()).first);
+    FailIfError(object::object_error::invalid_file_type);
+
+  return Result;
+}
+
+static void printFunctionLocs(const std::set<FunctionLoc> &FnLocs,
+                              raw_ostream &OS) {
+  for (const FunctionLoc &FnLoc : FnLocs) {
+    OS << stripPathPrefix(FnLoc.Loc.FileName) << ":" << FnLoc.Loc.Line << " "
+       << FnLoc.FunctionName << "\n";
+  }
 }
 
 class CoverageData {
@@ -116,7 +366,7 @@ class CoverageData {
       return make_error_code(errc::illegal_byte_sequence);
     }
 
-    auto Addrs = llvm::make_unique<std::vector<uint64_t>>();
+    auto Addrs = llvm::make_unique<std::set<uint64_t>>();
 
     switch (Header->Bitness) {
     case Bitness64:
@@ -138,15 +388,12 @@ class CoverageData {
   // Merge multiple coverage data together.
   static std::unique_ptr<CoverageData>
   merge(const std::vector<std::unique_ptr<CoverageData>> &Covs) {
-    std::set<uint64_t> Addrs;
+    auto Addrs = llvm::make_unique<std::set<uint64_t>>();
 
     for (const auto &Cov : Covs)
-      Addrs.insert(Cov->Addrs->begin(), Cov->Addrs->end());
+      Addrs->insert(Cov->Addrs->begin(), Cov->Addrs->end());
 
-    auto AddrsVector = llvm::make_unique<std::vector<uint64_t>>(
-        Addrs.begin(), Addrs.end());
-    return std::unique_ptr<CoverageData>(
-        new CoverageData(std::move(AddrsVector)));
+    return std::unique_ptr<CoverageData>(new CoverageData(std::move(Addrs)));
   }
 
   // Read list of files and merges their coverage info.
@@ -163,83 +410,39 @@ class CoverageData {
   }
 
   // Print coverage addresses.
-  void printAddrs(raw_ostream &out) {
+  void printAddrs(raw_ostream &OS) {
     for (auto Addr : *Addrs) {
-      out << "0x";
-      out.write_hex(Addr);
-      out << "\n";
+      OS << "0x";
+      OS.write_hex(Addr);
+      OS << "\n";
     }
   }
 
   // Print list of covered functions.
   // Line format: <file_name>:<line> <function_name>
-  void printCoveredFunctions(raw_ostream &out) {
-    if (Addrs->empty())
-      return;
-    symbolize::LLVMSymbolizer::Options SymbolizerOptions;
-    SymbolizerOptions.Demangle = ClDemangle;
-    symbolize::LLVMSymbolizer Symbolizer(SymbolizerOptions);
-
-    struct FileLoc {
-      std::string FileName;
-      uint32_t Line;
-      bool operator<(const FileLoc &Rhs) const {
-        return std::tie(FileName, Line) < std::tie(Rhs.FileName, Rhs.Line);
-      }
-    };
-
-    // FileLoc -> FunctionName
-    std::map<FileLoc, std::string> Fns;
-
-    // Fill in Fns map.
-    for (auto Addr : *Addrs) {
-      auto InliningInfo = Symbolizer.symbolizeInlinedCode(ClBinaryName, Addr);
-      FailIfError(InliningInfo);
-      for (uint32_t i = 0; i < InliningInfo->getNumberOfFrames(); ++i) {
-        auto FrameInfo = InliningInfo->getFrame(i);
-        SmallString<256> FileName(FrameInfo.FileName);
-        sys::path::remove_dots(FileName, /* remove_dot_dot */ true);
-        FileLoc Loc = { FileName.str(), FrameInfo.Line };
-        Fns[Loc] = FrameInfo.FunctionName;
-      }
-    }
-
-    // Compute file names common prefix.
-    std::string FilePrefix = Fns.begin()->first.FileName;
-    for (const auto &P : Fns)
-      FilePrefix = CommonPrefix(FilePrefix, P.first.FileName);
-
-    // Print first function occurence in a file.
-    {
-      std::string LastFileName;
-      std::set<std::string> ProcessedFunctions;
-
-      for (const auto &P : Fns) {
-        std::string FileName = P.first.FileName;
-        std::string FunctionName = P.second;
-        uint32_t Line = P.first.Line;
-
-        if (LastFileName != FileName)
-          ProcessedFunctions.clear();
-        LastFileName = FileName;
-
-        if (!ProcessedFunctions.insert(FunctionName).second)
-          continue;
-
-        // Don't strip prefix if we only have a single file.
-        if (FileName.size() > FilePrefix.size())
-          FileName = FileName.substr(FilePrefix.size());
+  void printCoveredFunctions(raw_ostream &OS) {
+    printFunctionLocs(computeFunctionLocs(*Addrs), OS);
+  }
 
-        out << FileName << ":" << Line << " " << FunctionName << "\n";
-      }
-    }
+  // Print list of not covered functions.
+  // Line format: <file_name>:<line> <function_name>
+  void printNotCoveredFunctions(raw_ostream &OS) {
+    std::set<FunctionLoc> AllFns =
+        computeFunctionLocs(getCoveragePoints(ClBinaryName));
+    std::set<FunctionLoc> CoveredFns = computeFunctionLocs(*Addrs);
+
+    std::set<FunctionLoc> NotCoveredFns;
+    std::set_difference(AllFns.begin(), AllFns.end(), CoveredFns.begin(),
+                        CoveredFns.end(),
+                        std::inserter(NotCoveredFns, NotCoveredFns.end()));
+    printFunctionLocs(NotCoveredFns, OS);
   }
 
- private:
-  explicit CoverageData(std::unique_ptr<std::vector<uint64_t>> Addrs)
+private:
+  explicit CoverageData(std::unique_ptr<std::set<uint64_t>> Addrs)
       : Addrs(std::move(Addrs)) {}
 
-  std::unique_ptr<std::vector<uint64_t>> Addrs;
+  std::unique_ptr<std::set<uint64_t>> Addrs;
 };
 } // namespace
 
@@ -249,6 +452,10 @@ int main(int argc, char **argv) {
   PrettyStackTraceProgram X(argc, argv);
   llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
 
+  llvm::InitializeAllTargetInfos();
+  llvm::InitializeAllTargetMCs();
+  llvm::InitializeAllDisassemblers();
+
   cl::ParseCommandLineOptions(argc, argv, "Sanitizer Coverage Processing Tool");
 
   auto CovData = CoverageData::readAndMerge(ClInputFiles);
@@ -263,5 +470,11 @@ int main(int argc, char **argv) {
     CovData.get()->printCoveredFunctions(outs());
     return 0;
   }
+  case NotCoveredFunctionsAction: {
+    CovData.get()->printNotCoveredFunctions(outs());
+    return 0;
+  }
   }
+
+  llvm_unreachable("unsupported action");
 }