InstrProf: Do a better job of reading coverage mapping data.
authorJustin Bogner <mail@justinbogner.com>
Mon, 16 Mar 2015 06:55:45 +0000 (06:55 +0000)
committerJustin Bogner <mail@justinbogner.com>
Mon, 16 Mar 2015 06:55:45 +0000 (06:55 +0000)
This code was casting regions of a memory buffer to a couple of
different structs. This is wrong in a few ways:

1. It breaks aliasing rules.
2. If the buffer isn't aligned, it hits undefined behaviour.
3. It completely ignores endianness differences.
4. The structs being defined for this aren't specifying their padding
   properly, so this doesn't even represent the data properly on some
   platforms.

This commit is mostly NFC, except that it fixes reading coverage for
32 bit binaries as a side effect of getting rid of the mispadded
structs. I've included a test for that.

I've also baked in that we only handle little endian more explicitly,
since that was true in practice already. I'll fix this to handle
endianness properly in a followup commit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232346 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Support/Endian.h
lib/ProfileData/CoverageMappingReader.cpp
test/tools/llvm-cov/Inputs/binary-formats.macho32l [new file with mode: 0755]
test/tools/llvm-cov/Inputs/binary-formats.macho64l [new file with mode: 0755]
test/tools/llvm-cov/Inputs/binary-formats.proftext [new file with mode: 0644]
test/tools/llvm-cov/binary-formats.c [new file with mode: 0644]
test/tools/llvm-cov/lit.local.cfg

index 17ae651b6b406a78887d654e6ba2589a2ddefc97..e9fe22e5eda828a050fe10b8d09e10953a2e87dd 100644 (file)
@@ -58,8 +58,9 @@ inline value_type read(const void *memory) {
 
 /// Read a value of a particular endianness from a buffer, and increment the
 /// buffer past that value.
-template<typename value_type, endianness endian, std::size_t alignment>
-inline value_type readNext(const unsigned char *&memory) {
+template<typename value_type, endianness endian, std::size_t alignment,
+         typename CharT>
+inline value_type readNext(const CharT *&memory) {
   value_type ret = read<value_type, endian, alignment>(memory);
   memory += sizeof(value_type);
   return ret;
index 3f8f76f60949975a305333cae35c596c7c96e018..fde6874fe404ac4ab0e0b5aa235862267c851a2d 100644 (file)
@@ -17,6 +17,7 @@
 #include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/LEB128.h"
 
 using namespace llvm;
@@ -288,24 +289,6 @@ std::error_code RawCoverageMappingReader::read() {
 }
 
 namespace {
-/// \brief The coverage mapping data for a single function.
-/// It points to the function's name.
-template <typename IntPtrT> struct CoverageMappingFunctionRecord {
-  IntPtrT FunctionNamePtr;
-  uint32_t FunctionNameSize;
-  uint32_t CoverageMappingSize;
-  uint64_t FunctionHash;
-};
-
-/// \brief The coverage mapping data for a single translation unit.
-/// It points to the array of function coverage mapping records and the encoded
-/// filenames array.
-template <typename IntPtrT> struct CoverageMappingTURecord {
-  uint32_t FunctionRecordsSize;
-  uint32_t FilenamesSize;
-  uint32_t CoverageMappingsSize;
-  uint32_t Version;
-};
 
 /// \brief A helper structure to access the data from a section
 /// in an object file.
@@ -337,72 +320,71 @@ std::error_code readCoverageMappingData(
     SectionData &ProfileNames, StringRef Data,
     std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
     std::vector<StringRef> &Filenames) {
+  using namespace support;
   llvm::DenseSet<T> UniqueFunctionMappingData;
 
   // Read the records in the coverage data section.
-  while (!Data.empty()) {
-    if (Data.size() < sizeof(CoverageMappingTURecord<T>))
+  for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) {
+    if (Buf + 4 * sizeof(uint32_t) > End)
       return instrprof_error::malformed;
-    auto TU = reinterpret_cast<const CoverageMappingTURecord<T> *>(Data.data());
-    Data = Data.substr(sizeof(CoverageMappingTURecord<T>));
-    switch (TU->Version) {
+    uint32_t NRecords = endian::readNext<uint32_t, little, unaligned>(Buf);
+    uint32_t FilenamesSize = endian::readNext<uint32_t, little, unaligned>(Buf);
+    uint32_t CoverageSize = endian::readNext<uint32_t, little, unaligned>(Buf);
+    uint32_t Version = endian::readNext<uint32_t, little, unaligned>(Buf);
+
+    switch (Version) {
     case CoverageMappingVersion1:
       break;
     default:
       return instrprof_error::unsupported_version;
     }
-    auto Version = CoverageMappingVersion(TU->Version);
 
-    // Get the function records.
-    auto FunctionRecords =
-        reinterpret_cast<const CoverageMappingFunctionRecord<T> *>(Data.data());
-    if (Data.size() <
-        sizeof(CoverageMappingFunctionRecord<T>) * TU->FunctionRecordsSize)
-      return instrprof_error::malformed;
-    Data = Data.substr(sizeof(CoverageMappingFunctionRecord<T>) *
-                       TU->FunctionRecordsSize);
+    // Skip past the function records, saving the start and end for later.
+    const char *FunBuf = Buf;
+    Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t));
+    const char *FunEnd = Buf;
 
     // Get the filenames.
-    if (Data.size() < TU->FilenamesSize)
+    if (Buf + FilenamesSize > End)
       return instrprof_error::malformed;
-    auto RawFilenames = Data.substr(0, TU->FilenamesSize);
-    Data = Data.substr(TU->FilenamesSize);
     size_t FilenamesBegin = Filenames.size();
-    RawCoverageFilenamesReader Reader(RawFilenames, Filenames);
+    RawCoverageFilenamesReader Reader(StringRef(Buf, FilenamesSize), Filenames);
     if (auto Err = Reader.read())
       return Err;
+    Buf += FilenamesSize;
 
-    // Get the coverage mappings.
-    if (Data.size() < TU->CoverageMappingsSize)
+    // We'll read the coverage mapping records in the loop below.
+    const char *CovBuf = Buf;
+    Buf += CoverageSize;
+    const char *CovEnd = Buf;
+    if (Buf > End)
       return instrprof_error::malformed;
-    auto CoverageMappings = Data.substr(0, TU->CoverageMappingsSize);
-    Data = Data.substr(TU->CoverageMappingsSize);
 
-    for (unsigned I = 0; I < TU->FunctionRecordsSize; ++I) {
-      auto &MappingRecord = FunctionRecords[I];
+    while (FunBuf < FunEnd) {
+      // Read the function information
+      T NamePtr = endian::readNext<T, little, unaligned>(FunBuf);
+      uint32_t NameSize = endian::readNext<uint32_t, little, unaligned>(FunBuf);
+      uint32_t DataSize = endian::readNext<uint32_t, little, unaligned>(FunBuf);
+      uint64_t FuncHash = endian::readNext<uint64_t, little, unaligned>(FunBuf);
 
-      // Get the coverage mapping.
-      if (CoverageMappings.size() < MappingRecord.CoverageMappingSize)
+      // Now use that to read the coverage data.
+      if (CovBuf + DataSize > CovEnd)
         return instrprof_error::malformed;
-      auto Mapping =
-          CoverageMappings.substr(0, MappingRecord.CoverageMappingSize);
-      CoverageMappings =
-          CoverageMappings.substr(MappingRecord.CoverageMappingSize);
+      auto Mapping = StringRef(CovBuf, DataSize);
+      CovBuf += DataSize;
 
       // Ignore this record if we already have a record that points to the same
-      // function name.
-      // This is useful to ignore the redundant records for the functions
-      // with ODR linkage.
-      if (!UniqueFunctionMappingData.insert(MappingRecord.FunctionNamePtr)
-               .second)
+      // function name. This is useful to ignore the redundant records for the
+      // functions with ODR linkage.
+      if (!UniqueFunctionMappingData.insert(NamePtr).second)
         continue;
-      StringRef FunctionName;
-      if (auto Err =
-              ProfileNames.get(MappingRecord.FunctionNamePtr,
-                               MappingRecord.FunctionNameSize, FunctionName))
-        return Err;
+
+      // Finally, grab the name and create a record.
+      StringRef FuncName;
+      if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName))
+        return EC;
       Records.push_back(BinaryCoverageReader::ProfileMappingRecord(
-          Version, FunctionName, MappingRecord.FunctionHash, Mapping,
+          CoverageMappingVersion(Version), FuncName, FuncHash, Mapping,
           FilenamesBegin, Filenames.size() - FilenamesBegin));
     }
   }
diff --git a/test/tools/llvm-cov/Inputs/binary-formats.macho32l b/test/tools/llvm-cov/Inputs/binary-formats.macho32l
new file mode 100755 (executable)
index 0000000..2dd4c44
Binary files /dev/null and b/test/tools/llvm-cov/Inputs/binary-formats.macho32l differ
diff --git a/test/tools/llvm-cov/Inputs/binary-formats.macho64l b/test/tools/llvm-cov/Inputs/binary-formats.macho64l
new file mode 100755 (executable)
index 0000000..0045c43
Binary files /dev/null and b/test/tools/llvm-cov/Inputs/binary-formats.macho64l differ
diff --git a/test/tools/llvm-cov/Inputs/binary-formats.proftext b/test/tools/llvm-cov/Inputs/binary-formats.proftext
new file mode 100644 (file)
index 0000000..97816c2
--- /dev/null
@@ -0,0 +1,4 @@
+main
+0x0
+1
+100
diff --git a/test/tools/llvm-cov/binary-formats.c b/test/tools/llvm-cov/binary-formats.c
new file mode 100644 (file)
index 0000000..4140ff8
--- /dev/null
@@ -0,0 +1,11 @@
+// Checks for reading various formats.
+
+// CHECK: 100| [[@LINE+1]]|int main
+int main(int argc, const char *argv[]) {}
+
+// RUN: llvm-profdata merge %S/Inputs/binary-formats.proftext -o %t.profdata
+// RUN: llvm-cov show %S/Inputs/binary-formats.macho32l -instr-profile %t.profdata -no-colors -filename-equivalence %s | FileCheck %s
+// RUN: llvm-cov show %S/Inputs/binary-formats.macho64l -instr-profile %t.profdata -no-colors -filename-equivalence %s | FileCheck %s
+
+// llvm-cov doesn't work on big endian yet
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
index 5510deda06ca6a67b08f931ad5dadb88b10907a3..05aef8645e4d577e41780693766b7a0187236050 100644 (file)
@@ -1,5 +1 @@
 config.suffixes = ['.test', '.m', '.cpp', '.c']
-
-# http://llvm.org/bugs/show_bug.cgi?id=20979
-if 'ubsan' in config.available_features:
-  config.unsupported = True