[llvm-objdump] support -rebase option for mach-o to dump rebasing info
authorNick Kledzik <kledzik@apple.com>
Fri, 12 Sep 2014 21:34:15 +0000 (21:34 +0000)
committerNick Kledzik <kledzik@apple.com>
Fri, 12 Sep 2014 21:34:15 +0000 (21:34 +0000)
Similar to my previous -exports-trie option, the -rebase option dumps info from
the LC_DYLD_INFO load command. The rebasing info is a list of the the locations
that dyld needs to adjust if a mach-o image is not loaded at its preferred
address. Since ASLR is now the default, images almost never load at their
preferred address, and thus need to be rebased by dyld.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217709 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Object/MachO.h
lib/Object/MachOObjectFile.cpp
test/tools/llvm-objdump/Inputs/rebase.macho-x86_64 [new file with mode: 0755]
test/tools/llvm-objdump/macho-rebase.test [new file with mode: 0644]
tools/llvm-objdump/MachODump.cpp
tools/llvm-objdump/llvm-objdump.cpp
tools/llvm-objdump/llvm-objdump.h

index dddc9c1c8c650501fdb0e423bffac37fbbf7e29b..29f737571fb323b28c7c1f09d843abddf63af428 100644 (file)
@@ -100,6 +100,42 @@ private:
 };
 typedef content_iterator<ExportEntry> export_iterator;
 
+/// MachORebaseEntry encapsulates the current state in the decompression of   
+/// rebasing opcodes. This allows you to iterate through the compressed table of
+/// rebasing using:
+///    for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable()) {
+///    }
+class MachORebaseEntry {
+public:
+  MachORebaseEntry(ArrayRef<uint8_t> opcodes, bool is64Bit);
+
+  uint32_t segmentIndex() const;
+  uint64_t segmentOffset() const;
+  StringRef typeName() const;
+
+  bool operator==(const MachORebaseEntry &) const;
+
+  void moveNext();
+  
+private:
+  friend class MachOObjectFile;
+  void moveToFirst();
+  void moveToEnd();
+  uint64_t readULEB128();
+
+  ArrayRef<uint8_t> Opcodes;
+  const uint8_t *Ptr;
+  uint64_t SegmentOffset;
+  uint32_t SegmentIndex;
+  uint64_t RemainingLoopCount;
+  uint64_t AdvanceAmount;
+  uint8_t  RebaseType;
+  uint8_t  PointerSize;
+  bool     Malformed;
+  bool     Done;
+};
+typedef content_iterator<MachORebaseEntry> rebase_iterator;
+
 class MachOObjectFile : public ObjectFile {
 public:
   struct LoadCommandInfo {
@@ -202,6 +238,13 @@ public:
   /// For use examining a trie not in a MachOObjectFile.
   static iterator_range<export_iterator> exports(ArrayRef<uint8_t> Trie);
 
+  /// For use iterating over all rebase table entries.
+  iterator_range<rebase_iterator> rebaseTable() const;
+
+  /// For use examining rebase opcodes not in a MachOObjectFile.
+  static iterator_range<rebase_iterator> rebaseTable(ArrayRef<uint8_t> Opcodes,
+                                                     bool is64);
+
   // In a MachO file, sections have a segment name. This is used in the .o
   // files. They have a single segment, but this field specifies which segment
   // a section should be put in in the final object.
index c6815ea27d24cfd02f8289a5a872cfc10c6cf2b5..7e7e8c493154e4f46139ca515942da8bb7b24bb5 100644 (file)
@@ -17,6 +17,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/LEB128.h"
@@ -1688,6 +1689,177 @@ iterator_range<export_iterator> MachOObjectFile::exports() const {
 }
 
 
+MachORebaseEntry::MachORebaseEntry(ArrayRef<uint8_t> Bytes, bool is64Bit)
+    : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0),
+      RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0),
+      PointerSize(is64Bit ? 8 : 4), Malformed(false), Done(false) {}
+
+void MachORebaseEntry::moveToFirst() {
+  Ptr = Opcodes.begin();
+  moveNext();
+}
+
+void MachORebaseEntry::moveToEnd() {
+  Ptr = Opcodes.end();
+  RemainingLoopCount = 0;
+  Done = true;
+}
+
+void MachORebaseEntry::moveNext() {
+  // If in the middle of some loop, move to next rebasing in loop.
+  SegmentOffset += AdvanceAmount;
+  if (RemainingLoopCount) {
+    --RemainingLoopCount;
+    return;
+  }
+  if (Ptr == Opcodes.end()) {
+    Done = true;
+    return;
+  }
+  bool More = true;
+  while (More && !Malformed) {
+    // Parse next opcode and set up next loop.
+    uint8_t Byte = *Ptr++;
+    uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK;
+    uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK;
+    switch (Opcode) {
+    case MachO::REBASE_OPCODE_DONE:
+      More = false;
+      Done = true;
+      moveToEnd();
+      DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n");
+      break;
+    case MachO::REBASE_OPCODE_SET_TYPE_IMM:
+      RebaseType = ImmValue;
+      DEBUG_WITH_TYPE(
+          "mach-o-rebase",
+          llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: "
+                       << "RebaseType=" << (int) RebaseType << "\n");
+      break;
+    case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
+      SegmentIndex = ImmValue;
+      SegmentOffset = readULEB128();
+      DEBUG_WITH_TYPE(
+          "mach-o-rebase",
+          llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
+                       << "SegmentIndex=" << SegmentIndex << ", "
+                       << format("SegmentOffset=0x%06X", SegmentOffset)
+                       << "\n");
+      break;
+    case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
+      SegmentOffset += readULEB128();
+      DEBUG_WITH_TYPE("mach-o-rebase",
+                      llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: "
+                                   << format("SegmentOffset=0x%06X",
+                                             SegmentOffset) << "\n");
+      break;
+    case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
+      SegmentOffset += ImmValue * PointerSize;
+      DEBUG_WITH_TYPE("mach-o-rebase",
+                      llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: "
+                                   << format("SegmentOffset=0x%06X",
+                                             SegmentOffset) << "\n");
+      break;
+    case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES:
+      AdvanceAmount = PointerSize;
+      RemainingLoopCount = ImmValue - 1;
+      DEBUG_WITH_TYPE(
+          "mach-o-rebase",
+          llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: "
+                       << format("SegmentOffset=0x%06X", SegmentOffset)
+                       << ", AdvanceAmount=" << AdvanceAmount
+                       << ", RemainingLoopCount=" << RemainingLoopCount
+                       << "\n");
+      return;
+    case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
+      AdvanceAmount = PointerSize;
+      RemainingLoopCount = readULEB128() - 1;
+      DEBUG_WITH_TYPE(
+          "mach-o-rebase",
+          llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: "
+                       << format("SegmentOffset=0x%06X", SegmentOffset)
+                       << ", AdvanceAmount=" << AdvanceAmount
+                       << ", RemainingLoopCount=" << RemainingLoopCount
+                       << "\n");
+      return;
+    case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
+      AdvanceAmount = readULEB128() + PointerSize;
+      RemainingLoopCount = 0;
+      DEBUG_WITH_TYPE(
+          "mach-o-rebase",
+          llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: "
+                       << format("SegmentOffset=0x%06X", SegmentOffset)
+                       << ", AdvanceAmount=" << AdvanceAmount
+                       << ", RemainingLoopCount=" << RemainingLoopCount
+                       << "\n");
+      return;
+    case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
+      RemainingLoopCount = readULEB128() - 1;
+      AdvanceAmount = readULEB128() + PointerSize;
+      DEBUG_WITH_TYPE(
+          "mach-o-rebase",
+          llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: "
+                       << format("SegmentOffset=0x%06X", SegmentOffset)
+                       << ", AdvanceAmount=" << AdvanceAmount
+                       << ", RemainingLoopCount=" << RemainingLoopCount
+                       << "\n");
+      return;
+    default:
+      Malformed = true;
+    }
+  }
+}
+
+uint64_t MachORebaseEntry::readULEB128() {
+  unsigned Count;
+  uint64_t Result = decodeULEB128(Ptr, &Count);
+  Ptr += Count;
+  if (Ptr > Opcodes.end()) {
+    Ptr = Opcodes.end();
+    Malformed = true;
+  }
+  return Result;
+}
+
+uint32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; }
+
+uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; }
+
+StringRef MachORebaseEntry::typeName() const {
+  switch (RebaseType) {
+  case MachO::REBASE_TYPE_POINTER:
+    return "pointer";
+  case MachO::REBASE_TYPE_TEXT_ABSOLUTE32:
+    return "text abs32";
+  case MachO::REBASE_TYPE_TEXT_PCREL32:
+    return "text rel32";
+  }
+  return "unknown";
+}
+
+bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const {
+  assert(Opcodes == Other.Opcodes && "compare iterators of different files");
+  return (Ptr == Other.Ptr) &&
+         (RemainingLoopCount == Other.RemainingLoopCount) &&
+         (Done == Other.Done);
+}
+
+iterator_range<rebase_iterator>
+MachOObjectFile::rebaseTable(ArrayRef<uint8_t> Opcodes, bool is64) {
+  MachORebaseEntry Start(Opcodes, is64);
+  Start.moveToFirst();
+
+  MachORebaseEntry Finish(Opcodes, is64);
+  Finish.moveToEnd();
+
+  return iterator_range<rebase_iterator>(rebase_iterator(Start),
+                                         rebase_iterator(Finish));
+}
+
+iterator_range<rebase_iterator> MachOObjectFile::rebaseTable() const {
+  return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit());
+}
+
 StringRef
 MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
   ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
diff --git a/test/tools/llvm-objdump/Inputs/rebase.macho-x86_64 b/test/tools/llvm-objdump/Inputs/rebase.macho-x86_64
new file mode 100755 (executable)
index 0000000..05062d8
Binary files /dev/null and b/test/tools/llvm-objdump/Inputs/rebase.macho-x86_64 differ
diff --git a/test/tools/llvm-objdump/macho-rebase.test b/test/tools/llvm-objdump/macho-rebase.test
new file mode 100644 (file)
index 0000000..98e7f32
--- /dev/null
@@ -0,0 +1,15 @@
+# RUN: llvm-objdump -macho -rebase -arch x86_64 \
+# RUN:   %p/Inputs/rebase.macho-x86_64 2>/dev/null | FileCheck %s
+
+
+# CHECK: segment  section            address     type
+# CHECK: __DATA   __data             0x00001010  pointer
+# CHECK: __DATA   __data             0x00001028  pointer
+# CHECK: __DATA   __data             0x00001030  pointer
+# CHECK: __DATA   __data             0x00001038  pointer
+# CHECK: __DATA   __data             0x00001040  pointer
+# CHECK: __DATA   __data             0x00001258  pointer
+# CHECK: __DATA   __mystuff          0x00001278  pointer
+# CHECK: __DATA   __mystuff          0x00001288  pointer
+# CHECK: __DATA   __mystuff          0x00001298  pointer
+# CHECK: __DATA   __mystuff          0x000012A8  pointer
index 34933eb70715e959755184bfd0d3b2f71ccf38cb..debe33e9cfa8c4fee830b77d49da89ae3982026e 100644 (file)
@@ -2146,3 +2146,109 @@ void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) {
     outs() << "\n";
   }
 }
+
+
+//===----------------------------------------------------------------------===//
+// rebase table dumping
+//===----------------------------------------------------------------------===//
+
+namespace {
+class SegInfo {
+public:
+  SegInfo(const object::MachOObjectFile *Obj);
+
+  StringRef segmentName(uint32_t SegIndex);
+  StringRef sectionName(uint32_t SegIndex, uint64_t SegOffset);
+  uint64_t address(uint32_t SegIndex, uint64_t SegOffset);
+
+private:
+  struct SectionInfo {
+    uint64_t Address;
+    uint64_t Size;
+    StringRef SectionName;
+    StringRef SegmentName;
+    uint64_t OffsetInSegment;
+    uint64_t SegmentStartAddress;
+    uint32_t SegmentIndex;
+  };
+  const SectionInfo &findSection(uint32_t SegIndex, uint64_t SegOffset);
+  SmallVector<SectionInfo, 32> Sections;
+};
+}
+
+SegInfo::SegInfo(const object::MachOObjectFile *Obj) {
+  // Build table of sections so segIndex/offset pairs can be translated.
+  uint32_t CurSegIndex = 0;
+  StringRef CurSegName;
+  uint64_t CurSegAddress;
+  for (const SectionRef &Section : Obj->sections()) {
+    SectionInfo Info;
+    if (error(Section.getName(Info.SectionName)))
+      return;
+    if (error(Section.getAddress(Info.Address)))
+      return;
+    if (error(Section.getSize(Info.Size)))
+      return;
+    Info.SegmentName =
+        Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl());
+    if (!Info.SegmentName.equals(CurSegName)) {
+      ++CurSegIndex;
+      CurSegName = Info.SegmentName;
+      CurSegAddress = Info.Address;
+    }
+    Info.SegmentIndex = CurSegIndex - 1;
+    Info.OffsetInSegment = Info.Address - CurSegAddress;
+    Info.SegmentStartAddress = CurSegAddress;
+    Sections.push_back(Info);
+  }
+}
+
+StringRef SegInfo::segmentName(uint32_t SegIndex) {
+  for (const SectionInfo &SI : Sections) {
+    if (SI.SegmentIndex == SegIndex)
+      return SI.SegmentName;
+  }
+  llvm_unreachable("invalid segIndex");
+}
+
+const SegInfo::SectionInfo &SegInfo::findSection(uint32_t SegIndex,
+                                                 uint64_t OffsetInSeg) {
+  for (const SectionInfo &SI : Sections) {
+    if (SI.SegmentIndex != SegIndex)
+      continue;
+    if (SI.OffsetInSegment > OffsetInSeg)
+      continue;
+    if (OffsetInSeg >= (SI.OffsetInSegment + SI.Size))
+      continue;
+    return SI;
+  }
+  llvm_unreachable("segIndex and offset not in any section");
+}
+
+StringRef SegInfo::sectionName(uint32_t SegIndex, uint64_t OffsetInSeg) {
+  return findSection(SegIndex, OffsetInSeg).SectionName;
+}
+
+uint64_t SegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) {
+  const SectionInfo &SI = findSection(SegIndex, OffsetInSeg);
+  return SI.SegmentStartAddress + OffsetInSeg;
+}
+
+void llvm::printMachORebaseTable(const object::MachOObjectFile *Obj) {
+  // Build table of sections so names can used in final output.
+  SegInfo sectionTable(Obj);
+
+  outs() << "segment  section            address     type\n";
+  for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable()) {
+    uint32_t SegIndex = Entry.segmentIndex();
+    uint64_t OffsetInSeg = Entry.segmentOffset();
+    StringRef SegmentName = sectionTable.segmentName(SegIndex);
+    StringRef SectionName = sectionTable.sectionName(SegIndex, OffsetInSeg);
+    uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg);
+
+    // Table lines look like: __DATA  __nl_symbol_ptr  0x0000F00C  pointer
+    outs() << format("%-8s %-18s 0x%08X  %s\n", SegmentName.str().c_str(),
+                     SectionName.str().c_str(), Address,
+                     Entry.typeName().str().c_str());
+  }
+}
index 7debbe9a961bf91cf66d3ec39d501e2b89c76f0d..2b70167d135338afe3759a4987b071e01b03d92b 100644 (file)
@@ -81,6 +81,9 @@ SymbolTable("t", cl::desc("Display the symbol table"));
 static cl::opt<bool>
 ExportsTrie("exports-trie", cl::desc("Display mach-o exported symbols"));
 
+static cl::opt<bool>
+Rebase("rebase", cl::desc("Display mach-o rebasing info"));
+
 static cl::opt<bool>
 MachOOpt("macho", cl::desc("Use MachO specific object file parser"));
 static cl::alias
@@ -720,6 +723,18 @@ static void printExportsTrie(const ObjectFile *o) {
   }
 }
 
+static void printRebaseTable(const ObjectFile *o) {
+  outs() << "Rebase table:\n";
+  if (const MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(o))
+    printMachORebaseTable(MachO);
+  else {
+    errs() << "This operation is only currently supported "
+              "for Mach-O executable files.\n";
+    return;
+  }
+}
+
+
 static void printPrivateFileHeader(const ObjectFile *o) {
   if (o->isELF()) {
     printELFFileHeader(o);
@@ -751,6 +766,8 @@ static void DumpObject(const ObjectFile *o) {
     printPrivateFileHeader(o);
   if (ExportsTrie)
     printExportsTrie(o);
+  if (Rebase)
+    printRebaseTable(o);
 }
 
 /// @brief Dump each object file in \a a;
@@ -833,7 +850,8 @@ int main(int argc, char **argv) {
       && !SymbolTable
       && !UnwindInfo
       && !PrivateHeaders
-      && !ExportsTrie) {
+      && !ExportsTrie
+      && !Rebase) {
     cl::PrintHelpMessage();
     return 2;
   }
index c259042c39dae14910c14639867f8aad23dd6cfb..a328e635ee00ee0ca5c8eb3394102c41cafc61c0 100644 (file)
@@ -36,6 +36,7 @@ void DisassembleInputMachO(StringRef Filename);
 void printCOFFUnwindInfo(const object::COFFObjectFile* o);
 void printMachOUnwindInfo(const object::MachOObjectFile* o);
 void printMachOExportsTrie(const object::MachOObjectFile* o);
+void printMachORebaseTable(const object::MachOObjectFile* o);
 void printELFFileHeader(const object::ObjectFile *o);
 void printCOFFFileHeader(const object::ObjectFile *o);
 void printMachOFileHeader(const object::ObjectFile *o);