Basic support for parsing Mach-O universal binaries in LLVMObject library
authorAlexey Samsonov <samsonov@google.com>
Tue, 18 Jun 2013 15:03:28 +0000 (15:03 +0000)
committerAlexey Samsonov <samsonov@google.com>
Tue, 18 Jun 2013 15:03:28 +0000 (15:03 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184191 91177308-0d34-0410-b5e6-96231b3b80d8

17 files changed:
include/llvm/Object/Binary.h
include/llvm/Object/Error.h
include/llvm/Object/MachO.h
include/llvm/Object/MachOFormat.h
include/llvm/Object/MachOUniversal.h [new file with mode: 0644]
include/llvm/Support/FileSystem.h
lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
lib/Object/Binary.cpp
lib/Object/CMakeLists.txt
lib/Object/Error.cpp
lib/Object/MachOObjectFile.cpp
lib/Object/MachOUniversal.cpp [new file with mode: 0644]
lib/Object/ObjectFile.cpp
lib/Support/PathV2.cpp
test/Object/Inputs/macho-universal.x86_64.i386 [new file with mode: 0755]
test/Object/nm-universal-binary.test [new file with mode: 0644]
tools/llvm-nm/llvm-nm.cpp

index 78fcf6feb8519e663614ebbef7c189e91dd71612..a3f5625cc9b7fe2f42bac73f4617353cdd65b539 100644 (file)
@@ -38,6 +38,7 @@ protected:
 
   enum {
     ID_Archive,
+    ID_MachOUniversalBinary,
     // Object and children.
     ID_StartObjects,
     ID_COFF,
@@ -87,6 +88,10 @@ public:
     return TypeID == ID_Archive;
   }
 
+  bool isMachOUniversalBinary() const {
+    return TypeID == ID_MachOUniversalBinary;
+  }
+
   bool isELF() const {
     return TypeID >= ID_ELF32L && TypeID <= ID_ELF64B;
   }
index 32b834f21543fe804ad8e305cf215d41843c5acd..8b0570b02f8b163f00b5274b832ffd8ff5c6c210 100644 (file)
@@ -24,6 +24,7 @@ const error_category &object_category();
 struct object_error {
   enum Impl {
     success = 0,
+    arch_not_found,
     invalid_file_type,
     parse_failed,
     unexpected_eof
index 1b9faaa9fd99f035acab2aac6aa02dd2fb9e0f93..50435d6fe227d9b5c74fe901f4078bf609ad5cc0 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/MachO.h"
@@ -196,6 +197,8 @@ public:
   bool is64Bit() const;
   void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;
 
+  static Triple::ArchType getArch(uint32_t CPUType);
+
   static bool classof(const Binary *v) {
     return v->isMachO();
   }
index ffca391ea228780419db2dabb6f59e8db6efbce6..96ee8a766b871732e23dc2bec060294692eed3b3 100644 (file)
@@ -95,6 +95,8 @@ namespace macho {
   enum StructureSizes {
     Header32Size = 28,
     Header64Size = 32,
+    FatHeaderSize = 8,
+    FatArchHeaderSize = 20,
     SegmentLoadCommand32Size = 56,
     SegmentLoadCommand64Size = 72,
     Section32Size = 68,
@@ -130,6 +132,22 @@ namespace macho {
     uint32_t Reserved;
   };
 
+  /// \brief Header for universal object files.
+  struct FatHeader {
+    uint32_t Magic;
+    uint32_t NumFatArch;
+  };
+
+  /// \brief Header for a single-architecture object file in a
+  /// universal binary.
+  struct FatArchHeader {
+    uint32_t CPUType;
+    uint32_t CPUSubtype;
+    uint32_t Offset;
+    uint32_t Size;
+    uint32_t Align;
+  };
+
   // See <mach-o/loader.h>.
   enum HeaderFileType {
     HFT_Object = 0x1
diff --git a/include/llvm/Object/MachOUniversal.h b/include/llvm/Object/MachOUniversal.h
new file mode 100644 (file)
index 0000000..5743282
--- /dev/null
@@ -0,0 +1,102 @@
+//===- MachOUniversal.h - Mach-O universal binaries -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Mach-O fat/universal binaries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MACHOUNIVERSAL_H
+#define LLVM_OBJECT_MACHOUNIVERSAL_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/MachOFormat.h"
+
+namespace llvm {
+namespace object {
+
+class ObjectFile;
+
+class MachOUniversalBinary : public Binary {
+  virtual void anchor();
+
+  uint32_t NumberOfObjects;
+public:
+  class ObjectForArch {
+    const MachOUniversalBinary *Parent;
+    /// \brief Index of object in the universal binary.
+    uint32_t Index;
+    /// \brief Descriptor of the object.
+    macho::FatArchHeader Header;
+
+  public:
+    ObjectForArch(const MachOUniversalBinary *Parent, uint32_t Index);
+
+    void clear() {
+      Parent = 0;
+      Index = 0;
+    }
+
+    bool operator==(const ObjectForArch &Other) const {
+      return (Parent == Other.Parent) && (Index == Other.Index);
+    }
+
+    ObjectForArch getNext() const { return ObjectForArch(Parent, Index + 1); }
+    uint32_t getCPUType() const { return Header.CPUType; }
+
+    error_code getAsObjectFile(OwningPtr<ObjectFile> &Result) const;
+  };
+
+  class object_iterator {
+    ObjectForArch Obj;
+  public:
+    object_iterator(const ObjectForArch &Obj) : Obj(Obj) {}
+    const ObjectForArch* operator->() const {
+      return &Obj;
+    }
+
+    bool operator==(const object_iterator &Other) const {
+      return Obj == Other.Obj;
+    }
+    bool operator!=(const object_iterator &Other) const {
+      return !(*this == Other);
+    }
+
+    object_iterator& operator++() {  // Preincrement
+      Obj = Obj.getNext();
+      return *this;
+    }
+  };
+
+  MachOUniversalBinary(MemoryBuffer *Source, error_code &ec);
+
+  object_iterator begin_objects() const {
+    return ObjectForArch(this, 0);
+  }
+  object_iterator end_objects() const {
+    return ObjectForArch(0, 0);
+  }
+
+  uint32_t getNumberOfObjects() const { return NumberOfObjects; }
+
+  // Cast methods.
+  static inline bool classof(Binary const *V) {
+    return V->isMachOUniversalBinary();
+  }
+
+  error_code getObjectForArch(Triple::ArchType Arch,
+                              OwningPtr<ObjectFile> &Result) const;
+};
+
+}
+}
+
+#endif
index c1c1fc6ed6ef5630ca1feebb32fea0eb65fa5870..79f85539c0328e474a41ced1ace3cbb2795a985d 100644 (file)
@@ -199,6 +199,7 @@ struct file_magic {
     macho_bundle,             ///< Mach-O Bundle file
     macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub
     macho_dsym_companion,     ///< Mach-O dSYM companion file
+    macho_universal_binary,   ///< Mach-O universal binary
     coff_object,              ///< COFF object file
     pecoff_executable         ///< PECOFF executable file
   };
index f0bd4e34a866cb3dea7f8c0b43f8035b155913e9..ee5d7226f45f71c204482a0a49e7cb017cf10401 100644 (file)
@@ -527,6 +527,7 @@ ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) {
     case sys::fs::file_magic::archive:
     case sys::fs::file_magic::coff_object:
     case sys::fs::file_magic::pecoff_executable:
+    case sys::fs::file_magic::macho_universal_binary:
       report_fatal_error("Incompatible object format!");
     }
   } else {
index a1497708c1e8a06f5508dc637766134ec9b28db4..177c86c5465f55709a68e321cd7aedf2d57addb5 100644 (file)
@@ -20,6 +20,7 @@
 // Include headers for createBinary.
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/COFF.h"
+#include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
 
 using namespace llvm;
@@ -82,6 +83,12 @@ error_code object::createBinary(MemoryBuffer *Source,
       Result.swap(ret);
       return object_error::success;
     }
+    case sys::fs::file_magic::macho_universal_binary: {
+      OwningPtr<Binary> ret(new MachOUniversalBinary(scopedSource.take(), ec));
+      if (ec) return ec;
+      Result.swap(ret);
+      return object_error::success;
+    }
     case sys::fs::file_magic::coff_object:
     case sys::fs::file_magic::pecoff_executable: {
       OwningPtr<Binary> ret(
index cec0e283379a6528b0767ec43f422071454df1aa..2c2cc8e4fb6fdb86c4e0c685fe06cd3fb9c8344e 100644 (file)
@@ -7,6 +7,7 @@ add_llvm_library(LLVMObject
   ELFYAML.cpp
   Error.cpp
   MachOObjectFile.cpp
+  MachOUniversal.cpp
   Object.cpp
   ObjectFile.cpp
   YAML.cpp
index 7005a72d68b93e1590e767037f1e374fd52b91ac..47ce38c88839483313f928aaff2a4afe4d443481 100644 (file)
@@ -34,6 +34,8 @@ std::string _object_error_category::message(int ev) const {
   object_error::Impl E = static_cast<object_error::Impl>(ev);
   switch (E) {
   case object_error::success: return "Success";
+  case object_error::arch_not_found:
+    return "No object file for requested architecture";
   case object_error::invalid_file_type:
     return "The file was not recognized as a valid object file";
   case object_error::parse_failed:
index e62b5a48190d5c7040048182928e4ceffcd6eec3..12090d6c300b4faeb361ec06a432d04628b2255f 100644 (file)
@@ -1297,8 +1297,8 @@ StringRef MachOObjectFile::getFileFormatName() const {
   }
 }
 
-unsigned MachOObjectFile::getArch() const {
-  switch (getCPUType(this)) {
+Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
+  switch (CPUType) {
   case llvm::MachO::CPUTypeI386:
     return Triple::x86;
   case llvm::MachO::CPUTypeX86_64:
@@ -1314,6 +1314,10 @@ unsigned MachOObjectFile::getArch() const {
   }
 }
 
+unsigned MachOObjectFile::getArch() const {
+  return getArch(getCPUType(this));
+}
+
 StringRef MachOObjectFile::getLoadName() const {
   // TODO: Implement
   report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
new file mode 100644 (file)
index 0000000..98f7198
--- /dev/null
@@ -0,0 +1,139 @@
+//===- MachOUniversal.cpp - Mach-O universal binary -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachOUniversalBinary class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOUniversal.h"
+
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace object;
+
+template<typename T>
+static void SwapValue(T &Value) {
+  Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<>
+void SwapStruct(macho::FatHeader &H) {
+  SwapValue(H.Magic);
+  SwapValue(H.NumFatArch);
+}
+
+template<>
+void SwapStruct(macho::FatArchHeader &H) {
+  SwapValue(H.CPUType);
+  SwapValue(H.CPUSubtype);
+  SwapValue(H.Offset);
+  SwapValue(H.Size);
+  SwapValue(H.Align);
+}
+
+template<typename T>
+static T getUniversalBinaryStruct(const char *Ptr) {
+  T Res;
+  memcpy(&Res, Ptr, sizeof(T));
+  // Universal binary headers have big-endian byte order.
+  if (sys::IsLittleEndianHost)
+    SwapStruct(Res);
+  return Res;
+}
+
+MachOUniversalBinary::ObjectForArch::ObjectForArch(
+    const MachOUniversalBinary *Parent, uint32_t Index)
+    : Parent(Parent), Index(Index) {
+  if (Parent == 0 || Index > Parent->getNumberOfObjects()) {
+    clear();
+  } else {
+    // Parse object header.
+    StringRef ParentData = Parent->getData();
+    const char *HeaderPos = ParentData.begin() + macho::FatHeaderSize +
+                            Index * macho::FatArchHeaderSize;
+    Header = getUniversalBinaryStruct<macho::FatArchHeader>(HeaderPos);
+    if (ParentData.size() < Header.Offset + Header.Size) {
+      clear();
+    }
+  }
+}
+
+error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile(
+    OwningPtr<ObjectFile> &Result) const {
+  if (Parent) {
+    StringRef ParentData = Parent->getData();
+    StringRef ObjectData = ParentData.substr(Header.Offset, Header.Size);
+    Twine ObjectName =
+        Twine(Parent->getFileName()) + ":" +
+        Triple::getArchTypeName(MachOObjectFile::getArch(Header.CPUType));
+    MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer(
+        ObjectData, ObjectName.str(), false);
+    if (ObjectFile *Obj = ObjectFile::createMachOObjectFile(ObjBuffer)) {
+      Result.reset(Obj);
+      return object_error::success;
+    }
+  }
+  return object_error::parse_failed;
+}
+
+void MachOUniversalBinary::anchor() { }
+
+MachOUniversalBinary::MachOUniversalBinary(MemoryBuffer *Source,
+                                           error_code &ec)
+  : Binary(Binary::ID_MachOUniversalBinary, Source),
+    NumberOfObjects(0) {
+  if (Source->getBufferSize() < macho::FatHeaderSize) {
+    ec = object_error::invalid_file_type;
+    return;
+  }
+  // Check for magic value and sufficient header size.
+  StringRef Buf = getData();
+  macho::FatHeader H = getUniversalBinaryStruct<macho::FatHeader>(Buf.begin());
+  NumberOfObjects = H.NumFatArch;
+  uint32_t MinSize = macho::FatHeaderSize +
+                     macho::FatArchHeaderSize * NumberOfObjects;
+  if (H.Magic != macho::HM_Universal || Buf.size() < MinSize) {
+    ec = object_error::parse_failed;
+    return;
+  }
+  ec = object_error::success;
+}
+
+static bool getCTMForArch(Triple::ArchType Arch, mach::CPUTypeMachine &CTM) {
+  switch (Arch) {
+    case Triple::x86:    CTM = mach::CTM_i386; return true;
+    case Triple::x86_64: CTM = mach::CTM_x86_64; return true;
+    case Triple::arm:    CTM = mach::CTM_ARM; return true;
+    case Triple::sparc:  CTM = mach::CTM_SPARC; return true;
+    case Triple::ppc:    CTM = mach::CTM_PowerPC; return true;
+    case Triple::ppc64:  CTM = mach::CTM_PowerPC64; return true;
+    default: return false;
+  }
+}
+
+error_code
+MachOUniversalBinary::getObjectForArch(Triple::ArchType Arch,
+                                       OwningPtr<ObjectFile> &Result) const {
+  mach::CPUTypeMachine CTM;
+  if (!getCTMForArch(Arch, CTM))
+    return object_error::arch_not_found;
+  for (object_iterator I = begin_objects(), E = end_objects(); I != E; ++I) {
+    if (I->getCPUType() == static_cast<uint32_t>(CTM))
+      return I->getAsObjectFile(Result);
+  }
+  return object_error::arch_not_found;
+}
index 3ec29bf7e75e78cb57049ffd5d9474fb4668fc62..8dfc26508bcc08bebd39c8f80e5de0d5676f7163 100644 (file)
@@ -46,6 +46,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
   case sys::fs::file_magic::unknown:
   case sys::fs::file_magic::bitcode:
   case sys::fs::file_magic::archive:
+  case sys::fs::file_magic::macho_universal_binary:
     return 0;
   case sys::fs::file_magic::elf_relocatable:
   case sys::fs::file_magic::elf_executable:
index 24eac47eca01b64006e0af1db776f97f02dc87fa..05366202559ee47cdb4861242ca228f7ed8b96bf 100644 (file)
@@ -810,8 +810,7 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
         // This is complicated by an overlap with Java class files.
         // See the Mach-O section in /usr/share/file/magic for details.
         if (Magic.size() >= 8 && Magic[7] < 43)
-          // FIXME: Universal Binary of any type.
-          return file_magic::macho_dynamically_linked_shared_lib;
+          return file_magic::macho_universal_binary;
       }
       break;
 
diff --git a/test/Object/Inputs/macho-universal.x86_64.i386 b/test/Object/Inputs/macho-universal.x86_64.i386
new file mode 100755 (executable)
index 0000000..36d5fc2
Binary files /dev/null and b/test/Object/Inputs/macho-universal.x86_64.i386 differ
diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test
new file mode 100644 (file)
index 0000000..8febfdf
--- /dev/null
@@ -0,0 +1,6 @@
+RUN: llvm-nm %p/Inputs/macho-universal.x86_64.i386 | FileCheck %s
+
+CHECK: macho-universal.x86_64.i386:x86_64
+CHECK: main
+CHECK: macho-universal.x86_64.i386:i386
+CHECK: main
index aa782aae40fa62a677c486af7e969f1a6c5f7f04..cb465207631456fa7f78f5de7c46ca31e9d9d3c2 100644 (file)
@@ -20,6 +20,7 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -402,6 +403,23 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
         }
       }
     }
+  } else if (magic == sys::fs::file_magic::macho_universal_binary) {
+    OwningPtr<Binary> Bin;
+    if (error(object::createBinary(Buffer.take(), Bin), Filename))
+      return;
+
+    object::MachOUniversalBinary *UB =
+        cast<object::MachOUniversalBinary>(Bin.get());
+    for (object::MachOUniversalBinary::object_iterator
+             I = UB->begin_objects(),
+             E = UB->end_objects();
+         I != E; ++I) {
+      OwningPtr<ObjectFile> Obj;
+      if (!I->getAsObjectFile(Obj)) {
+        outs() << Obj->getFileName() << ":\n";
+        DumpSymbolNamesFromObject(Obj.get());
+      }
+    }
   } else if (magic.is_object()) {
     OwningPtr<Binary> obj;
     if (error(object::createBinary(Buffer.take(), obj), Filename))