Remove redundant symbolization support from MCDisassembler interface.
authorLang Hames <lhames@gmail.com>
Fri, 11 Apr 2014 20:07:58 +0000 (20:07 +0000)
committerLang Hames <lhames@gmail.com>
Fri, 11 Apr 2014 20:07:58 +0000 (20:07 +0000)
MCDisassembler has an MCSymbolizer member that is meant to take care of
symbolizing during disassembly, but it also has several methods that enable the
disassembler to do symbolization internally (i.e. without an attached symbolizer
object). There is no need for this duplication, but ARM64 had been making use of
it. This patch moves the ARM64 symbolization logic out of ARM64Disassembler and
into an ARM64ExternalSymbolizer class, and removes the duplicated MCSymbolizer
functionality from the MCDisassembler interface. Symbolization will now be
done exclusively through MCSymbolizers.

There should be no impact on disassembly for any platform, but this allows us to
tidy up the MCDisassembler interface and simplify the process of (and invariants
related to) disassembler setup.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206063 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/MC/MCDisassembler.h
include/llvm/MC/MCExternalSymbolizer.h
lib/MC/MCDisassembler.cpp
lib/MC/MCDisassembler/Disassembler.cpp
lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
lib/Target/ARM64/Disassembler/ARM64Disassembler.h
lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp [new file with mode: 0644]
lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h [new file with mode: 0644]
lib/Target/ARM64/Disassembler/CMakeLists.txt

index d545fc7e4ed91db9ef9e43621a7edba6cbbaa59a..410d6d36f799bb61d614285f5ef613fe075af578 100644 (file)
@@ -57,8 +57,7 @@ public:
 
   /// Constructor     - Performs initial setup for the disassembler.
   MCDisassembler(const MCSubtargetInfo &STI)
-      : GetOpInfo(0), SymbolLookUp(0), DisInfo(0), Ctx(0), STI(STI),
-        Symbolizer(), CommentStream(0) {}
+      : STI(STI), Symbolizer(), CommentStream(0) {}
 
   virtual ~MCDisassembler();
 
@@ -84,19 +83,6 @@ public:
                                        uint64_t address,
                                        raw_ostream &vStream,
                                        raw_ostream &cStream) const = 0;
-private:
-  //
-  // Hooks for symbolic disassembly via the public 'C' interface.
-  //
-  // The function to get the symbolic information for operands.
-  LLVMOpInfoCallback GetOpInfo;
-  // The function to lookup a symbol name.
-  LLVMSymbolLookupCallback SymbolLookUp;
-  // The pointer to the block of symbolic information for above call back.
-  void *DisInfo;
-  // The assembly context for creating symbols and MCExprs in place of
-  // immediate operands when there is symbolic information.
-  MCContext *Ctx;
 
 protected:
   // Subtarget information, for instruction decoding predicates if required.
@@ -116,20 +102,6 @@ public:
   /// This takes ownership of \p Symzer, and deletes the previously set one.
   void setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer);
 
-  /// Sets up an external symbolizer that uses the C API callbacks.
-  void setupForSymbolicDisassembly(LLVMOpInfoCallback GetOpInfo,
-                                   LLVMSymbolLookupCallback SymbolLookUp,
-                                   void *DisInfo,
-                                   MCContext *Ctx,
-                                   std::unique_ptr<MCRelocationInfo> &RelInfo);
-
-  LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; }
-  LLVMSymbolLookupCallback getLLVMSymbolLookupCallback() const {
-    return SymbolLookUp;
-  }
-  void *getDisInfoBlock() const { return DisInfo; }
-  MCContext *getMCContext() const { return Ctx; }
-
   const MCSubtargetInfo& getSubtargetInfo() const { return STI; }
 
   // Marked mutable because we cache it inside the disassembler, rather than
index cab915234f39dbc713b0870e0254556baec5b5c7..2c7d23707c95682727ef8ecc0fac2b9c895b69c0 100644 (file)
@@ -26,7 +26,7 @@ namespace llvm {
 ///
 /// See llvm-c/Disassembler.h.
 class MCExternalSymbolizer : public MCSymbolizer {
-
+protected:
   /// \name Hooks for symbolic disassembly via the public 'C' interface.
   /// @{
   /// The function to get the symbolic information for operands.
index 7a2b1a14a5bb2db9a2a1a902007c9123d3107aa3..77d9ce167548b61b28612bb416ab1190ba037151 100644 (file)
@@ -16,20 +16,6 @@ using namespace llvm;
 MCDisassembler::~MCDisassembler() {
 }
 
-void MCDisassembler::setupForSymbolicDisassembly(
-    LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp,
-    void *DisInfo, MCContext *Ctx, std::unique_ptr<MCRelocationInfo> &RelInfo) {
-  this->GetOpInfo = GetOpInfo;
-  this->SymbolLookUp = SymbolLookUp;
-  this->DisInfo = DisInfo;
-  this->Ctx = Ctx;
-  assert(Ctx != 0 && "No MCContext given for symbolic disassembly");
-  if (!Symbolizer)
-    Symbolizer.reset(new MCExternalSymbolizer(*Ctx, std::move(RelInfo),
-                                              GetOpInfo, SymbolLookUp,
-                                              DisInfo));
-}
-
 bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
                                               uint64_t Address, bool IsBranch,
                                               uint64_t Offset,
index b935b839bd7bf8a37c95711f8facd174d7fd877d..b57b8aaa113ea92d64aca1ba089273c5209ba714 100644 (file)
@@ -82,8 +82,7 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
   std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer(
       Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, RelInfo.release()));
   DisAsm->setSymbolizer(std::move(Symbolizer));
-  DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo,
-                                      Ctx, RelInfo);
+
   // Set up the instruction printer.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
   MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
index 8f9b79c90a81314ceb782873415d0e40ed3750be..34cc1b630c3b7b7a709de545922971563937d958 100644 (file)
 #define DEBUG_TYPE "arm64-disassembler"
 
 #include "ARM64Disassembler.h"
+#include "ARM64ExternalSymbolizer.h"
 #include "ARM64Subtarget.h"
 #include "MCTargetDesc/ARM64AddressingModes.h"
 #include "Utils/ARM64BaseInfo.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
 
 // Pull DecodeStatus and its enum values into the global namespace.
 typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
@@ -219,205 +216,23 @@ DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
   return Success;
 }
 
-static MCSymbolRefExpr::VariantKind
-getVariant(uint64_t LLVMDisassembler_VariantKind) {
-  switch (LLVMDisassembler_VariantKind) {
-  case LLVMDisassembler_VariantKind_None:
-    return MCSymbolRefExpr::VK_None;
-  case LLVMDisassembler_VariantKind_ARM64_PAGE:
-    return MCSymbolRefExpr::VK_PAGE;
-  case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
-    return MCSymbolRefExpr::VK_PAGEOFF;
-  case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
-    return MCSymbolRefExpr::VK_GOTPAGE;
-  case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
-    return MCSymbolRefExpr::VK_GOTPAGEOFF;
-  case LLVMDisassembler_VariantKind_ARM64_TLVP:
-  case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
-  default:
-    assert(0 && "bad LLVMDisassembler_VariantKind");
-    return MCSymbolRefExpr::VK_None;
-  }
-}
-
-/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
-/// operand in place of the immediate Value in the MCInst.  The immediate
-/// Value has not had any PC adjustment made by the caller. If the instruction
-/// is a branch that adds the PC to the immediate Value then isBranch is
-/// Success, else Fail.  If the getOpInfo() function was set as part of the
-/// setupForSymbolicDisassembly() call then that function is called to get any
-/// symbolic information at the Address for this instrution.  If that returns
-/// non-zero then the symbolic information it returns is used to create an
-/// MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
-/// returns zero and isBranch is Success then a symbol look up for
-/// Address + Value is done and if a symbol is found an MCExpr is created with
-/// that, else an MCExpr with Address + Value is created.  If getOpInfo()
-/// returns zero and isBranch is Fail then the the Opcode of the MCInst is
-/// tested and for ADRP an other instructions that help to load of pointers
-/// a symbol look up is done to see it is returns a specific reference type
-/// to add to the comment stream.  This function returns Success if it adds
-/// an operand to the MCInst and Fail otherwise.
-bool ARM64Disassembler::tryAddingSymbolicOperand(uint64_t Address, int Value,
-                                                 bool isBranch,
-                                                 uint64_t InstSize, MCInst &MI,
-                                                 uint32_t insn) const {
-  LLVMOpInfoCallback getOpInfo = getLLVMOpInfoCallback();
-
-  struct LLVMOpInfo1 SymbolicOp;
-  memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
-  SymbolicOp.Value = Value;
-  void *DisInfo = getDisInfoBlock();
-  uint64_t ReferenceType;
-  const char *ReferenceName;
-  const char *Name;
-  LLVMSymbolLookupCallback SymbolLookUp = getLLVMSymbolLookupCallback();
-  if (!getOpInfo ||
-      !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
-    if (isBranch) {
-      if (SymbolLookUp) {
-        ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
-        Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
-                            &ReferenceName);
-        if (Name) {
-          SymbolicOp.AddSymbol.Name = Name;
-          SymbolicOp.AddSymbol.Present = Success;
-          SymbolicOp.Value = 0;
-        } else {
-          SymbolicOp.Value = Address + Value;
-        }
-        if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
-          (*CommentStream) << "symbol stub for: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Message)
-          (*CommentStream) << "Objc message: " << ReferenceName;
-      } else {
-        return false;
-      }
-    } else if (MI.getOpcode() == ARM64::ADRP) {
-      if (SymbolLookUp) {
-        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
-        Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
-                            &ReferenceName);
-        (*CommentStream) << format("0x%llx",
-                                   0xfffffffffffff000LL & (Address + Value));
-      } else {
-        return false;
-      }
-    } else if (MI.getOpcode() == ARM64::ADDXri ||
-               MI.getOpcode() == ARM64::LDRXui ||
-               MI.getOpcode() == ARM64::LDRXl || MI.getOpcode() == ARM64::ADR) {
-      if (SymbolLookUp) {
-        if (MI.getOpcode() == ARM64::ADDXri)
-          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
-        else if (MI.getOpcode() == ARM64::LDRXui)
-          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
-        if (MI.getOpcode() == ARM64::LDRXl) {
-          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
-          Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
-                              &ReferenceName);
-        } else if (MI.getOpcode() == ARM64::ADR) {
-          ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
-          Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
-                              &ReferenceName);
-        } else {
-          Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
-                              &ReferenceName);
-        }
-        if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
-          (*CommentStream) << "literal pool symbol address: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
-          (*CommentStream) << "literal pool for: \"" << ReferenceName << "\"";
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
-          (*CommentStream) << "Objc cfstring ref: @\"" << ReferenceName << "\"";
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Message)
-          (*CommentStream) << "Objc message: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
-          (*CommentStream) << "Objc message ref: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
-          (*CommentStream) << "Objc selector ref: " << ReferenceName;
-        else if (ReferenceType ==
-                 LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
-          (*CommentStream) << "Objc class ref: " << ReferenceName;
-        // For these instructions, the SymbolLookUp() above is just to get the
-        // ReferenceType and ReferenceName.  We want to make sure not to
-        // fall through so we don't build an MCExpr to leave the disassembly
-        // of the immediate values of these instructions to the InstPrinter.
-        return false;
-      } else {
-        return false;
-      }
-    } else {
-      return false;
-    }
-  }
-
-  MCContext *Ctx = getMCContext();
-  const MCExpr *Add = NULL;
-  if (SymbolicOp.AddSymbol.Present) {
-    if (SymbolicOp.AddSymbol.Name) {
-      StringRef Name(SymbolicOp.AddSymbol.Name);
-      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
-      MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
-      if (Variant != MCSymbolRefExpr::VK_None)
-        Add = MCSymbolRefExpr::Create(Sym, Variant, *Ctx);
-      else
-        Add = MCSymbolRefExpr::Create(Sym, *Ctx);
-    } else {
-      Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
-    }
-  }
-
-  const MCExpr *Sub = NULL;
-  if (SymbolicOp.SubtractSymbol.Present) {
-    if (SymbolicOp.SubtractSymbol.Name) {
-      StringRef Name(SymbolicOp.SubtractSymbol.Name);
-      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
-      Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
-    } else {
-      Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
-    }
-  }
-
-  const MCExpr *Off = NULL;
-  if (SymbolicOp.Value != 0)
-    Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
-
-  const MCExpr *Expr;
-  if (Sub) {
-    const MCExpr *LHS;
-    if (Add)
-      LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
-    else
-      LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
-    if (Off != 0)
-      Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
-    else
-      Expr = LHS;
-  } else if (Add) {
-    if (Off != 0)
-      Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
-    else
-      Expr = Add;
-  } else {
-    if (Off != 0)
-      Expr = Off;
-    else
-      Expr = MCConstantExpr::Create(0, *Ctx);
-  }
-
-  MI.addOperand(MCOperand::CreateExpr(Expr));
-
-  return true;
+MCSymbolizer *createARM64ExternalSymbolizer(
+                                          StringRef TT,
+                                          LLVMOpInfoCallback GetOpInfo,
+                                          LLVMSymbolLookupCallback SymbolLookUp,
+                                          void *DisInfo, MCContext *Ctx,
+                                          MCRelocationInfo *RelInfo) {
+  return new llvm::ARM64ExternalSymbolizer(
+                                     *Ctx,
+                                     std::unique_ptr<MCRelocationInfo>(RelInfo),
+                                     GetOpInfo, SymbolLookUp, DisInfo);
 }
 
 extern "C" void LLVMInitializeARM64Disassembler() {
   TargetRegistry::RegisterMCDisassembler(TheARM64Target,
                                          createARM64Disassembler);
+  TargetRegistry::RegisterMCSymbolizer(TheARM64Target,
+                                       createARM64ExternalSymbolizer);
 }
 
 static const unsigned FPR128DecoderTable[] = {
@@ -773,8 +588,8 @@ static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm,
   if (ImmVal & (1 << (19 - 1)))
     ImmVal |= ~((1LL << 19) - 1);
 
-  if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal << 2,
-                                     Inst.getOpcode() != ARM64::LDRXl, 4, Inst))
+  if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr,
+                                     Inst.getOpcode() != ARM64::LDRXl, 0, 4))
     Inst.addOperand(MCOperand::CreateImm(ImmVal));
   return Success;
 }
@@ -1023,7 +838,7 @@ static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
   }
 
   DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  if (!Dis->tryAddingSymbolicOperand(Addr, offset, Fail, 4, Inst, insn))
+  if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
     Inst.addOperand(MCOperand::CreateImm(offset));
   return Success;
 }
@@ -1535,7 +1350,7 @@ static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
     imm |= ~((1LL << 21) - 1);
 
   DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
-  if (!Dis->tryAddingSymbolicOperand(Addr, imm, Fail, 4, Inst, insn))
+  if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
     Inst.addOperand(MCOperand::CreateImm(imm));
 
   return Success;
@@ -1571,7 +1386,7 @@ static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
     DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
   }
 
-  if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal, Fail, 4, Inst, insn))
+  if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
     Inst.addOperand(MCOperand::CreateImm(ImmVal));
   Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal));
   return Success;
@@ -1588,7 +1403,7 @@ static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
   if (imm & (1 << (26 - 1)))
     imm |= ~((1LL << 26) - 1);
 
-  if (!Dis->tryAddingSymbolicOperand(Addr, imm << 2, true, 4, Inst))
+  if (!Dis->tryAddingSymbolicOperand(Inst, imm << 2, Addr, true, 0, 4))
     Inst.addOperand(MCOperand::CreateImm(imm));
 
   return Success;
@@ -1627,7 +1442,7 @@ static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
 
   DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
   Inst.addOperand(MCOperand::CreateImm(bit));
-  if (!Dis->tryAddingSymbolicOperand(Addr, dst << 2, true, 4, Inst))
+  if (!Dis->tryAddingSymbolicOperand(Inst, dst << 2, Addr, true, 0, 4))
     Inst.addOperand(MCOperand::CreateImm(dst));
 
   return Success;
index 35efc8de42acd31d3be97b6a39361551261f9540..95848d55fa4b7eb6c5fc4912201b870c27aaeba6 100644 (file)
@@ -33,20 +33,6 @@ public:
                                               uint64_t address,
                                               raw_ostream &vStream,
                                               raw_ostream &cStream) const;
-
-  /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
-  /// operand in place of the immediate Value in the MCInst.  The immediate
-  /// Value has not had any PC adjustment made by the caller. If the instruction
-  /// adds the PC to the immediate Value then InstsAddsAddressToValue is true,
-  /// else false.  If the getOpInfo() function was set as part of the
-  /// setupForSymbolicDisassembly() call then that function is called to get any
-  /// symbolic information at the Address for this instrution.  If that returns
-  /// non-zero then the symbolic information it returns is used to create an
-  /// MCExpr and that is added as an operand to the MCInst.  This function
-  /// returns true if it adds an operand to the MCInst and false otherwise.
-  bool tryAddingSymbolicOperand(uint64_t Address, int Value,
-                                bool InstsAddsAddressToValue, uint64_t InstSize,
-                                MCInst &MI, uint32_t insn = 0) const;
 };
 
 } // namespace llvm
diff --git a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp b/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.cpp
new file mode 100644 (file)
index 0000000..65d4220
--- /dev/null
@@ -0,0 +1,226 @@
+//===- ARM64ExternalSymbolizer.cpp - Symbolizer for ARM64 -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm64-disassembler"
+
+#include "ARM64ExternalSymbolizer.h"
+#include "ARM64Subtarget.h"
+#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "Utils/ARM64BaseInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static MCSymbolRefExpr::VariantKind
+getVariant(uint64_t LLVMDisassembler_VariantKind) {
+  switch (LLVMDisassembler_VariantKind) {
+  case LLVMDisassembler_VariantKind_None:
+    return MCSymbolRefExpr::VK_None;
+  case LLVMDisassembler_VariantKind_ARM64_PAGE:
+    return MCSymbolRefExpr::VK_PAGE;
+  case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
+    return MCSymbolRefExpr::VK_PAGEOFF;
+  case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
+    return MCSymbolRefExpr::VK_GOTPAGE;
+  case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
+    return MCSymbolRefExpr::VK_GOTPAGEOFF;
+  case LLVMDisassembler_VariantKind_ARM64_TLVP:
+  case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
+  default:
+    assert(0 && "bad LLVMDisassembler_VariantKind");
+    return MCSymbolRefExpr::VK_None;
+  }
+}
+
+/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+/// operand in place of the immediate Value in the MCInst.  The immediate
+/// Value has not had any PC adjustment made by the caller. If the instruction
+/// is a branch that adds the PC to the immediate Value then isBranch is
+/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
+/// symbolic information at the Address for this instrution.  If that returns
+/// non-zero then the symbolic information it returns is used to create an
+/// MCExpr and that is added as an operand to the MCInst.  If GetOpInfo()
+/// returns zero and isBranch is Success then a symbol look up for
+/// Address + Value is done and if a symbol is found an MCExpr is created with
+/// that, else an MCExpr with Address + Value is created.  If GetOpInfo()
+/// returns zero and isBranch is Fail then the the Opcode of the MCInst is
+/// tested and for ADRP an other instructions that help to load of pointers
+/// a symbol look up is done to see it is returns a specific reference type
+/// to add to the comment stream.  This function returns Success if it adds
+/// an operand to the MCInst and Fail otherwise.
+bool ARM64ExternalSymbolizer::tryAddingSymbolicOperand(
+                                                     MCInst &MI,
+                                                     raw_ostream &CommentStream,
+                                                     int64_t Value,
+                                                     uint64_t Address,
+                                                     bool IsBranch,
+                                                     uint64_t Offset,
+                                                     uint64_t InstSize) {
+  // FIXME: This method shares a lot of code with
+  //        MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
+  //        refactor the MCExternalSymbolizer interface to allow more of this
+  //        implementation to be shared.
+  //
+  struct LLVMOpInfo1 SymbolicOp;
+  memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+  SymbolicOp.Value = Value;
+  uint64_t ReferenceType;
+  const char *ReferenceName;
+  if (!GetOpInfo ||
+      !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+    if (IsBranch) {
+      ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+      const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
+                                      Address, &ReferenceName);
+      if (Name) {
+        SymbolicOp.AddSymbol.Name = Name;
+        SymbolicOp.AddSymbol.Present = true;
+        SymbolicOp.Value = 0;
+      } else {
+        SymbolicOp.Value = Address + Value;
+      }
+      if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+        CommentStream << "symbol stub for: " << ReferenceName;
+      else if (ReferenceType ==
+               LLVMDisassembler_ReferenceType_Out_Objc_Message)
+        CommentStream << "Objc message: " << ReferenceName;
+    } else if (MI.getOpcode() == ARM64::ADRP) {
+        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
+        // otool expects the fully encoded ADRP instruction to be passed in as
+        // the value here, so reconstruct it:
+        const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
+        uint32_t EncodedInst = 0x90000000;
+        EncodedInst |= (Value & 0x3) << 29; // immlo
+        EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
+        EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
+        SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
+                     &ReferenceName);
+        CommentStream << format("0x%llx",
+                                0xfffffffffffff000LL & (Address + Value));
+    } else if (MI.getOpcode() == ARM64::ADDXri ||
+               MI.getOpcode() == ARM64::LDRXui ||
+               MI.getOpcode() == ARM64::LDRXl ||
+               MI.getOpcode() == ARM64::ADR) {
+      if (MI.getOpcode() == ARM64::ADDXri)
+        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
+      else if (MI.getOpcode() == ARM64::LDRXui)
+        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
+      if (MI.getOpcode() == ARM64::LDRXl) {
+        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
+        SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
+                     &ReferenceName);
+      } else if (MI.getOpcode() == ARM64::ADR) {
+        ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
+        SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
+                            &ReferenceName);
+      } else {
+        const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
+        // otool expects the fully encoded ADD/LDR instruction to be passed in
+        // as the value here, so reconstruct it:
+        unsigned EncodedInst =
+          MI.getOpcode() == ARM64::ADDXri ? 0x91000000: 0xF9400000;
+        EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
+        EncodedInst |=
+          MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
+        EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
+
+        SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
+                     &ReferenceName);
+      }
+      if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
+        CommentStream << "literal pool symbol address: " << ReferenceName;
+      else if (ReferenceType ==
+               LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+        CommentStream << "literal pool for: \"" << ReferenceName << "\"";
+      else if (ReferenceType ==
+               LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
+        CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
+      else if (ReferenceType ==
+               LLVMDisassembler_ReferenceType_Out_Objc_Message)
+        CommentStream << "Objc message: " << ReferenceName;
+      else if (ReferenceType ==
+               LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
+        CommentStream << "Objc message ref: " << ReferenceName;
+      else if (ReferenceType ==
+               LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
+        CommentStream << "Objc selector ref: " << ReferenceName;
+      else if (ReferenceType ==
+               LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
+        CommentStream << "Objc class ref: " << ReferenceName;
+      // For these instructions, the SymbolLookUp() above is just to get the
+      // ReferenceType and ReferenceName.  We want to make sure not to
+      // fall through so we don't build an MCExpr to leave the disassembly
+      // of the immediate values of these instructions to the InstPrinter.
+      return false;
+    } else {
+      return false;
+    }
+  }
+
+  const MCExpr *Add = NULL;
+  if (SymbolicOp.AddSymbol.Present) {
+    if (SymbolicOp.AddSymbol.Name) {
+      StringRef Name(SymbolicOp.AddSymbol.Name);
+      MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+      MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
+      if (Variant != MCSymbolRefExpr::VK_None)
+        Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx);
+      else
+        Add = MCSymbolRefExpr::Create(Sym, Ctx);
+    } else {
+      Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx);
+    }
+  }
+
+  const MCExpr *Sub = NULL;
+  if (SymbolicOp.SubtractSymbol.Present) {
+    if (SymbolicOp.SubtractSymbol.Name) {
+      StringRef Name(SymbolicOp.SubtractSymbol.Name);
+      MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+      Sub = MCSymbolRefExpr::Create(Sym, Ctx);
+    } else {
+      Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx);
+    }
+  }
+
+  const MCExpr *Off = NULL;
+  if (SymbolicOp.Value != 0)
+    Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx);
+
+  const MCExpr *Expr;
+  if (Sub) {
+    const MCExpr *LHS;
+    if (Add)
+      LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx);
+    else
+      LHS = MCUnaryExpr::CreateMinus(Sub, Ctx);
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx);
+    else
+      Expr = LHS;
+  } else if (Add) {
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx);
+    else
+      Expr = Add;
+  } else {
+    if (Off != 0)
+      Expr = Off;
+    else
+      Expr = MCConstantExpr::Create(0, Ctx);
+  }
+
+  MI.addOperand(MCOperand::CreateExpr(Expr));
+
+  return true;
+}
diff --git a/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h b/lib/Target/ARM64/Disassembler/ARM64ExternalSymbolizer.h
new file mode 100644 (file)
index 0000000..45f07a5
--- /dev/null
@@ -0,0 +1,37 @@
+//===- ARM64ExternalSymbolizer.h - Symbolizer for ARM64 ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Symbolize ARM64 assembly code during disassembly using callbacks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM64EXTERNALSYMBOLIZER_H
+#define ARM64EXTERNALSYMBOLIZER_H
+
+#include "llvm/MC/MCExternalSymbolizer.h"
+
+namespace llvm {
+
+class ARM64ExternalSymbolizer : public MCExternalSymbolizer {
+public:
+  ARM64ExternalSymbolizer(MCContext &Ctx,
+                          std::unique_ptr<MCRelocationInfo> RelInfo,
+                          LLVMOpInfoCallback GetOpInfo,
+                          LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo)
+    : MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp,
+                           DisInfo) {}
+
+  bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
+                                int64_t Value, uint64_t Address, bool IsBranch,
+                                uint64_t Offset, uint64_t InstSize) override;
+};
+
+} // namespace llvm
+
+#endif
index ad998c28c493a14edbae2642ab722329fb569300..43ade66be1448965c2d2ed88e1ace87cbd5da492 100644 (file)
@@ -2,6 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 
 add_llvm_library(LLVMARM64Disassembler
   ARM64Disassembler.cpp
+  ARM64ExternalSymbolizer.cpp
   )
 # workaround for hanging compilation on MSVC8, 9 and 10
 #if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )