From: Kevin Enderby Date: Mon, 11 Apr 2011 18:08:50 +0000 (+0000) Subject: Adding support for printing operands symbolically to llvm's public 'C' X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=bd3327654b5708f1ba92aff3ab25b1bbf5034797;p=oota-llvm.git Adding support for printing operands symbolically to llvm's public 'C' disassembler API. Hooked this up to the ARM target so such tools as Darwin's otool(1) can now print things like branch targets for example this: blx _puts instead of this: blx #-36 And even print the expression encoded in the Mach-O relocation entried for things like this: movt r0, :upper16:((_foo-_bar)+1234) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129284 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h index 63ed9df88e2..dda7111ca21 100644 --- a/include/llvm-c/Disassembler.h +++ b/include/llvm-c/Disassembler.h @@ -47,6 +47,49 @@ typedef int (*LLVMOpInfoCallback)(void *DisInfo, int TagType, void *TagBuf); +/** + * The initial support in LLVM MC for the most general form of a relocatable + * expression is "AddSymbol - SubtractSymbol + Offset". For some Darwin targets + * this full form is encoded in the relocation information so that AddSymbol and + * SubtractSymbol can be link edited independent of each other. Many other + * platforms only allow a relocatable expression of the form AddSymbol + Offset + * to be encoded. + * + * The LLVMOpInfoCallback() for the TagType value of 1 uses the struct + * LLVMOpInfo1. The value of the relocatable expression for the operand, + * including any PC adjustment, is passed in to the call back in the Value + * field. The symbolic information about the operand is returned using all + * the fields of the structure with the Offset of the relocatable expression + * returned in the Value field. It is possible that some symbols in the + * relocatable expression were assembly temporary symbols, for example + * "Ldata - LpicBase + constant", and only the Values of the symbols without + * symbol names are present in the relocation information. The VariantKind + * type is one of the Target specific #defines below and is used to print + * operands like "_foo@GOT", ":lower16:_foo", etc. + */ +struct LLVMOpInfoSymbol1 { + uint64_t Present; /* 1 if this symbol is present */ + char *Name; /* symbol name if not NULL */ + uint64_t Value; /* symbol value if name is NULL */ +}; +struct LLVMOpInfo1 { + struct LLVMOpInfoSymbol1 AddSymbol; + struct LLVMOpInfoSymbol1 SubtractSymbol; + uint64_t Value; + uint64_t VariantKind; +}; + +/** + * The operand VariantKinds for symbolic disassembly. + */ +#define LLVMDisassembler_VariantKind_None 0 /* all targets */ + +/** + * The ARM target VariantKinds. + */ +#define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */ +#define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */ + /** * The type for the symbol lookup function. This may be called by the * disassembler for such things like adding a comment for a PC plus a constant diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h index 0fdb51b11bf..ce8759a882e 100644 --- a/include/llvm/MC/MCDisassembler.h +++ b/include/llvm/MC/MCDisassembler.h @@ -10,12 +10,14 @@ #define MCDISASSEMBLER_H #include "llvm/Support/DataTypes.h" +#include "llvm-c/Disassembler.h" namespace llvm { class MCInst; class MemoryObject; class raw_ostream; +class MCContext; struct EDInstInfo; @@ -24,7 +26,7 @@ struct EDInstInfo; class MCDisassembler { public: /// Constructor - Performs initial setup for the disassembler. - MCDisassembler() {} + MCDisassembler() : GetOpInfo(0), DisInfo(0), Ctx(0) {} virtual ~MCDisassembler(); @@ -53,6 +55,30 @@ public: /// each MCInst opcode this disassembler returns. /// NULL if there is no info for this target. virtual EDInstInfo *getEDInfo() const { return (EDInstInfo*)0; } + +private: + // + // Hooks for symbolic disassembly via the public 'C' interface. + // + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The pointer to the block of symbolic information for above call back. + void *DisInfo; + // The assembly context for creating symbols and MCExprs in place of + // immediate operands when there is symbolic information. + MCContext *Ctx; + +public: + void setupForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, + void *disInfo, + MCContext *ctx) { + GetOpInfo = getOpInfo; + DisInfo = disInfo; + Ctx = ctx; + } + LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; } + void *getDisInfoBlock() const { return DisInfo; } + MCContext *getMCContext() const { return Ctx; } }; } // namespace llvm diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 6ccade99aa0..95c7b845b49 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -77,8 +77,9 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, assert(Ctx && "Unable to create MCContext!"); // Set up disassembler. - const MCDisassembler *DisAsm = TheTarget->createMCDisassembler(); + MCDisassembler *DisAsm = TheTarget->createMCDisassembler(); assert(DisAsm && "Unable to create disassembler!"); + DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx); // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h index f84495182d1..6208b1c8a06 100644 --- a/lib/MC/MCDisassembler/Disassembler.h +++ b/lib/MC/MCDisassembler/Disassembler.h @@ -69,7 +69,7 @@ private: public: LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType, - LLVMOpInfoCallback getOpInfo, + LLVMOpInfoCallback getOpInfo, LLVMSymbolLookupCallback symbolLookUp, const Target *theTarget, const MCAsmInfo *mAI, llvm::TargetMachine *tM, const TargetAsmInfo *tai, diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 74f8b53b48c..579e1a49b12 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -422,6 +422,10 @@ bool ARMDisassembler::getInstruction(MCInst &MI, if (!Builder) return false; + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; @@ -504,6 +508,10 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, Builder->SetSession(const_cast(&SO)); + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index 06a77736caa..d5675d26fcb 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -17,6 +17,7 @@ #include "ARMDisassemblerCore.h" #include "ARMAddressingModes.h" +#include "ARMMCExpr.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1066,7 +1067,8 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}). assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form"); unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0); - MI.addOperand(MCOperand::CreateImm(Imm16)); + if (!B->tryAddingSymbolicOperand(Imm16, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Imm16)); ++OpIdx; } else { // We have a reg/imm form. @@ -3628,3 +3630,80 @@ ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { return new ARMBasicMCBuilder(Opcode, Format, ARMInsts[Opcode].getNumOperands()); } + +/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic +/// operand in place of the immediate Value in the MCInst. The immediate +/// Value has had any PC adjustment made by the caller. If the getOpInfo() +/// function was set as part of the setupBuilderForSymbolicDisassembly() call +/// then that function is called to get any symbolic information at the +/// builder's Address for this instrution. If that returns non-zero then the +/// symbolic information is returns is used to create an MCExpr and that is +/// added as an operand to the MCInst. This function returns true if it adds +/// an operand to the MCInst and false otherwise. +bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value, + uint64_t InstSize, + MCInst &MI) { + if (!GetOpInfo) + return false; + + struct LLVMOpInfo1 SymbolicOp; + SymbolicOp.Value = Value; + if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) + return false; + + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); + else + Expr = Add; + } else + Expr = Off; + + if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) + MI.addOperand(MCOperand::CreateExpr(Expr)); + else + assert("bad SymbolicOp.VariantKind"); + + return true; +} diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 56dd85a7d99..f720e14d23a 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -22,12 +22,17 @@ #define ARMDISASSEMBLERCORE_H #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm-c/Disassembler.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMDisassembler.h" namespace llvm { +class MCContext; class ARMUtils { public: @@ -202,7 +207,7 @@ private: public: ARMBasicMCBuilder(ARMBasicMCBuilder &B) : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP) { + SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) { Err = 0; } @@ -261,6 +266,44 @@ private: assert(SP); return slice(SP->ITState, 7, 4); } + +private: + // + // Hooks for symbolic disassembly via the public 'C' interface. + // + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The pointer to the block of symbolic information for above call back. + void *DisInfo; + // The assembly context for creating symbols and MCExprs in place of + // immediate operands when there is symbolic information. + MCContext *Ctx; + // The address of the instruction being disassembled. + uint64_t Address; + +public: + void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, + void *disInfo, MCContext *ctx, + uint64_t address) { + GetOpInfo = getOpInfo; + DisInfo = disInfo; + Ctx = ctx; + Address = address; + } + + uint64_t getBuilderAddress() const { return Address; } + + /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic + /// operand in place of the immediate Value in the MCInst. The immediate + /// Value has had any PC adjustment made by the caller. If the getOpInfo() + /// function was set as part of the setupBuilderForSymbolicDisassembly() call + /// then that function is called to get any symbolic information at the + /// builder's Address for this instrution. If that returns non-zero then the + /// symbolic information is returns is used to create an MCExpr and that is + /// added as an operand to the MCInst. This function returns true if it adds + /// an operand to the MCInst and false otherwise. + bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI); + }; } // namespace llvm diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 618526816f7..61c4a13bcfd 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -1570,9 +1570,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12 || Opcode == ARM::t2LEApcrel) MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); - else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) - MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { + else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) { + if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI)) + MI.addOperand(MCOperand::CreateImm(getImm16(insn))); + } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { uint32_t mask = 0; if (getBitfieldInvMask(insn, mask)) MI.addOperand(MCOperand::CreateImm(mask)); @@ -1756,7 +1757,9 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, Offset = decodeImm32_BLX(insn); break; } - MI.addOperand(MCOperand::CreateImm(Offset)); + + if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Offset)); // This is an increment as some predicate operands may have been added first. NumOpsAdded += 1;