From ee5dfd42f1361da9e44b768a12495be235d6043f Mon Sep 17 00:00:00 2001 From: Sean Callanan Date: Mon, 1 Feb 2010 08:49:35 +0000 Subject: [PATCH] Added the enhanced disassembly library's implementation and fleshed out the .exports file. I still have to fix several details of operand parsing, but the basic functionality is there and usable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94974 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/ed/EDDisassembler.cpp | 445 +++++++++++++++++++++++++++ tools/ed/EDDisassembler.h | 248 +++++++++++++++ tools/ed/EDInst.cpp | 205 ++++++++++++ tools/ed/EDInst.h | 171 ++++++++++ tools/ed/EDMain.cpp | 235 +++++++++++++- tools/ed/EDOperand.cpp | 148 +++++++++ tools/ed/EDOperand.h | 65 ++++ tools/ed/EDToken.cpp | 185 +++++++++++ tools/ed/EDToken.h | 135 ++++++++ tools/ed/EnhancedDisassembly.exports | 30 ++ 10 files changed, 1865 insertions(+), 2 deletions(-) create mode 100644 tools/ed/EDDisassembler.cpp create mode 100644 tools/ed/EDDisassembler.h create mode 100644 tools/ed/EDInst.cpp create mode 100644 tools/ed/EDInst.h create mode 100644 tools/ed/EDOperand.cpp create mode 100644 tools/ed/EDOperand.h create mode 100644 tools/ed/EDToken.cpp create mode 100644 tools/ed/EDToken.h diff --git a/tools/ed/EDDisassembler.cpp b/tools/ed/EDDisassembler.cpp new file mode 100644 index 00000000000..3aade1e94ea --- /dev/null +++ b/tools/ed/EDDisassembler.cpp @@ -0,0 +1,445 @@ +//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's disassembler class. +// The disassembler is responsible for vending individual instructions according +// to a given architecture and disassembly syntax. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/AsmLexer.h" +#include "llvm/MC/MCParser/AsmParser.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetAsmLexer.h" +#include "llvm/Target/TargetAsmParser.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSelect.h" + +#include "EDDisassembler.h" +#include "EDInst.h" + +#include "../../lib/Target/X86/X86GenEDInfo.inc" + +using namespace llvm; + +bool EDDisassembler::sInitialized = false; +EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; + +struct InfoMap { + Triple::ArchType Arch; + const char *String; + const InstInfo *Info; +}; + +static struct InfoMap infomap[] = { + { Triple::x86, "i386-unknown-unknown", instInfoX86 }, + { Triple::x86_64, "x86_64-unknown-unknown", instInfoX86 }, + { Triple::InvalidArch, NULL, NULL } +}; + +/// infoFromArch - Returns the InfoMap corresponding to a given architecture, +/// or NULL if there is an error +/// +/// @arg arch - The Triple::ArchType for the desired architecture +static const InfoMap *infoFromArch(Triple::ArchType arch) { + unsigned int infoIndex; + + for (infoIndex = 0; infomap[infoIndex].String != NULL; ++infoIndex) { + if(arch == infomap[infoIndex].Arch) + return &infomap[infoIndex]; + } + + return NULL; +} + +/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer +/// for the desired assembly syntax, suitable for passing to +/// Target::createMCInstPrinter() +/// +/// @arg arch - The target architecture +/// @arg syntax - The assembly syntax in sd form +static int getLLVMSyntaxVariant(Triple::ArchType arch, + EDAssemblySyntax_t syntax) { + switch (syntax) { + default: + return -1; + // Mappings below from X86AsmPrinter.cpp + case kEDAssemblySyntaxX86ATT: + if (arch == Triple::x86 || arch == Triple::x86_64) + return 0; + else + return -1; + case kEDAssemblySyntaxX86Intel: + if (arch == Triple::x86 || arch == Triple::x86_64) + return 1; + else + return -1; + } +} + +#define BRINGUP_TARGET(tgt) \ + LLVMInitialize##tgt##TargetInfo(); \ + LLVMInitialize##tgt##Target(); \ + LLVMInitialize##tgt##AsmPrinter(); \ + LLVMInitialize##tgt##AsmParser(); \ + LLVMInitialize##tgt##Disassembler(); + +void EDDisassembler::initialize() { + if (sInitialized) + return; + + sInitialized = true; + + BRINGUP_TARGET(X86) +} + +#undef BRINGUP_TARGET + +EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, + EDAssemblySyntax_t syntax) { + CPUKey key; + key.Arch = arch; + key.Syntax = syntax; + + EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key); + + if (i != sDisassemblers.end()) { + return i->second; + } + else { + EDDisassembler* sdd = new EDDisassembler(key); + if(!sdd->valid()) { + delete sdd; + return NULL; + } + + sDisassemblers[key] = sdd; + + return sdd; + } + + return NULL; +} + +EDDisassembler *EDDisassembler::getDisassembler(StringRef str, + EDAssemblySyntax_t syntax) { + Triple triple(str); + + return getDisassembler(triple.getArch(), syntax); +} + +namespace { + class EDAsmParser : public MCAsmParser { + AsmLexer Lexer; + MCContext Context; + OwningPtr Streamer; + public: + // Mandatory functions + EDAsmParser(const MCAsmInfo &MAI) : Lexer(MAI) { + Streamer.reset(createNullStreamer(Context)); + } + virtual ~EDAsmParser() { } + MCAsmLexer &getLexer() { return Lexer; } + MCContext &getContext() { return Context; } + MCStreamer &getStreamer() { return *Streamer; } + void Warning(SMLoc L, const Twine &Msg) { } + bool Error(SMLoc L, const Twine &Msg) { return true; } + const AsmToken &Lex() { return Lexer.Lex(); } + bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) { + AsmToken token = Lex(); + if(token.isNot(AsmToken::Integer)) + return true; + Res = MCConstantExpr::Create(token.getIntVal(), Context); + return false; + } + bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { + assert(0 && "I can't ParseParenExpression()s!"); + } + bool ParseAbsoluteExpression(int64_t &Res) { + assert(0 && "I can't ParseAbsoluteExpression()s!"); + } + + /// setBuffer - loads a buffer into the parser + /// @arg buf - The buffer to read tokens from + void setBuffer(const MemoryBuffer &buf) { Lexer.setBuffer(&buf); } + /// parseInstName - When the lexer is positioned befor an instruction + /// name (with possible intervening whitespace), reads past the name, + /// returning 0 on success and -1 on failure + /// @arg name - A reference to a string that is filled in with the + /// instruction name + /// @arg loc - A reference to a location that is filled in with the + /// position of the instruction name + int parseInstName(StringRef &name, SMLoc &loc) { + AsmToken tok = Lexer.Lex(); + if(tok.isNot(AsmToken::Identifier)) { + return -1; + } + name = tok.getString(); + loc = tok.getLoc(); + return 0; + } + }; +} + +EDDisassembler::EDDisassembler(CPUKey &key) : + Valid(false), ErrorString(), ErrorStream(ErrorString), Key(key) { + const InfoMap *infoMap = infoFromArch(key.Arch); + + if (!infoMap) + return; + + const char *triple = infoMap->String; + + int syntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax); + + if (syntaxVariant < 0) + return; + + std::string tripleString(triple); + std::string errorString; + + Tgt = TargetRegistry::lookupTarget(tripleString, + errorString); + + if (!Tgt) + return; + + std::string featureString; + + OwningPtr + targetMachine(Tgt->createTargetMachine(tripleString, + featureString)); + + const TargetRegisterInfo *registerInfo = targetMachine->getRegisterInfo(); + + if (!registerInfo) + return; + + AsmInfo.reset(Tgt->createAsmInfo(tripleString)); + + if (!AsmInfo) + return; + + Disassembler.reset(Tgt->createMCDisassembler()); + + if (!Disassembler) + return; + + InstString.reset(new std::string); + InstStream.reset(new raw_string_ostream(*InstString)); + + InstPrinter.reset(Tgt->createMCInstPrinter(syntaxVariant, + *AsmInfo, + *InstStream)); + + if (!InstPrinter) + return; + + GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); + SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo)); + SpecificAsmLexer->InstallLexer(*GenericAsmLexer); + + InstInfos = infoMap->Info; + + Valid = true; +} + +EDDisassembler::~EDDisassembler() { + if(!valid()) + return; +} + +namespace { + /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback + /// as provided by the sd interface. See MemoryObject. + class EDMemoryObject : public llvm::MemoryObject { + private: + EDByteReaderCallback Callback; + void *Arg; + public: + EDMemoryObject(EDByteReaderCallback callback, + void *arg) : Callback(callback), Arg(arg) { } + ~EDMemoryObject() { } + uint64_t getBase() const { return 0x0; } + uint64_t getExtent() const { return (uint64_t)-1; } + int readByte(uint64_t address, uint8_t *ptr) const { + if(!Callback) + return -1; + + if(Callback(ptr, address, Arg)) + return -1; + + return 0; + } + }; +} + +EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, + uint64_t address, + void *arg) { + EDMemoryObject memoryObject(byteReader, arg); + + MCInst* inst = new MCInst; + uint64_t byteSize; + + if (!Disassembler->getInstruction(*inst, + byteSize, + memoryObject, + address, + ErrorStream)) { + delete inst; + return NULL; + } + else { + const InstInfo *thisInstInfo = &InstInfos[inst->getOpcode()]; + + EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); + return sdInst; + } +} + +void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) { + unsigned numRegisters = registerInfo.getNumRegs(); + unsigned registerIndex; + + for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { + const char* registerName = registerInfo.get(registerIndex).Name; + + RegVec.push_back(registerName); + RegRMap[registerName] = registerIndex; + } + + if (Key.Arch == Triple::x86 || + Key.Arch == Triple::x86_64) { + stackPointers.insert(registerIDWithName("SP")); + stackPointers.insert(registerIDWithName("ESP")); + stackPointers.insert(registerIDWithName("RSP")); + + programCounters.insert(registerIDWithName("IP")); + programCounters.insert(registerIDWithName("EIP")); + programCounters.insert(registerIDWithName("RIP")); + } +} + +const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const { + if (registerID >= RegVec.size()) + return NULL; + else + return RegVec[registerID].c_str(); +} + +unsigned EDDisassembler::registerIDWithName(const char *name) const { + regrmap_t::const_iterator iter = RegRMap.find(std::string(name)); + if (iter == RegRMap.end()) + return 0; + else + return (*iter).second; +} + +bool EDDisassembler::registerIsStackPointer(unsigned registerID) { + return (stackPointers.find(registerID) != stackPointers.end()); +} + +bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { + return (programCounters.find(registerID) != programCounters.end()); +} + +int EDDisassembler::printInst(std::string& str, + MCInst& inst) { + PrinterMutex.acquire(); + + InstPrinter->printInst(&inst); + InstStream->flush(); + str = *InstString; + InstString->clear(); + + PrinterMutex.release(); + + return 0; +} + +int EDDisassembler::parseInst(SmallVectorImpl &operands, + SmallVectorImpl &tokens, + const std::string &str) { + int ret = 0; + + const char *cStr = str.c_str(); + MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr)); + + StringRef instName; + SMLoc instLoc; + + SourceMgr sourceMgr; + sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over + MCContext context; + OwningPtr streamer + (createNullStreamer(context)); + AsmParser genericParser(sourceMgr, context, *streamer, *AsmInfo); + OwningPtr specificParser + (Tgt->createAsmParser(genericParser)); + + AsmToken OpcodeToken = genericParser.Lex(); + + if(OpcodeToken.is(AsmToken::Identifier)) { + instName = OpcodeToken.getString(); + instLoc = OpcodeToken.getLoc(); + if (specificParser->ParseInstruction(instName, instLoc, operands)) + ret = -1; + } + else { + ret = -1; + } + + SmallVectorImpl::iterator oi; + + for(oi = operands.begin(); oi != operands.end(); ++oi) { + printf("Operand start %p, end %p\n", + (*oi)->getStartLoc().getPointer(), + (*oi)->getEndLoc().getPointer()); + } + + ParserMutex.acquire(); + + if (!ret) { + GenericAsmLexer->setBuffer(buf); + + while (SpecificAsmLexer->Lex(), + SpecificAsmLexer->isNot(AsmToken::Eof) && + SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) { + if (SpecificAsmLexer->is(AsmToken::Error)) { + ret = -1; + break; + } + tokens.push_back(SpecificAsmLexer->getTok()); + } + } + + ParserMutex.release(); + + return ret; +} + +int EDDisassembler::llvmSyntaxVariant() const { + return LLVMSyntaxVariant; +} diff --git a/tools/ed/EDDisassembler.h b/tools/ed/EDDisassembler.h new file mode 100644 index 00000000000..6be9152facb --- /dev/null +++ b/tools/ed/EDDisassembler.h @@ -0,0 +1,248 @@ +//===-EDDisassembler.h - LLVM Enhanced Disassembler -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// disassembler class. The disassembler is responsible for vending individual +// instructions according to a given architecture and disassembly syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef EDDisassembler_ +#define EDDisassembler_ + +#include "EDInfo.inc" + +#include "llvm-c/EnhancedDisassembly.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Mutex.h" + +#include +#include +#include +#include + +namespace llvm { +class AsmLexer; +class AsmToken; +class MCContext; +class MCAsmInfo; +class MCAsmLexer; +class AsmParser; +class TargetAsmLexer; +class TargetAsmParser; +class MCDisassembler; +class MCInstPrinter; +class MCInst; +class MCParsedAsmOperand; +class MCStreamer; +template class SmallVectorImpl; +class SourceMgr; +class Target; +class TargetRegisterInfo; +} + +/// EDDisassembler - Encapsulates a disassembler for a single architecture and +/// disassembly syntax. Also manages the static disassembler registry. +struct EDDisassembler { + //////////////////// + // Static members // + //////////////////// + + /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax + /// pair + struct CPUKey { + /// The architecture type + llvm::Triple::ArchType Arch; + + /// The assembly syntax + EDAssemblySyntax_t Syntax; + + /// operator== - Equality operator + bool operator==(const CPUKey &key) const { + return (Arch == key.Arch && + Syntax == key.Syntax); + } + + /// operator< - Less-than operator + bool operator<(const CPUKey &key) const { + if(Arch > key.Arch) + return false; + if(Syntax >= key.Syntax) + return false; + return true; + } + }; + + typedef std::map DisassemblerMap_t; + + /// True if the disassembler registry has been initialized; false if not + static bool sInitialized; + /// A map from disassembler specifications to disassemblers. Populated + /// lazily. + static DisassemblerMap_t sDisassemblers; + + /// getDisassembler - Returns the specified disassemble, or NULL on failure + /// + /// @arg arch - The desired architecture + /// @arg syntax - The desired disassembly syntax + static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch, + EDAssemblySyntax_t syntax); + + /// getDisassembler - Returns the disassembler for a given combination of + /// CPU type, CPU subtype, and assembly syntax, or NULL on failure + /// + /// @arg str - The string representation of the architecture triple, e.g., + /// "x86_64-apple-darwin" + /// @arg syntax - The disassembly syntax for the required disassembler + static EDDisassembler *getDisassembler(llvm::StringRef str, + EDAssemblySyntax_t syntax); + + /// initialize - Initializes the disassembler registry and the LLVM backend + static void initialize(); + + //////////////////////// + // Per-object members // + //////////////////////// + + /// True only if the object has been fully and successfully initialized + bool Valid; + + /// The string that stores disassembler errors from the backend + std::string ErrorString; + /// The stream that wraps the ErrorString + llvm::raw_string_ostream ErrorStream; + + /// The architecture/syntax pair for the current architecture + CPUKey Key; + /// The LLVM target corresponding to the disassembler + const llvm::Target *Tgt; + /// The assembly information for the target architecture + llvm::OwningPtr AsmInfo; + /// The disassembler for the target architecture + llvm::OwningPtr Disassembler; + /// The output string for the instruction printer; must be guarded with + /// PrinterMutex + llvm::OwningPtr InstString; + /// The output stream for the disassembler; must be guarded with + /// PrinterMutex + llvm::OwningPtr InstStream; + /// The instruction printer for the target architecture; must be guarded with + /// PrinterMutex when printing + llvm::OwningPtr InstPrinter; + /// The mutex that guards the instruction printer's printing functions, which + /// use a shared stream + llvm::sys::Mutex PrinterMutex; + /// The array of instruction information provided by the TableGen backend for + /// the target architecture + const InstInfo *InstInfos; + /// The target-specific lexer for use in tokenizing strings, in + /// target-independent and target-specific portions + llvm::OwningPtr GenericAsmLexer; + llvm::OwningPtr SpecificAsmLexer; + /// The guard for the above + llvm::sys::Mutex ParserMutex; + /// The LLVM number used for the target disassembly syntax variant + int LLVMSyntaxVariant; + + typedef std::vector regvec_t; + typedef std::map regrmap_t; + + /// A vector of registers for quick mapping from LLVM register IDs to names + regvec_t RegVec; + /// A map of registers for quick mapping from register names to LLVM IDs + regrmap_t RegRMap; + + /// A set of register IDs for aliases of the stack pointer for the current + /// architecture + std::set stackPointers; + /// A set of register IDs for aliases of the program counter for the current + /// architecture + std::set programCounters; + + /// Constructor - initializes a disassembler with all the necessary objects, + /// which come pre-allocated from the registry accessor function + /// + /// @arg key - the architecture and disassembly syntax for the + /// disassembler + EDDisassembler(CPUKey& key); + + /// valid - reports whether there was a failure in the constructor. + bool valid() { + return Valid; + } + + ~EDDisassembler(); + + /// createInst - creates and returns an instruction given a callback and + /// memory address, or NULL on failure + /// + /// @arg byteReader - A callback function that provides machine code bytes + /// @arg address - The address of the first byte of the instruction, + /// suitable for passing to byteReader + /// @arg arg - An opaque argument for byteReader + EDInst *createInst(EDByteReaderCallback byteReader, + uint64_t address, + void *arg); + + /// initMaps - initializes regVec and regRMap using the provided register + /// info + /// + /// @arg registerInfo - the register information to use as a source + void initMaps(const llvm::TargetRegisterInfo ®isterInfo); + /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a + /// register for a given register ID, or NULL on failure + /// + /// @arg registerID - the ID of the register to be queried + const char *nameWithRegisterID(unsigned registerID) const; + /// registerIDWithName - Returns the ID of a register for a given register + /// name, or (unsigned)-1 on failure + /// + /// @arg name - The name of the register + unsigned registerIDWithName(const char *name) const; + + /// registerIsStackPointer - reports whether a register ID is an alias for the + /// stack pointer register + /// + /// @arg registerID - The LLVM register ID + bool registerIsStackPointer(unsigned registerID); + /// registerIsStackPointer - reports whether a register ID is an alias for the + /// stack pointer register + /// + /// @arg registerID - The LLVM register ID + bool registerIsProgramCounter(unsigned registerID); + + /// printInst - prints an MCInst to a string, returning 0 on success, or -1 + /// otherwise + /// + /// @arg str - A reference to a string which is filled in with the string + /// representation of the instruction + /// @arg inst - A reference to the MCInst to be printed + int printInst(std::string& str, + llvm::MCInst& inst); + + /// parseInst - extracts operands and tokens from a string for use in + /// tokenizing the string. Returns 0 on success, or -1 otherwise. + /// + /// @arg operands - A reference to a vector that will be filled in with the + /// parsed operands + /// @arg tokens - A reference to a vector that will be filled in with the + /// tokens + /// @arg str - The string representation of the instruction + int parseInst(llvm::SmallVectorImpl &operands, + llvm::SmallVectorImpl &tokens, + const std::string &str); + + /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler + int llvmSyntaxVariant() const; +}; + +#endif diff --git a/tools/ed/EDInst.cpp b/tools/ed/EDInst.cpp new file mode 100644 index 00000000000..9ed27002ad5 --- /dev/null +++ b/tools/ed/EDInst.cpp @@ -0,0 +1,205 @@ +//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's instruction class. +// The instruction is responsible for vending the string representation, +// individual tokens, and operands for a single instruction. +// +//===----------------------------------------------------------------------===// + +#include "EDDisassembler.h" +#include "EDInst.h" +#include "EDOperand.h" +#include "EDToken.h" + +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +EDInst::EDInst(llvm::MCInst *inst, + uint64_t byteSize, + EDDisassembler &disassembler, + const InstInfo *info) : + Disassembler(disassembler), + Inst(inst), + ThisInstInfo(info), + ByteSize(byteSize), + BranchTarget(-1), + MoveSource(-1), + MoveTarget(-1) { +} + +EDInst::~EDInst() { + unsigned int index; + unsigned int numOperands = Operands.size(); + + for (index = 0; index < numOperands; ++index) + delete Operands[index]; + + unsigned int numTokens = Tokens.size(); + + for (index = 0; index < numTokens; ++index) + delete Tokens[index]; + + delete Inst; +} + +uint64_t EDInst::byteSize() { + return ByteSize; +} + +int EDInst::stringify() { + if (StringifyResult.valid()) + return StringifyResult.result(); + + if (Disassembler.printInst(String, *Inst)) + return StringifyResult.setResult(-1); + + OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()]; + + return StringifyResult.setResult(0); +} + +int EDInst::getString(const char*& str) { + if (stringify()) + return -1; + + str = String.c_str(); + + return 0; +} + +unsigned EDInst::instID() { + return Inst->getOpcode(); +} + +bool EDInst::isBranch() { + if (ThisInstInfo) + return ThisInstInfo->instructionFlags & kInstructionFlagBranch; + else + return false; +} + +bool EDInst::isMove() { + if (ThisInstInfo) + return ThisInstInfo->instructionFlags & kInstructionFlagMove; + else + return false; +} + +int EDInst::parseOperands() { + if (ParseResult.valid()) + return ParseResult.result(); + + if (!ThisInstInfo) + return ParseResult.setResult(-1); + + unsigned int opIndex; + unsigned int mcOpIndex = 0; + + for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) { + if (isBranch() && + (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) { + BranchTarget = opIndex; + } + else if (isMove()) { + if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource) + MoveSource = opIndex; + else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget) + MoveTarget = opIndex; + } + + EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex); + + Operands.push_back(operand); + } + + return ParseResult.setResult(0); +} + +int EDInst::branchTargetID() { + if (parseOperands()) + return -1; + return BranchTarget; +} + +int EDInst::moveSourceID() { + if (parseOperands()) + return -1; + return MoveSource; +} + +int EDInst::moveTargetID() { + if (parseOperands()) + return -1; + return MoveTarget; +} + +int EDInst::numOperands() { + if (parseOperands()) + return -1; + return Operands.size(); +} + +int EDInst::getOperand(EDOperand *&operand, unsigned int index) { + if (parseOperands()) + return -1; + + if (index >= Operands.size()) + return -1; + + operand = Operands[index]; + return 0; +} + +int EDInst::tokenize() { + if (TokenizeResult.valid()) + return TokenizeResult.result(); + + if (stringify()) + return TokenizeResult.setResult(-1); + + return TokenizeResult.setResult(EDToken::tokenize(Tokens, + String, + OperandOrder, + Disassembler)); + +} + +int EDInst::numTokens() { + if (tokenize()) + return -1; + return Tokens.size(); +} + +int EDInst::getToken(EDToken *&token, unsigned int index) { + if (tokenize()) + return -1; + token = Tokens[index]; + return 0; +} + +#ifdef __BLOCKS__ +int EDInst::visitTokens(EDTokenVisitor_t visitor) { + if (tokenize()) + return -1; + + tokvec_t::iterator iter; + + for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) { + int ret = visitor(*iter); + if (ret == 1) + return 0; + if (ret != 0) + return -1; + } + + return 0; +} +#endif diff --git a/tools/ed/EDInst.h b/tools/ed/EDInst.h new file mode 100644 index 00000000000..db03a7852ee --- /dev/null +++ b/tools/ed/EDInst.h @@ -0,0 +1,171 @@ +//===-EDInst.h - LLVM Enhanced Disassembler ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// instruction class. The instruction is responsible for vending the string +// representation, individual tokens and operands for a single instruction. +// +//===----------------------------------------------------------------------===// + +#ifndef EDInst_ +#define EDInst_ + +#include "llvm-c/EnhancedDisassembly.h" + +#include "llvm/ADT/SmallVector.h" + +#include +#include + +/// CachedResult - Encapsulates the result of a function along with the validity +/// of that result, so that slow functions don't need to run twice +struct CachedResult { + /// True if the result has been obtained by executing the function + bool Valid; + /// The result last obtained from the function + int Result; + + /// Constructor - Initializes an invalid result + CachedResult() : Valid(false) { } + /// valid - Returns true if the result has been obtained by executing the + /// function and false otherwise + bool valid() { return Valid; } + /// result - Returns the result of the function or an undefined value if + /// valid() is false + int result() { return Result; } + /// setResult - Sets the result of the function and declares it valid + /// returning the result (so that setResult() can be called from inside a + /// return statement) + /// @arg result - The result of the function + int setResult(int result) { Result = result; Valid = true; return result; } +}; + +/// EDInst - Encapsulates a single instruction, which can be queried for its +/// string representation, as well as its operands and tokens +struct EDInst { + /// The parent disassembler + EDDisassembler &Disassembler; + /// The containing MCInst + llvm::MCInst *Inst; + /// The instruction information provided by TableGen for this instruction + const InstInfo *ThisInstInfo; + /// The number of bytes for the machine code representation of the instruction + uint64_t ByteSize; + + /// The result of the stringify() function + CachedResult StringifyResult; + /// The string representation of the instruction + std::string String; + /// The order in which operands from the InstInfo's operand information appear + /// in String + const char* OperandOrder; + + /// The result of the parseOperands() function + CachedResult ParseResult; + typedef llvm::SmallVector opvec_t; + /// The instruction's operands + opvec_t Operands; + /// The operand corresponding to the target, if the instruction is a branch + int BranchTarget; + /// The operand corresponding to the source, if the instruction is a move + int MoveSource; + /// The operand corresponding to the target, if the instruction is a move + int MoveTarget; + + /// The result of the tokenize() function + CachedResult TokenizeResult; + typedef std::vector tokvec_t; + /// The instruction's tokens + tokvec_t Tokens; + + /// Constructor - initializes an instruction given the output of the LLVM + /// C++ disassembler + /// + /// @arg inst - The MCInst, which will now be owned by this object + /// @arg byteSize - The size of the consumed instruction, in bytes + /// @arg disassembler - The parent disassembler + /// @arg instInfo - The instruction information produced by the table + /// generator for this instruction + EDInst(llvm::MCInst *inst, + uint64_t byteSize, + EDDisassembler &disassembler, + const InstInfo *instInfo); + ~EDInst(); + + /// byteSize - returns the number of bytes consumed by the machine code + /// representation of the instruction + uint64_t byteSize(); + /// instID - returns the LLVM instruction ID of the instruction + unsigned instID(); + + /// stringify - populates the String and AsmString members of the instruction, + /// returning 0 on success or -1 otherwise + int stringify(); + /// getString - retrieves a pointer to the string representation of the + /// instructinon, returning 0 on success or -1 otherwise + /// + /// @arg str - A reference to a pointer that, on success, is set to point to + /// the string representation of the instruction; this string is still owned + /// by the instruction and will be deleted when it is + int getString(const char *&str); + + /// isBranch - Returns true if the instruction is a branch + bool isBranch(); + /// isMove - Returns true if the instruction is a move + bool isMove(); + + /// parseOperands - populates the Operands member of the instruction, + /// returning 0 on success or -1 otherwise + int parseOperands(); + /// branchTargetID - returns the ID (suitable for use with getOperand()) of + /// the target operand if the instruction is a branch, or -1 otherwise + int branchTargetID(); + /// moveSourceID - returns the ID of the source operand if the instruction + /// is a move, or -1 otherwise + int moveSourceID(); + /// moveTargetID - returns the ID of the target operand if the instruction + /// is a move, or -1 otherwise + int moveTargetID(); + + /// numOperands - returns the number of operands available to retrieve, or -1 + /// on error + int numOperands(); + /// getOperand - retrieves an operand from the instruction's operand list by + /// index, returning 0 on success or -1 on error + /// + /// @arg operand - A reference whose target is pointed at the operand on + /// success, although the operand is still owned by the EDInst + /// @arg index - The index of the operand in the instruction + int getOperand(EDOperand *&operand, unsigned int index); + + /// tokenize - populates the Tokens member of the instruction, returning 0 on + /// success or -1 otherwise + int tokenize(); + /// numTokens - returns the number of tokens in the instruction, or -1 on + /// error + int numTokens(); + /// getToken - retrieves a token from the instruction's token list by index, + /// returning 0 on success or -1 on error + /// + /// @arg token - A reference whose target is pointed at the token on success, + /// although the token is still owned by the EDInst + /// @arg index - The index of the token in the instrcutino + int getToken(EDToken *&token, unsigned int index); + +#ifdef __BLOCKS__ + /// visitTokens - Visits each token in turn and applies a block to it, + /// returning 0 if all blocks are visited and/or the block signals + /// termination by returning 1; returns -1 on error + /// + /// @arg visitor - The visitor block to apply to all tokens. + int visitTokens(EDTokenVisitor_t visitor); +#endif +}; + +#endif diff --git a/tools/ed/EDMain.cpp b/tools/ed/EDMain.cpp index 69285f343a0..2ac6c79296a 100644 --- a/tools/ed/EDMain.cpp +++ b/tools/ed/EDMain.cpp @@ -11,10 +11,241 @@ // //===----------------------------------------------------------------------===// -#include +#include "EDDisassembler.h" +#include "EDInst.h" +#include "EDOperand.h" +#include "EDToken.h" + +#include "llvm-c/EnhancedDisassembly.h" int EDGetDisassembler(EDDisassemblerRef *disassembler, const char *triple, EDAssemblySyntax_t syntax) { - return -1; + EDDisassembler::initialize(); + + EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, + syntax); + + if (ret) { + *disassembler = ret; + return 0; + } + else { + return -1; + } +} + +int EDGetRegisterName(const char** regName, + EDDisassemblerRef disassembler, + unsigned regID) { + const char* name = disassembler->nameWithRegisterID(regID); + if(!name) + return -1; + *regName = name; + return 0; +} + +int EDRegisterIsStackPointer(EDDisassemblerRef disassembler, + unsigned regID) { + return disassembler->registerIsStackPointer(regID) ? 1 : 0; +} + +int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler, + unsigned regID) { + return disassembler->registerIsProgramCounter(regID) ? 1 : 0; +} + +unsigned int EDCreateInsts(EDInstRef *insts, + unsigned int count, + EDDisassemblerRef disassembler, + EDByteReaderCallback byteReader, + uint64_t address, + void *arg) { + unsigned int index; + + for (index = 0; index < count; index++) { + EDInst *inst = disassembler->createInst(byteReader, address, arg); + + if(!inst) + return index; + + insts[index] = inst; + address += inst->byteSize(); + } + + return count; +} + +void EDReleaseInst(EDInstRef inst) { + delete inst; +} + +int EDInstByteSize(EDInstRef inst) { + return inst->byteSize(); +} + +int EDGetInstString(const char **buf, + EDInstRef inst) { + return inst->getString(*buf); +} + +int EDInstID(unsigned *instID, EDInstRef inst) { + *instID = inst->instID(); + return 0; +} + +int EDInstIsBranch(EDInstRef inst) { + return inst->isBranch(); +} + +int EDInstIsMove(EDInstRef inst) { + return inst->isMove(); +} + +int EDBranchTargetID(EDInstRef inst) { + return inst->branchTargetID(); +} + +int EDMoveSourceID(EDInstRef inst) { + return inst->moveSourceID(); +} + +int EDMoveTargetID(EDInstRef inst) { + return inst->moveTargetID(); +} + +int EDNumTokens(EDInstRef inst) { + return inst->numTokens(); +} + +int EDGetToken(EDTokenRef *token, + EDInstRef inst, + int index) { + return inst->getToken(*token, index); +} + +int EDGetTokenString(const char **buf, + EDTokenRef token) { + return token->getString(*buf); +} + +int EDOperandIndexForToken(EDTokenRef token) { + return token->operandID(); +} + +int EDTokenIsWhitespace(EDTokenRef token) { + if(token->type() == EDToken::kTokenWhitespace) + return 1; + else + return 0; +} + +int EDTokenIsPunctuation(EDTokenRef token) { + if(token->type() == EDToken::kTokenPunctuation) + return 1; + else + return 0; +} + +int EDTokenIsOpcode(EDTokenRef token) { + if(token->type() == EDToken::kTokenOpcode) + return 1; + else + return 0; +} + +int EDTokenIsLiteral(EDTokenRef token) { + if(token->type() == EDToken::kTokenLiteral) + return 1; + else + return 0; +} + +int EDTokenIsRegister(EDTokenRef token) { + if(token->type() == EDToken::kTokenRegister) + return 1; + else + return 0; +} + +int EDTokenIsNegativeLiteral(EDTokenRef token) { + if(token->type() != EDToken::kTokenLiteral) + return -1; + + return token->literalSign(); +} + +int EDLiteralTokenAbsoluteValue(uint64_t *value, + EDTokenRef token) { + if(token->type() != EDToken::kTokenLiteral) + return -1; + + return token->literalAbsoluteValue(*value); +} + +int EDRegisterTokenValue(unsigned *registerID, + EDTokenRef token) { + if(token->type() != EDToken::kTokenRegister) + return -1; + + return token->registerID(*registerID); } + +int EDNumOperands(EDInstRef inst) { + return inst->numOperands(); +} + +int EDGetOperand(EDOperandRef *operand, + EDInstRef inst, + int index) { + return inst->getOperand(*operand, index); +} + +int EDEvaluateOperand(uint64_t *result, + EDOperandRef operand, + EDRegisterReaderCallback regReader, + void *arg) { + return operand->evaluate(*result, regReader, arg); +} + +#ifdef __BLOCKS__ + +struct ByteReaderWrapper { + EDByteBlock_t byteBlock; +}; + +static int readerWrapperCallback(uint8_t *byte, + uint64_t address, + void *arg) { + struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg; + return wrapper->byteBlock(byte, address); +} + +unsigned int EDBlockCreateInsts(EDInstRef *insts, + int count, + EDDisassemblerRef disassembler, + EDByteBlock_t byteBlock, + uint64_t address) { + struct ByteReaderWrapper wrapper; + wrapper.byteBlock = byteBlock; + + return EDCreateInsts(insts, + count, + disassembler, + readerWrapperCallback, + address, + (void*)&wrapper); +} + +int EDBlockEvaluateOperand(uint64_t *result, + EDOperandRef operand, + EDRegisterBlock_t regBlock) { + return operand->evaluate(*result, regBlock); +} + +int EDBlockVisitTokens(EDInstRef inst, + EDTokenVisitor_t visitor) { + return inst->visitTokens(visitor); +} + +#endif diff --git a/tools/ed/EDOperand.cpp b/tools/ed/EDOperand.cpp new file mode 100644 index 00000000000..c15860affda --- /dev/null +++ b/tools/ed/EDOperand.cpp @@ -0,0 +1,148 @@ +//===-EDOperand.cpp - LLVM Enhanced Disassembler --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembly library's operand class. The +// operand is responsible for allowing evaluation given a particular register +// context. +// +//===----------------------------------------------------------------------===// + +#include "EDDisassembler.h" +#include "EDInst.h" +#include "EDOperand.h" + +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +EDOperand::EDOperand(const EDDisassembler &disassembler, + const EDInst &inst, + unsigned int opIndex, + unsigned int &mcOpIndex) : + Disassembler(disassembler), + Inst(inst), + OpIndex(opIndex), + MCOpIndex(mcOpIndex) { + unsigned int numMCOperands = 0; + + if(Disassembler.Key.Arch == Triple::x86 || + Disassembler.Key.Arch == Triple::x86_64) { + uint8_t operandFlags = inst.ThisInstInfo->operandFlags[opIndex]; + + if (operandFlags & kOperandFlagImmediate) { + numMCOperands = 1; + } + else if (operandFlags & kOperandFlagRegister) { + numMCOperands = 1; + } + else if (operandFlags & kOperandFlagMemory) { + if (operandFlags & kOperandFlagPCRelative) { + numMCOperands = 1; + } + else { + numMCOperands = 5; + } + } + else if (operandFlags & kOperandFlagEffectiveAddress) { + numMCOperands = 4; + } + } + + mcOpIndex += numMCOperands; +} + +EDOperand::~EDOperand() { +} + +int EDOperand::evaluate(uint64_t &result, + EDRegisterReaderCallback callback, + void *arg) { + if (Disassembler.Key.Arch == Triple::x86 || + Disassembler.Key.Arch == Triple::x86_64) { + uint8_t operandFlags = Inst.ThisInstInfo->operandFlags[OpIndex]; + + if (operandFlags & kOperandFlagImmediate) { + result = Inst.Inst->getOperand(MCOpIndex).getImm(); + return 0; + } + if (operandFlags & kOperandFlagRegister) { + unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); + return callback(&result, reg, arg); + } + if (operandFlags & kOperandFlagMemory || + operandFlags & kOperandFlagEffectiveAddress){ + if(operandFlags & kOperandFlagPCRelative) { + int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); + + uint64_t ripVal; + + // TODO fix how we do this + + if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg)) + return -1; + + result = ripVal + displacement; + return 0; + } + else { + unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg(); + uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm(); + unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg(); + int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm(); + //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg(); + + uint64_t addr = 0; + + if(baseReg) { + uint64_t baseVal; + if (callback(&baseVal, baseReg, arg)) + return -1; + addr += baseVal; + } + + if(indexReg) { + uint64_t indexVal; + if (callback(&indexVal, indexReg, arg)) + return -1; + addr += (scaleAmount * indexVal); + } + + addr += displacement; + + result = addr; + return 0; + } + } + return -1; + } + + return -1; +} + +#ifdef __BLOCKS__ +struct RegisterReaderWrapper { + EDRegisterBlock_t regBlock; +}; + +int readerWrapperCallback(uint64_t *value, + unsigned regID, + void *arg) { + struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg; + return wrapper->regBlock(value, regID); +} + +int EDOperand::evaluate(uint64_t &result, + EDRegisterBlock_t regBlock) { + struct RegisterReaderWrapper wrapper; + wrapper.regBlock = regBlock; + return evaluate(result, + readerWrapperCallback, + (void*)&wrapper); +} +#endif diff --git a/tools/ed/EDOperand.h b/tools/ed/EDOperand.h new file mode 100644 index 00000000000..32d3a5ef83d --- /dev/null +++ b/tools/ed/EDOperand.h @@ -0,0 +1,65 @@ +//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's +// operand class. The operand is responsible for allowing evaluation given a +// particular register context. +// +//===----------------------------------------------------------------------===// + +#ifndef EDOperand_ +#define EDOperand_ + +#include "llvm-c/EnhancedDisassembly.h" + +/// EDOperand - Encapsulates a single operand, which can be evaluated by the +/// client +struct EDOperand { + /// The parent disassembler + const EDDisassembler &Disassembler; + /// The parent instruction + const EDInst &Inst; + + /// The index of the operand in the EDInst + unsigned int OpIndex; + /// The index of the first component of the operand in the MCInst + unsigned int MCOpIndex; + + /// Constructor - Initializes an EDOperand + /// + /// @arg disassembler - The disassembler responsible for the operand + /// @arg inst - The instruction containing this operand + /// @arg opIndex - The index of the operand in inst + /// @arg mcOpIndex - The index of the operand in the original MCInst + EDOperand(const EDDisassembler &disassembler, + const EDInst &inst, + unsigned int opIndex, + unsigned int &mcOpIndex); + ~EDOperand(); + + /// evaluate - Returns the numeric value of an operand to the extent possible, + /// returning 0 on success or -1 if there was some problem (such as a + /// register not being readable) + /// + /// @arg result - A reference whose target is filled in with the value of + /// the operand (the address if it is a memory operand) + /// @arg callback - A function to call to obtain register values + /// @arg arg - An opaque argument to pass to callback + int evaluate(uint64_t &result, + EDRegisterReaderCallback callback, + void *arg); + +#ifdef __BLOCKS__ + /// evaluate - Like evaluate for a callback, but uses a block instead + int evaluate(uint64_t &result, + EDRegisterBlock_t regBlock); +#endif +}; + +#endif diff --git a/tools/ed/EDToken.cpp b/tools/ed/EDToken.cpp new file mode 100644 index 00000000000..6b67e624e8c --- /dev/null +++ b/tools/ed/EDToken.cpp @@ -0,0 +1,185 @@ +//===-EDToken.cpp - LLVM Enhanced Disassembler ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Enhanced Disassembler library's token class. The +// token is responsible for vending information about the token, such as its +// type and logical value. +// +//===----------------------------------------------------------------------===// + +#include "EDDisassembler.h" +#include "EDToken.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" + +using namespace llvm; + +EDToken::EDToken(StringRef str, + enum tokenType type, + uint64_t localType, + EDDisassembler &disassembler) : + Disassembler(disassembler), + Str(str), + Type(type), + LocalType(localType), + OperandID(-1) { +} + +EDToken::~EDToken() { +} + +void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) { + Type = kTokenLiteral; + LiteralSign = sign; + LiteralAbsoluteValue = absoluteValue; +} + +void EDToken::makeRegister(unsigned registerID) { + Type = kTokenRegister; + RegisterID = registerID; +} + +void EDToken::setOperandID(int operandID) { + OperandID = operandID; +} + +enum EDToken::tokenType EDToken::type() const { + return Type; +} + +uint64_t EDToken::localType() const { + return LocalType; +} + +StringRef EDToken::string() const { + return Str; +} + +int EDToken::operandID() const { + return OperandID; +} + +int EDToken::literalSign() const { + if(Type != kTokenLiteral) + return -1; + return (LiteralSign ? 1 : 0); +} + +int EDToken::literalAbsoluteValue(uint64_t &value) const { + if(Type != kTokenLiteral) + return -1; + value = LiteralAbsoluteValue; + return 0; +} + +int EDToken::registerID(unsigned ®isterID) const { + if(Type != kTokenRegister) + return -1; + registerID = RegisterID; + return 0; +} + +int EDToken::tokenize(std::vector &tokens, + std::string &str, + const char *operandOrder, + EDDisassembler &disassembler) { + SmallVector parsedOperands; + SmallVector asmTokens; + + disassembler.parseInst(parsedOperands, asmTokens, str); + + SmallVectorImpl::iterator operandIterator; + unsigned int operandIndex; + SmallVectorImpl::iterator tokenIterator; + + operandIterator = parsedOperands.begin(); + operandIndex = 0; + + bool readOpcode = false; + + for (tokenIterator = asmTokens.begin(); + tokenIterator != asmTokens.end(); + ++tokenIterator) { + SMLoc tokenLoc = tokenIterator->getLoc(); + + while (operandIterator != parsedOperands.end() && + tokenLoc.getPointer() > + (*operandIterator)->getEndLoc().getPointer()) { + ++operandIterator; + ++operandIndex; + } + + EDToken *token; + + switch (tokenIterator->getKind()) { + case AsmToken::Identifier: + if (!readOpcode) { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenOpcode, + (uint64_t)tokenIterator->getKind(), + disassembler); + readOpcode = true; + break; + } + // any identifier that isn't an opcode is mere punctuation; so we fall + // through + default: + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenPunctuation, + (uint64_t)tokenIterator->getKind(), + disassembler); + break; + case AsmToken::Integer: + { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenLiteral, + (uint64_t)tokenIterator->getKind(), + disassembler); + + int64_t intVal = tokenIterator->getIntVal(); + + if(intVal < 0) + token->makeLiteral(true, -intVal); + else + token->makeLiteral(false, intVal); + break; + } + case AsmToken::Register: + { + token = new EDToken(tokenIterator->getString(), + EDToken::kTokenLiteral, + (uint64_t)tokenIterator->getKind(), + disassembler); + + token->makeRegister((unsigned)tokenIterator->getRegVal()); + break; + } + } + + if(operandIterator != parsedOperands.end() && + tokenLoc.getPointer() >= + (*operandIterator)->getStartLoc().getPointer()) { + token->setOperandID(operandOrder[operandIndex]); + } + + tokens.push_back(token); + } + + return 0; +} + +int EDToken::getString(const char*& buf) { + if(PermStr.length() == 0) { + PermStr = Str.str(); + } + buf = PermStr.c_str(); + return 0; +} diff --git a/tools/ed/EDToken.h b/tools/ed/EDToken.h new file mode 100644 index 00000000000..e4ae91f7ec3 --- /dev/null +++ b/tools/ed/EDToken.h @@ -0,0 +1,135 @@ +//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Enhanced Disassembly library's token +// class. The token is responsible for vending information about the token, +// such as its type and logical value. +// +//===----------------------------------------------------------------------===// + +#ifndef EDToken_ +#define EDToken_ + +#include "llvm-c/EnhancedDisassembly.h" +#include "llvm/ADT/StringRef.h" + +#include +#include + +/// EDToken - Encapsulates a single token, which can provide a string +/// representation of itself or interpret itself in various ways, depending +/// on the token type. +struct EDToken { + enum tokenType { + kTokenWhitespace, + kTokenOpcode, + kTokenLiteral, + kTokenRegister, + kTokenPunctuation + }; + + /// The parent disassembler + EDDisassembler &Disassembler; + + /// The token's string representation + llvm::StringRef Str; + /// The token's string representation, but in a form suitable for export + std::string PermStr; + /// The type of the token, as exposed through the external API + enum tokenType Type; + /// The type of the token, as recorded by the syntax-specific tokenizer + uint64_t LocalType; + /// The operand corresponding to the token, or (unsigned int)-1 if not + /// part of an operand. + int OperandID; + + /// The sign if the token is a literal (1 if negative, 0 otherwise) + bool LiteralSign; + /// The absolute value if the token is a literal + uint64_t LiteralAbsoluteValue; + /// The LLVM register ID if the token is a register name + unsigned RegisterID; + + /// Constructor - Initializes an EDToken with the information common to all + /// tokens + /// + /// @arg str - The string corresponding to the token + /// @arg type - The token's type as exposed through the public API + /// @arg localType - The token's type as recorded by the tokenizer + /// @arg disassembler - The disassembler responsible for the token + EDToken(llvm::StringRef str, + enum tokenType type, + uint64_t localType, + EDDisassembler &disassembler); + + /// makeLiteral - Adds the information specific to a literal + /// @arg sign - The sign of the literal (1 if negative, 0 + /// otherwise) + /// + /// @arg absoluteValue - The absolute value of the literal + void makeLiteral(bool sign, uint64_t absoluteValue); + /// makeRegister - Adds the information specific to a register + /// + /// @arg registerID - The LLVM register ID + void makeRegister(unsigned registerID); + + /// setOperandID - Links the token to a numbered operand + /// + /// @arg operandID - The operand ID to link to + void setOperandID(int operandID); + + ~EDToken(); + + /// type - Returns the public type of the token + enum tokenType type() const; + /// localType - Returns the tokenizer-specific type of the token + uint64_t localType() const; + /// string - Returns the string representation of the token + llvm::StringRef string() const; + /// operandID - Returns the operand ID of the token + int operandID() const; + + /// literalSign - Returns the sign of the token + /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal) + int literalSign() const; + /// literalAbsoluteValue - Retrieves the absolute value of the token, and + /// returns -1 if the token is not a literal + /// @arg value - A reference to a value that is filled in with the absolute + /// value, if it is valid + int literalAbsoluteValue(uint64_t &value) const; + /// registerID - Retrieves the register ID of the token, and returns -1 if the + /// token is not a register + /// + /// @arg registerID - A reference to a value that is filled in with the + /// register ID, if it is valid + int registerID(unsigned ®isterID) const; + + /// tokenize - Tokenizes a string using the platform- and syntax-specific + /// tokenizer, and returns 0 on success (-1 on failure) + /// + /// @arg tokens - A vector that will be filled in with pointers to + /// allocated tokens + /// @arg str - The string, as outputted by the AsmPrinter + /// @arg operandOrder - The order of the operands from the operandFlags array + /// as they appear in str + /// @arg disassembler - The disassembler for the desired target and + // assembly syntax + static int tokenize(std::vector &tokens, + std::string &str, + const char *operandOrder, + EDDisassembler &disassembler); + + /// getString - Directs a character pointer to the string, returning 0 on + /// success (-1 on failure) + /// @arg buf - A reference to a pointer that is set to point to the string. + /// The string is still owned by the token. + int getString(const char*& buf); +}; + +#endif diff --git a/tools/ed/EnhancedDisassembly.exports b/tools/ed/EnhancedDisassembly.exports index 7c9da030d05..63738e53665 100644 --- a/tools/ed/EnhancedDisassembly.exports +++ b/tools/ed/EnhancedDisassembly.exports @@ -1 +1,31 @@ _EDGetDisassembler +_EDGetRegisterName +_EDRegisterIsStackPointer +_EDRegisterIsProgramCounter +_EDCreateInsts +_EDReleaseInst +_EDInstByteSize +_EDGetInstString +_EDInstIsBranch +_EDInstIsMove +_EDBranchTargetID +_EDMoveSourceID +_EDMoveTargetID +_EDNumTokens +_EDGetToken +_EDGetTokenString +_EDOperandIndexForToken +_EDTokenIsWhitespace +_EDTokenIsPunctuation +_EDTokenIsOpcode +_EDTokenIsLiteral +_EDTokenIsRegister +_EDTokenIsNegativeLiteral +_EDLiteralTokenAbsoluteValue +_EDRegisterTokenValue +_EDNumOperands +_EDGetOperand +_EDEvaluateOperand +_EDBlockCreateInsts +_EDBlockEvaluateOperand +_EDBlockVisitTokens -- 2.34.1