1 //===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interface for the Enhanced Disassembly library's
11 // disassembler class. The disassembler is responsible for vending individual
12 // instructions according to a given architecture and disassembly syntax.
14 //===----------------------------------------------------------------------===//
16 #ifndef LLVM_EDDISASSEMBLER_H
17 #define LLVM_EDDISASSEMBLER_H
21 #include "llvm/ADT/OwningPtr.h"
22 #include "llvm/ADT/Triple.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include "llvm/Support/Mutex.h"
40 class MCParsedAsmOperand;
43 class MCSubtargetInfo;
44 template <typename T> class SmallVectorImpl;
48 class TargetAsmParser;
50 class TargetRegisterInfo;
57 typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
59 /// EDDisassembler - Encapsulates a disassembler for a single architecture and
60 /// disassembly syntax. Also manages the static disassembler registry.
61 struct EDDisassembler {
63 /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
64 kEDAssemblySyntaxX86Intel = 0,
65 /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
66 kEDAssemblySyntaxX86ATT = 1,
67 kEDAssemblySyntaxARMUAL = 2
75 /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
78 /// The architecture type
79 llvm::Triple::ArchType Arch;
81 /// The assembly syntax
82 AssemblySyntax Syntax;
84 /// operator== - Equality operator
85 bool operator==(const CPUKey &key) const {
86 return (Arch == key.Arch &&
87 Syntax == key.Syntax);
90 /// operator< - Less-than operator
91 bool operator<(const CPUKey &key) const {
92 return ((Arch < key.Arch) ||
93 ((Arch == key.Arch) && Syntax < (key.Syntax)));
97 typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
99 /// True if the disassembler registry has been initialized; false if not
100 static bool sInitialized;
101 /// A map from disassembler specifications to disassemblers. Populated
103 static DisassemblerMap_t sDisassemblers;
105 /// getDisassembler - Returns the specified disassemble, or NULL on failure
107 /// @arg arch - The desired architecture
108 /// @arg syntax - The desired disassembly syntax
109 static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
110 AssemblySyntax syntax);
112 /// getDisassembler - Returns the disassembler for a given combination of
113 /// CPU type, CPU subtype, and assembly syntax, or NULL on failure
115 /// @arg str - The string representation of the architecture triple, e.g.,
116 /// "x86_64-apple-darwin"
117 /// @arg syntax - The disassembly syntax for the required disassembler
118 static EDDisassembler *getDisassembler(llvm::StringRef str,
119 AssemblySyntax syntax);
121 /// initialize - Initializes the disassembler registry and the LLVM backend
122 static void initialize();
124 ////////////////////////
125 // Per-object members //
126 ////////////////////////
128 /// True only if the object has been successfully initialized
130 /// True if the disassembler can provide semantic information
133 /// The stream to write errors to
134 llvm::raw_ostream &ErrorStream;
136 /// The architecture/syntax pair for the current architecture
138 /// The LLVM target corresponding to the disassembler
139 const llvm::Target *Tgt;
140 /// The target machine instance.
141 llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
142 /// The assembly information for the target architecture
143 llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
144 // The register information for the target architecture.
145 llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
146 /// The disassembler for the target architecture
147 llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
148 /// The output string for the instruction printer; must be guarded with
150 llvm::OwningPtr<std::string> InstString;
151 /// The output stream for the disassembler; must be guarded with
153 llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
154 /// The instruction printer for the target architecture; must be guarded with
155 /// PrinterMutex when printing
156 llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
157 /// The mutex that guards the instruction printer's printing functions, which
158 /// use a shared stream
159 llvm::sys::Mutex PrinterMutex;
160 /// The array of instruction information provided by the TableGen backend for
161 /// the target architecture
162 const llvm::EDInstInfo *InstInfos;
163 /// The target-specific lexer for use in tokenizing strings, in
164 /// target-independent and target-specific portions
165 llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
166 llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
167 /// The guard for the above
168 llvm::sys::Mutex ParserMutex;
169 /// The LLVM number used for the target disassembly syntax variant
170 int LLVMSyntaxVariant;
172 typedef std::vector<std::string> regvec_t;
173 typedef std::map<std::string, unsigned> regrmap_t;
175 /// A vector of registers for quick mapping from LLVM register IDs to names
177 /// A map of registers for quick mapping from register names to LLVM IDs
180 /// A set of register IDs for aliases of the stack pointer for the current
182 std::set<unsigned> stackPointers;
183 /// A set of register IDs for aliases of the program counter for the current
185 std::set<unsigned> programCounters;
187 /// Constructor - initializes a disassembler with all the necessary objects,
188 /// which come pre-allocated from the registry accessor function
190 /// @arg key - the architecture and disassembly syntax for the
192 EDDisassembler(CPUKey& key);
194 /// valid - reports whether there was a failure in the constructor.
199 /// hasSemantics - reports whether the disassembler can provide operands and
201 bool hasSemantics() {
207 /// createInst - creates and returns an instruction given a callback and
208 /// memory address, or NULL on failure
210 /// @arg byteReader - A callback function that provides machine code bytes
211 /// @arg address - The address of the first byte of the instruction,
212 /// suitable for passing to byteReader
213 /// @arg arg - An opaque argument for byteReader
214 EDInst *createInst(EDByteReaderCallback byteReader,
218 /// initMaps - initializes regVec and regRMap using the provided register
221 /// @arg registerInfo - the register information to use as a source
222 void initMaps(const llvm::TargetRegisterInfo ®isterInfo);
223 /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
224 /// register for a given register ID, or NULL on failure
226 /// @arg registerID - the ID of the register to be queried
227 const char *nameWithRegisterID(unsigned registerID) const;
228 /// registerIDWithName - Returns the ID of a register for a given register
229 /// name, or (unsigned)-1 on failure
231 /// @arg name - The name of the register
232 unsigned registerIDWithName(const char *name) const;
234 /// registerIsStackPointer - reports whether a register ID is an alias for the
235 /// stack pointer register
237 /// @arg registerID - The LLVM register ID
238 bool registerIsStackPointer(unsigned registerID);
239 /// registerIsStackPointer - reports whether a register ID is an alias for the
240 /// stack pointer register
242 /// @arg registerID - The LLVM register ID
243 bool registerIsProgramCounter(unsigned registerID);
245 /// printInst - prints an MCInst to a string, returning 0 on success, or -1
248 /// @arg str - A reference to a string which is filled in with the string
249 /// representation of the instruction
250 /// @arg inst - A reference to the MCInst to be printed
251 int printInst(std::string& str,
254 /// parseInst - extracts operands and tokens from a string for use in
255 /// tokenizing the string. Returns 0 on success, or -1 otherwise.
257 /// @arg operands - A reference to a vector that will be filled in with the
259 /// @arg tokens - A reference to a vector that will be filled in with the
261 /// @arg str - The string representation of the instruction
262 int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
263 llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
264 const std::string &str);
266 /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
267 int llvmSyntaxVariant() const;
270 } // end namespace llvm