X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FDisassembler%2FX86DisassemblerDecoder.h;h=457b3820b461c060b8792cc2077dd5240acb8ff2;hp=a9c90f8f9bda12b3d8d80aef08e9415b0168fbae;hb=00e08fcaa02286dd7da9cf9a8d158545532ab832;hpb=7a2bdde0a0eebcd2125055e0eacaca040f0b766c diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index a9c90f8f9bd..457b3820b46 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -1,39 +1,28 @@ -/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==* - * - * The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===* - * - * This file is part of the X86 Disassembler. - * It contains the public interface of the instruction decoder. - * Documentation for the disassembler can be found in X86Disassembler.h. - * - *===----------------------------------------------------------------------===*/ - -#ifndef X86DISASSEMBLERDECODER_H -#define X86DISASSEMBLERDECODER_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define INSTRUCTION_SPECIFIER_FIELDS \ - const char* name; - -#define INSTRUCTION_IDS \ - const InstrUID *instructionIDs; +//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains the public interface of the instruction decoder. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H +#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H #include "X86DisassemblerDecoderCommon.h" - -#undef INSTRUCTION_SPECIFIER_FIELDS -#undef INSTRUCTION_IDS - -/* - * Accessor functions for various fields of an Intel instruction - */ +#include "llvm/ADT/ArrayRef.h" + +namespace llvm { +namespace X86Disassembler { + +// Accessor functions for various fields of an Intel instruction #define modFromModRM(modRM) (((modRM) & 0xc0) >> 6) #define regFromModRM(modRM) (((modRM) & 0x38) >> 3) #define rmFromModRM(modRM) ((modRM) & 0x7) @@ -44,7 +33,22 @@ extern "C" { #define rFromREX(rex) (((rex) & 0x4) >> 2) #define xFromREX(rex) (((rex) & 0x2) >> 1) #define bFromREX(rex) ((rex) & 0x1) - + +#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7) +#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6) +#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5) +#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4) +#define mmFromEVEX2of4(evex) ((evex) & 0x3) +#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7) +#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3) +#define ppFromEVEX3of4(evex) ((evex) & 0x3) +#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7) +#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6) +#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5) +#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4) +#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3) +#define aaaFromEVEX4of4(evex) ((evex) & 0x7) + #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) @@ -59,10 +63,16 @@ extern "C" { #define lFromVEX2of2(vex) (((vex) & 0x4) >> 2) #define ppFromVEX2of2(vex) ((vex) & 0x3) -/* - * These enums represent Intel registers for use by the decoder. - */ +#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7) +#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6) +#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5) +#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f) +#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7) +#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3) +#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2) +#define ppFromXOP3of3(xop) ((xop) & 0x3) +// These enums represent Intel registers for use by the decoder. #define REGS_8BIT \ ENTRY(AL) \ ENTRY(CL) \ @@ -219,7 +229,23 @@ extern "C" { ENTRY(XMM12) \ ENTRY(XMM13) \ ENTRY(XMM14) \ - ENTRY(XMM15) + ENTRY(XMM15) \ + ENTRY(XMM16) \ + ENTRY(XMM17) \ + ENTRY(XMM18) \ + ENTRY(XMM19) \ + ENTRY(XMM20) \ + ENTRY(XMM21) \ + ENTRY(XMM22) \ + ENTRY(XMM23) \ + ENTRY(XMM24) \ + ENTRY(XMM25) \ + ENTRY(XMM26) \ + ENTRY(XMM27) \ + ENTRY(XMM28) \ + ENTRY(XMM29) \ + ENTRY(XMM30) \ + ENTRY(XMM31) #define REGS_YMM \ ENTRY(YMM0) \ @@ -237,8 +263,68 @@ extern "C" { ENTRY(YMM12) \ ENTRY(YMM13) \ ENTRY(YMM14) \ - ENTRY(YMM15) - + ENTRY(YMM15) \ + ENTRY(YMM16) \ + ENTRY(YMM17) \ + ENTRY(YMM18) \ + ENTRY(YMM19) \ + ENTRY(YMM20) \ + ENTRY(YMM21) \ + ENTRY(YMM22) \ + ENTRY(YMM23) \ + ENTRY(YMM24) \ + ENTRY(YMM25) \ + ENTRY(YMM26) \ + ENTRY(YMM27) \ + ENTRY(YMM28) \ + ENTRY(YMM29) \ + ENTRY(YMM30) \ + ENTRY(YMM31) + +#define REGS_ZMM \ + ENTRY(ZMM0) \ + ENTRY(ZMM1) \ + ENTRY(ZMM2) \ + ENTRY(ZMM3) \ + ENTRY(ZMM4) \ + ENTRY(ZMM5) \ + ENTRY(ZMM6) \ + ENTRY(ZMM7) \ + ENTRY(ZMM8) \ + ENTRY(ZMM9) \ + ENTRY(ZMM10) \ + ENTRY(ZMM11) \ + ENTRY(ZMM12) \ + ENTRY(ZMM13) \ + ENTRY(ZMM14) \ + ENTRY(ZMM15) \ + ENTRY(ZMM16) \ + ENTRY(ZMM17) \ + ENTRY(ZMM18) \ + ENTRY(ZMM19) \ + ENTRY(ZMM20) \ + ENTRY(ZMM21) \ + ENTRY(ZMM22) \ + ENTRY(ZMM23) \ + ENTRY(ZMM24) \ + ENTRY(ZMM25) \ + ENTRY(ZMM26) \ + ENTRY(ZMM27) \ + ENTRY(ZMM28) \ + ENTRY(ZMM29) \ + ENTRY(ZMM30) \ + ENTRY(ZMM31) + +#define REGS_MASKS \ + ENTRY(K0) \ + ENTRY(K1) \ + ENTRY(K2) \ + ENTRY(K3) \ + ENTRY(K4) \ + ENTRY(K5) \ + ENTRY(K6) \ + ENTRY(K7) + #define REGS_SEGMENT \ ENTRY(ES) \ ENTRY(CS) \ @@ -246,7 +332,7 @@ extern "C" { ENTRY(DS) \ ENTRY(FS) \ ENTRY(GS) - + #define REGS_DEBUG \ ENTRY(DR0) \ ENTRY(DR1) \ @@ -267,12 +353,12 @@ extern "C" { ENTRY(CR6) \ ENTRY(CR7) \ ENTRY(CR8) - + #define ALL_EA_BASES \ EA_BASES_16BIT \ EA_BASES_32BIT \ EA_BASES_64BIT - + #define ALL_SIB_BASES \ REGS_32BIT \ REGS_64BIT @@ -285,18 +371,18 @@ extern "C" { REGS_MMX \ REGS_XMM \ REGS_YMM \ + REGS_ZMM \ + REGS_MASKS \ REGS_SEGMENT \ REGS_DEBUG \ REGS_CONTROL \ ENTRY(RIP) -/* - * EABase - All possible values of the base field for effective-address - * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We - * distinguish between bases (EA_BASE_*) and registers that just happen to be - * referred to when Mod == 0b11 (EA_REG_*). - */ -typedef enum { +/// \brief All possible values of the base field for effective-address +/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte. +/// We distinguish between bases (EA_BASE_*) and registers that just happen +/// to be referred to when Mod == 0b11 (EA_REG_*). +enum EABase { EA_BASE_NONE, #define ENTRY(x) EA_BASE_##x, ALL_EA_BASES @@ -305,36 +391,33 @@ typedef enum { ALL_REGS #undef ENTRY EA_max -} EABase; - -/* - * SIBIndex - All possible values of the SIB index field. - * Borrows entries from ALL_EA_BASES with the special case that - * sib is synonymous with NONE. - */ -typedef enum { +}; + +/// \brief All possible values of the SIB index field. +/// borrows entries from ALL_EA_BASES with the special case that +/// sib is synonymous with NONE. +/// Vector SIB: index can be XMM or YMM. +enum SIBIndex { SIB_INDEX_NONE, #define ENTRY(x) SIB_INDEX_##x, ALL_EA_BASES + REGS_XMM + REGS_YMM + REGS_ZMM #undef ENTRY SIB_INDEX_max -} SIBIndex; - -/* - * SIBBase - All possible values of the SIB base field. - */ -typedef enum { +}; + +/// \brief All possible values of the SIB base field. +enum SIBBase { SIB_BASE_NONE, #define ENTRY(x) SIB_BASE_##x, ALL_SIB_BASES #undef ENTRY SIB_BASE_max -} SIBBase; +}; -/* - * EADisplacement - Possible displacement types for effective-address - * computations. - */ +/// \brief Possible displacement types for effective-address computations. typedef enum { EA_DISP_NONE, EA_DISP_8, @@ -342,20 +425,16 @@ typedef enum { EA_DISP_32 } EADisplacement; -/* - * Reg - All possible values of the reg field in the ModR/M byte. - */ -typedef enum { +/// \brief All possible values of the reg field in the ModR/M byte. +enum Reg { #define ENTRY(x) MODRM_REG_##x, ALL_REGS #undef ENTRY MODRM_REG_max -} Reg; - -/* - * SegmentOverride - All possible segment overrides. - */ -typedef enum { +}; + +/// \brief All possible segment overrides. +enum SegmentOverride { SEG_OVERRIDE_NONE, SEG_OVERRIDE_CS, SEG_OVERRIDE_SS, @@ -364,212 +443,220 @@ typedef enum { SEG_OVERRIDE_FS, SEG_OVERRIDE_GS, SEG_OVERRIDE_max -} SegmentOverride; - -/* - * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field - */ +}; -typedef enum { +/// \brief Possible values for the VEX.m-mmmm field +enum VEXLeadingOpcodeByte { VEX_LOB_0F = 0x1, VEX_LOB_0F38 = 0x2, VEX_LOB_0F3A = 0x3 -} VEXLeadingOpcodeByte; +}; -/* - * VEXPrefixCode - Possible values for the VEX.pp field - */ +enum XOPMapSelect { + XOP_MAP_SELECT_8 = 0x8, + XOP_MAP_SELECT_9 = 0x9, + XOP_MAP_SELECT_A = 0xA +}; -typedef enum { +/// \brief Possible values for the VEX.pp/EVEX.pp field +enum VEXPrefixCode { VEX_PREFIX_NONE = 0x0, VEX_PREFIX_66 = 0x1, VEX_PREFIX_F3 = 0x2, VEX_PREFIX_F2 = 0x3 -} VEXPrefixCode; - -typedef uint8_t BOOL; - -/* - * byteReader_t - Type for the byte reader that the consumer must provide to - * the decoder. Reads a single byte from the instruction's address space. - * @param arg - A baton that the consumer can associate with any internal - * state that it needs. - * @param byte - A pointer to a single byte in memory that should be set to - * contain the value at address. - * @param address - The address in the instruction's address space that should - * be read from. - * @return - -1 if the byte cannot be read for any reason; 0 otherwise. - */ -typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address); - -/* - * dlog_t - Type for the logging function that the consumer can provide to - * get debugging output from the decoder. - * @param arg - A baton that the consumer can associate with any internal - * state that it needs. - * @param log - A string that contains the message. Will be reused after - * the logger returns. - */ -typedef void (*dlog_t)(void* arg, const char *log); - -/* - * The x86 internal instruction, which is produced by the decoder. - */ +}; + +enum VectorExtensionType { + TYPE_NO_VEX_XOP = 0x0, + TYPE_VEX_2B = 0x1, + TYPE_VEX_3B = 0x2, + TYPE_EVEX = 0x3, + TYPE_XOP = 0x4 +}; + +/// \brief Type for the byte reader that the consumer must provide to +/// the decoder. Reads a single byte from the instruction's address space. +/// \param arg A baton that the consumer can associate with any internal +/// state that it needs. +/// \param byte A pointer to a single byte in memory that should be set to +/// contain the value at address. +/// \param address The address in the instruction's address space that should +/// be read from. +/// \return -1 if the byte cannot be read for any reason; 0 otherwise. +typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address); + +/// \brief Type for the logging function that the consumer can provide to +/// get debugging output from the decoder. +/// \param arg A baton that the consumer can associate with any internal +/// state that it needs. +/// \param log A string that contains the message. Will be reused after +/// the logger returns. +typedef void (*dlog_t)(void *arg, const char *log); + +/// The specification for how to extract and interpret a full instruction and +/// its operands. +struct InstructionSpecifier { + uint16_t operands; +}; + +/// The x86 internal instruction, which is produced by the decoder. struct InternalInstruction { - /* Reader interface (C) */ + // Reader interface (C) byteReader_t reader; - /* Opaque value passed to the reader */ - void* readerArg; - /* The address of the next byte to read via the reader */ + // Opaque value passed to the reader + const void* readerArg; + // The address of the next byte to read via the reader uint64_t readerCursor; - /* Logger interface (C) */ + // Logger interface (C) dlog_t dlog; - /* Opaque value passed to the logger */ + // Opaque value passed to the logger void* dlogArg; - /* General instruction information */ - - /* The mode to disassemble for (64-bit, protected, real) */ + // General instruction information + + // The mode to disassemble for (64-bit, protected, real) DisassemblerMode mode; - /* The start of the instruction, usable with the reader */ + // The start of the instruction, usable with the reader uint64_t startLocation; - /* The length of the instruction, in bytes */ + // The length of the instruction, in bytes size_t length; - - /* Prefix state */ - - /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ + + // Prefix state + + // 1 if the prefix byte corresponding to the entry is present; 0 if not uint8_t prefixPresent[0x100]; - /* contains the location (for use with the reader) of the prefix byte */ + // contains the location (for use with the reader) of the prefix byte uint64_t prefixLocations[0x100]; - /* The value of the VEX prefix, if present */ - uint8_t vexPrefix[3]; - /* The length of the VEX prefix (0 if not present) */ - uint8_t vexSize; - /* The value of the REX prefix, if present */ + // The value of the vector extension prefix(EVEX/VEX/XOP), if present + uint8_t vectorExtensionPrefix[4]; + // The type of the vector extension prefix + VectorExtensionType vectorExtensionType; + // The value of the REX prefix, if present uint8_t rexPrefix; - /* The location where a mandatory prefix would have to be (i.e., right before - the opcode, or right before the REX prefix if one is present) */ + // The location where a mandatory prefix would have to be (i.e., right before + // the opcode, or right before the REX prefix if one is present). uint64_t necessaryPrefixLocation; - /* The segment override type */ + // The segment override type SegmentOverride segmentOverride; - - /* Sizes of various critical pieces of data, in bytes */ + // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease + bool xAcquireRelease; + + // Sizes of various critical pieces of data, in bytes uint8_t registerSize; uint8_t addressSize; uint8_t displacementSize; uint8_t immediateSize; - - /* opcode state */ - - /* The value of the two-byte escape prefix (usually 0x0f) */ - uint8_t twoByteEscape; - /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */ - uint8_t threeByteEscape; - /* The last byte of the opcode, not counting any ModR/M extension */ + + // Offsets from the start of the instruction to the pieces of data, which is + // needed to find relocation entries for adding symbolic operands. + uint8_t displacementOffset; + uint8_t immediateOffset; + + // opcode state + + // The last byte of the opcode, not counting any ModR/M extension uint8_t opcode; - /* The ModR/M byte of the instruction, if it is an opcode extension */ + // The ModR/M byte of the instruction, if it is an opcode extension uint8_t modRMExtension; - - /* decode state */ - - /* The type of opcode, used for indexing into the array of decode tables */ + + // decode state + + // The type of opcode, used for indexing into the array of decode tables OpcodeType opcodeType; - /* The instruction ID, extracted from the decode table */ + // The instruction ID, extracted from the decode table uint16_t instructionID; - /* The specifier for the instruction, from the instruction info table */ - const struct InstructionSpecifier *spec; - - /* state for additional bytes, consumed during operand decode. Pattern: - consumed___ indicates that the byte was already consumed and does not - need to be consumed again */ - - /* The VEX.vvvv field, which contains a third register operand for some AVX - instructions */ + // The specifier for the instruction, from the instruction info table + const InstructionSpecifier *spec; + + // state for additional bytes, consumed during operand decode. Pattern: + // consumed___ indicates that the byte was already consumed and does not + // need to be consumed again. + + // The VEX.vvvv field, which contains a third register operand for some AVX + // instructions. Reg vvvv; - - /* The ModR/M byte, which contains most register operands and some portion of - all memory operands */ - BOOL consumedModRM; + + // The writemask for AVX-512 instructions which is contained in EVEX.aaa + Reg writemask; + + // The ModR/M byte, which contains most register operands and some portion of + // all memory operands. + bool consumedModRM; uint8_t modRM; - - /* The SIB byte, used for more complex 32- or 64-bit memory operands */ - BOOL consumedSIB; + + // The SIB byte, used for more complex 32- or 64-bit memory operands + bool consumedSIB; uint8_t sib; - /* The displacement, used for memory operands */ - BOOL consumedDisplacement; + // The displacement, used for memory operands + bool consumedDisplacement; int32_t displacement; - - /* Immediates. There can be two in some cases */ + + // Immediates. There can be two in some cases uint8_t numImmediatesConsumed; uint8_t numImmediatesTranslated; uint64_t immediates[2]; - - /* A register or immediate operand encoded into the opcode */ - BOOL consumedOpcodeModifier; - uint8_t opcodeModifier; + + // A register or immediate operand encoded into the opcode Reg opcodeRegister; - - /* Portions of the ModR/M byte */ - - /* These fields determine the allowable values for the ModR/M fields, which - depend on operand and address widths */ + + // Portions of the ModR/M byte + + // These fields determine the allowable values for the ModR/M fields, which + // depend on operand and address widths. EABase eaBaseBase; EABase eaRegBase; Reg regBase; - /* The Mod and R/M fields can encode a base for an effective address, or a - register. These are separated into two fields here */ + // The Mod and R/M fields can encode a base for an effective address, or a + // register. These are separated into two fields here. EABase eaBase; EADisplacement eaDisplacement; - /* The reg field always encodes a register */ + // The reg field always encodes a register Reg reg; - - /* SIB state */ + + // SIB state SIBIndex sibIndex; uint8_t sibScale; SIBBase sibBase; + + ArrayRef operands; }; -/* decodeInstruction - Decode one instruction and store the decoding results in - * a buffer provided by the consumer. - * @param insn - The buffer to store the instruction in. Allocated by the - * consumer. - * @param reader - The byteReader_t for the bytes to be read. - * @param readerArg - An argument to pass to the reader for storing context - * specific to the consumer. May be NULL. - * @param logger - The dlog_t to be used in printing status messages from the - * disassembler. May be NULL. - * @param loggerArg - An argument to pass to the logger for storing context - * specific to the logger. May be NULL. - * @param startLoc - The address (in the reader's address space) of the first - * byte in the instruction. - * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in. - * @return - Nonzero if there was an error during decode, 0 otherwise. - */ -int decodeInstruction(struct InternalInstruction* insn, +/// \brief Decode one instruction and store the decoding results in +/// a buffer provided by the consumer. +/// \param insn The buffer to store the instruction in. Allocated by the +/// consumer. +/// \param reader The byteReader_t for the bytes to be read. +/// \param readerArg An argument to pass to the reader for storing context +/// specific to the consumer. May be NULL. +/// \param logger The dlog_t to be used in printing status messages from the +/// disassembler. May be NULL. +/// \param loggerArg An argument to pass to the logger for storing context +/// specific to the logger. May be NULL. +/// \param startLoc The address (in the reader's address space) of the first +/// byte in the instruction. +/// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in. +/// \return Nonzero if there was an error during decode, 0 otherwise. +int decodeInstruction(InternalInstruction *insn, byteReader_t reader, - void* readerArg, + const void *readerArg, dlog_t logger, - void* loggerArg, + void *loggerArg, + const void *miiArg, uint64_t startLoc, DisassemblerMode mode); -/* x86DisassemblerDebug - C-accessible function for printing a message to - * debugs() - * @param file - The name of the file printing the debug message. - * @param line - The line number that printed the debug message. - * @param s - The message to print. - */ - -void x86DisassemblerDebug(const char *file, - unsigned line, - const char *s); - -#ifdef __cplusplus -} -#endif - +/// \brief Print a message to debugs() +/// \param file The name of the file printing the debug message. +/// \param line The line number that printed the debug message. +/// \param s The message to print. +void Debug(const char *file, unsigned line, const char *s); + +const char *GetInstrName(unsigned Opcode, const void *mii); + +} // namespace X86Disassembler +} // namespace llvm + #endif