X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FMCTargetDesc%2FX86MCCodeEmitter.cpp;h=0e18a4e9f65d9e71f41d1032028cde1da44137ab;hb=aa75693ea4d6136c22d2def344ce91852fcccfc7;hp=7f24766f678348a3bff8764925092e33a85376c0;hpb=6d1263acb9704b38a8d90fd6ce94f49193cd4dde;p=oota-llvm.git diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 7f24766f678..0e18a4e9f65 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===// +//===-- X86MCCodeEmitter.cpp - Convert X86 code to machine code -----------===// // // The LLVM Compiler Infrastructure // @@ -16,6 +16,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -28,8 +29,8 @@ using namespace llvm; namespace { class X86MCCodeEmitter : public MCCodeEmitter { - X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT + X86MCCodeEmitter(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION; + void operator=(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCSubtargetInfo &STI; MCContext &Ctx; @@ -46,8 +47,36 @@ public: return (STI.getFeatureBits() & X86::Mode64Bit) != 0; } - static unsigned GetX86RegNum(const MCOperand &MO) { - return X86_MC::getX86RegNum(MO.getReg()); + bool is32BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode32Bit) != 0; + } + + bool is16BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode16Bit) != 0; + } + + /// Is16BitMemOperand - Return true if the specified instruction has + /// a 16-bit memory operand. Op specifies the operand # of the memoperand. + bool Is16BitMemOperand(const MCInst &MI, unsigned Op) const { + const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + const MCOperand &Disp = MI.getOperand(Op+X86::AddrDisp); + + if (is16BitMode() && BaseReg.getReg() == 0 && + Disp.isImm() && Disp.getImm() < 0x10000) + return true; + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg()))) + return true; + return false; + } + + unsigned GetX86RegNum(const MCOperand &MO) const { + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7; } // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range @@ -59,8 +88,8 @@ public: // VEX.VVVV => XMM9 => ~9 // // See table 4-35 of Intel AVX Programming Reference for details. - static unsigned char getVEXRegisterEncoding(const MCInst &MI, - unsigned OpNum) { + unsigned char getVEXRegisterEncoding(const MCInst &MI, + unsigned OpNum) const { unsigned SrcReg = MI.getOperand(OpNum).getReg(); unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum)); if (X86II::isX86_64ExtendedReg(SrcReg)) @@ -71,6 +100,14 @@ public: return (~SrcRegNum) & 0xf; } + unsigned char getWriteMaskRegisterEncoding(const MCInst &MI, + unsigned OpNum) const { + assert(X86::K0 != MI.getOperand(OpNum).getReg() && + "Invalid mask register as write-mask!"); + unsigned MaskRegNum = GetX86RegNum(MI.getOperand(OpNum)); + return MaskRegNum; + } + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { OS << (char)C; ++CurByte; @@ -121,9 +158,8 @@ public: const MCInst &MI, const MCInstrDesc &Desc, raw_ostream &OS) const; - void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte, - int MemOperand, const MCInst &MI, - raw_ostream &OS) const; + void EmitSegmentOverridePrefix(unsigned &CurByte, unsigned SegOperand, + const MCInst &MI, raw_ostream &OS) const; void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, const MCInstrDesc &Desc, @@ -134,6 +170,7 @@ public: MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, MCContext &Ctx) { return new X86MCCodeEmitter(MCII, STI, Ctx); @@ -145,6 +182,52 @@ static bool isDisp8(int Value) { return Value == (signed char)Value; } +/// isCDisp8 - Return true if this signed displacement fits in a 8-bit +/// compressed dispacement field. +static bool isCDisp8(uint64_t TSFlags, int Value, int& CValue) { + assert(((TSFlags >> X86II::VEXShift) & X86II::EVEX) && + "Compressed 8-bit displacement is only valid for EVEX inst."); + + unsigned CD8E = (TSFlags >> X86II::EVEX_CD8EShift) & X86II::EVEX_CD8EMask; + unsigned CD8V = (TSFlags >> X86II::EVEX_CD8VShift) & X86II::EVEX_CD8VMask; + + if (CD8V == 0 && CD8E == 0) { + CValue = Value; + return isDisp8(Value); + } + + unsigned MemObjSize = 1U << CD8E; + if (CD8V & 4) { + // Fixed vector length + MemObjSize *= 1U << (CD8V & 0x3); + } else { + // Modified vector length + bool EVEX_b = (TSFlags >> X86II::VEXShift) & X86II::EVEX_B; + if (!EVEX_b) { + unsigned EVEX_LL = ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) ? 1 : 0; + EVEX_LL += ((TSFlags >> X86II::VEXShift) & X86II::EVEX_L2) ? 2 : 0; + assert(EVEX_LL < 3 && ""); + + unsigned NumElems = (1U << (EVEX_LL + 4)) / MemObjSize; + NumElems /= 1U << (CD8V & 0x3); + + MemObjSize *= NumElems; + } + } + + unsigned MemObjMask = MemObjSize - 1; + assert((MemObjSize & MemObjMask) == 0 && "Invalid memory object size."); + + if (Value & MemObjMask) // Unaligned offset + return false; + Value /= MemObjSize; + bool Ret = (Value == (signed char)Value); + + if (Ret) + CValue = Value; + return Ret; +} + /// getImmFixupKind - Return the appropriate fixup kind to use for an immediate /// in an instruction with the specified TSFlags. static MCFixupKind getImmFixupKind(uint64_t TSFlags) { @@ -154,9 +237,8 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) { return MCFixup::getKindForSize(Size, isPCRel); } -/// Is32BitMemOperand - Return true if the specified instruction with a memory -/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit -/// memory operand. Op specifies the operand # of the memoperand. +/// Is32BitMemOperand - Return true if the specified instruction has +/// a 32-bit memory operand. Op specifies the operand # of the memoperand. static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); @@ -169,6 +251,22 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { return false; } +/// Is64BitMemOperand - Return true if the specified instruction has +/// a 64-bit memory operand. Op specifies the operand # of the memoperand. +#ifndef NDEBUG +static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) { + const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} +#endif + /// StartsWithGlobalOffsetTable - Check if this expression starts with /// _GLOBAL_OFFSET_TABLE_ and if it is of the form /// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF @@ -201,6 +299,14 @@ StartsWithGlobalOffsetTable(const MCExpr *Expr) { return GOT_Normal; } +static bool HasSecRelSymbolRef(const MCExpr *Expr) { + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast(Expr); + return Ref->getKind() == MCSymbolRefExpr::VK_SECREL; + } + return false; +} + void X86MCCodeEmitter:: EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, @@ -222,6 +328,7 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, // If we have an immoffset, add it to the expression. if ((FixupKind == FK_Data_4 || + FixupKind == FK_Data_8 || FixupKind == MCFixupKind(X86::reloc_signed_4byte))) { GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr); if (Kind != GOT_None) { @@ -230,6 +337,16 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, FixupKind = MCFixupKind(X86::reloc_global_offset_table); if (Kind == GOT_Normal) ImmOffset = CurByte; + } else if (Expr->getKind() == MCExpr::SymbolRef) { + if (HasSecRelSymbolRef(Expr)) { + FixupKind = MCFixupKind(FK_SecRel_4); + } + } else if (Expr->getKind() == MCExpr::Binary) { + const MCBinaryExpr *Bin = static_cast(Expr); + if (HasSecRelSymbolRef(Bin->getLHS()) + || HasSecRelSymbolRef(Bin->getRHS())) { + FixupKind = MCFixupKind(FK_SecRel_4); + } } } @@ -263,6 +380,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, const MCOperand &Scale = MI.getOperand(Op+X86::AddrScaleAmt); const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); unsigned BaseReg = Base.getReg(); + bool HasEVEX = (TSFlags >> X86II::VEXShift) & X86II::EVEX; // Handle %rip relative addressing. if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode @@ -292,6 +410,66 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U; + // 16-bit addressing forms of the ModR/M byte have a different encoding for + // the R/M field and are far more limited in which registers can be used. + if (Is16BitMemOperand(MI, Op)) { + if (BaseReg) { + // For 32-bit addressing, the row and column values in Table 2-2 are + // basically the same. It's AX/CX/DX/BX/SP/BP/SI/DI in that order, with + // some special cases. And GetX86RegNum reflects that numbering. + // For 16-bit addressing it's more fun, as shown in the SDM Vol 2A, + // Table 2-1 "16-Bit Addressing Forms with the ModR/M byte". We can only + // use SI/DI/BP/BX, which have "row" values 4-7 in no particular order, + // while values 0-3 indicate the allowed combinations (base+index) of + // those: 0 for BX+SI, 1 for BX+DI, 2 for BP+SI, 3 for BP+DI. + // + // R16Table[] is a lookup from the normal RegNo, to the row values from + // Table 2-1 for 16-bit addressing modes. Where zero means disallowed. + static const unsigned R16Table[] = { 0, 0, 0, 7, 0, 6, 4, 5 }; + unsigned RMfield = R16Table[BaseRegNo]; + + assert(RMfield && "invalid 16-bit base register"); + + if (IndexReg.getReg()) { + unsigned IndexReg16 = R16Table[GetX86RegNum(IndexReg)]; + + assert(IndexReg16 && "invalid 16-bit index register"); + // We must have one of SI/DI (4,5), and one of BP/BX (6,7). + assert(((IndexReg16 ^ RMfield) & 2) && + "invalid 16-bit base/index register combination"); + assert(Scale.getImm() == 1 && + "invalid scale for 16-bit memory reference"); + + // Allow base/index to appear in either order (although GAS doesn't). + if (IndexReg16 & 2) + RMfield = (RMfield & 1) | ((7 - IndexReg16) << 1); + else + RMfield = (IndexReg16 & 1) | ((7 - RMfield) << 1); + } + + if (Disp.isImm() && isDisp8(Disp.getImm())) { + if (Disp.getImm() == 0 && BaseRegNo != N86::EBP) { + // There is no displacement; just the register. + EmitByte(ModRMByte(0, RegOpcodeField, RMfield), CurByte, OS); + return; + } + // Use the [REG]+disp8 form, including for [BP] which cannot be encoded. + EmitByte(ModRMByte(1, RegOpcodeField, RMfield), CurByte, OS); + EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups); + return; + } + // This is the [REG]+disp16 case. + EmitByte(ModRMByte(2, RegOpcodeField, RMfield), CurByte, OS); + } else { + // There is no BaseReg; this is the plain [disp16] case. + EmitByte(ModRMByte(0, RegOpcodeField, 6), CurByte, OS); + } + + // Emit 16-bit displacement for plain disp16 or [REG]+disp16 cases. + EmitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, CurByte, OS, Fixups); + return; + } + // Determine whether a SIB byte is needed. // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table @@ -323,10 +501,21 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, } // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. - if (Disp.isImm() && isDisp8(Disp.getImm())) { - EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); - EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups); - return; + if (Disp.isImm()) { + if (!HasEVEX && isDisp8(Disp.getImm())) { + EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); + EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups); + return; + } + // Try EVEX compressed 8-bit displacement first; if failed, fall back to + // 32-bit displacement. + int CDisp8 = 0; + if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) { + EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); + EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups, + CDisp8 - Disp.getImm()); + return; + } } // Otherwise, emit the most general non-SIB encoding: [REG+disp32] @@ -342,6 +531,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, bool ForceDisp32 = false; bool ForceDisp8 = false; + int CDisp8 = 0; + int ImmOffset = 0; if (BaseReg == 0) { // If there is no base register, we emit the special case SIB byte with // MOD=0, BASE=5, to JUST get the index, scale, and displacement. @@ -357,10 +548,15 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, BaseRegNo != N86::EBP) { // Emit no displacement ModR/M byte EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS); - } else if (isDisp8(Disp.getImm())) { + } else if (!HasEVEX && isDisp8(Disp.getImm())) { + // Emit the disp8 encoding. + EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS); + ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + } else if (HasEVEX && isCDisp8(TSFlags, Disp.getImm(), CDisp8)) { // Emit the disp8 encoding. EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS); ForceDisp8 = true; // Make sure to force 8 bit disp if Base=EBP + ImmOffset = CDisp8 - Disp.getImm(); } else { // Emit the normal disp32 encoding. EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); @@ -390,7 +586,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Do we need to output a displacement? if (ForceDisp8) - EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups); + EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups, ImmOffset); else if (ForceDisp32 || Disp.getImm() != 0) EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS, Fixups); @@ -402,8 +598,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, const MCInstrDesc &Desc, raw_ostream &OS) const { + bool HasEVEX = (TSFlags >> X86II::VEXShift) & X86II::EVEX; + bool HasEVEX_K = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K); bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + bool HasEVEX_RC = (TSFlags >> X86II::VEXShift) & X86II::EVEX_RC; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -412,6 +612,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // 0: Same as REX_R=1 (64 bit mode only) // unsigned char VEX_R = 0x1; + unsigned char EVEX_R2 = 0x1; // VEX_X: equivalent to REX.X, only used when a // register is used for index in SIB Byte. @@ -432,7 +633,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, unsigned char VEX_W = 0; // XOP: Use XOP prefix byte 0x8f instead of VEX. - unsigned char XOP = 0; + bool XOP = (TSFlags >> X86II::VEXShift) & X86II::XOP; // VEX_5M (VEX m-mmmmm field): // @@ -442,12 +643,14 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // 0b00011: implied 0F 3A leading opcode bytes // 0b00100-0b11111: Reserved for future use // 0b01000: XOP map select - 08h instructions with imm byte - // 0b10001: XOP map select - 09h instructions with no imm byte + // 0b01001: XOP map select - 09h instructions with no imm byte + // 0b01010: XOP map select - 0Ah instructions with imm dword unsigned char VEX_5M = 0x1; // VEX_4V (VEX vvvv field): a register specifier // (in 1's complement form) or 1111 if unused. unsigned char VEX_4V = 0xf; + unsigned char EVEX_V2 = 0x1; // VEX_L (Vector Length): // @@ -455,6 +658,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // 1: 256-bit vector // unsigned char VEX_L = 0; + unsigned char EVEX_L2 = 0; // VEX_PP: opcode extension providing equivalent // functionality of a SIMD prefix @@ -466,18 +670,36 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // unsigned char VEX_PP = 0; - // Encode the operand size opcode prefix as needed. - if (TSFlags & X86II::OpSize) - VEX_PP = 0x01; + // EVEX_U + unsigned char EVEX_U = 1; // Always '1' so far + + // EVEX_z + unsigned char EVEX_z = 0; + + // EVEX_b + unsigned char EVEX_b = 0; + + // EVEX_rc + unsigned char EVEX_rc = 0; + + // EVEX_aaa + unsigned char EVEX_aaa = 0; + + bool EncodeRC = false; if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) VEX_W = 1; - if ((TSFlags >> X86II::VEXShift) & X86II::XOP) - XOP = 1; - if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) VEX_L = 1; + if (HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_L2)) + EVEX_L2 = 1; + + if (HasEVEX_K && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_Z)) + EVEX_z = 1; + + if (HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_B)) + EVEX_b = 1; switch (TSFlags & X86II::Op0Mask) { default: llvm_unreachable("Invalid prefix!"); @@ -487,6 +709,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::TA: // 0F 3A VEX_5M = 0x3; break; + case X86II::T8PD: // 66 0F 38 + VEX_PP = 0x1; + VEX_5M = 0x2; + break; case X86II::T8XS: // F3 0F 38 VEX_PP = 0x2; VEX_5M = 0x2; @@ -495,10 +721,17 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_PP = 0x3; VEX_5M = 0x2; break; + case X86II::TAPD: // 66 0F 3A + VEX_PP = 0x1; + VEX_5M = 0x3; + break; case X86II::TAXD: // F2 0F 3A VEX_PP = 0x3; VEX_5M = 0x3; break; + case X86II::PD: // 66 0F + VEX_PP = 0x1; + break; case X86II::XS: // F3 0F VEX_PP = 0x2; break; @@ -511,45 +744,57 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::XOP9: VEX_5M = 0x9; break; - case X86II::A6: // Bypass: Not used by VEX - case X86II::A7: // Bypass: Not used by VEX - case X86II::TB: // Bypass: Not used by VEX - case 0: - break; // No prefix! + case X86II::XOPA: + VEX_5M = 0xA; + break; + case X86II::TB: // VEX_5M/VEX_PP already correct + break; } - // Set the vector length to 256-bit if YMM0-YMM15 is used - for (unsigned i = 0; i != MI.getNumOperands(); ++i) { - if (!MI.getOperand(i).isReg()) - continue; - unsigned SrcReg = MI.getOperand(i).getReg(); - if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) - VEX_L = 1; - } - // Classify VEX_B, VEX_4V, VEX_R, VEX_X - unsigned CurOp = 0; + unsigned NumOps = Desc.getNumOperands(); + unsigned CurOp = X86II::getOperandBias(Desc); + switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!"); + default: llvm_unreachable("Unexpected form in EmitVEXOpcodePrefix!"); + case X86II::RawFrm: + break; case X86II::MRMDestMem: { // MRMDestMem instructions forms: // MemAddr, src1(ModR/M) // MemAddr, src1(VEX_4V), src2(ModR/M) // MemAddr, src1(ModR/M), imm8 // - if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg())) + if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand + + X86::AddrBaseReg).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) + if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand + + X86::AddrIndexReg).getReg())) VEX_X = 0x0; + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(MemOperand + + X86::AddrIndexReg).getReg())) + EVEX_V2 = 0x0; - CurOp = X86::AddrNumOperands; - if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + CurOp += X86::AddrNumOperands; + + if (HasEVEX_K) + EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); + + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_V2 = 0x0; + CurOp++; + } const MCOperand &MO = MI.getOperand(CurOp); - if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) - VEX_R = 0x0; + if (MO.isReg()) { + if (X86II::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; + if (HasEVEX && X86II::is32ExtendedReg(MO.getReg())) + EVEX_R2 = 0x0; + } break; } case X86II::MRMSrcMem: @@ -562,11 +807,21 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // FMA4: // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_R2 = 0x0; + CurOp++; - if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, 1); + if (HasEVEX_K) + EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); + + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_V2 = 0x0; + CurOp++; + } if (X86II::isX86_64ExtendedReg( MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) @@ -574,9 +829,17 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (X86II::isX86_64ExtendedReg( MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) VEX_X = 0x0; + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(MemOperand + + X86::AddrIndexReg).getReg())) + EVEX_V2 = 0x0; if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); + // Instruction format for 4VOp3: + // src1(ModR/M), MemAddr, src3(VEX_4V) + // CurOp points to start of the MemoryOperand, + // it skips TIED_TO operands if exist, then increments past src1. + // CurOp + X86::AddrNumOperands will point to src3. + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); break; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: @@ -585,8 +848,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // MRM[0-9]m instructions forms: // MemAddr // src1(VEX_4V), MemAddr - if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, 0); + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_V2 = 0x0; + CurOp++; + } + + if (HasEVEX_K) + EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); if (X86II::isX86_64ExtendedReg( MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) @@ -602,26 +872,71 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // dst(ModR/M), src1(ModR/M) // dst(ModR/M), src1(ModR/M), imm8 // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_R2 = 0x0; CurOp++; - if (HasVEX_4V) - VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + if (HasEVEX_K) + EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); + + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_V2 = 0x0; + CurOp++; + } + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_X = 0x0; CurOp++; if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, CurOp); + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + if (EVEX_b) { + if (HasEVEX_RC) { + unsigned RcOperand = NumOps-1; + assert(RcOperand >= CurOp); + EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3; + } + EncodeRC = true; + } break; case X86II::MRMDestReg: // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_X = 0x0; + CurOp++; + + if (HasEVEX_K) + EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); + + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_V2 = 0x0; + CurOp++; + } + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_R2 = 0x0; + if (EVEX_b) + EncodeRC = true; break; case X86II::MRM0r: case X86II::MRM1r: case X86II::MRM2r: case X86II::MRM3r: @@ -629,40 +944,85 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::MRM6r: case X86II::MRM7r: // MRM0r-MRM7r instructions forms: // dst(VEX_4V), src(ModR/M), imm8 - VEX_4V = getVEXRegisterEncoding(MI, 0); - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + EVEX_V2 = 0x0; + CurOp++; + } + if (HasEVEX_K) + EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - break; - default: // RawFrm + if (HasEVEX && X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_X = 0x0; break; } // Emit segment override opcode prefix as needed. - EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS); + if (MemOperand >= 0) + EmitSegmentOverridePrefix(CurByte, MemOperand+X86::AddrSegmentReg, MI, OS); - // VEX opcode prefix can have 2 or 3 bytes - // - // 3 bytes: - // +-----+ +--------------+ +-------------------+ - // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | - // +-----+ +--------------+ +-------------------+ - // 2 bytes: - // +-----+ +-------------------+ - // | C5h | | R | vvvv | L | pp | - // +-----+ +-------------------+ - // - unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); + if (!HasEVEX) { + // VEX opcode prefix can have 2 or 3 bytes + // + // 3 bytes: + // +-----+ +--------------+ +-------------------+ + // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + // 2 bytes: + // +-----+ +-------------------+ + // | C5h | | R | vvvv | L | pp | + // +-----+ +-------------------+ + // + unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); - if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix - EmitByte(0xC5, CurByte, OS); - EmitByte(LastByte | (VEX_R << 7), CurByte, OS); - return; - } + if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix + EmitByte(0xC5, CurByte, OS); + EmitByte(LastByte | (VEX_R << 7), CurByte, OS); + return; + } - // 3 byte VEX prefix - EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS); - EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); - EmitByte(LastByte | (VEX_W << 7), CurByte, OS); + // 3 byte VEX prefix + EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS); + EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); + EmitByte(LastByte | (VEX_W << 7), CurByte, OS); + } else { + // EVEX opcode prefix can have 4 bytes + // + // +-----+ +--------------+ +-------------------+ +------------------------+ + // | 62h | | RXBR' | 00mm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa | + // +-----+ +--------------+ +-------------------+ +------------------------+ + assert((VEX_5M & 0x3) == VEX_5M + && "More than 2 significant bits in VEX.m-mmmm fields for EVEX!"); + + VEX_5M &= 0x3; + + EmitByte(0x62, CurByte, OS); + EmitByte((VEX_R << 7) | + (VEX_X << 6) | + (VEX_B << 5) | + (EVEX_R2 << 4) | + VEX_5M, CurByte, OS); + EmitByte((VEX_W << 7) | + (VEX_4V << 3) | + (EVEX_U << 2) | + VEX_PP, CurByte, OS); + if (EncodeRC) + EmitByte((EVEX_z << 7) | + (EVEX_rc << 5) | + (EVEX_b << 4) | + (EVEX_V2 << 3) | + EVEX_aaa, CurByte, OS); + else + EmitByte((EVEX_z << 7) | + (EVEX_L2 << 6) | + (VEX_L << 5) | + (EVEX_b << 4) | + (EVEX_V2 << 3) | + EVEX_aaa, CurByte, OS); + } } /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 @@ -695,7 +1055,6 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, } switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!"); case X86II::MRMSrcReg: if (MI.getOperand(0).isReg() && X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) @@ -760,33 +1119,20 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, } /// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed -void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, - unsigned &CurByte, int MemOperand, - const MCInst &MI, - raw_ostream &OS) const { - switch (TSFlags & X86II::SegOvrMask) { - default: llvm_unreachable("Invalid segment!"); - case 0: - // No segment override, check for explicit one on memory operand. - if (MemOperand != -1) { // If the instruction has a memory operand. - switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { - default: llvm_unreachable("Unknown segment register!"); - case 0: break; - case X86::CS: EmitByte(0x2E, CurByte, OS); break; - case X86::SS: EmitByte(0x36, CurByte, OS); break; - case X86::DS: EmitByte(0x3E, CurByte, OS); break; - case X86::ES: EmitByte(0x26, CurByte, OS); break; - case X86::FS: EmitByte(0x64, CurByte, OS); break; - case X86::GS: EmitByte(0x65, CurByte, OS); break; - } - } - break; - case X86II::FS: - EmitByte(0x64, CurByte, OS); - break; - case X86II::GS: - EmitByte(0x65, CurByte, OS); - break; +void X86MCCodeEmitter::EmitSegmentOverridePrefix(unsigned &CurByte, + unsigned SegOperand, + const MCInst &MI, + raw_ostream &OS) const { + // Check for explicit segment override on memory operand. + switch (MI.getOperand(SegOperand).getReg()) { + default: llvm_unreachable("Unknown segment register!"); + case 0: break; + case X86::CS: EmitByte(0x2E, CurByte, OS); break; + case X86::SS: EmitByte(0x36, CurByte, OS); break; + case X86::DS: EmitByte(0x3E, CurByte, OS); break; + case X86::ES: EmitByte(0x26, CurByte, OS); break; + case X86::FS: EmitByte(0x64, CurByte, OS); break; + case X86::GS: EmitByte(0x65, CurByte, OS); break; } } @@ -804,19 +1150,45 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EmitByte(0xF0, CurByte, OS); // Emit segment override opcode prefix as needed. - EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS); + if (MemOperand >= 0) + EmitSegmentOverridePrefix(CurByte, MemOperand+X86::AddrSegmentReg, MI, OS); // Emit the repeat opcode prefix as needed. if ((TSFlags & X86II::Op0Mask) == X86II::REP) EmitByte(0xF3, CurByte, OS); // Emit the address size opcode prefix as needed. - if ((TSFlags & X86II::AdSize) || - (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand))) + bool need_address_override; + // The AdSize prefix is only for 32-bit and 64-bit modes. Hm, perhaps we + // should introduce an AdSize16 bit instead of having seven special cases? + if ((!is16BitMode() && TSFlags & X86II::AdSize) || + (is16BitMode() && (MI.getOpcode() == X86::JECXZ_32 || + MI.getOpcode() == X86::MOV8o8a || + MI.getOpcode() == X86::MOV16o16a || + MI.getOpcode() == X86::MOV32o32a || + MI.getOpcode() == X86::MOV8ao8 || + MI.getOpcode() == X86::MOV16ao16 || + MI.getOpcode() == X86::MOV32ao32))) { + need_address_override = true; + } else if (MemOperand == -1) { + need_address_override = false; + } else if (is64BitMode()) { + assert(!Is16BitMemOperand(MI, MemOperand)); + need_address_override = Is32BitMemOperand(MI, MemOperand); + } else if (is32BitMode()) { + assert(!Is64BitMemOperand(MI, MemOperand)); + need_address_override = Is16BitMemOperand(MI, MemOperand); + } else { + assert(is16BitMode()); + assert(!Is64BitMemOperand(MI, MemOperand)); + need_address_override = !Is16BitMemOperand(MI, MemOperand); + } + + if (need_address_override) EmitByte(0x67, CurByte, OS); // Emit the operand size opcode prefix as needed. - if (TSFlags & X86II::OpSize) + if (TSFlags & (is16BitMode() ? X86II::OpSize16 : X86II::OpSize)) EmitByte(0x66, CurByte, OS); bool Need0FPrefix = false; @@ -831,34 +1203,34 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::A7: // 0F A7 Need0FPrefix = true; break; - case X86II::T8XS: // F3 0F 38 - EmitByte(0xF3, CurByte, OS); - Need0FPrefix = true; - break; - case X86II::T8XD: // F2 0F 38 - EmitByte(0xF2, CurByte, OS); - Need0FPrefix = true; - break; - case X86II::TAXD: // F2 0F 3A - EmitByte(0xF2, CurByte, OS); + case X86II::PD: // 66 0F + case X86II::T8PD: // 66 0F 38 + case X86II::TAPD: // 66 0F 3A + EmitByte(0x66, CurByte, OS); Need0FPrefix = true; break; case X86II::XS: // F3 0F + case X86II::T8XS: // F3 0F 38 EmitByte(0xF3, CurByte, OS); Need0FPrefix = true; break; case X86II::XD: // F2 0F + case X86II::T8XD: // F2 0F 38 + case X86II::TAXD: // F2 0F 3A EmitByte(0xF2, CurByte, OS); Need0FPrefix = true; break; - case X86II::D8: EmitByte(0xD8, CurByte, OS); break; - case X86II::D9: EmitByte(0xD9, CurByte, OS); break; - case X86II::DA: EmitByte(0xDA, CurByte, OS); break; - case X86II::DB: EmitByte(0xDB, CurByte, OS); break; - case X86II::DC: EmitByte(0xDC, CurByte, OS); break; - case X86II::DD: EmitByte(0xDD, CurByte, OS); break; - case X86II::DE: EmitByte(0xDE, CurByte, OS); break; - case X86II::DF: EmitByte(0xDF, CurByte, OS); break; + case X86II::D8: + case X86II::D9: + case X86II::DA: + case X86II::DB: + case X86II::DC: + case X86II::DD: + case X86II::DE: + case X86II::DF: + EmitByte(0xD8+(((TSFlags & X86II::Op0Mask) - X86II::D8) >> X86II::Op0Shift), + CurByte, OS); + break; } // Handle REX prefix. @@ -874,11 +1246,13 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // FIXME: Pull this up into previous switch if REX can be moved earlier. switch (TSFlags & X86II::Op0Mask) { + case X86II::T8PD: // 66 0F 38 case X86II::T8XS: // F3 0F 38 case X86II::T8XD: // F2 0F 38 case X86II::T8: // 0F 38 EmitByte(0x38, CurByte, OS); break; + case X86II::TAPD: // 66 0F 3A case X86II::TAXD: // F2 0F 3A case X86II::TA: // 0F 3A EmitByte(0x3A, CurByte, OS); @@ -903,15 +1277,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if ((TSFlags & X86II::FormMask) == X86II::Pseudo) return; - // If this is a two-address instruction, skip one of the register operands. - // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); - unsigned CurOp = 0; - if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1) - ++CurOp; - else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0) - // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 - --NumOps; + unsigned CurOp = X86II::getOperandBias(Desc); // Keep track of the current byte being emitted. unsigned CurByte = 0; @@ -925,6 +1292,11 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; const unsigned MemOp4_I8IMMOperand = 2; + // It uses the EVEX.aaa field? + bool HasEVEX = (TSFlags >> X86II::VEXShift) & X86II::EVEX; + bool HasEVEX_K = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K); + bool HasEVEX_RC = HasEVEX && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_RC); + // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); if (MemoryOperand != -1) MemoryOperand += CurOp; @@ -941,15 +1313,62 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { - case X86II::MRMInitReg: - llvm_unreachable("FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!"); case X86II::Pseudo: llvm_unreachable("Pseudo instruction shouldn't be emitted"); + case X86II::RawFrmDstSrc: { + unsigned siReg = MI.getOperand(1).getReg(); + assert(((siReg == X86::SI && MI.getOperand(0).getReg() == X86::DI) || + (siReg == X86::ESI && MI.getOperand(0).getReg() == X86::EDI) || + (siReg == X86::RSI && MI.getOperand(0).getReg() == X86::RDI)) && + "SI and DI register sizes do not match"); + // Emit segment override opcode prefix as needed (not for %ds). + if (MI.getOperand(2).getReg() != X86::DS) + EmitSegmentOverridePrefix(CurByte, 2, MI, OS); + // Emit OpSize prefix as needed. + if ((!is32BitMode() && siReg == X86::ESI) || + (is32BitMode() && siReg == X86::SI)) + EmitByte(0x67, CurByte, OS); + CurOp += 3; // Consume operands. + EmitByte(BaseOpcode, CurByte, OS); + break; + } + case X86II::RawFrmSrc: { + unsigned siReg = MI.getOperand(0).getReg(); + // Emit segment override opcode prefix as needed (not for %ds). + if (MI.getOperand(1).getReg() != X86::DS) + EmitSegmentOverridePrefix(CurByte, 1, MI, OS); + // Emit OpSize prefix as needed. + if ((!is32BitMode() && siReg == X86::ESI) || + (is32BitMode() && siReg == X86::SI)) + EmitByte(0x67, CurByte, OS); + CurOp += 2; // Consume operands. + EmitByte(BaseOpcode, CurByte, OS); + break; + } + case X86II::RawFrmDst: { + unsigned siReg = MI.getOperand(0).getReg(); + // Emit OpSize prefix as needed. + if ((!is32BitMode() && siReg == X86::EDI) || + (is32BitMode() && siReg == X86::DI)) + EmitByte(0x67, CurByte, OS); + ++CurOp; // Consume operand. + EmitByte(BaseOpcode, CurByte, OS); + break; + } case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; + case X86II::RawFrmMemOffs: + // Emit segment override opcode prefix as needed. + EmitSegmentOverridePrefix(CurByte, 1, MI, OS); + EmitByte(BaseOpcode, CurByte, OS); + EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + ++CurOp; // skip segment operand + break; case X86II::RawFrmImm8: EmitByte(BaseOpcode, CurByte, OS); EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), @@ -973,18 +1392,29 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + 1; + + if (HasEVEX_K) // Skip writemask + SrcRegNum++; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + EmitRegModRMByte(MI.getOperand(CurOp), - GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); - CurOp += 2; + GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS); + CurOp = SrcRegNum + 1; break; case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); SrcRegNum = CurOp + X86::AddrNumOperands; - if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + if (HasEVEX_K) // Skip writemask SrcRegNum++; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + EmitMemModRMByte(MI, CurOp, GetX86RegNum(MI.getOperand(SrcRegNum)), TSFlags, CurByte, OS, Fixups); @@ -995,29 +1425,41 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); SrcRegNum = CurOp + 1; - if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + if (HasEVEX_K) // Skip writemask SrcRegNum++; - if(HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) - SrcRegNum++; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + + if (HasMemOp4) // Skip 2nd src (which is encoded in I8IMM) + ++SrcRegNum; EmitRegModRMByte(MI.getOperand(SrcRegNum), GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); - // 2 operands skipped with HasMemOp4, comensate accordingly + // 2 operands skipped with HasMemOp4, compensate accordingly CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; if (HasVEX_4VOp3) ++CurOp; + // do not count the rounding control operand + if (HasEVEX_RC) + NumOps--; break; case X86II::MRMSrcMem: { int AddrOperands = X86::AddrNumOperands; unsigned FirstMemOp = CurOp+1; + + if (HasEVEX_K) { // Skip writemask + ++AddrOperands; + ++FirstMemOp; + } + if (HasVEX_4V) { ++AddrOperands; ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). } - if(HasMemOp4) // Skip second register source (encoded in I8IMM) + if (HasMemOp4) // Skip second register source (encoded in I8IMM) ++FirstMemOp; EmitByte(BaseOpcode, CurByte, OS); @@ -1035,7 +1477,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). - CurOp++; + ++CurOp; EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp++), (TSFlags & X86II::FormMask)-X86II::MRM0r, @@ -1046,79 +1488,74 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). - CurOp++; + ++CurOp; EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, TSFlags, CurByte, OS, Fixups); CurOp += X86::AddrNumOperands; break; - case X86II::MRM_C1: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC1, CurByte, OS); - break; - case X86II::MRM_C2: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC2, CurByte, OS); - break; - case X86II::MRM_C3: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC3, CurByte, OS); - break; - case X86II::MRM_C4: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC4, CurByte, OS); - break; - case X86II::MRM_C8: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC8, CurByte, OS); - break; - case X86II::MRM_C9: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xC9, CurByte, OS); - break; - case X86II::MRM_E8: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xE8, CurByte, OS); - break; - case X86II::MRM_F0: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xF0, CurByte, OS); - break; - case X86II::MRM_F8: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xF8, CurByte, OS); - break; + case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: + case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: + case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_D0: + case X86II::MRM_D1: case X86II::MRM_D4: case X86II::MRM_D5: + case X86II::MRM_D6: case X86II::MRM_D8: case X86II::MRM_D9: + case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC: + case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF: + case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xF9, CurByte, OS); - break; - case X86II::MRM_D0: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xD0, CurByte, OS); - break; - case X86II::MRM_D1: - EmitByte(BaseOpcode, CurByte, OS); - EmitByte(0xD1, CurByte, OS); + + unsigned char MRM; + switch (TSFlags & X86II::FormMask) { + default: llvm_unreachable("Invalid Form"); + case X86II::MRM_C1: MRM = 0xC1; break; + case X86II::MRM_C2: MRM = 0xC2; break; + case X86II::MRM_C3: MRM = 0xC3; break; + case X86II::MRM_C4: MRM = 0xC4; break; + case X86II::MRM_C8: MRM = 0xC8; break; + case X86II::MRM_C9: MRM = 0xC9; break; + case X86II::MRM_CA: MRM = 0xCA; break; + case X86II::MRM_CB: MRM = 0xCB; break; + case X86II::MRM_D0: MRM = 0xD0; break; + case X86II::MRM_D1: MRM = 0xD1; break; + case X86II::MRM_D4: MRM = 0xD4; break; + case X86II::MRM_D5: MRM = 0xD5; break; + case X86II::MRM_D6: MRM = 0xD6; break; + case X86II::MRM_D8: MRM = 0xD8; break; + case X86II::MRM_D9: MRM = 0xD9; break; + case X86II::MRM_DA: MRM = 0xDA; break; + case X86II::MRM_DB: MRM = 0xDB; break; + case X86II::MRM_DC: MRM = 0xDC; break; + case X86II::MRM_DD: MRM = 0xDD; break; + case X86II::MRM_DE: MRM = 0xDE; break; + case X86II::MRM_DF: MRM = 0xDF; break; + case X86II::MRM_E8: MRM = 0xE8; break; + case X86II::MRM_F0: MRM = 0xF0; break; + case X86II::MRM_F8: MRM = 0xF8; break; + case X86II::MRM_F9: MRM = 0xF9; break; + } + EmitByte(MRM, CurByte, OS); break; } // If there is a remaining operand, it must be a trailing immediate. Emit it - // according to the right size for the instruction. - if (CurOp != NumOps) { + // according to the right size for the instruction. Some instructions + // (SSE4a extrq and insertq) have two trailing immediates. + while (CurOp != NumOps && NumOps - CurOp <= 2) { // The last source register of a 4 operand instruction in AVX is encoded // in bits[7:4] of a immediate byte. if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { const MCOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand - : CurOp); - CurOp++; - bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg()); - unsigned RegNum = (IsExtReg ? (1 << 7) : 0); - RegNum |= GetX86RegNum(MO) << 4; + : CurOp); + ++CurOp; + unsigned RegNum = GetX86RegNum(MO) << 4; + if (X86II::isX86_64ExtendedReg(MO.getReg())) + RegNum |= 1 << 7; // If there is an additional 5th operand it must be an immediate, which // is encoded in bits[3:0] - if(CurOp != NumOps) { + if (CurOp != NumOps) { const MCOperand &MIMM = MI.getOperand(CurOp++); - if(MIMM.isImm()) { + if (MIMM.isImm()) { unsigned Val = MIMM.getImm(); assert(Val < 16 && "Immediate operand value out of range"); RegNum |= Val;