+ case X86II::T8XD: // F2 0F 38
+ case X86II::T8XS: // F3 0F 38
+ case X86II::T8: // 0F 38
+ MCE.emitByte(0x38);
+ break;
+ case X86II::TAXD: // F2 0F 38
+ case X86II::TA: // 0F 3A
+ MCE.emitByte(0x3A);
+ break;
+ case X86II::A6: // 0F A6
+ MCE.emitByte(0xA6);
+ break;
+ case X86II::A7: // 0F A7
+ MCE.emitByte(0xA7);
+ break;
+ }
+}
+
+static unsigned GetX86RegNum(const MachineOperand &MO) {
+ return X86_MC::getX86RegNum(MO.getReg());
+}
+
+// On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+// 0-7 and the difference between the 2 groups is given by the REX prefix.
+// In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+// in 1's complement form, example:
+//
+// ModRM field => XMM9 => 1
+// VEX.VVVV => XMM9 => ~9
+//
+// See table 4-35 of Intel AVX Programming Reference for details.
+static unsigned char getVEXRegisterEncoding(const MachineInstr &MI,
+ unsigned OpNum) {
+ unsigned SrcReg = MI.getOperand(OpNum).getReg();
+ unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+ if (X86II::isX86_64ExtendedReg(SrcReg))
+ SrcRegNum |= 8;
+
+ // The registers represented through VEX_VVVV should
+ // be encoded in 1's complement form.
+ return (~SrcRegNum) & 0xf;
+}
+
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSegmentOverridePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI) const {
+ switch (TSFlags & X86II::SegOvrMask) {
+ default: llvm_unreachable("Invalid segment!");
+ case 0:
+ // No segment override, check for explicit one on memory operand.
+ if (MemOperand != -1) { // If the instruction has a memory operand.
+ switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+ default: llvm_unreachable("Unknown segment register!");
+ case 0: break;
+ case X86::CS: MCE.emitByte(0x2E); break;
+ case X86::SS: MCE.emitByte(0x36); break;
+ case X86::DS: MCE.emitByte(0x3E); break;
+ case X86::ES: MCE.emitByte(0x26); break;
+ case X86::FS: MCE.emitByte(0x64); break;
+ case X86::GS: MCE.emitByte(0x65); break;
+ }
+ }
+ break;
+ case X86II::FS:
+ MCE.emitByte(0x64);
+ break;
+ case X86II::GS:
+ MCE.emitByte(0x65);
+ break;
+ }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags,
+ int MemOperand,
+ const MachineInstr &MI,
+ const MCInstrDesc *Desc) const {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+
+ // VEX_R: opcode externsion equivalent to REX.R in
+ // 1's complement (inverted) form
+ //
+ // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX_R=1 (64 bit mode only)
+ //
+ unsigned char VEX_R = 0x1;
+
+ // VEX_X: equivalent to REX.X, only used when a
+ // register is used for index in SIB Byte.
+ //
+ // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX.X=1 (64-bit mode only)
+ unsigned char VEX_X = 0x1;
+
+ // VEX_B:
+ //
+ // 1: Same as REX_B=0 (ignored in 32-bit mode)
+ // 0: Same as REX_B=1 (64 bit mode only)
+ //
+ unsigned char VEX_B = 0x1;
+
+ // VEX_W: opcode specific (use like REX.W, or used for
+ // opcode extension, or ignored, depending on the opcode byte)
+ unsigned char VEX_W = 0;
+
+ // XOP: Use XOP prefix byte 0x8f instead of VEX.
+ unsigned char XOP = 0;
+
+ // VEX_5M (VEX m-mmmmm field):
+ //
+ // 0b00000: Reserved for future use
+ // 0b00001: implied 0F leading opcode
+ // 0b00010: implied 0F 38 leading opcode bytes
+ // 0b00011: implied 0F 3A leading opcode bytes
+ // 0b00100-0b11111: Reserved for future use
+ // 0b01000: XOP map select - 08h instructions with imm byte
+ // 0b10001: XOP map select - 09h instructions with no imm byte
+ unsigned char VEX_5M = 0x1;
+
+ // VEX_4V (VEX vvvv field): a register specifier
+ // (in 1's complement form) or 1111 if unused.
+ unsigned char VEX_4V = 0xf;
+
+ // VEX_L (Vector Length):
+ //
+ // 0: scalar or 128-bit vector
+ // 1: 256-bit vector
+ //
+ unsigned char VEX_L = 0;
+
+ // VEX_PP: opcode extension providing equivalent
+ // functionality of a SIMD prefix
+ //
+ // 0b00: None
+ // 0b01: 66
+ // 0b10: F3
+ // 0b11: F2
+ //
+ unsigned char VEX_PP = 0;
+
+ // Encode the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ VEX_PP = 0x01;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
+ VEX_W = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
+ XOP = 1;
+
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
+ VEX_L = 1;
+
+ switch (TSFlags & X86II::Op0Mask) {
+ default: llvm_unreachable("Invalid prefix!");
+ case X86II::T8: // 0F 38
+ VEX_5M = 0x2;
+ break;
+ case X86II::TA: // 0F 3A
+ VEX_5M = 0x3;
+ break;
+ case X86II::T8XS: // F3 0F 38
+ VEX_PP = 0x2;
+ VEX_5M = 0x2;
+ break;
+ case X86II::T8XD: // F2 0F 38
+ VEX_PP = 0x3;
+ VEX_5M = 0x2;
+ break;
+ case X86II::TAXD: // F2 0F 3A
+ VEX_PP = 0x3;
+ VEX_5M = 0x3;
+ break;
+ case X86II::XS: // F3 0F
+ VEX_PP = 0x2;
+ break;
+ case X86II::XD: // F2 0F
+ VEX_PP = 0x3;
+ break;
+ case X86II::XOP8:
+ VEX_5M = 0x8;
+ break;
+ case X86II::XOP9:
+ VEX_5M = 0x9;
+ break;
+ case X86II::A6: // Bypass: Not used by VEX
+ case X86II::A7: // Bypass: Not used by VEX
+ case X86II::TB: // Bypass: Not used by VEX
+ case 0:
+ break; // No prefix!
+ }
+
+
+ // Set the vector length to 256-bit if YMM0-YMM15 is used
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
+ VEX_L = 1;
+ }
+
+ // Classify VEX_B, VEX_4V, VEX_R, VEX_X
+ unsigned CurOp = 0;
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
+ case X86II::MRMDestMem: {
+ // MRMDestMem instructions forms:
+ // MemAddr, src1(ModR/M)
+ // MemAddr, src1(VEX_4V), src2(ModR/M)
+ // MemAddr, src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ CurOp = X86::AddrNumOperands;
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+ const MachineOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ break;
+ }
+ case X86II::MRMSrcMem:
+ // MRMSrcMem instructions forms:
+ // src1(ModR/M), MemAddr
+ // src1(ModR/M), src2(VEX_4V), MemAddr
+ // src1(ModR/M), MemAddr, imm8
+ // src1(ModR/M), MemAddr, src2(VEX_I8IMM)
+ //
+ // FMA4:
+ // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_R = 0x0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, 1);
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
+ break;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m: {
+ // MRM[0-9]m instructions forms:
+ // MemAddr
+ // src1(VEX_4V), MemAddr
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(
+ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
+ VEX_X = 0x0;
+ break;
+ }
+ case X86II::MRMSrcReg:
+ // MRMSrcReg instructions forms:
+ // dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+ // dst(ModR/M), src1(ModR/M)
+ // dst(ModR/M), src1(ModR/M), imm8
+ //
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+ CurOp++;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0x0;
+ CurOp++;
+ if (HasVEX_4VOp3)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ break;
+ case X86II::MRMDestReg:
+ // MRMDestReg instructions forms:
+ // dst(ModR/M), src(ModR/M)
+ // dst(ModR/M), src(ModR/M), imm8
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+ VEX_B = 0x0;
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_R = 0x0;
+ break;
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ // MRM0r-MRM7r instructions forms:
+ // dst(VEX_4V), src(ModR/M), imm8
+ VEX_4V = getVEXRegisterEncoding(MI, 0);
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg()))
+ VEX_B = 0x0;
+ break;
+ default: // RawFrm
+ break;
+ }
+
+ // Emit segment override opcode prefix as needed.
+ emitSegmentOverridePrefix(TSFlags, MemOperand, MI);
+
+ // VEX opcode prefix can have 2 or 3 bytes
+ //
+ // 3 bytes:
+ // +-----+ +--------------+ +-------------------+
+ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+ // 2 bytes:
+ // +-----+ +-------------------+
+ // | C5h | | R | vvvv | L | pp |
+ // +-----+ +-------------------+
+ //
+ unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+ if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
+ MCE.emitByte(0xC5);
+ MCE.emitByte(LastByte | (VEX_R << 7));
+ return;
+ }
+
+ // 3 byte VEX prefix
+ MCE.emitByte(XOP ? 0x8F : 0xC4);
+ MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M);
+ MCE.emitByte(LastByte | (VEX_W << 7));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
+ const MCInstrDesc *Desc) {
+ DEBUG(dbgs() << MI);
+
+ // If this is a pseudo instruction, lower it.
+ switch (Desc->getOpcode()) {
+ case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break;
+ case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break;
+ case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break;
+ case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break;
+ case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break;
+ case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break;
+ case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break;
+ case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break;
+ case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break;
+ case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break;
+ case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break;
+ case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break;
+ case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break;
+ case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break;
+ case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break;
+ case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break;
+ case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break;