From 1f7a1b68a07ea6bdf521525a7928f4a8c5216713 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Tue, 26 Jun 2012 19:47:59 +0000 Subject: [PATCH] X86: add GATHER intrinsics (AVX2) in LLVM Support the following intrinsics: llvm.x86.avx2.gather.d.pd, llvm.x86.avx2.gather.q.pd llvm.x86.avx2.gather.d.pd.256, llvm.x86.avx2.gather.q.pd.256 llvm.x86.avx2.gather.d.ps, llvm.x86.avx2.gather.q.ps llvm.x86.avx2.gather.d.ps.256, llvm.x86.avx2.gather.q.ps.256 Modified Disassembler to handle VSIB addressing mode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159221 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsX86.td | 36 +++++++++ lib/Target/X86/AsmParser/X86AsmParser.cpp | 7 +- .../X86/Disassembler/X86Disassembler.cpp | 27 ++++++- .../X86/Disassembler/X86DisassemblerDecoder.h | 3 + .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 7 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 47 +++++++++++ lib/Target/X86/X86InstrInfo.td | 8 ++ lib/Target/X86/X86InstrSSE.td | 37 +++++++++ test/CodeGen/X86/avx2-intrinsics-x86.ll | 80 +++++++++++++++++++ test/MC/Disassembler/X86/simple-tests.txt | 6 ++ test/MC/X86/x86_64-avx-encoding.s | 8 ++ utils/TableGen/EDEmitter.cpp | 2 + utils/TableGen/X86RecognizableInstr.cpp | 4 + 13 files changed, 268 insertions(+), 4 deletions(-) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index fe53b0aaf80..ba38c3ce13a 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1744,6 +1744,42 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem]>; } +// Gather ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrReadMem]>; + def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrReadMem]>; +} + // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 08c732c3886..e74aea26d0e 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -916,15 +916,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // If we have both a base register and an index register make sure they are // both 64-bit or 32-bit registers. + // To support VSIB, IndexReg can be 128-bit or 256-bit registers. if (BaseReg != 0 && IndexReg != 0) { if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) && IndexReg != X86::RIZ) { Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); return 0; } if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && - !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) && IndexReg != X86::EIZ){ Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); return 0; diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index b13a00620bb..b13e1ca41c5 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -498,7 +498,30 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, } else { baseReg = MCOperand::CreateReg(0); } - + + // Check whether we are handling VSIB addressing mode for GATHER. + // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and + // we should use SIB_INDEX_XMM4|YMM4 for VSIB. + // I don't see a way to get the correct IndexReg in readSIB: + // We can tell whether it is VSIB or SIB after instruction ID is decoded, + // but instruction ID may not be decoded yet when calling readSIB. + uint32_t Opcode = mcInst.getOpcode(); + bool IsGather = (Opcode == X86::VGATHERDPDrm || + Opcode == X86::VGATHERQPDrm || + Opcode == X86::VGATHERDPSrm || + Opcode == X86::VGATHERQPSrm); + bool IsGatherY = (Opcode == X86::VGATHERDPDYrm || + Opcode == X86::VGATHERQPDYrm || + Opcode == X86::VGATHERDPSYrm || + Opcode == X86::VGATHERQPSYrm); + if (IsGather || IsGatherY) { + unsigned IndexOffset = insn.sibIndex - + (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); + SIBIndex IndexBase = IsGatherY ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; + insn.sibIndex = (SIBIndex)(IndexBase + + (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); + } + if (insn.sibIndex != SIB_INDEX_NONE) { switch (insn.sibIndex) { default: @@ -509,6 +532,8 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, indexReg = MCOperand::CreateReg(X86::x); break; EA_BASES_32BIT EA_BASES_64BIT + REGS_XMM + REGS_YMM #undef ENTRY } } else { diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index fae309b45d0..e2caf6a2a8b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -310,11 +310,14 @@ typedef enum { * SIBIndex - All possible values of the SIB index field. * Borrows entries from ALL_EA_BASES with the special case that * sib is synonymous with NONE. + * Vector SIB: index can be XMM or YMM. */ typedef enum { SIB_INDEX_NONE, #define ENTRY(x) SIB_INDEX_##x, ALL_EA_BASES + REGS_XMM + REGS_YMM #undef ENTRY SIB_INDEX_max } SIBIndex; diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 12f1961ed80..52506fa185f 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -621,7 +621,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_X = 0x0; if (HasVEX_4VOp3) - VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1); + // Instruction format for 4VOp3: + // src1(ModR/M), MemAddr, src3(VEX_4V) + // CurOp points to start of the MemoryOperand, + // it skips TIED_TO operands if exist, then increments past src1. + // CurOp + X86::AddrNumOperands will point to src3. + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); break; case X86II::MRM0m: case X86II::MRM1m: case X86II::MRM2m: case X86II::MRM3m: diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index d381f3da4b1..ea9e5bcf180 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -187,6 +187,7 @@ namespace { private: SDNode *Select(SDNode *N); + SDNode *SelectGather(SDNode *N, unsigned Opc); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT); @@ -1952,6 +1953,29 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { llvm_unreachable("unrecognized size for LdVT"); } +/// SelectGather - Customized ISel for GATHER operations. +/// +SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { + // Operands of Gather: VSrc, Base, VIdx, VMask, Scale + SDValue Chain = Node->getOperand(0); + SDValue VSrc = Node->getOperand(2); + SDValue Base = Node->getOperand(3); + SDValue VIdx = Node->getOperand(4); + SDValue VMask = Node->getOperand(5); + ConstantSDNode *Scale = dyn_cast(Node->getOperand(6)); + assert(Scale && "Scale should be a constant for GATHER operations"); + + // Memory Operands: Base, Scale, Index, Disp, Segment + SDValue Disp = CurDAG->getTargetConstant(0, MVT::i32); + SDValue Segment = CurDAG->getRegister(0, MVT::i32); + const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx, + Disp, Segment, VMask, Chain}; + SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), + VSrc.getValueType(), MVT::Other, + Ops, array_lengthof(Ops)); + return ResNode; +} + SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; @@ -1967,6 +1991,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { switch (Opcode) { default: break; + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: break; + case Intrinsic::x86_avx2_gather_d_pd: + return SelectGather(Node, X86::VGATHERDPDrm); + case Intrinsic::x86_avx2_gather_d_pd_256: + return SelectGather(Node, X86::VGATHERDPDYrm); + case Intrinsic::x86_avx2_gather_q_pd: + return SelectGather(Node, X86::VGATHERQPDrm); + case Intrinsic::x86_avx2_gather_q_pd_256: + return SelectGather(Node, X86::VGATHERQPDYrm); + case Intrinsic::x86_avx2_gather_d_ps: + return SelectGather(Node, X86::VGATHERDPSrm); + case Intrinsic::x86_avx2_gather_d_ps_256: + return SelectGather(Node, X86::VGATHERDPSYrm); + case Intrinsic::x86_avx2_gather_q_ps: + return SelectGather(Node, X86::VGATHERQPSrm); + case Intrinsic::x86_avx2_gather_q_ps_256: + return SelectGather(Node, X86::VGATHERQPSYrm); + } + break; + } case X86ISD::GlobalBaseReg: return getGlobalBaseReg(); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9ce61409daf..0023424e660 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -325,6 +325,14 @@ def f128mem : X86MemOperand<"printf128mem"> { let ParserMatchClass = X86Mem128AsmOperand; } def f256mem : X86MemOperand<"printf256mem">{ let ParserMatchClass = X86Mem256AsmOperand; } +def v128mem : Operand { + let PrintMethod = "printf128mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm); + let ParserMatchClass = X86Mem128AsmOperand; } +def v256mem : Operand { + let PrintMethod = "printf256mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm); + let ParserMatchClass = X86Mem256AsmOperand; } } // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5efb501085e..8974d453522 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7994,3 +7994,40 @@ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; + +//===----------------------------------------------------------------------===// +// VGATHER - GATHER Operations +// +// [(set VR128:$dst, (IntGather128 VR128:$src1, addr:$src2, VR128:$idx, +// VR128:$mask, (i8 imm:$sc)))]>, VEX_4VOp3; +// [(set VR256:$dst, (IntGather256 VR256:$src1, addr:$src2, VR256:$idx, +// VR256:$mask, (i8 imm:$sc)))]>, VEX_4VOp3; +multiclass avx2_gather opc, string OpcodeStr, + Intrinsic IntGather128, Intrinsic IntGather256> { + def rm : AVX28I, VEX_4VOp3; + def Yrm : AVX28I, VEX_4VOp3; +} + +//let Constraints = "$src1 = $dst, $mask = $mask_wb" in { +let Constraints = "$src1 = $dst" in { + defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", + int_x86_avx2_gather_d_pd, + int_x86_avx2_gather_d_pd_256>, VEX_W; + defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", + int_x86_avx2_gather_q_pd, + int_x86_avx2_gather_q_pd_256>, VEX_W; + defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", + int_x86_avx2_gather_d_ps, + int_x86_avx2_gather_d_ps_256>; + defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", + int_x86_avx2_gather_q_ps, + int_x86_avx2_gather_q_ps_256>; +} diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 3f27a0291b4..3fb3497f359 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -976,3 +976,83 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { ret void } declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind + +define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, + <4 x i32> %idx, <2 x double> %mask) { + ; CHECK: vgatherdpd + %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, + i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, + <4 x i32>, <2 x double>, i8) nounwind readonly + +define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, + <8 x i32> %idx, <4 x double> %mask) { + ; CHECK: vgatherdpd + %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, + i8* %a1, <8 x i32> %idx, <4 x double> %mask, i8 2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, + <8 x i32>, <4 x double>, i8) nounwind readonly + +define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, + <2 x i64> %idx, <2 x double> %mask) { + ; CHECK: vgatherqpd + %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, + i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ; + ret <2 x double> %res +} +declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, + <2 x i64>, <2 x double>, i8) nounwind readonly + +define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1, + <4 x i64> %idx, <4 x double> %mask) { + ; CHECK: vgatherqpd + %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, + i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ; + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, + <4 x i64>, <4 x double>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, + <4 x i32> %idx, <4 x float> %mask) { + ; CHECK: vgatherdps + %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, + i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, + <4 x i32>, <4 x float>, i8) nounwind readonly + +define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1, + <8 x i32> %idx, <8 x float> %mask) { + ; CHECK: vgatherdps + %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, + i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ; + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, + <8 x i32>, <8 x float>, i8) nounwind readonly + +define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, + <2 x i64> %idx, <4 x float> %mask) { + ; CHECK: vgatherqps + %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, + i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, + <2 x i64>, <4 x float>, i8) nounwind readonly + +define <8 x float> @test_x86_avx2_gather_q_ps_256(<8 x float> %a0, i8* %a1, + <4 x i64> %idx, <8 x float> %mask) { + ; CHECK: vgatherqps + %res = call <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float> %a0, + i8* %a1, <4 x i64> %idx, <8 x float> %mask, i8 2) ; + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float>, i8*, + <4 x i64>, <8 x float>, i8) nounwind readonly diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt index c0e77d0698b..c543e461732 100644 --- a/test/MC/Disassembler/X86/simple-tests.txt +++ b/test/MC/Disassembler/X86/simple-tests.txt @@ -725,6 +725,12 @@ # CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0 0xc4 0xe3 0xe1 0x48 0x40 0x04 0x21 +# CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 +0xc4 0xe2 0xe9 0x92 0x04 0x4f + +# CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10 +0xc4 0x02 0x2d 0x93 0x04 0x4f + # rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling # CHECK: lock # CHECK-NEXT: xaddq %rcx, %rbx diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s index cdc5da42e8c..77c0ff358e2 100644 --- a/test/MC/X86/x86_64-avx-encoding.s +++ b/test/MC/X86/x86_64-avx-encoding.s @@ -4121,3 +4121,11 @@ _foo: _foo2: nop vblendvps %ymm1, _foo2(%rip), %ymm0, %ymm0 + +// CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f] + vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 + +// CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10 +// CHECK: encoding: [0xc4,0x02,0x2d,0x93,0x04,0x4f] + vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10 diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index bd98308aeab..7099f57a966 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -316,6 +316,8 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type, MEM("i256mem"); MEM("f128mem"); MEM("f256mem"); + MEM("v128mem"); + MEM("v256mem"); MEM("opaque512mem"); // all R, I, R, I diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index afb25be7ff8..0ab21c8a545 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -1106,6 +1106,8 @@ OperandType RecognizableInstr::typeFromString(const std::string &s, TYPE("VR128", TYPE_XMM128) TYPE("f128mem", TYPE_M128) TYPE("f256mem", TYPE_M256) + TYPE("v128mem", TYPE_M128) + TYPE("v256mem", TYPE_M256) TYPE("FR64", TYPE_XMM64) TYPE("f64mem", TYPE_M64FP) TYPE("sdmem", TYPE_M64FP) @@ -1235,6 +1237,8 @@ OperandEncoding RecognizableInstr::memoryEncodingFromString ENCODING("sdmem", ENCODING_RM) ENCODING("f128mem", ENCODING_RM) ENCODING("f256mem", ENCODING_RM) + ENCODING("v128mem", ENCODING_RM) + ENCODING("v256mem", ENCODING_RM) ENCODING("f64mem", ENCODING_RM) ENCODING("f32mem", ENCODING_RM) ENCODING("i128mem", ENCODING_RM) -- 2.34.1