From: Asaf Badouh Date: Wed, 3 Jun 2015 13:41:48 +0000 (+0000) Subject: re-apply 238809 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=ce375dc63ae6dfd6cfb1fe56ea109573b5b9d9ec;p=oota-llvm.git re-apply 238809 AVX-512: Implemented GETEXP instruction for KNL and SKX Added rounding mode modifier for SQRTPS/PD Added tests for encoding and intrinsics. CR: http://reviews.llvm.org/D9991 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238923 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 3a8a4a643a4..0826aa2287e 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3372,10 +3372,40 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">, + def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_sqrt_ps_128 : GCCBuiltin<"__builtin_ia32_sqrtps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_mask_getexp_pd_128 : GCCBuiltin<"__builtin_ia32_getexppd128_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_getexp_pd_256 : GCCBuiltin<"__builtin_ia32_getexppd256_mask">, + Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_getexp_pd_512 : GCCBuiltin<"__builtin_ia32_getexppd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">, + def int_x86_avx512_mask_getexp_ps_128 : GCCBuiltin<"__builtin_ia32_getexpps128_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_getexp_ps_256 : GCCBuiltin<"__builtin_ia32_getexpps256_mask">, + Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_mask_getexp_ps_512 : GCCBuiltin<"__builtin_ia32_getexpps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5c6fbdda25c..e0e79d7d198 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15098,12 +15098,31 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Op.getOperand(2), Op.getOperand(3)); case INTR_TYPE_1OP_MASK_RM: { SDValue Src = Op.getOperand(1); - SDValue Src0 = Op.getOperand(2); + SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); - SDValue RoundingMode = Op.getOperand(4); + SDValue RoundingMode; + if (Op.getNumOperands() == 4) + RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32); + else + RoundingMode = Op.getOperand(4); + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + unsigned Round = cast(RoundingMode)->getZExtValue(); + if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, + dl, Op.getValueType(), Src, RoundingMode), + Mask, PassThru, Subtarget, DAG); + } return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, RoundingMode), - Mask, Src0, Subtarget, DAG); + Mask, PassThru, Subtarget, DAG); + } + case INTR_TYPE_1OP_MASK: { + SDValue Src = Op.getOperand(1); + SDValue Passthru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), + Mask, Passthru, Subtarget, DAG); } case INTR_TYPE_SCALAR_MASK_RM: { SDValue Src1 = Op.getOperand(1); @@ -18368,6 +18387,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; + case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND"; + case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND"; case X86ISD::ADDS: return "X86ISD::ADDS"; case X86ISD::SUBS: return "X86ISD::SUBS"; } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index fc412cd9468..b5d062f72b2 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -199,6 +199,7 @@ namespace llvm { /// Combined add and sub on an FP vector. ADDSUB, + // FP vector ops with rounding mode. FADD_RND, FSUB_RND, @@ -206,6 +207,10 @@ namespace llvm { FDIV_RND, FMAX_RND, FMIN_RND, + FSQRT_RND, + + // FP vector get exponent + FGETEXP_RND, // Integer add/sub with unsigned saturation. ADDUS, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 24c72001197..5d5ab14cf46 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -4854,11 +4854,6 @@ multiclass avx512_fp28_p opc, string OpcodeStr, X86VectorVTInfo _, (ins _.RC:$src), OpcodeStr, "$src", "$src", (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>; - defm rb : AVX512_maskable, EVEX_B; - defm m : AVX512_maskable opc, string OpcodeStr, X86VectorVTInfo _, (i32 FROUND_CURRENT))>; defm mb : AVX512_maskable, EVEX_B; } +multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, + SDNode OpNode> { + defm rb : AVX512_maskable, EVEX_B; +} multiclass avx512_eri opc, string OpcodeStr, SDNode OpNode> { defm PS : avx512_fp28_p, - EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round, + T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; defm PD : avx512_fp28_p, - VEX_W, EVEX_CD8<32, CD8VF>; + avx512_fp28_p_round, + T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } +multiclass avx512_fp_unaryop_packed opc, string OpcodeStr, + SDNode OpNode> { + // Define only if AVX512VL feature is present. + let Predicates = [HasVLX] in { + defm PSZ128 : avx512_fp28_p, + EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; + defm PSZ256 : avx512_fp28_p, + EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; + defm PDZ128 : avx512_fp28_p, + EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + defm PDZ256 : avx512_fp28_p, + EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; + } +} let Predicates = [HasERI], hasSideEffects = 0 in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX; +} +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>, + avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX; + +multiclass avx512_sqrt_packed_round opc, string OpcodeStr, + SDNode OpNodeRnd, X86VectorVTInfo _>{ + defm rb: AVX512_maskable, + EVEX, EVEX_B, EVEX_RC; } multiclass avx512_sqrt_packed opc, string OpcodeStr, @@ -4992,20 +5021,22 @@ multiclass avx512_sqrt_packed_all opc, string OpcodeStr, } } -defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>; +multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, + SDNode OpNodeRnd> { + defm PSZ : avx512_sqrt_packed_round, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_sqrt_packed_round, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; +} + +defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, + avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>; defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, SSE_SQRTSS, SSE_SQRTSD>; let Predicates = [HasAVX512] in { - def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1), - (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), - (VSQRTPSZr VR512:$src1)>; - def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), - (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), - (VSQRTPDZr VR512:$src1)>; - def : Pat<(f32 (fsqrt FR32X:$src)), (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (fsqrt (load addr:$src))), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index bb894bb9f3b..dfe58ef8067 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -231,6 +231,9 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>; +def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [ // fsqrt_round, fgetexp_round, etc. + SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]>; + def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, @@ -304,6 +307,8 @@ def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>; def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>; +def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; +def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>; def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 4af514a83ca..8bf0d445453 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -21,7 +21,7 @@ enum IntrinsicType { GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, - INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK, + INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, FMA_OP_MASK, INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, EXPAND_FROM_MEM, BLEND }; @@ -339,9 +339,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV, X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV, - X86ISD::FDIV_RND), + X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV, - X86ISD::FDIV_RND), + X86ISD::FDIV_RND), X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG, @@ -366,6 +366,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG, X86ISD::EXPAND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_pd_512, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_128, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_256, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM, + X86ISD::FGETEXP_RND, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, @@ -559,6 +571,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::RNDSCALE, 0), X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RNDSCALE, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT, + X86ISD::FSQRT_RND), + X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0), + X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT, + X86ISD::FSQRT_RND), X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0), X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0), X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB, diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 471e34cdedc..9387192f8aa 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -98,18 +98,55 @@ define <4 x float> @test_rcp14_ss(<4 x float> %a0) { declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { + ; CHECK-LABEL: test_sqrt_pd_512 ; CHECK: vsqrtpd - %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1] + %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ret <8 x double> %res } -declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone +declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { + ; CHECK-LABEL: test_sqrt_ps_512 ; CHECK: vsqrtps - %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1] + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ret <16 x float> %res } -declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone +define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) { + ; CHECK-LABEL: test_sqrt_round_ps_512 + ; CHECK: vsqrtps {rz-sae} + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3) + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone + +define <8 x double> @test_getexp_pd_512(<8 x double> %a0) { + ; CHECK-LABEL: test_getexp_pd_512 + ; CHECK: vgetexppd + %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) + ret <8 x double> %res +} +define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) { + ; CHECK-LABEL: test_getexp_round_pd_512 + ; CHECK: vgetexppd {sae} + %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone + +define <16 x float> @test_getexp_ps_512(<16 x float> %a0) { + ; CHECK-LABEL: test_getexp_ps_512 + ; CHECK: vgetexpps + %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) + ret <16 x float> %res +} + +define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) { + ; CHECK-LABEL: test_getexp_round_ps_512 + ; CHECK: vgetexpps {sae} + %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: vsqrtss {{.*}}encoding: [0x62 diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index b93b1d04d80..9d96c272f35 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2553,3 +2553,37 @@ define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 % ret <4 x float> %res } declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) { + ; CHECK-LABEL: test_sqrt_pd_256 + ; CHECK: vsqrtpd + %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask) + ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + +define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) { + ; CHECK-LABEL: test_sqrt_ps_256 + ; CHECK: vsqrtps + %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask) + ret <8 x float> %res +} + +declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone + +define <4 x double> @test_getexp_pd_256(<4 x double> %a0) { + ; CHECK-LABEL: test_getexp_pd_256 + ; CHECK: vgetexppd + %res = call <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1) + ret <4 x double> %res +} + +declare <4 x double> @llvm.x86.avx512.mask.getexp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone + +define <8 x float> @test_getexp_ps_256(<8 x float> %a0) { + ; CHECK-LABEL: test_getexp_ps_256 + ; CHECK: vgetexpps + %res = call <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1) + ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone \ No newline at end of file diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index 4232fff5e95..5b507a70a77 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -4796,6 +4796,38 @@ // CHECK: encoding: [0x62,0x61,0x7c,0x58,0x51,0xa2,0xfc,0xfd,0xff,0xff] vsqrtps -516(%rdx){1to16}, %zmm28 +// CHECK: vsqrtpd {rn-sae}, %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa1,0xfd,0x18,0x51,0xdb] + vsqrtpd {rn-sae}, %zmm19, %zmm19 + +// CHECK: vsqrtpd {ru-sae}, %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa1,0xfd,0x58,0x51,0xdb] + vsqrtpd {ru-sae}, %zmm19, %zmm19 + +// CHECK: vsqrtpd {rd-sae}, %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa1,0xfd,0x38,0x51,0xdb] + vsqrtpd {rd-sae}, %zmm19, %zmm19 + +// CHECK: vsqrtpd {rz-sae}, %zmm19, %zmm19 +// CHECK: encoding: [0x62,0xa1,0xfd,0x78,0x51,0xdb] + vsqrtpd {rz-sae}, %zmm19, %zmm19 + +// CHECK: vsqrtps {rn-sae}, %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x01,0x7c,0x18,0x51,0xe5] + vsqrtps {rn-sae}, %zmm29, %zmm28 + +// CHECK: vsqrtps {ru-sae}, %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x01,0x7c,0x58,0x51,0xe5] + vsqrtps {ru-sae}, %zmm29, %zmm28 + +// CHECK: vsqrtps {rd-sae}, %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x01,0x7c,0x38,0x51,0xe5] + vsqrtps {rd-sae}, %zmm29, %zmm28 + +// CHECK: vsqrtps {rz-sae}, %zmm29, %zmm28 +// CHECK: encoding: [0x62,0x01,0x7c,0x78,0x51,0xe5] + vsqrtps {rz-sae}, %zmm29, %zmm28 + // CHECK: vsubpd %zmm9, %zmm12, %zmm9 // CHECK: encoding: [0x62,0x51,0x9d,0x48,0x5c,0xc9] vsubpd %zmm9, %zmm12, %zmm9 @@ -8016,6 +8048,126 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2 // CHECK: encoding: [0x62,0xe1,0x7d,0x58,0x70,0x9a,0xfc,0xfd,0xff,0xff,0x7b] vpshufd $123, -516(%rdx){1to16}, %zmm19 +// CHECK: vgetexppd %zmm25, %zmm14 +// CHECK: encoding: [0x62,0x12,0xfd,0x48,0x42,0xf1] + vgetexppd %zmm25, %zmm14 + +// CHECK: vgetexppd %zmm25, %zmm14 {%k5} +// CHECK: encoding: [0x62,0x12,0xfd,0x4d,0x42,0xf1] + vgetexppd %zmm25, %zmm14 {%k5} + +// CHECK: vgetexppd %zmm25, %zmm14 {%k5} {z} +// CHECK: encoding: [0x62,0x12,0xfd,0xcd,0x42,0xf1] + vgetexppd %zmm25, %zmm14 {%k5} {z} + +// CHECK: vgetexppd {sae}, %zmm25, %zmm14 +// CHECK: encoding: [0x62,0x12,0xfd,0x18,0x42,0xf1] + vgetexppd {sae}, %zmm25, %zmm14 + +// CHECK: vgetexppd (%rcx), %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0x31] + vgetexppd (%rcx), %zmm14 + +// CHECK: vgetexppd 291(%rax,%r14,8), %zmm14 +// CHECK: encoding: [0x62,0x32,0xfd,0x48,0x42,0xb4,0xf0,0x23,0x01,0x00,0x00] + vgetexppd 291(%rax,%r14,8), %zmm14 + +// CHECK: vgetexppd (%rcx){1to8}, %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0x31] + vgetexppd (%rcx){1to8}, %zmm14 + +// CHECK: vgetexppd 8128(%rdx), %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0x72,0x7f] + vgetexppd 8128(%rdx), %zmm14 + +// CHECK: vgetexppd 8192(%rdx), %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0xb2,0x00,0x20,0x00,0x00] + vgetexppd 8192(%rdx), %zmm14 + +// CHECK: vgetexppd -8192(%rdx), %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0x72,0x80] + vgetexppd -8192(%rdx), %zmm14 + +// CHECK: vgetexppd -8256(%rdx), %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x48,0x42,0xb2,0xc0,0xdf,0xff,0xff] + vgetexppd -8256(%rdx), %zmm14 + +// CHECK: vgetexppd 1016(%rdx){1to8}, %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0x72,0x7f] + vgetexppd 1016(%rdx){1to8}, %zmm14 + +// CHECK: vgetexppd 1024(%rdx){1to8}, %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0xb2,0x00,0x04,0x00,0x00] + vgetexppd 1024(%rdx){1to8}, %zmm14 + +// CHECK: vgetexppd -1024(%rdx){1to8}, %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0x72,0x80] + vgetexppd -1024(%rdx){1to8}, %zmm14 + +// CHECK: vgetexppd -1032(%rdx){1to8}, %zmm14 +// CHECK: encoding: [0x62,0x72,0xfd,0x58,0x42,0xb2,0xf8,0xfb,0xff,0xff] + vgetexppd -1032(%rdx){1to8}, %zmm14 + +// CHECK: vgetexpps %zmm6, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0xce] + vgetexpps %zmm6, %zmm1 + +// CHECK: vgetexpps %zmm6, %zmm1 {%k3} +// CHECK: encoding: [0x62,0xf2,0x7d,0x4b,0x42,0xce] + vgetexpps %zmm6, %zmm1 {%k3} + +// CHECK: vgetexpps %zmm6, %zmm1 {%k3} {z} +// CHECK: encoding: [0x62,0xf2,0x7d,0xcb,0x42,0xce] + vgetexpps %zmm6, %zmm1 {%k3} {z} + +// CHECK: vgetexpps {sae}, %zmm6, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x18,0x42,0xce] + vgetexpps {sae}, %zmm6, %zmm1 + +// CHECK: vgetexpps (%rcx), %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x09] + vgetexpps (%rcx), %zmm1 + +// CHECK: vgetexpps 291(%rax,%r14,8), %zmm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00] + vgetexpps 291(%rax,%r14,8), %zmm1 + +// CHECK: vgetexpps (%rcx){1to16}, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x09] + vgetexpps (%rcx){1to16}, %zmm1 + +// CHECK: vgetexpps 8128(%rdx), %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x4a,0x7f] + vgetexpps 8128(%rdx), %zmm1 + +// CHECK: vgetexpps 8192(%rdx), %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x8a,0x00,0x20,0x00,0x00] + vgetexpps 8192(%rdx), %zmm1 + +// CHECK: vgetexpps -8192(%rdx), %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x4a,0x80] + vgetexpps -8192(%rdx), %zmm1 + +// CHECK: vgetexpps -8256(%rdx), %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x42,0x8a,0xc0,0xdf,0xff,0xff] + vgetexpps -8256(%rdx), %zmm1 + +// CHECK: vgetexpps 508(%rdx){1to16}, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x4a,0x7f] + vgetexpps 508(%rdx){1to16}, %zmm1 + +// CHECK: vgetexpps 512(%rdx){1to16}, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x8a,0x00,0x02,0x00,0x00] + vgetexpps 512(%rdx){1to16}, %zmm1 + +// CHECK: vgetexpps -512(%rdx){1to16}, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x4a,0x80] + vgetexpps -512(%rdx){1to16}, %zmm1 + +// CHECK: vgetexpps -516(%rdx){1to16}, %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x58,0x42,0x8a,0xfc,0xfd,0xff,0xff] + vgetexpps -516(%rdx){1to16}, %zmm1 + // CHECK: vshuff32x4 $171, %zmm3, %zmm24, %zmm6 // CHECK: encoding: [0x62,0xf3,0x3d,0x40,0x23,0xf3,0xab] vshuff32x4 $171, %zmm3, %zmm24, %zmm6 diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s index dd2a49d7046..983e87912ed 100644 --- a/test/MC/X86/x86-64-avx512f_vl.s +++ b/test/MC/X86/x86-64-avx512f_vl.s @@ -10549,6 +10549,230 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1 // CHECK: encoding: [0x62,0xe1,0x7d,0x38,0x70,0xa2,0xfc,0xfd,0xff,0xff,0x7b] vpshufd $123, -516(%rdx){1to8}, %ymm20 +// CHECK: vgetexppd %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x42,0xca] + vgetexppd %xmm18, %xmm17 + +// CHECK: vgetexppd %xmm18, %xmm17 {%k1} +// CHECK: encoding: [0x62,0xa2,0xfd,0x09,0x42,0xca] + vgetexppd %xmm18, %xmm17 {%k1} + +// CHECK: vgetexppd %xmm18, %xmm17 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0x89,0x42,0xca] + vgetexppd %xmm18, %xmm17 {%k1} {z} + +// CHECK: vgetexppd (%rcx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x09] + vgetexppd (%rcx), %xmm17 + +// CHECK: vgetexppd 291(%rax,%r14,8), %xmm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00] + vgetexppd 291(%rax,%r14,8), %xmm17 + +// CHECK: vgetexppd (%rcx){1to2}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x09] + vgetexppd (%rcx){1to2}, %xmm17 + +// CHECK: vgetexppd 2032(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x4a,0x7f] + vgetexppd 2032(%rdx), %xmm17 + +// CHECK: vgetexppd 2048(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x8a,0x00,0x08,0x00,0x00] + vgetexppd 2048(%rdx), %xmm17 + +// CHECK: vgetexppd -2048(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x4a,0x80] + vgetexppd -2048(%rdx), %xmm17 + +// CHECK: vgetexppd -2064(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x42,0x8a,0xf0,0xf7,0xff,0xff] + vgetexppd -2064(%rdx), %xmm17 + +// CHECK: vgetexppd 1016(%rdx){1to2}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x4a,0x7f] + vgetexppd 1016(%rdx){1to2}, %xmm17 + +// CHECK: vgetexppd 1024(%rdx){1to2}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x8a,0x00,0x04,0x00,0x00] + vgetexppd 1024(%rdx){1to2}, %xmm17 + +// CHECK: vgetexppd -1024(%rdx){1to2}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x4a,0x80] + vgetexppd -1024(%rdx){1to2}, %xmm17 + +// CHECK: vgetexppd -1032(%rdx){1to2}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x42,0x8a,0xf8,0xfb,0xff,0xff] + vgetexppd -1032(%rdx){1to2}, %xmm17 + +// CHECK: vgetexppd %ymm17, %ymm20 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x42,0xe1] + vgetexppd %ymm17, %ymm20 + +// CHECK: vgetexppd %ymm17, %ymm20 {%k3} +// CHECK: encoding: [0x62,0xa2,0xfd,0x2b,0x42,0xe1] + vgetexppd %ymm17, %ymm20 {%k3} + +// CHECK: vgetexppd %ymm17, %ymm20 {%k3} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xab,0x42,0xe1] + vgetexppd %ymm17, %ymm20 {%k3} {z} + +// CHECK: vgetexppd (%rcx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0x21] + vgetexppd (%rcx), %ymm20 + +// CHECK: vgetexppd 291(%rax,%r14,8), %ymm20 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x42,0xa4,0xf0,0x23,0x01,0x00,0x00] + vgetexppd 291(%rax,%r14,8), %ymm20 + +// CHECK: vgetexppd (%rcx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0x21] + vgetexppd (%rcx){1to4}, %ymm20 + +// CHECK: vgetexppd 4064(%rdx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0x62,0x7f] + vgetexppd 4064(%rdx), %ymm20 + +// CHECK: vgetexppd 4096(%rdx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0xa2,0x00,0x10,0x00,0x00] + vgetexppd 4096(%rdx), %ymm20 + +// CHECK: vgetexppd -4096(%rdx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0x62,0x80] + vgetexppd -4096(%rdx), %ymm20 + +// CHECK: vgetexppd -4128(%rdx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x42,0xa2,0xe0,0xef,0xff,0xff] + vgetexppd -4128(%rdx), %ymm20 + +// CHECK: vgetexppd 1016(%rdx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0x62,0x7f] + vgetexppd 1016(%rdx){1to4}, %ymm20 + +// CHECK: vgetexppd 1024(%rdx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0xa2,0x00,0x04,0x00,0x00] + vgetexppd 1024(%rdx){1to4}, %ymm20 + +// CHECK: vgetexppd -1024(%rdx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0x62,0x80] + vgetexppd -1024(%rdx){1to4}, %ymm20 + +// CHECK: vgetexppd -1032(%rdx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x42,0xa2,0xf8,0xfb,0xff,0xff] + vgetexppd -1032(%rdx){1to4}, %ymm20 + +// CHECK: vgetexpps %xmm27, %xmm17 +// CHECK: encoding: [0x62,0x82,0x7d,0x08,0x42,0xcb] + vgetexpps %xmm27, %xmm17 + +// CHECK: vgetexpps %xmm27, %xmm17 {%k2} +// CHECK: encoding: [0x62,0x82,0x7d,0x0a,0x42,0xcb] + vgetexpps %xmm27, %xmm17 {%k2} + +// CHECK: vgetexpps %xmm27, %xmm17 {%k2} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0x8a,0x42,0xcb] + vgetexpps %xmm27, %xmm17 {%k2} {z} + +// CHECK: vgetexpps (%rcx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x09] + vgetexpps (%rcx), %xmm17 + +// CHECK: vgetexpps 291(%rax,%r14,8), %xmm17 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00] + vgetexpps 291(%rax,%r14,8), %xmm17 + +// CHECK: vgetexpps (%rcx){1to4}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x09] + vgetexpps (%rcx){1to4}, %xmm17 + +// CHECK: vgetexpps 2032(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x4a,0x7f] + vgetexpps 2032(%rdx), %xmm17 + +// CHECK: vgetexpps 2048(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x8a,0x00,0x08,0x00,0x00] + vgetexpps 2048(%rdx), %xmm17 + +// CHECK: vgetexpps -2048(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x4a,0x80] + vgetexpps -2048(%rdx), %xmm17 + +// CHECK: vgetexpps -2064(%rdx), %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x42,0x8a,0xf0,0xf7,0xff,0xff] + vgetexpps -2064(%rdx), %xmm17 + +// CHECK: vgetexpps 508(%rdx){1to4}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x4a,0x7f] + vgetexpps 508(%rdx){1to4}, %xmm17 + +// CHECK: vgetexpps 512(%rdx){1to4}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x8a,0x00,0x02,0x00,0x00] + vgetexpps 512(%rdx){1to4}, %xmm17 + +// CHECK: vgetexpps -512(%rdx){1to4}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x4a,0x80] + vgetexpps -512(%rdx){1to4}, %xmm17 + +// CHECK: vgetexpps -516(%rdx){1to4}, %xmm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0x42,0x8a,0xfc,0xfd,0xff,0xff] + vgetexpps -516(%rdx){1to4}, %xmm17 + +// CHECK: vgetexpps %ymm29, %ymm30 +// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x42,0xf5] + vgetexpps %ymm29, %ymm30 + +// CHECK: vgetexpps %ymm29, %ymm30 {%k6} +// CHECK: encoding: [0x62,0x02,0x7d,0x2e,0x42,0xf5] + vgetexpps %ymm29, %ymm30 {%k6} + +// CHECK: vgetexpps %ymm29, %ymm30 {%k6} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xae,0x42,0xf5] + vgetexpps %ymm29, %ymm30 {%k6} {z} + +// CHECK: vgetexpps (%rcx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0x31] + vgetexpps (%rcx), %ymm30 + +// CHECK: vgetexpps 291(%rax,%r14,8), %ymm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x42,0xb4,0xf0,0x23,0x01,0x00,0x00] + vgetexpps 291(%rax,%r14,8), %ymm30 + +// CHECK: vgetexpps (%rcx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0x31] + vgetexpps (%rcx){1to8}, %ymm30 + +// CHECK: vgetexpps 4064(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0x72,0x7f] + vgetexpps 4064(%rdx), %ymm30 + +// CHECK: vgetexpps 4096(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0xb2,0x00,0x10,0x00,0x00] + vgetexpps 4096(%rdx), %ymm30 + +// CHECK: vgetexpps -4096(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0x72,0x80] + vgetexpps -4096(%rdx), %ymm30 + +// CHECK: vgetexpps -4128(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x42,0xb2,0xe0,0xef,0xff,0xff] + vgetexpps -4128(%rdx), %ymm30 + +// CHECK: vgetexpps 508(%rdx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0x72,0x7f] + vgetexpps 508(%rdx){1to8}, %ymm30 + +// CHECK: vgetexpps 512(%rdx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0xb2,0x00,0x02,0x00,0x00] + vgetexpps 512(%rdx){1to8}, %ymm30 + +// CHECK: vgetexpps -512(%rdx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0x72,0x80] + vgetexpps -512(%rdx){1to8}, %ymm30 + +// CHECK: vgetexpps -516(%rdx){1to8}, %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x42,0xb2,0xfc,0xfd,0xff,0xff] + vgetexpps -516(%rdx){1to8}, %ymm30 + // CHECK: vshuff32x4 $171, %ymm18, %ymm27, %ymm29 // CHECK: encoding: [0x62,0x23,0x25,0x20,0x23,0xea,0xab] vshuff32x4 $0xab, %ymm18, %ymm27, %ymm29