From 58da66b2bf444678397acfbe9295d6d8a5be0d1e Mon Sep 17 00:00:00 2001 From: Robert Khasanov Date: Tue, 30 Sep 2014 11:19:50 +0000 Subject: [PATCH] [AVX512] Enabled intrinsics for VPCMPEQD and VPCMPEQQ. Added CMP_MASK intrinsic type git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218667 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 27 ++++++++++++++++++++- lib/Target/X86/X86IntrinsicsInfo.h | 6 +++-- test/CodeGen/X86/avx512-intrinsics.ll | 34 ++++++++++++++++++++++++++- 3 files changed, 63 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ed542560742..98616119fa0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15859,7 +15859,8 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } -/// \brief Return (vselect \p Mask, \p Op, \p PreservedSrc) along with the +/// \brief Return (and \p Op, \p Mask) for compare instructions or +/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the /// necessary casting for \p Mask when lowering masking intrinsics. static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, SelectionDAG &DAG) { @@ -15869,6 +15870,20 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDLoc dl(Op); assert(MaskVT.isSimple() && "invalid mask type"); + + if (isAllOnes(Mask)) + return Op; + + switch (Op.getOpcode()) { + default: break; + case X86ISD::PCMPEQM: + case X86ISD::PCMPGTM: + case X86ISD::CMPM: + case X86ISD::CMPMU: + return DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask)); + } + return DAG.getNode(ISD::VSELECT, dl, VT, DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask), Op, PreservedSrc); @@ -15937,6 +15952,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { case INTR_TYPE_3OP: return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case CMP_MASK: { + EVT VT = Op.getOperand(1).getValueType(); + EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + VT.getVectorNumElements()); + SDValue Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, + Op.getOperand(1), Op.getOperand(2)); + SDValue Res = getVectorMaskingNode(Cmp, Op.getOperand(3), + DAG.getTargetConstant(0, MaskVT), DAG); + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); + } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; SDValue LHS = Op.getOperand(1); diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 171ddf44c28..b9b836c78f1 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -19,8 +19,8 @@ namespace llvm { enum IntrinsicType { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, - INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, VSHIFT, - COMI + INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, + CMP_MASK, VSHIFT, COMI }; struct IntrinsicData { @@ -156,6 +156,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), + X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0), + X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0), diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 3762f60986f..38d8da78a1f 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -644,4 +644,36 @@ define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) { ret void } -declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 ) \ No newline at end of file +declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 ) + +define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) { +; CHECK-LABEL: test_pcmpeq_d +; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ## + %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1) + ret i16 %res +} + +define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_d +; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## + %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) + ret i16 %res +} + +declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16) + +define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) { +; CHECK-LABEL: test_pcmpeq_q +; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ## + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1) + ret i8 %res +} + +define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_q +; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## + %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) + ret i8 %res +} + +declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8) -- 2.34.1