From 1f0a0e314d461932aa2c626659dae3e3a6d6a00e Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 17 Dec 2013 08:33:15 +0000 Subject: [PATCH] AVX-512: Added implementation of CONCAT_VECTORS for v8i1 vectors (by Alexey Bader). Added implementation of "truncate" from integer type (i64/i32/i16/i8) to i1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197482 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 14 +++++++++++++- lib/Target/X86/X86ISelLowering.h | 3 +++ lib/Target/X86/X86InstrAVX512.td | 7 +++++++ lib/Target/X86/X86InstrFragmentsSIMD.td | 3 +++ test/CodeGen/X86/avx512-vec-cmp.ll | 11 +++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e87e7edab3c..3be3bf53755 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1356,7 +1356,7 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal); setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); - setOperationAction(ISD::TRUNCATE, MVT::i1, Legal); + setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); @@ -1374,6 +1374,7 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal); setOperationAction(ISD::SETCC, MVT::v16i1, Custom); setOperationAction(ISD::SETCC, MVT::v8i1, Custom); @@ -9066,6 +9067,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); SDValue In = Op.getOperand(0); MVT InVT = In.getSimpleValueType(); + + if (VT == MVT::i1) { + assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) && + "Invalid scalar TRUNCATE operation"); + In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(1, InVT)); + if (InVT.getSizeInBits() == 64) + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::i32, In); + else if (InVT.getSizeInBits() < 32) + In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In); + return DAG.getNode(X86ISD::TRUNC, DL, VT, In); + } assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0b3495dc1b3..03d645c1060 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -260,6 +260,9 @@ namespace llvm { // VTRUNC - Vector integer truncate. VTRUNC, + // TRUNC - Integer truncate + TRUNC, + // VTRUNC - Vector integer truncate with mask. VTRUNCM, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2a405332711..7bcbc454c9a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -908,6 +908,9 @@ let Predicates = [HasAVX512] in { def : Pat<(v8i1 (load addr:$src)), (COPY_TO_REGCLASS (KMOVWkm addr:$src), VK8)>; + + def : Pat<(i1 (X86trunc (i32 GR32:$src))), + (COPY_TO_REGCLASS (KMOVWkr $src), VK1)>; } // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. let Predicates = [HasAVX512] in { @@ -1051,6 +1054,10 @@ multiclass avx512_mask_unpck_bw opc, string OpcodeStr> { } defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">; +def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))), + (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16), + (COPY_TO_REGCLASS VK8:$src1, VK16))>; + multiclass avx512_mask_unpck_int { let Predicates = [HasAVX512] in diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index b56ce1122df..2157de28f14 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -108,6 +108,9 @@ def X86vsext : SDNode<"X86ISD::VSEXT", def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>]>>; +def X86trunc : SDNode<"X86ISD::TRUNC", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>>; + def X86vtruncm : SDNode<"X86ISD::VTRUNCM", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisInt<1>, diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index 6ca5bcc3b86..822809c62ed 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -111,3 +111,14 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind { %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y ret <8 x i32> %max } + +; CHECK-LABEL: test12 +; CHECK: vpcmpeqq %zmm2, %zmm0, [[LO:%k[0-7]]] +; CHECK: vpcmpeqq %zmm3, %zmm1, [[HI:%k[0-7]]] +; CHECK: kunpckbw [[LO]], [[HI]], {{%k[0-7]}} + +define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind { + %res = icmp eq <16 x i64> %a, %b + %res1 = bitcast <16 x i1> %res to i16 + ret i16 %res1 +} -- 2.34.1