From cfa5724d507d7bf9c2ca854ee2d6fd7bca00a345 Mon Sep 17 00:00:00 2001 From: Robert Khasanov Date: Tue, 30 Sep 2014 11:32:22 +0000 Subject: [PATCH] [AVX512] Added intrinsics for VPCMPEQB and VPCMPEQW. Added new operand type for intrinsics (IIT_V64) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218668 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 24 ++++++++++----- lib/IR/Function.cpp | 41 ++++++++++++++----------- lib/Target/X86/X86IntrinsicsInfo.h | 2 ++ test/CodeGen/X86/avx512bw-intrinsics.ll | 33 ++++++++++++++++++++ test/TableGen/intrinsic-varargs.td | 2 +- utils/TableGen/IntrinsicEmitter.cpp | 38 ++++++++++++----------- 6 files changed, 96 insertions(+), 44 deletions(-) create mode 100644 test/CodeGen/X86/avx512bw-intrinsics.ll diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index e9e6730427a..3012cdf7db4 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3234,6 +3234,23 @@ let TargetPrefix = "x86" in { [IntrNoMem]>; } +// Compares +let TargetPrefix = "x86" in { + // 512-bit + def int_x86_avx512_mask_pcmpeq_b_512 : GCCBuiltin<"__builtin_ia32_pcmpeqb512_mask">, + Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_w_512 : GCCBuiltin<"__builtin_ia32_pcmpeqw512_mask">, + Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_d_512 : GCCBuiltin<"__builtin_ia32_pcmpeqd512_mask">, + Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_pcmpeq_q_512 : GCCBuiltin<"__builtin_ia32_pcmpeqq512_mask">, + Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; +} + // Misc. let TargetPrefix = "x86" in { def int_x86_avx512_mask_cmp_ps_512 : GCCBuiltin<"__builtin_ia32_cmpps512_mask">, @@ -3242,13 +3259,6 @@ let TargetPrefix = "x86" in { def int_x86_avx512_mask_cmp_pd_512 : GCCBuiltin<"__builtin_ia32_cmppd512_mask">, Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_mask_pcmpeq_d_512 : GCCBuiltin<"__builtin_ia32_pcmpeqd512_mask">, - Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrNoMem]>; - def int_x86_avx512_mask_pcmpeq_q_512 : GCCBuiltin<"__builtin_ia32_pcmpeqq512_mask">, - Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrNoMem]>; def int_x86_avx512_mask_pand_d_512 : GCCBuiltin<"__builtin_ia32_pandd512_mask">, Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 605f3ecea50..83f71a89162 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -474,7 +474,7 @@ std::string Intrinsic::getName(ID id, ArrayRef Tys) { /// /// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter! enum IIT_Info { - // Common values should be encoded with 0-15. + // Common values should be encoded with 0-16. IIT_Done = 0, IIT_I1 = 1, IIT_I8 = 2, @@ -489,23 +489,24 @@ enum IIT_Info { IIT_V8 = 11, IIT_V16 = 12, IIT_V32 = 13, - IIT_PTR = 14, - IIT_ARG = 15, - - // Values from 16+ are only encodable with the inefficient encoding. - IIT_MMX = 16, - IIT_METADATA = 17, - IIT_EMPTYSTRUCT = 18, - IIT_STRUCT2 = 19, - IIT_STRUCT3 = 20, - IIT_STRUCT4 = 21, - IIT_STRUCT5 = 22, - IIT_EXTEND_ARG = 23, - IIT_TRUNC_ARG = 24, - IIT_ANYPTR = 25, - IIT_V1 = 26, - IIT_VARARG = 27, - IIT_HALF_VEC_ARG = 28 + IIT_V64 = 14, + IIT_PTR = 15, + IIT_ARG = 16, + + // Values from 17+ are only encodable with the inefficient encoding. + IIT_MMX = 17, + IIT_METADATA = 18, + IIT_EMPTYSTRUCT = 19, + IIT_STRUCT2 = 20, + IIT_STRUCT3 = 21, + IIT_STRUCT4 = 22, + IIT_STRUCT5 = 23, + IIT_EXTEND_ARG = 24, + IIT_TRUNC_ARG = 25, + IIT_ANYPTR = 26, + IIT_V1 = 27, + IIT_VARARG = 28, + IIT_HALF_VEC_ARG = 29 }; @@ -576,6 +577,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 32)); DecodeIITType(NextElt, Infos, OutputTable); return; + case IIT_V64: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 64)); + DecodeIITType(NextElt, Infos, OutputTable); + return; case IIT_PTR: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0)); DecodeIITType(NextElt, Infos, OutputTable); diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index b9b836c78f1..4c374af1f04 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -156,8 +156,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0), + X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_d_512, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_q_512, CMP_MASK, X86ISD::PCMPEQM, 0), + X86_INTRINSIC_DATA(avx512_mask_pcmpeq_w_512, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0), diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll new file mode 100644 index 00000000000..a4ebaeb5834 --- /dev/null +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw --show-mc-encoding| FileCheck %s + +define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { +; CHECK-LABEL: test_pcmpeq_b +; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## + %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) + ret i64 %res +} + +define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_b +; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ## + %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) + ret i64 %res +} + +declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) + +define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { +; CHECK-LABEL: test_pcmpeq_w +; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ## + %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) + ret i32 %res +} + +define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { +; CHECK-LABEL: test_mask_pcmpeq_w +; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ## + %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) + ret i32 %res +} + +declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) diff --git a/test/TableGen/intrinsic-varargs.td b/test/TableGen/intrinsic-varargs.td index 3e48f8da33b..935a6250380 100644 --- a/test/TableGen/intrinsic-varargs.td +++ b/test/TableGen/intrinsic-varargs.td @@ -26,5 +26,5 @@ class Intrinsic param_types = []> { def isVoid : ValueType<0, 56>; // Produces no value def llvm_vararg_ty : LLVMType; // this means vararg here -// CHECK: /* 0 */ 0, 27, 0, +// CHECK: /* 0 */ 0, 28, 0, def int_foo : Intrinsic<"llvm.foo", [llvm_vararg_ty]>; diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp index f305baa445c..406a5f606b7 100644 --- a/utils/TableGen/IntrinsicEmitter.cpp +++ b/utils/TableGen/IntrinsicEmitter.cpp @@ -225,7 +225,7 @@ EmitIntrinsicToOverloadTable(const std::vector &Ints, // NOTE: This must be kept in synch with the copy in lib/VMCore/Function.cpp! enum IIT_Info { - // Common values should be encoded with 0-15. + // Common values should be encoded with 0-16. IIT_Done = 0, IIT_I1 = 1, IIT_I8 = 2, @@ -240,23 +240,24 @@ enum IIT_Info { IIT_V8 = 11, IIT_V16 = 12, IIT_V32 = 13, - IIT_PTR = 14, - IIT_ARG = 15, - - // Values from 16+ are only encodable with the inefficient encoding. - IIT_MMX = 16, - IIT_METADATA = 17, - IIT_EMPTYSTRUCT = 18, - IIT_STRUCT2 = 19, - IIT_STRUCT3 = 20, - IIT_STRUCT4 = 21, - IIT_STRUCT5 = 22, - IIT_EXTEND_ARG = 23, - IIT_TRUNC_ARG = 24, - IIT_ANYPTR = 25, - IIT_V1 = 26, - IIT_VARARG = 27, - IIT_HALF_VEC_ARG = 28 + IIT_V64 = 14, + IIT_PTR = 15, + IIT_ARG = 16, + + // Values from 17+ are only encodable with the inefficient encoding. + IIT_MMX = 17, + IIT_METADATA = 18, + IIT_EMPTYSTRUCT = 19, + IIT_STRUCT2 = 20, + IIT_STRUCT3 = 21, + IIT_STRUCT4 = 22, + IIT_STRUCT5 = 23, + IIT_EXTEND_ARG = 24, + IIT_TRUNC_ARG = 25, + IIT_ANYPTR = 26, + IIT_V1 = 27, + IIT_VARARG = 28, + IIT_HALF_VEC_ARG = 29 }; @@ -356,6 +357,7 @@ static void EncodeFixedType(Record *R, std::vector &ArgCodes, case 8: Sig.push_back(IIT_V8); break; case 16: Sig.push_back(IIT_V16); break; case 32: Sig.push_back(IIT_V32); break; + case 64: Sig.push_back(IIT_V64); break; } return EncodeFixedValueType(VVT.getVectorElementType().SimpleTy, Sig); -- 2.34.1