From bf348c4e462e0253628e39678fd3f643cf0931f0 Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky Date: Tue, 22 Jul 2014 11:07:31 +0000 Subject: [PATCH] AVX-512: Fixed intrinsic of VSQRTPS/PD instructions. I set number and types of parameters according to GCC intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213640 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsX86.td | 10 +++++---- lib/Target/X86/X86InstrAVX512.td | 30 +++++++-------------------- test/CodeGen/X86/avx512-intrinsics.ll | 8 +++---- 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 5de950813cd..018aa9a87d0 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2955,10 +2955,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], [IntrNoMem]>; - def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], [IntrNoMem]>; + def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index d2894088b80..92d536356d4 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3629,7 +3629,6 @@ def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src), (VRCP28PDZrb VR512:$src)>; multiclass avx512_sqrt_packed opc, string OpcodeStr, SDNode OpNode, - Intrinsic V16F32Int, Intrinsic V8F64Int, OpndItins itins_s, OpndItins itins_d> { def PSZrr :AVX512PSI opc, string OpcodeStr, SDNode OpNode, (v8f64 (bitconvert (memopv16f32 addr:$src)))))], itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; -let isCodeGenOnly = 1 in { - def PSZr_Int : AVX512PSI, - EVEX, EVEX_V512; - def PSZm_Int : AVX512PSI, EVEX, - EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr_Int : AVX512PDI, - EVEX, EVEX_V512, VEX_W; - def PDZm_Int : AVX512PDI, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} // isCodeGenOnly = 1 } multiclass avx512_sqrt_scalar opc, string OpcodeStr, @@ -3744,10 +3722,16 @@ defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, SSE_SQRTSS, SSE_SQRTSD>, avx512_sqrt_packed<0x51, "vsqrt", fsqrt, - int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512, SSE_SQRTPS, SSE_SQRTPD>; let Predicates = [HasAVX512] in { + def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1), + (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), + (VSQRTPSZrr VR512:$src1)>; + def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), + (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), + (VSQRTPDZrr VR512:$src1)>; + def : Pat<(f32 (fsqrt FR32X:$src)), (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (fsqrt (load addr:$src))), diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 18cfcfe78b0..6f34d4596f9 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -134,17 +134,17 @@ declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x flo define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { ; CHECK: vsqrtpd - %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1] + %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1] ret <8 x double> %res } -declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>) nounwind readnone +declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { ; CHECK: vsqrtps - %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] + %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1] ret <16 x float> %res } -declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone +declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: vsqrtss {{.*}}encoding: [0x62 -- 2.34.1