From: Elena Demikhovsky Date: Thu, 21 May 2015 14:01:32 +0000 (+0000) Subject: AVX-512: Enabled SSE intrinsics on AVX-512. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=86425451e58c7237d13e3eb3f22defc79cf4abda;p=oota-llvm.git AVX-512: Enabled SSE intrinsics on AVX-512. Predicate UseAVX depricates pattern selection on AVX-512. This predicate is necessary for DAG selection to select EVEX form. But mapping SSE intrinsics to AVX-512 instructions is not ready yet. So I replaced UseAVX with HasAVX for intrinsics patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237903 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index ed480ec60f5..331faf2fd0b 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -442,12 +442,29 @@ class SI o, Format F, dag outs, dag ins, string asm, asm)); } -// SIi8 - SSE 1 & 2 scalar instructions +// SI - SSE 1 & 2 scalar intrinsics - vex form available on AVX512 +class SI_Int o, Format F, dag outs, dag ins, string asm, + list pattern, InstrItinClass itin = NoItinerary, + Domain d = GenericDomain> + : I { + let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512], + !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX], + !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1], + !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2], + !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2], + [UseSSE1]))))); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(!eq(OpEnc.Value, EncEVEX.Value), !strconcat("v", asm), + !if(!eq(OpEnc.Value, EncVEX.Value), !strconcat("v", asm), + asm)); +} +// SIi8 - SSE 1 & 2 scalar instructions - vex form available on AVX512 class SIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8 { let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512], - !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX], + !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX], !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1], [UseSSE2]))); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d255fad6c15..b898981ec13 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -264,7 +264,7 @@ multiclass sse12_fp_scalar_int opc, string OpcodeStr, RegisterClass RC, Operand memopr, ComplexPattern mem_cpat, Domain d, OpndItins itins, bit Is2Addr = 1> { let isCodeGenOnly = 1 in { - def rr_Int : SI, Sched<[itins.Sched]>; - def rm_Int : SI, XD, VEX_4V, Requires<[UseAVX]>, + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>, + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { @@ -1936,14 +1936,14 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>, + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>, + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, @@ -3380,7 +3380,7 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, Operand vec_memop, ComplexPattern mem_cpat, Intrinsic Intr, SDNode OpNode, Domain d, - OpndItins itins, Predicate target, string Suffix> { + OpndItins itins, string Suffix> { let hasSideEffects = 0 in { def r : I opc, string OpcodeStr, RegisterClass RC, } } - let Predicates = [target] in { + let Predicates = [UseAVX] in { def : Pat<(OpNode RC:$src), (!cast("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)), RC:$src)>; @@ -3410,6 +3410,8 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, (!cast("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), mem_cpat:$src)>; + } + let Predicates = [HasAVX] in { def : Pat<(Intr VR128:$src), (!cast("V"#NAME#Suffix##r_Int) (vt (IMPLICIT_DEF)), VR128:$src)>; @@ -3418,7 +3420,7 @@ multiclass avx_fp_unop_s opc, string OpcodeStr, RegisterClass RC, (!cast("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), mem_cpat:$src)>; } - let Predicates = [target, OptForSize] in + let Predicates = [UseAVX, OptForSize] in def : Pat<(ScalarVT (OpNode (load addr:$src))), (!cast("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)), addr:$src)>; @@ -3505,7 +3507,7 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, defm V#NAME#SS : avx_fp_unop_s("int_x86_sse_"##OpcodeStr##_ss), OpNode, - SSEPackedSingle, itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG; + SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG; } multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, @@ -3517,7 +3519,7 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, defm V#NAME#SD : avx_fp_unop_s("int_x86_sse2_"##OpcodeStr##_sd), - OpNode, SSEPackedDouble, itins, UseAVX, "SD">, + OpNode, SSEPackedDouble, itins, "SD">, XD, VEX_4V, VEX_LIG; } @@ -4980,7 +4982,7 @@ def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. // -let Predicates = [UseAVX] in +let Predicates = [HasAVX] in def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), (VMOVPQI2QImr addr:$dst, VR128:$src)>; let Predicates = [UseSSE2] in diff --git a/test/CodeGen/X86/sse-intrinsics-x86.ll b/test/CodeGen/X86/sse-intrinsics-x86.ll index 65d44bfb5ba..0857189be73 100644 --- a/test/CodeGen/X86/sse-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse-intrinsics-x86.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: addss diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index 5afebd24bb4..53132a167fb 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: addsd @@ -142,7 +143,7 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { ; CHECK: cvtsd2ss - ; CHECK-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} + ; SSE-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll index 5f25a16380d..771e4024336 100644 --- a/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) { ; CHECK: blendpd