From fb9d61a8d62e75a4a99b3fafd406699e08219775 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Wed, 15 Oct 2014 23:42:17 +0000 Subject: [PATCH] [AVX512] Add DQ subvector inserts In AVX512f we support 64x2 and 32x8 inserts via matching them to 32x4 and 64x4 respectively. These are matched by "Alt" Pat<>'s (Alt stands for alternative VTs). Since DQ has native support for these intructions, I peeled off the non-"Alt" part of the baseclass into vinsert_for_size_no_alt. The DQ instructions are derived from this multiclass. The "Alt" Pat<>'s are disabled with DQ. Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219874 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 43 +++++++++++++++++------ lib/Target/X86/X86InstrInfo.td | 1 + test/CodeGen/X86/avx512-insert-extract.ll | 9 +++-- test/MC/X86/avx512-encodings.s | 8 +++++ 4 files changed, 47 insertions(+), 14 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 788fcab773e..f20c8829286 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -347,11 +347,10 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>; // AVX-512 - VECTOR INSERT // -multiclass vinsert_for_size { +multiclass vinsert_for_size_no_alt { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { def rr : AVX512AIi8, EVEX_4V, EVEX_V512, EVEX_CD8; } +} +multiclass vinsert_for_size : + vinsert_for_size_no_alt { // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for - // vinserti32x4 - def : Pat<(vinsert_insert:$ins - (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)), - (AltTo.VT (!cast(NAME # From.EltSize # "x4rr") - VR512:$src1, From.RC:$src2, - (INSERT_get_vinsert_imm VR512:$ins)))>; + // vinserti32x4. Only add this if 64x2 and friends are not supported + // natively via AVX512DQ. + let Predicates = [NoDQI] in + def : Pat<(vinsert_insert:$ins + (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)), + (AltTo.VT (!cast(NAME # From.EltSize # "x4rr") + VR512:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm VR512:$ins)))>; } multiclass vinsert_for_type, vinsert128_insert, INSERT_get_vinsert128_imm>; + let Predicates = [HasDQI] in + defm NAME # "64x2" : vinsert_for_size_no_alt, + X86VectorVTInfo< 8, EltVT64, VR512>, + vinsert128_insert, + INSERT_get_vinsert128_imm>, VEX_W; defm NAME # "64x4" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, @@ -398,6 +413,12 @@ multiclass vinsert_for_type, vinsert256_insert, INSERT_get_vinsert256_imm>, VEX_W; + let Predicates = [HasDQI] in + defm NAME # "32x8" : vinsert_for_size_no_alt, + X86VectorVTInfo<16, EltVT32, VR512>, + vinsert256_insert, + INSERT_get_vinsert256_imm>; } defm VINSERTF : vinsert_for_type; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4c55f7cffe4..3dbf8196249 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -710,6 +710,7 @@ def HasCDI : Predicate<"Subtarget->hasCDI()">; def HasPFI : Predicate<"Subtarget->hasPFI()">; def HasERI : Predicate<"Subtarget->hasERI()">; def HasDQI : Predicate<"Subtarget->hasDQI()">; +def NoDQI : Predicate<"!Subtarget->hasDQI()">; def HasBWI : Predicate<"Subtarget->hasBWI()">; def HasVLX : Predicate<"Subtarget->hasVLX()">, AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">; diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index fa60476ab0a..eba895ebf56 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -13,9 +13,11 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { } ;CHECK-LABEL: test2: -;CHECK: vinsertf32x4 $0 +;KNL: vinsertf32x4 $0 +;SKX: vinsertf64x2 $0 ;CHECK: vextractf32x4 $3 -;CHECK: vinsertf32x4 $3 +;KNL: vinsertf32x4 $3 +;SKX: vinsertf64x2 $3 ;CHECK: ret define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { %rrr = load double* %br @@ -36,7 +38,8 @@ define <16 x float> @test3(<16 x float> %x) nounwind { ;CHECK-LABEL: test4: ;CHECK: vextracti32x4 $2 -;CHECK: vinserti32x4 $0 +;KNL: vinserti32x4 $0 +;SKX: vinserti64x2 $0 ;CHECK: ret define <8 x i64> @test4(<8 x i64> %x) nounwind { %eee = extractelement <8 x i64> %x, i32 4 diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s index 8326a7c9289..8dc89da48e5 100644 --- a/test/MC/X86/avx512-encodings.s +++ b/test/MC/X86/avx512-encodings.s @@ -4205,6 +4205,14 @@ vinserti32x4 $1, %xmm21, %zmm5, %zmm17 // CHECK: encoding: [0x62,0xe3,0x1d,0x40,0x38,0x4f,0x10,0x01] vinserti32x4 $1, 256(%rdi), %zmm28, %zmm17 +// CHECK: vinserti32x8 +// CHECK: encoding: [0x62,0xd3,0x4d,0x40,0x3a,0xdb,0x01] +vinserti32x8 $1, %ymm11, %zmm22, %zmm3 + +// CHECK: vinsertf64x2 +// CHECK: encoding: [0x62,0xf3,0xed,0x48,0x18,0x4f,0x10,0x01] +vinsertf64x2 $1, 256(%rdi), %zmm2, %zmm1 + // CHECK: vextracti32x4 // CHECK: encoding: [0x62,0x33,0x7d,0x48,0x39,0xc9,0x01] vextracti32x4 $1, %zmm9, %xmm17 -- 2.34.1