From: Adam Nemet Date: Wed, 2 Jul 2014 21:26:01 +0000 (+0000) Subject: [X86] AVX512: Allow writemask argument in vpermt* intrinsics X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=998743e1855324c274561abbed15e60373025313 [X86] AVX512: Allow writemask argument in vpermt* intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212223 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index dd1a52facf0..c8b0ecdbe49 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -693,22 +693,32 @@ defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, multiclass avx512_perm_table_3src opc, string Suffix, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop, - SDNode OpNode, ValueType OpVT, RegisterClass KRC> : + SDNode OpNode, ValueType OpVT, RegisterClass KRC, + ValueType MaskVT, RegisterClass MRC> : avx512_perm_3src { def : Pat<(OpVT (!cast("int_x86_avx512_mask_vpermt_"##Suffix##"_512") VR512:$idx, VR512:$src1, VR512:$src2, -1)), (!cast(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>; + + def : Pat<(OpVT (!cast("int_x86_avx512_mask_vpermt_"##Suffix##"_512") + VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)), + (!cast(NAME#rrk) VR512:$src1, + (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>; } defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem, - X86VPermv3, v16i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; + X86VPermv3, v16i32, VK16WM, v16i1, GR16>, + EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem, - X86VPermv3, v8i64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + X86VPermv3, v8i64, VK8WM, v8i1, GR8>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem, - X86VPermv3, v16f32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; + X86VPermv3, v16f32, VK16WM, v16i1, GR16>, + EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem, - X86VPermv3, v8f64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + X86VPermv3, v8f64, VK8WM, v8i1, GR8>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 51175390a25..18cfcfe78b0 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -594,6 +594,13 @@ define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%p ret <16 x float> %res } +define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) { +; CHECK-LABEL: test_vpermt2ps_mask: +; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1] + %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask) + ret <16 x float> %res +} + declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) define <8 x i64> @test_vmovntdqa(i8 *%x) {