From: Adam Nemet Date: Thu, 14 Aug 2014 17:13:19 +0000 (+0000) Subject: [AVX512] Add masking variant for the FMA instructions X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=265d201e1931c30a309b4889644d423cea9befa2 [AVX512] Add masking variant for the FMA instructions This change further evolves the base class AVX512_masking in order to make it suitable for the masking variants of the FMA instructions. Besides AVX512_masking there is now a new base class that instructions including FMAs can use: AVX512_masking_3src. With three-source (destructive) instructions one of the sources is already tied to the destination. This difference from AVX512_masking is captured by this new class. The common bits between _masking and _masking_3src are broken out into a new super class called AVX512_masking_common. As with valign, there is some corresponding restructuring of the underlying format classes. The idea is the same we want to derive from two classes essentially: one providing the format bits and another format-independent multiclass supplying the various masking and non-masking instruction variants. Existing fma tests in avx512-fma*.ll provide coverage here for the non-masking variants. For masking, the next patches in the series will add intrinsics and intrinsic tests. For AVX512_masking_3src to work, the (ins ...) dag has to be passed *without* the leading source operand that is tied to dst ($src1). This is necessary to properly construct the (ins ...) for the different variants. For the record, I did check that if $src is mistakenly included, you do get a fairly intuitive error message from the tablegen backend. Part of git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215660 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 3fe62d60d3c..3678255e5f6 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1,25 +1,28 @@ -multiclass AVX512_masking O, Format F, dag Outs, dag Ins, - string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm, - dag RHS, ValueType OpVT, - RegisterClass RC, RegisterClass KRC> { +// Common base class of AVX512_masking and AVX512_masking_3src. +multiclass AVX512_masking_common O, Format F, dag Outs, dag Ins, + dag MaskingIns, dag ZeroMaskingIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, dag MaskingRHS, ValueType OpVT, + RegisterClass RC, RegisterClass KRC, + string MaskingConstraint = ""> { def NAME: AVX512; // Prefer over VMOV*rrk Pat<> - let Constraints = "$src0 = $dst", AddedComplexity = 20 in - def NAME#k: AVX512, - EVEX_K; + [(set RC:$dst, MaskingRHS)]>, + EVEX_K { + // In case of the 3src subclass this is overridden with a let. + string Constraints = MaskingConstraint; + } let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> - def NAME#kz: AVX512 O, Format F, dag Outs, dag Ins, EVEX_KZ; } +// This multiclass generates the unconditional/non-masking, the masking and +// the zero-masking variant of the instruction. In the masking case, the +// perserved vector elements come from a new dummy input operand tied to $dst. +multiclass AVX512_masking O, Format F, dag Outs, dag Ins, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, ValueType OpVT, RegisterClass RC, + RegisterClass KRC> : + AVX512_masking_common; + +// Similar to AVX512_masking but in this case one of the source operands +// ($src1) is already tied to $dst so we just use that for the preserved +// vector elements. NOTE that the NonTiedIns (the ins dag) should exclude +// $src1. +multiclass AVX512_masking_3src O, Format F, dag Outs, dag NonTiedIns, + string OpcodeStr, + string AttSrcAsm, string IntelSrcAsm, + dag RHS, ValueType OpVT, + RegisterClass RC, RegisterClass KRC> : + AVX512_masking_common; + // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion let Predicates = [HasAVX512] in { @@ -2955,11 +2992,13 @@ let Constraints = "$src1 = $dst" in { multiclass avx512_fma3p_rm opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, SDNode OpNode, ValueType OpVT> { - def r: AVX512FMA3; + string BrdcstStr, SDNode OpNode, ValueType OpVT, + RegisterClass KRC> { + defm r: AVX512_masking_3src, + AVX512FMA3Base; let mayLoad = 1 in def m: AVX512FMA3 opc, string OpcodeStr, let ExeDomain = SSEPackedSingle in { defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmadd, v16f32>, EVEX_V512, + X86Fmadd, v16f32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsub, v16f32>, EVEX_V512, + X86Fmsub, v16f32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmaddsub, v16f32>, + X86Fmaddsub, v16f32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsubadd, v16f32>, + X86Fmsubadd, v16f32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmadd, v16f32>, EVEX_V512, + X86Fnmadd, v16f32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem, memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmsub, v16f32>, EVEX_V512, + X86Fnmsub, v16f32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>; } let ExeDomain = SSEPackedDouble in { defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmadd, v8f64>, EVEX_V512, + X86Fmadd, v8f64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsub, v8f64>, EVEX_V512, VEX_W, + X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; + X86Fmaddsub, v8f64, VK8WM>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; + X86Fmsubadd, v8f64, VK8WM>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmadd, v8f64>, EVEX_V512, VEX_W, + X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem, memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmsub, v8f64>, EVEX_V512, VEX_W, + X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 61ea18d5cad..3bbb9de387d 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -722,7 +722,7 @@ class AVX512AIi8 o, Format F, dag outs, dag ins, string asm, list pattern, InstrItinClass itin = NoItinerary> : Ii8, TAPD, Requires<[HasAVX512]>; -class AVX512AIi8Base: TAPD { +class AVX512AIi8Base : TAPD { Domain ExeDomain = SSEPackedInt; ImmType ImmT = Imm8; } @@ -748,6 +748,7 @@ class AVX512FMA3 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary> : I, T8PD, EVEX_4V, Requires<[HasAVX512]>; +class AVX512FMA3Base : T8PD, EVEX_4V; class AVX512 o, Format F, dag outs, dag ins, string asm, listpattern, InstrItinClass itin = NoItinerary>