From e3d2fcce41c07b2b0d62f70b5ee27e3e841c1725 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Mon, 29 Sep 2014 22:54:41 +0000 Subject: [PATCH] [AVX512] Use X86VectorVTInfo in the masking helper classes and the FMAs No functionality change. Makes the code more compact (see the FMA part). This needs a new type attribute MemOpFrag in X86VectorVTInfo. For now I only defined this in the simple cases. See the commment before the attribute. Diff of X86.td.expanded before and after is empty except for the appearance of the new attribute. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218637 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 290 ++++++++++++++----------------- 1 file changed, 135 insertions(+), 155 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 90f45f97225..c2a95849b6a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -52,6 +52,14 @@ class X86VectorVTInfo("load" # EltVT); + // Load patterns used for memory operands. We only have this defined in + // case of i64 element types for sub-512 integer vectors. For now, keep + // MemOpFrag undefined in these cases. + PatFrag MemOpFrag = + !if (!eq (TypeVariantName, "f"), !cast("memop" # VTName), + !if (!eq (EltTypeName, "i64"), !cast("memop" # VTName), + !if (!eq (VTName, "v16i32"), !cast("memop" # VTName), ?))); + // The corresponding float type, e.g. v16f32 for v16i32 // Note: For EltSize < 32, FloatVT is illegal and TableGen // fails to compile, so we choose FloatVT = VT @@ -77,6 +85,8 @@ def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">; def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">; def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">; def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">; +def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">; +def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">; // "x" in v32i8x_info means RC = VR256X def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">; @@ -107,24 +117,24 @@ def avx512vl_i64_info : AVX512VLVectorVTInfo O, Format F, dag Outs, dag Ins, - dag MaskingIns, dag ZeroMaskingIns, +multiclass AVX512_masking_common O, Format F, X86VectorVTInfo _, + dag Outs, + dag Ins, dag MaskingIns, dag ZeroMaskingIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, dag MaskingRHS, ValueType OpVT, - RegisterClass RC, RegisterClass KRC, + dag RHS, dag MaskingRHS, string MaskingConstraint = ""> { def NAME: AVX512; + [(set _.RC:$dst, RHS)]>; // Prefer over VMOV*rrk Pat<> let AddedComplexity = 20 in def NAME#k: AVX512, + [(set _.RC:$dst, MaskingRHS)]>, EVEX_K { // In case of the 3src subclass this is overridden with a let. string Constraints = MaskingConstraint; @@ -133,9 +143,9 @@ multiclass AVX512_masking_common O, Format F, dag Outs, dag Ins, def NAME#kz: AVX512, EVEX_KZ; } @@ -143,34 +153,31 @@ multiclass AVX512_masking_common O, Format F, dag Outs, dag Ins, // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the instruction. In the masking case, the // perserved vector elements come from a new dummy input operand tied to $dst. -multiclass AVX512_masking O, Format F, dag Outs, dag Ins, - string OpcodeStr, +multiclass AVX512_masking O, Format F, X86VectorVTInfo _, + dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, ValueType OpVT, RegisterClass RC, - RegisterClass KRC> : - AVX512_masking_common : + AVX512_masking_common; // Similar to AVX512_masking but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude // $src1. -multiclass AVX512_masking_3src O, Format F, dag Outs, dag NonTiedIns, - string OpcodeStr, +multiclass AVX512_masking_3src O, Format F, X86VectorVTInfo _, + dag Outs, dag NonTiedIns, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, ValueType OpVT, - RegisterClass RC, RegisterClass KRC> : - AVX512_masking_common : + AVX512_masking_common; + (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>; // Bitcasts between 512-bit vector types. Return the original type since // no instruction is needed for the conversion @@ -3267,156 +3274,130 @@ let Predicates = [HasAVX512] in { // FMA - Fused Multiply Operations // let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_rm opc, string OpcodeStr, - RegisterClass RC, X86MemOperand x86memop, - PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, SDNode OpNode, ValueType OpVT, - RegisterClass KRC> { - defm r: AVX512_masking_3src opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { + defm r: AVX512_masking_3src, + (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, AVX512FMA3Base; let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2, + (_.MemOpFrag addr:$src3))))]>; + def mb: AVX512FMA3, EVEX_B; } } // Constraints = "$src1 = $dst" let ExeDomain = SSEPackedSingle in { - defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmadd, v16f32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsub, v16f32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmaddsub, v16f32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsubadd, v16f32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmadd, v16f32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmsub, v16f32, VK16WM>, EVEX_V512, - EVEX_CD8<32, CD8VF>; + defm VFMADD213PSZ : avx512_fma3p_rm<0xA8, "vfmadd213ps", X86Fmadd, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUB213PSZ : avx512_fma3p_rm<0xAA, "vfmsub213ps", X86Fmsub, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMADDSUB213PSZ : avx512_fma3p_rm<0xA6, "vfmaddsub213ps", X86Fmaddsub, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUBADD213PSZ : avx512_fma3p_rm<0xA7, "vfmsubadd213ps", X86Fmsubadd, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMADD213PSZ : avx512_fma3p_rm<0xAC, "vfnmadd213ps", X86Fnmadd, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMSUB213PSZ : avx512_fma3p_rm<0xAE, "vfnmsub213ps", X86Fnmsub, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; } let ExeDomain = SSEPackedDouble in { - defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmadd, v8f64, VK8WM>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsub, v8f64, VK8WM>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmaddsub, v8f64, VK8WM>, + defm VFMADD213PDZ : avx512_fma3p_rm<0xA8, "vfmadd213pd", X86Fmadd, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMSUB213PDZ : avx512_fma3p_rm<0xAA, "vfmsub213pd", X86Fmsub, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMADDSUB213PDZ : avx512_fma3p_rm<0xA6, "vfmaddsub213pd", X86Fmaddsub, + v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsubadd, v8f64, VK8WM>, + defm VFMSUBADD213PDZ : avx512_fma3p_rm<0xA7, "vfmsubadd213pd", X86Fmsubadd, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", X86Fnmadd, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", X86Fnmsub, + v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm VFNMADD213PDZ : avx512_fma3p_rm<0xAC, "vfnmadd213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmadd, v8f64, VK8WM>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMSUB213PDZ : avx512_fma3p_rm<0xAE, "vfnmsub213pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmsub, v8f64, VK8WM>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; } let Constraints = "$src1 = $dst" in { -multiclass avx512_fma3p_m132 opc, string OpcodeStr, - RegisterClass RC, X86MemOperand x86memop, - PatFrag mem_frag, X86MemOperand x86scalar_mop, PatFrag scalar_mfrag, - string BrdcstStr, SDNode OpNode, ValueType OpVT> { +multiclass avx512_fma3p_m132 opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _> { let mayLoad = 1 in - def m: AVX512FMA3; - def mb: AVX512FMA3, EVEX_B; + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (_.MemOpFrag addr:$src2), + _.RC:$src3)))]>; + def mb: AVX512FMA3, EVEX_B; } } // Constraints = "$src1 = $dst" let ExeDomain = SSEPackedSingle in { - defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmaddsub, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fmsubadd, v16f32>, - EVEX_V512, EVEX_CD8<32, CD8VF>; - defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmadd, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; - defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", VR512, f512mem, - memopv16f32, f32mem, loadf32, "{1to16}", - X86Fnmsub, v16f32>, EVEX_V512, - EVEX_CD8<32, CD8VF>; + defm VFMADD132PSZ : avx512_fma3p_m132<0x98, "vfmadd132ps", X86Fmadd, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUB132PSZ : avx512_fma3p_m132<0x9A, "vfmsub132ps", X86Fmsub, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMADDSUB132PSZ : avx512_fma3p_m132<0x96, "vfmaddsub132ps", X86Fmaddsub, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFMSUBADD132PSZ : avx512_fma3p_m132<0x97, "vfmsubadd132ps", X86Fmsubadd, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMADD132PSZ : avx512_fma3p_m132<0x9C, "vfnmadd132ps", X86Fnmadd, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; + defm VFNMSUB132PSZ : avx512_fma3p_m132<0x9E, "vfnmsub132ps", X86Fnmsub, + v16f32_info>, + EVEX_V512, EVEX_CD8<32, CD8VF>; } let ExeDomain = SSEPackedDouble in { - defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmadd, v8f64>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; - defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmaddsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fmsubadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmadd, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; - defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", VR512, f512mem, - memopv8f64, f64mem, loadf64, "{1to8}", - X86Fnmsub, v8f64>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VF>; + defm VFMADD132PDZ : avx512_fma3p_m132<0x98, "vfmadd132pd", X86Fmadd, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMSUB132PDZ : avx512_fma3p_m132<0x9A, "vfmsub132pd", X86Fmsub, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMADDSUB132PDZ : avx512_fma3p_m132<0x96, "vfmaddsub132pd", X86Fmaddsub, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFMSUBADD132PDZ : avx512_fma3p_m132<0x97, "vfmsubadd132pd", X86Fmsubadd, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFNMADD132PDZ : avx512_fma3p_m132<0x9C, "vfnmadd132pd", X86Fnmadd, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + defm VFNMSUB132PDZ : avx512_fma3p_m132<0x9E, "vfnmsub132pd", X86Fnmsub, + v8f64_info>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } // Scalar FMA @@ -4841,13 +4822,12 @@ def : Pat<(v8i64 (X86Shufp VR512:$src1, (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>; multiclass avx512_valign { - defm rri : AVX512_masking<0x03, MRMSrcReg, (outs _.RC:$dst), + defm rri : AVX512_masking<0x03, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i8imm:$src3), "valign"##_.Suffix, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86VAlign _.RC:$src2, _.RC:$src1, - (i8 imm:$src3))), - _.VT, _.RC, _.KRCWM>, + (i8 imm:$src3)))>, AVX512AIi8Base, EVEX_4V; // Also match valign of packed floats. -- 2.34.1