From: Bruno Cardoso Lopes Date: Mon, 12 Jul 2010 22:41:32 +0000 (+0000) Subject: More refactoring of basic SSE arith instructions. Open room for 256-bit instructions X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=f428fee70d34639f91f1796d8004580f0943ceb7;p=oota-llvm.git More refactoring of basic SSE arith instructions. Open room for 256-bit instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108204 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0d5d1b449e7..9f43a254a3b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -393,75 +393,103 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { /// sse12_fp_scalar - SSE 1 & 2 scalar instructions class multiclass sse12_fp_scalar opc, string OpcodeStr, SDNode OpNode, - RegisterClass RC, X86MemOperand x86memop> { + RegisterClass RC, X86MemOperand x86memop, + bit Is2Addr = 1> { let isCommutable = 1 in { def rr : SI; + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>; } def rm : SI; + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class multiclass sse12_fp_scalar_int opc, string OpcodeStr, RegisterClass RC, - string asm, string SSEVer, string FPSizeStr, - Operand memopr, ComplexPattern mem_cpat> { + string asm, string SSEVer, string FPSizeStr, + Operand memopr, ComplexPattern mem_cpat, + bit Is2Addr = 1> { def rr_Int : SI("int_x86_sse", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) - RC:$src1, RC:$src2))]>; + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, RC:$src2))]>; def rm_Int : SI("int_x86_sse", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) - RC:$src1, mem_cpat:$src2))]>; + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, mem_cpat:$src2))]>; } /// sse12_fp_packed - SSE 1 & 2 packed instructions class multiclass sse12_fp_packed opc, string OpcodeStr, SDNode OpNode, RegisterClass RC, ValueType vt, X86MemOperand x86memop, PatFrag mem_frag, - Domain d, bit MayLoad = 0> { + Domain d, bit Is2Addr = 1> { let isCommutable = 1 in def rr : PI; - let mayLoad = MayLoad in + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>; + let mayLoad = 1 in def rm : PI; + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, string OpcodeStr, X86MemOperand x86memop, - list pat_rr, list pat_rm> { + list pat_rr, list pat_rm, + bit Is2Addr = 1> { let isCommutable = 1 in - def rr : PI; - def rm : PI; + def rr : PI; + def rm : PI; } /// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class multiclass sse12_fp_packed_int opc, string OpcodeStr, RegisterClass RC, - string asm, string SSEVer, string FPSizeStr, - X86MemOperand x86memop, PatFrag mem_frag, - Domain d> { + string asm, string SSEVer, string FPSizeStr, + X86MemOperand x86memop, PatFrag mem_frag, + Domain d, bit Is2Addr = 1> { def rr_Int : PI("int_x86_sse", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) - RC:$src1, RC:$src2))], d>; - def rm_Int : PI("int_x86_sse", - !strconcat(SSEVer, !strconcat("_", - !strconcat(OpcodeStr, FPSizeStr)))) - RC:$src1, (mem_frag addr:$src2)))], d>; + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, RC:$src2))], d>; + def rm_Int : PI("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, (mem_frag addr:$src2)))], d>; } //===----------------------------------------------------------------------===// @@ -1652,36 +1680,33 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), /// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops /// multiclass sse12_fp_alias_pack_logical opc, string OpcodeStr, - SDNode OpNode, bit MayLoad = 0> { + SDNode OpNode> { let isAsmParserOnly = 1 in { - defm V#NAME#PS : sse12_fp_packed, VEX_4V; - - defm V#NAME#PD : sse12_fp_packed, OpSize, - VEX_4V; + defm V#NAME#PS : sse12_fp_packed, VEX_4V; + + defm V#NAME#PD : sse12_fp_packed, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PS : sse12_fp_packed, TB; + defm PS : sse12_fp_packed, TB; - defm PD : sse12_fp_packed, TB, OpSize; + defm PD : sse12_fp_packed, TB, OpSize; } } // Alias bitwise logical operations using SSE logical ops on packed FP values. -defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>; -defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>; -defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>; +let mayLoad = 0 in { + defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>; + defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>; + defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>; +} let neverHasSideEffects = 1, Pattern = [], isCommutable = 0 in - defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, 1>; + defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>; /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// @@ -1690,31 +1715,29 @@ multiclass sse12_fp_packed_logical opc, string OpcodeStr, list> Pattern = []> { let isAsmParserOnly = 1 in { defm V#NAME#PS : sse12_fp_packed_logical_rm, + (memopv2i64 addr:$src2)))]), 0>, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm, + (memopv2i64 addr:$src2)))]), 0>, OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm opc, string OpcodeStr, (memopv2i64 addr:$src2)))])>, TB; defm PD : sse12_fp_packed_logical_rm opc, string OpcodeStr, - SDNode OpNode> { - - let isAsmParserOnly = 1 in { - defm V#NAME#SS : sse12_fp_scalar, XS, VEX_4V; - - defm V#NAME#SD : sse12_fp_scalar, XD, VEX_4V; - - defm V#NAME#PS : sse12_fp_packed, - VEX_4V; - - defm V#NAME#PD : sse12_fp_packed, - OpSize, VEX_4V; - - defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V; - - defm V#NAME#SD : sse12_fp_scalar_int, XD, VEX_4V; +multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, + bit Is2Addr = 1> { + defm SS : sse12_fp_scalar, XS; + defm SD : sse12_fp_scalar, XD; +} + +multiclass basic_sse12_fp_binop_p opc, string OpcodeStr, SDNode OpNode, + bit Is2Addr = 1> { + let mayLoad = 0 in { + defm PS : sse12_fp_packed, TB; + defm PD : sse12_fp_packed, TB, OpSize; } +} - let Constraints = "$src1 = $dst" in { - defm SS : sse12_fp_scalar, XS; - - defm SD : sse12_fp_scalar, XD; - - defm PS : sse12_fp_packed, TB; - - defm PD : sse12_fp_packed, TB, OpSize; +multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, + bit Is2Addr = 1> { + defm SS : sse12_fp_scalar_int, XS; + defm SD : sse12_fp_scalar_int, XD; +} - defm SS : sse12_fp_scalar_int, XS; +multiclass basic_sse12_fp_binop_p_int opc, string OpcodeStr, + bit Is2Addr = 1> { + defm PS : sse12_fp_packed_int, TB; - defm SD : sse12_fp_scalar_int, XD; - } + defm PD : sse12_fp_packed_int, TB, OpSize; } // Arithmetic instructions -defm ADD : basic_sse12_fp_binop_rm<0x58, "add", fadd>; -defm MUL : basic_sse12_fp_binop_rm<0x59, "mul", fmul>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, VEX_4V; + defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, VEX_4V; -let isCommutable = 0 in { - defm SUB : basic_sse12_fp_binop_rm<0x5C, "sub", fsub>; - defm DIV : basic_sse12_fp_binop_rm<0x5E, "div", fdiv>; + let isCommutable = 0 in { + defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, VEX_4V; + defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, VEX_4V; + defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, VEX_4V; + defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, VEX_4V; + } } -/// sse12_fp_binop_rm - Other SSE 1 & 2 binops -/// -/// This multiclass is like basic_sse12_fp_binop_rm, with the addition of -/// instructions for a full-vector intrinsic form. Operations that map -/// onto C operators don't use this form since they just use the plain -/// vector form instead of having a separate vector intrinsic form. -/// -multiclass sse12_fp_binop_rm opc, string OpcodeStr, - SDNode OpNode> { - - let isAsmParserOnly = 1 in { - // Scalar operation, reg+reg. - defm V#NAME#SS : sse12_fp_scalar, XS, VEX_4V; - - defm V#NAME#SD : sse12_fp_scalar, XD, VEX_4V; - - defm V#NAME#PS : sse12_fp_packed, - VEX_4V; - - defm V#NAME#PD : sse12_fp_packed, - OpSize, VEX_4V; - - defm V#NAME#SS : sse12_fp_scalar_int, XS, VEX_4V; - - defm V#NAME#SD : sse12_fp_scalar_int, XD, VEX_4V; - - defm V#NAME#PS : sse12_fp_packed_int, VEX_4V; - - defm V#NAME#PD : sse12_fp_packed_int, OpSize, - VEX_4V; - } +let Constraints = "$src1 = $dst" in { + defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>, + basic_sse12_fp_binop_p<0x58, "add", fadd>, + basic_sse12_fp_binop_s_int<0x58, "add">; + defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>, + basic_sse12_fp_binop_p<0x59, "mul", fmul>, + basic_sse12_fp_binop_s_int<0x59, "mul">; - let Constraints = "$src1 = $dst" in { - // Scalar operation, reg+reg. - defm SS : sse12_fp_scalar, XS; - defm SD : sse12_fp_scalar, XD; - defm PS : sse12_fp_packed, TB; - - defm PD : sse12_fp_packed, TB, OpSize; - - defm SS : sse12_fp_scalar_int, XS; - - defm SD : sse12_fp_scalar_int, XD; - - defm PS : sse12_fp_packed_int, TB; - - defm PD : sse12_fp_packed_int, TB, OpSize; + let isCommutable = 0 in { + defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub>, + basic_sse12_fp_binop_s_int<0x5C, "sub">; + defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv>, + basic_sse12_fp_binop_s_int<0x5E, "div">; + defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_s_int<0x5F, "max">, + basic_sse12_fp_binop_p_int<0x5F, "max">; + defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin>, + basic_sse12_fp_binop_s_int<0x5D, "min">, + basic_sse12_fp_binop_p_int<0x5D, "min">; } } -let isCommutable = 0 in { - defm MAX : sse12_fp_binop_rm<0x5F, "max", X86fmax>; - defm MIN : sse12_fp_binop_rm<0x5D, "min", X86fmin>; -} - /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the