X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86InstrSSE.td;h=e14cb10b744bf541494541061a45b384cb7ce820;hb=1011c13f15339632d2edd669dcb3ce22303a26a6;hp=b3d6066a8eaa476aa35d75e5fe34aca7ac001af5;hpb=33160cf37637691de97b16ad1b67e251fc6355d4;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b3d6066a8ea..e14cb10b744 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -16,6 +16,8 @@ class OpndItins { InstrItinClass rr = arg_rr; InstrItinClass rm = arg_rm; + // InstrSchedModel info. + X86FoldableSchedWrite Sched = WriteFAdd; } class SizeItins { @@ -33,6 +35,7 @@ class ShiftOpndItins; @@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins< def SSE_ALU_F64S : OpndItins< IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM >; +} def SSE_ALU_ITINS_S : SizeItins< SSE_ALU_F32S, SSE_ALU_F64S >; +let Sched = WriteFMul in { def SSE_MUL_F32S : OpndItins< IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM >; @@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins< def SSE_MUL_F64S : OpndItins< IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM >; +} def SSE_MUL_ITINS_S : SizeItins< SSE_MUL_F32S, SSE_MUL_F64S >; +let Sched = WriteFDiv in { def SSE_DIV_F32S : OpndItins< IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM >; @@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins< def SSE_DIV_F64S : OpndItins< IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM >; +} def SSE_DIV_ITINS_S : SizeItins< SSE_DIV_F32S, SSE_DIV_F64S >; // parallel +let Sched = WriteFAdd in { def SSE_ALU_F32P : OpndItins< IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM >; @@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins< def SSE_ALU_F64P : OpndItins< IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM >; +} def SSE_ALU_ITINS_P : SizeItins< SSE_ALU_F32P, SSE_ALU_F64P >; +let Sched = WriteFMul in { def SSE_MUL_F32P : OpndItins< IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM >; @@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins< def SSE_MUL_F64P : OpndItins< IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM >; +} def SSE_MUL_ITINS_P : SizeItins< SSE_MUL_F32P, SSE_MUL_F64P >; +let Sched = WriteFDiv in { def SSE_DIV_F32P : OpndItins< IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM >; @@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins< def SSE_DIV_F64P : OpndItins< IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM >; +} def SSE_DIV_ITINS_P : SizeItins< SSE_DIV_F32P, SSE_DIV_F64P @@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins< IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM >; +let Sched = WriteVecALU in { def SSE_INTALU_ITINS_P : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; @@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins< def SSE_INTALUQ_ITINS_P : OpndItins< IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM >; +} +let Sched = WriteVecIMul in def SSE_INTMUL_ITINS_P : OpndItins< IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM >; @@ -148,13 +165,15 @@ multiclass sse12_fp_scalar opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } def rm : SI; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class @@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int opc, string OpcodeStr, RegisterClass RC, !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast( !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], itins.rr>; + RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm_Int : SI(!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, mem_cpat:$src2))], itins.rm>; + RC:$src1, mem_cpat:$src2))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed - SSE 1 & 2 packed instructions class @@ -189,14 +210,16 @@ multiclass sse12_fp_packed opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm : PI; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, IIC_DEFAULT, d>; + pat_rr, NoItinerary, d>, + Sched<[WriteVecLogic]>; def rm : PI; + pat_rm, NoItinerary, d>, + Sched<[WriteVecLogicLd, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -345,7 +370,7 @@ let Predicates = [HasAVX] in { // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", @@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } @@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>; // at the rename stage without using any execution unit, so SET0PSY // and SET0PDY can be used for vector int instructions without penalty let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, Predicates = [HasAVX] in { + isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8f32 immAllZerosV))]>; } @@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in @@ -436,93 +461,70 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // in terms of a copy, and just mentioned, we don't use movss/movsd for copies. //===----------------------------------------------------------------------===// -class sse12_move_rr : - SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, - [(set VR128:$dst, (vt (OpNode VR128:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; +multiclass sse12_move_rr { + def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, RC:$src2), + !strconcat(base_opc, asm_opr), + [(set VR128:$dst, (vt (OpNode VR128:$src1, + (scalar_to_vector RC:$src2))))], + IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; -// Loading from memory automatically zeroing upper bits. -class sse12_move_rm : - SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; - -// AVX -def VMOVSSrr : sse12_move_rr, XS, VEX_4V, - VEX_LIG; -def VMOVSDrr : sse12_move_rr, XD, VEX_4V, - VEX_LIG; - -// For the disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { - def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XS, VEX_4V, VEX_LIG; - def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_MOV_S_RR>, - XD, VEX_4V, VEX_LIG; + // For the disassembler + let isCodeGenOnly = 1, hasSideEffects = 0 in + def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), + (ins VR128:$src1, RC:$src2), + !strconcat(base_opc, asm_opr), + [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; } -let canFoldAsLoad = 1, isReMaterializable = 1 in { - def VMOVSSrm : sse12_move_rm, XS, VEX, - VEX_LIG; - let AddedComplexity = 20 in - def VMOVSDrm : sse12_move_rm, XD, VEX, - VEX_LIG; -} +multiclass sse12_move { + // AVX + defm V#NAME : sse12_move_rr, + VEX_4V, VEX_LIG; -def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - XS, VEX, VEX_LIG; -def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - XD, VEX, VEX_LIG; + def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + VEX, VEX_LIG, Sched<[WriteStore]>; + // SSE1 & 2 + let Constraints = "$src1 = $dst" in { + defm NAME : sse12_move_rr; + } -// SSE1 & 2 -let Constraints = "$src1 = $dst" in { - def MOVSSrr : sse12_move_rr, XS; - def MOVSDrr : sse12_move_rr, XD; + def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + Sched<[WriteStore]>; +} - // For the disassembler - let isCodeGenOnly = 1, hasSideEffects = 0 in { - def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", [], - IIC_SSE_MOV_S_RR>, XS; - def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", [], - IIC_SSE_MOV_S_RR>, XD; - } +// Loading from memory automatically zeroing upper bits. +multiclass sse12_move_rm { + def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; + def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))], + IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; } +defm MOVSS : sse12_move, XS; +defm MOVSD : sse12_move, XD; + let canFoldAsLoad = 1, isReMaterializable = 1 in { - def MOVSSrm : sse12_move_rm, XS; + defm MOVSS : sse12_move_rm, XS; let AddedComplexity = 20 in - def MOVSDrm : sse12_move_rm, XD; + defm MOVSD : sse12_move_rm, XD; } -def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), - "movss\t{$src, $dst|$dst, $src}", - [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; -def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; - // Patterns let Predicates = [HasAVX] in { let AddedComplexity = 15 in { @@ -769,11 +771,13 @@ multiclass sse12_mov_packed opc, RegisterClass RC, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, + Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, + Sched<[WriteLoad]>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, @@ -814,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, TB, OpSize; +let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -846,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>, VEX, VEX_L; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], @@ -904,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; +let SchedRW = [WriteStore] in { def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -920,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -1033,7 +1041,7 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), addr:$dst), - (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), addr:$dst), (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; @@ -1068,7 +1076,7 @@ let Predicates = [UseSSE1] in { // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -1085,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { let isCodeGenOnly = 1 in { def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", @@ -1119,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_baseopc, SDNode psnode, SDNode pdnode, [(set VR128:$dst, (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - itin, SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB, + Sched<[WriteShuffleLd, ReadAfterLd]>; def PDrm : PI, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize, + Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -1147,6 +1157,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1167,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // Shuffle with VMOVLPS @@ -1246,6 +1258,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1270,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // VMOVHPS patterns @@ -1320,14 +1334,14 @@ let AddedComplexity = 20 in { [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -1335,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; } let Predicates = [HasAVX] in { @@ -1376,22 +1390,27 @@ def SSE_CVT_PD : OpndItins< IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_PS : OpndItins< IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_Scalar : OpndItins< IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_32 : OpndItins< IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_64 : OpndItins< IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SD2SI : OpndItins< IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM >; @@ -1401,10 +1420,10 @@ multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI; + itins.rr>, Sched<[itins.Sched]>; def rm : SI; + itins.rm>, Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_p opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1412,10 +1431,10 @@ multiclass sse12_cvt_p opc, RegisterClass SrcRC, RegisterClass DstRC, OpndItins itins> { let neverHasSideEffects = 1 in { def rr : I; + [], itins.rr, d>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I; + [], itins.rm, d>, Sched<[itins.Sched.Folded]>; } } @@ -1423,11 +1442,13 @@ multiclass sse12_vcvt_avx opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let neverHasSideEffects = 1 in { def rr : SI; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2F]>; let mayLoad = 1 in def rm : SI; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2FLd, ReadAfterLd]>; } // neverHasSideEffects = 1 } @@ -1558,10 +1579,12 @@ multiclass sse12_cvt_sint opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm : SI; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, @@ -1573,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, @@ -1725,13 +1748,15 @@ let neverHasSideEffects = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG; + IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; + XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, @@ -1740,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))], - IIC_SSE_CVT_Scalar_RR>; + IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, @@ -1767,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert scalar single to scalar double @@ -1783,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, - XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; + XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; + XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f64 (fextend FR32:$src)), @@ -1808,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[UseSSE2]>; + Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1830,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; // Convert Packed Double FP to Packed DW Integers @@ -1891,7 +1926,7 @@ let Predicates = [HasAVX] in { def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, - VEX; + VEX, Sched<[WriteCvtF2I]>; // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", @@ -1899,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX, + Sched<[WriteCvtF2ILd]>; // YMM only def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L; + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, + Sched<[WriteCvtF2I]>; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, - VEX, VEX_L; + VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; } @@ -1919,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix @@ -1931,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, + Sched<[WriteCvtF2ILd]>; def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -2006,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. @@ -2019,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; @@ -2045,12 +2083,13 @@ let Predicates = [HasAVX] in { def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, + Sched<[WriteCvtF2ILd]>; // Convert packed single to packed double let Predicates = [HasAVX] in { @@ -2058,32 +2097,32 @@ let Predicates = [HasAVX] in { def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX; + IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX; + IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; } let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB; + IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB; + IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>; } // Convert Packed DW Integers to Packed Double FP @@ -2091,30 +2130,33 @@ let Predicates = [HasAVX] in { let neverHasSideEffects = 1, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - []>, VEX; + []>, VEX, Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; + (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, + Sched<[WriteCvtI2F]>; def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtdq2_pd_256 - (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L; + (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L, + Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L; + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, + Sched<[WriteCvtI2F]>; } let neverHasSideEffects = 1, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; // AVX 256-bit register conversion intrinsics let Predicates = [HasAVX] in { @@ -2131,7 +2173,7 @@ let Predicates = [HasAVX] in { def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", @@ -2140,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; // AVX 256-bit register conversion intrinsics @@ -2217,22 +2259,24 @@ multiclass sse12_cmp_scalar; + itins.rr>, Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RR>; + IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RM>; + IIC_SSE_ALU_F32S_RM>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -2265,12 +2309,14 @@ multiclass sse12_cmp_scalar_int; + itins.rr>, + Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Aliases to match intrinsics which expect XMM operand(s). @@ -2296,63 +2342,62 @@ let Constraints = "$src1 = $dst" in { // sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, ValueType vt, X86MemOperand x86memop, - PatFrag ld_frag, string OpcodeStr, Domain d> { - def rr: PI { + def rr: SI; - def rm: PI, + Sched<[WriteFAdd]>; + def rm: SI; + IIC_SSE_COMIS_RM>, + Sched<[WriteFAddLd, ReadAfterLd]>; } let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG; + "ucomiss">, TB, VEX, VEX_LIG; defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", SSEPackedDouble>, TB, OpSize, VEX, - VEX_LIG; + "ucomisd">, TB, OpSize, VEX, VEX_LIG; let Pattern = [] in { defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, - "comiss", SSEPackedSingle>, TB, VEX, - VEX_LIG; + "comiss">, TB, VEX, VEX_LIG; defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd", SSEPackedDouble>, TB, OpSize, VEX, - VEX_LIG; + "comisd">, TB, OpSize, VEX, VEX_LIG; } defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss", SSEPackedSingle>, TB, VEX; + load, "ucomiss">, TB, VEX; defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX; + load, "ucomisd">, TB, OpSize, VEX; defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, - load, "comiss", SSEPackedSingle>, TB, VEX; + load, "comiss">, TB, VEX; defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd", SSEPackedDouble>, TB, OpSize, VEX; + load, "comisd">, TB, OpSize, VEX; defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", SSEPackedSingle>, TB; + "ucomiss">, TB; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", SSEPackedDouble>, TB, OpSize; + "ucomisd">, TB, OpSize; let Pattern = [] in { defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, - "comiss", SSEPackedSingle>, TB; + "comiss">, TB; defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd", SSEPackedDouble>, TB, OpSize; + "comisd">, TB, OpSize; } defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss", SSEPackedSingle>, TB; + load, "ucomiss">, TB; defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd", SSEPackedDouble>, TB, OpSize; + load, "ucomisd">, TB, OpSize; defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, - "comiss", SSEPackedSingle>, TB; + "comiss">, TB; defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, - "comisd", SSEPackedDouble>, TB, OpSize; + "comisd">, TB, OpSize; } // Defs = [EFLAGS] // sse12_cmp_packed - sse 1 & 2 compare packed instructions @@ -2362,20 +2407,23 @@ multiclass sse12_cmp_packed; + IIC_SSE_CMPP_RR, d>, + Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; + IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; + asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + asm_alt, [], IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } } @@ -2451,12 +2499,14 @@ multiclass sse12_shuffle; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffle]>; } defm VSHUFPS : sse12_shuffle opc, SDNode OpNode, ValueType vt, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>; def rm : PI; + IIC_SSE_UNPCK, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2637,10 +2688,11 @@ multiclass sse12_extr_sign_mask { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; + [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, + Sched<[WriteVecLogic]>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], - IIC_SSE_MOVMSK, d>, REX_W; + IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>; } let Predicates = [HasAVX] in { @@ -2668,18 +2720,18 @@ let Predicates = [HasAVX] in { // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX; + SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX; + OpSize, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX, VEX_L; + SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>; def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX, VEX_L; + OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>; } defm MOVMSKPS : sse12_extr_sign_mask opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -2991,14 +3045,25 @@ let isCodeGenOnly = 1 in { /// /// And, we have a special variant form for a full-vector intrinsic form. -def SSE_SQRTP : OpndItins< - IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM +let Sched = WriteFSqrt in { +def SSE_SQRTPS : OpndItins< + IIC_SSE_SQRTPS_RR, IIC_SSE_SQRTPS_RM >; -def SSE_SQRTS : OpndItins< - IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM +def SSE_SQRTSS : OpndItins< + IIC_SSE_SQRTSS_RR, IIC_SSE_SQRTSS_RM >; +def SSE_SQRTPD : OpndItins< + IIC_SSE_SQRTPD_RR, IIC_SSE_SQRTPD_RM +>; + +def SSE_SQRTSD : OpndItins< + IIC_SSE_SQRTSD_RR, IIC_SSE_SQRTSD_RM +>; +} + +let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM >; @@ -3006,6 +3071,7 @@ def SSE_RCPP : OpndItins< def SSE_RCPS : OpndItins< IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM >; +} /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s opc, string OpcodeStr, @@ -3013,26 +3079,28 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3040,13 +3108,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; def SSr_Int : SSI; + [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SSm_Int : SSI; + [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. @@ -3055,26 +3125,28 @@ multiclass sse1_fp_unop_rw opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SSr : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3082,17 +3154,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; let Constraints = "$src1 = $dst" in { def SSr_Int : SSI; + [], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1, hasSideEffects = 0 in def SSm_Int : SSI; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3101,33 +3173,35 @@ multiclass sse1_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PSr : PSI, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm : PSI, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr : PSI, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm : PSI, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr : PSI; + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PSm : PSI; + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. @@ -3136,36 +3210,36 @@ multiclass sse1_fp_unop_p_int opc, string OpcodeStr, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PSr_Int : PSI, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm_Int : PSI, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr_Int : PSI, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm_Int : PSI, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr_Int : PSI; + itins.rr>, Sched<[itins.Sched]>; def PSm_Int : PSI; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3174,37 +3248,42 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, let Predicates = [HasAVX], hasSideEffects = 0 in { def V#NAME#SDr : SDI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SDm : SDI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SDm_Int : SDI, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SDr : SDI; + [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, + Sched<[itins.Sched]>; // See the comments in sse1_fp_unop_s for why this is OptForSize. def SDm : I, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; def SDr_Int : SDI; + [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SDm_Int : SDI; + [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3212,49 +3291,51 @@ multiclass sse2_fp_unop_p opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { let Predicates = [HasAVX] in { def V#NAME#PDr : PDI, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PDm : PDI, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PDYr : PDI, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PDYm : PDI, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PDr : PDI; + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PDm : PDI; + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } // Square root. defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, - SSE_SQRTS>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>, + SSE_SQRTSS>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>, sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, - SSE_SQRTS>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTP>; + SSE_SQRTSD>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, - sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTP>, +defm RSQRT : sse1_fp_unop_rw<0x52, "rsqrt", X86frsqrt, SSE_SQRTSS>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTPS>, sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, - int_x86_avx_rsqrt_ps_256, SSE_SQRTP>; + int_x86_avx_rsqrt_ps_256, SSE_SQRTPS>; defm RCP : sse1_fp_unop_rw<0x53, "rcp", X86frcp, SSE_RCPS>, sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, @@ -3329,52 +3410,48 @@ let Predicates = [UseSSE1] in { //===----------------------------------------------------------------------===// let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2i64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; - - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4i64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; -} +let SchedRW = [WriteStore] in { +def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; +def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +let ExeDomain = SSEPackedInt in +def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2i64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +let ExeDomain = SSEPackedInt in +def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4i64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; -let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], @@ -3390,9 +3467,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVNT>; -def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; - // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", @@ -3404,14 +3478,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), [(nontemporalstore (i64 GR64:$src), addr:$dst)], IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; -} +} // SchedRW = [WriteStore] + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; +} // AddedComplexity //===----------------------------------------------------------------------===// // SSE 1 & 2 - Prefetch and memory fence //===----------------------------------------------------------------------===// // Prefetch intrinsic. -let Predicates = [HasSSE1] in { +let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], IIC_SSE_PREFETCH>, TB; @@ -3426,6 +3507,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), IIC_SSE_PREFETCH>, TB; } +// FIXME: How should these memory instructions be modeled? +let SchedRW = [WriteLoad] in { // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], @@ -3445,6 +3528,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, TB, Requires<[HasSSE2]>; +} // SchedRW def : Pat<(X86SFence), (SFENCE)>; def : Pat<(X86LFence), (LFENCE)>; @@ -3456,17 +3540,17 @@ def : Pat<(X86MFence), (MFENCE)>; def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], - IIC_SSE_LDMXCSR>, VEX; + IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>; def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], - IIC_SSE_STMXCSR>, VEX; + IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>; def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], - IIC_SSE_LDMXCSR>; + IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>; def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], - IIC_SSE_STMXCSR>; + IIC_SSE_STMXCSR>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // SSE2 - Move Aligned/Unaligned Packed Integer Instructions @@ -3474,7 +3558,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -3490,7 +3574,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3508,7 +3592,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, VEX; @@ -3525,7 +3609,7 @@ let Predicates = [HasAVX] in { } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, @@ -3544,6 +3628,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), } } +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3562,9 +3647,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } +} // SchedRW let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], @@ -3576,7 +3662,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], @@ -3604,6 +3690,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecIMul in def SSE_PMADD : OpndItins< IIC_SSE_PMADD, IIC_SSE_PMADD >; @@ -3622,14 +3709,15 @@ multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass PDI_binop_all_int opc, string OpcodeStr, Intrinsic IntId128, @@ -3663,20 +3751,22 @@ multiclass PDI_binop_rmi opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], - itins.rr>; + itins.rr>, Sched<[WriteVecShift]>; def rm : PDI; + (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, + Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : PDIi8; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>, + Sched<[WriteVecShift]>; } /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types @@ -3691,14 +3781,16 @@ multiclass PDI_binop_rm2 opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + Sched<[itins.Sched]>; def rm : PDI; + (bitconvert (memop_frag addr:$src2)))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -3803,7 +3895,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def VPSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3849,7 +3941,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), @@ -3895,7 +3987,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P>; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def PSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3986,35 +4078,37 @@ multiclass sse2_pshuffle, VEX; + IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX2] in { def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L; + IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), - !strconcat(!strconcat("v", OpcodeStr), + !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + Sched<[WriteShuffleLd]>; } let Predicates = [UseSSE2] in { @@ -4024,14 +4118,15 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; + IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, + Sched<[WriteShuffleLd]>; } } } // ExeDomain = SSEPackedInt @@ -4067,7 +4162,7 @@ multiclass sse2_unpack opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; def rm : PDI opc, string OpcodeStr, ValueType vt, [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse2_unpack_y opc, string OpcodeStr, ValueType vt, @@ -4084,12 +4180,14 @@ multiclass sse2_unpack_y opc, string OpcodeStr, ValueType vt, def Yrr : PDI; + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, + Sched<[WriteShuffle]>; def Yrm : PDI; + (bc_frag (memopv4i64 addr:$src2))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4166,7 +4264,8 @@ multiclass sse2_pinsrw { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffle]>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4175,7 +4274,8 @@ multiclass sse2_pinsrw { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))], IIC_SSE_PINSRW>; + imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } // Extract @@ -4184,12 +4284,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX, + Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))], IIC_SSE_PEXTRW>; + imm:$src2))], IIC_SSE_PEXTRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert let Predicates = [HasAVX] in { @@ -4197,7 +4299,7 @@ let Predicates = [HasAVX] in { def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, TB, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst" in @@ -4209,7 +4311,7 @@ let Constraints = "$src1 = $dst" in // SSE2 - Packed Mask Creation //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -4237,7 +4339,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), // SSE2 - Conditional Store //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), @@ -4272,107 +4374,112 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), //===---------------------------------------------------------------------===// // Move Int Doubleword to Packed Double Int // -def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), +def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - VEX; -def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + VEX, Sched<[WriteMove]>; +def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, - VEX; -def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + VEX, Sched<[WriteLoad]>; +def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, VEX; -def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; +def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; -def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), +def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; -def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + Sched<[WriteMove]>; +def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>; -def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; +def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>; -def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; +def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar // -def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), +def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; -def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), +def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, - VEX; -def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + VEX, Sched<[WriteLoad]>; +def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; -def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), +def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int // -def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), +def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; -def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, + Sched<[WriteMove]>; +def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - VEX; -def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), + VEX, Sched<[WriteLoad]>; +def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>; -def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + Sched<[WriteMove]>; +def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // -def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), - "vmov{d|q}\t{$src, $dst|$dst, $src}", +let SchedRW = [WriteMove] in { +def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>, - TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>; + VEX; -def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), +def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; +} //SchedRW //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 @@ -4381,59 +4488,60 @@ let Predicates = [HasAVX] in def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, - VEX; -def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + VEX, Sched<[WriteLoad]>; +def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, VEX; -def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; +def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], - IIC_SSE_MOVDQ>; -def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; +def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVD_ToGP>; -def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; +def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int // -def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), +def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>, VEX; -def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), + IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; +def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; -def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; +def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>; -def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; +def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Patterns and instructions to describe movd/movq to XMM register zero-extends // +let SchedRW = [WriteMove] in { let AddedComplexity = 15 in { -def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), +def VMOVZDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))))], IIC_SSE_MOVDQ>, VEX; -def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), +def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))))], @@ -4441,32 +4549,33 @@ def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), VEX, VEX_W; } let AddedComplexity = 15 in { -def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), +def MOVZDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))))], IIC_SSE_MOVDQ>; -def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), +def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))))], IIC_SSE_MOVDQ>; } +} // SchedRW -let AddedComplexity = 20 in { -def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), +let AddedComplexity = 20, SchedRW = [WriteLoad] in { +def VMOVZDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], IIC_SSE_MOVDQ>, VEX; -def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), +def MOVZDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], IIC_SSE_MOVDQ>; -} +} // AddedComplexity, SchedRW let Predicates = [HasAVX] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. @@ -4515,6 +4624,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}", //===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int // + +let SchedRW = [WriteLoad] in { def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4526,31 +4637,35 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix +} // SchedRW //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int // -def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), +let SchedRW = [WriteStore] in { +def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, VEX; -def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), +def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>; +} // SchedRW //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. // -def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), +def VMOVLQ128mr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; -def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX, + Sched<[WriteStore]>; +def MOVLQ128mr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; let AddedComplexity = 20 in def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4559,7 +4674,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, VEX, Requires<[HasAVX]>; + XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>; let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4568,7 +4683,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[UseSSE2]>; + XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), @@ -4598,6 +4713,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)), // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. // +let SchedRW = [WriteVecLogic] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4610,7 +4726,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, XS, Requires<[UseSSE2]>; +} // SchedRW +let SchedRW = [WriteVecLogicLd] in { let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4626,6 +4744,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; } +} // SchedRW let AddedComplexity = 20 in { let Predicates = [HasAVX] in { @@ -4643,26 +4762,30 @@ let AddedComplexity = 20 in { } // Instructions to match in the assembler -def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), +let SchedRW = [WriteMove] in { +def VMOVQs64rr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; -def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), +def VMOVQd64rr : VS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; // Recognize "movd" with GR64 destination, but encode as a "movq" -def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), +def VMOVQd64rr_alt : VS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; +} // SchedRW // Instructions for the disassembler // xr = XMM register // xm = mem64 +let SchedRW = [WriteMove] in { let Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; +} // SchedRW //===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP @@ -4673,11 +4796,11 @@ multiclass sse3_replicate_sfp op, SDNode OpNode, string OpcodeStr, def rr : S3SI; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3SI; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4733,25 +4856,27 @@ multiclass sse3_replicate_dfp { let neverHasSideEffects = 1 in def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [], IIC_SSE_MOV_LH>; + [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (X86Movddup (scalar_to_vector (loadf64 addr:$src)))))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } // FIXME: Merge with above classe when there're patterns for the ymm version multiclass sse3_replicate_dfp_y { def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>; + [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, + Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (X86Movddup - (scalar_to_vector (loadf64 addr:$src)))))]>; + (scalar_to_vector (loadf64 addr:$src)))))]>, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4799,6 +4924,7 @@ let Predicates = [UseSSE3] in { // SSE3 - Move Unaligned Integer //===---------------------------------------------------------------------===// +let SchedRW = [WriteLoad] in { let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", @@ -4812,6 +4938,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], IIC_SSE_LDDQU>; +} //===---------------------------------------------------------------------===// // SSE3 - Arithmetic @@ -4825,13 +4952,15 @@ multiclass sse3_addsub; + [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : I<0xD0, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4868,14 +4997,15 @@ multiclass S3D_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3DI; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { @@ -4883,14 +5013,15 @@ multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3I; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4939,7 +5070,7 @@ multiclass SS3I_unop_rm_int opc, string OpcodeStr, (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm128 : SS38I opc, string OpcodeStr, [(set VR128:$dst, (IntId128 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, - OpSize; + OpSize, Sched<[WriteVecALULd]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. @@ -4957,16 +5088,27 @@ multiclass SS3I_unop_rm_int_y opc, string OpcodeStr, (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 VR256:$src))]>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm256 : SS38I, OpSize; + (bitconvert (memopv4i64 addr:$src))))]>, OpSize, + Sched<[WriteVecALULd]>; } +// Helper fragments to match sext vXi1 to vXiY. +def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), + VR128:$src))>; +def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>; +def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>; +def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), + VR256:$src))>; +def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>; +def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>; + let Predicates = [HasAVX] in { defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", int_x86_ssse3_pabs_b_128>, VEX; @@ -4974,6 +5116,19 @@ let Predicates = [HasAVX] in { int_x86_ssse3_pabs_w_128>, VEX; defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", int_x86_ssse3_pabs_d_128>, VEX; + + def : Pat<(xor + (bc_v2i64 (v16i1sextv16i8)), + (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), + (VPABSBrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v8i1sextv8i16)), + (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), + (VPABSWrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v4i1sextv4i32)), + (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), + (VPABSDrr128 VR128:$src)>; } let Predicates = [HasAVX2] in { @@ -4983,6 +5138,19 @@ let Predicates = [HasAVX2] in { int_x86_avx2_pabs_w>, VEX, VEX_L; defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", int_x86_avx2_pabs_d>, VEX, VEX_L; + + def : Pat<(xor + (bc_v4i64 (v32i1sextv32i8)), + (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), + (VPABSBrr256 VR256:$src)>; + def : Pat<(xor + (bc_v4i64 (v16i1sextv16i16)), + (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), + (VPABSWrr256 VR256:$src)>; + def : Pat<(xor + (bc_v4i64 (v8i1sextv8i32)), + (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), + (VPABSDrr256 VR256:$src)>; } defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", @@ -4992,10 +5160,26 @@ defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", int_x86_ssse3_pabs_d_128>; +let Predicates = [HasSSSE3] in { + def : Pat<(xor + (bc_v2i64 (v16i1sextv16i8)), + (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), + (PABSBrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v8i1sextv8i16)), + (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), + (PABSWrr128 VR128:$src)>; + def : Pat<(xor + (bc_v2i64 (v4i1sextv4i32)), + (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), + (PABSDrr128 VR128:$src)>; +} + //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecALU in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; @@ -5005,12 +5189,16 @@ def SSE_PHADDSUBSW : OpndItins< def SSE_PHADDSUBW : OpndItins< IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM >; +} +let Sched = WriteShuffle in def SSE_PSHUFB : OpndItins< IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM >; +let Sched = WriteVecALU in def SSE_PSIGN : OpndItins< IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM >; +let Sched = WriteVecIMul in def SSE_PMULHRSW : OpndItins< IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW >; @@ -5027,7 +5215,7 @@ multiclass SS3I_binop_rm opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm : SS38I opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, - (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize; + (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. @@ -5049,7 +5238,7 @@ multiclass SS3I_binop_rm_int opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm128 : SS38I opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass SS3I_binop_rm_int_y opc, string OpcodeStr, @@ -5191,7 +5381,7 @@ defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -multiclass ssse3_palign { +multiclass ssse3_palignr { let neverHasSideEffects = 1 in { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), @@ -5199,7 +5389,7 @@ multiclass ssse3_palign { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5207,63 +5397,63 @@ multiclass ssse3_palign { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } -multiclass ssse3_palign_y { +multiclass ssse3_palignr_y { let neverHasSideEffects = 1 in { def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } let Predicates = [HasAVX] in - defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; + defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V; let Predicates = [HasAVX2] in - defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L; + defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in - defm PALIGN : ssse3_palign<"palignr">; + defm PALIGN : ssse3_palignr<"palignr">; let Predicates = [HasAVX2] in { -def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; -def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; } let Predicates = [HasAVX] in { -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } let Predicates = [UseSSSE3] in { -def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; -def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), +def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; } @@ -5271,6 +5461,7 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), // SSSE3 - Thread synchronization //===---------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, @@ -5284,13 +5475,14 @@ let Uses = [ECX, EAX] in def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, TB, Requires<[HasSSE3]>; +} // SchedRW -def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; -def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; +def : InstAlias<"mwait\t{%eax, %ecx|ECX, EAX}", (MWAITrr)>, Requires<[In32BitMode]>; +def : InstAlias<"mwait\t{%rax, %rcx|RCX, RAX}", (MWAITrr)>, Requires<[In64BitMode]>; -def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>, +def : InstAlias<"monitor\t{%eax, %ecx, %edx|EDX, ECX, EAX}", (MONITORrrr)>, Requires<[In32BitMode]>; -def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>, +def : InstAlias<"monitor\t{%rax, %rcx, %rdx|RDX, RCX, RAX}", (MONITORrrr)>, Requires<[In64BitMode]>; //===----------------------------------------------------------------------===// @@ -6703,7 +6895,7 @@ multiclass SS41I_quaternary_int_avx opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], - IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; def rm : Ii8 opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), RC:$src3))], - IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; } let Predicates = [HasAVX] in { @@ -6824,17 +7016,17 @@ defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, int_x86_sse41_pblendvb>; // Aliases with the implicit xmm0 argument -def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (BLENDVPDrr0 VR128:$dst, VR128:$src2)>; -def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>; -def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (BLENDVPSrr0 VR128:$dst, VR128:$src2)>; -def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>; -def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (PBLENDVBrr0 VR128:$dst, VR128:$src2)>; -def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", +def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, XMM0}", (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; let Predicates = [UseSSE41] in { @@ -8180,7 +8372,9 @@ multiclass avx2_gather opc, string OpcodeStr, RegisterClass RC256, []>, VEX_4VOp3, VEX_L; } -let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in { +let mayLoad = 1, Constraints + = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" + in { defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;