X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86SchedHaswell.td;h=73a32304302aaa405e6f2c8d886cbef6ca4cd445;hb=97a3cd4383a350b213cb285576dc015d46da8b3a;hp=38833de7c25013f289e9f8c63f0c81a7811cb426;hpb=0c5071f56198367ea2b29f8e34ddb9ecd1773985;p=oota-llvm.git diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 38833de7c25..73a32304302 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -53,12 +53,12 @@ def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>; def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>; def HWPort04 : ProcResGroup<[HWPort0, HWPort4]>; def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>; -def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>; +def HWPort06 : ProcResGroup<[HWPort0, HWPort6]>; def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>; def HWPort16 : ProcResGroup<[HWPort1, HWPort6]>; -def HWPort56: ProcResGroup<[HWPort5, HWPort6]>; +def HWPort56 : ProcResGroup<[HWPort5, HWPort6]>; def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>; -def HWPort056: ProcResGroup<[HWPort0, HWPort5, HWPort6]>; +def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>; def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>; // 60 Entry Unified Scheduler @@ -129,6 +129,7 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // 10-14 cycles. defm : HWWriteResPair; +defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -269,16 +270,57 @@ def : WriteRes; //================ Exceptions ================// //-- Specific Scheduling Models --// + +// Starting with P0. def WriteP0 : SchedWriteRes<[HWPort0]>; -def WriteP1 : SchedWriteRes<[HWPort1]>; -def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { + +def WriteP0_P1_Lat4 : SchedWriteRes<[HWPort0, HWPort1]> { + let Latency = 4; let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> { - let Latency = 3; + +def WriteP0_P1_Lat4Ld : SchedWriteRes<[HWPort0, HWPort1, HWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; } -def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> { - let Latency = 7; + +def WriteP01 : SchedWriteRes<[HWPort01]>; + +def Write2P01 : SchedWriteRes<[HWPort01]> { + let NumMicroOps = 2; +} +def Write3P01 : SchedWriteRes<[HWPort01]> { + let NumMicroOps = 3; +} + +def WriteP015 : SchedWriteRes<[HWPort015]>; + +def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> { + let NumMicroOps = 2; +} +def WriteP06 : SchedWriteRes<[HWPort06]>; + +def Write2P06 : SchedWriteRes<[HWPort06]> { + let Latency = 1; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} + +def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} + +def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { + let NumMicroOps = 2; +} + +def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; } def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> { @@ -295,31 +337,32 @@ def Write5P0156 : SchedWriteRes<[HWPort0156]> { let ResourceCycles = [5]; } -def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> { +def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { let Latency = 1; - let ResourceCycles = [2, 1]; + let ResourceCycles = [1, 2, 1]; } -def WriteP01 : SchedWriteRes<[HWPort01]>; - -def Write2P01 : SchedWriteRes<[HWPort01]> { - let NumMicroOps = 2; +def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { + let Latency = 1; + let ResourceCycles = [2, 2, 1]; } -def Write3P01 : SchedWriteRes<[HWPort01]> { - let NumMicroOps = 3; + +def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { + let Latency = 1; + let ResourceCycles = [3, 2, 1]; } -def WriteP015 : SchedWriteRes<[HWPort015]>; +// Starting with P1. +def WriteP1 : SchedWriteRes<[HWPort1]>; -def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> { +def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { let NumMicroOps = 2; } -def WriteP06 : SchedWriteRes<[HWPort06]>; - -def Write2P06 : SchedWriteRes<[HWPort06]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [2]; +def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> { + let Latency = 3; +} +def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> { + let Latency = 7; } def Write2P1 : SchedWriteRes<[HWPort1]> { @@ -335,36 +378,37 @@ def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> { let Latency = 4; } -def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [3]; -} - -def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { +def WriteP1_P5_Lat4 : SchedWriteRes<[HWPort1, HWPort5]> { + let Latency = 4; let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { - let Latency = 1; - let ResourceCycles = [1, 2, 1]; +def WriteP1_P5_Lat4Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; } -def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { - let Latency = 1; - let ResourceCycles = [2, 2, 1]; +def WriteP1_P5_Lat6 : SchedWriteRes<[HWPort1, HWPort5]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { +def WriteP1_P5_Lat6Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { + let Latency = 10; let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [1, 1, 1]; } -def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { +// Starting with P2. +def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> { let Latency = 1; - let ResourceCycles = [3, 2, 1]; + let ResourceCycles = [2, 1]; } +// Starting with P5. def WriteP5 : SchedWriteRes<[HWPort5]>; def WriteP5Ld : SchedWriteRes<[HWPort5, HWPort23]> { let Latency = 5; @@ -1536,4 +1580,568 @@ def : InstRW<[WritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)(Y?)rr")>; // PSLL,PSRL DQ. def : InstRW<[WriteP5], (instregex "(V?)PS(R|L)LDQ(Y?)ri")>; +//-- Other --// + +// EMMS. +def WriteEMMS : SchedWriteRes<[]> { + let Latency = 13; + let NumMicroOps = 31; +} +def : InstRW<[WriteEMMS], (instregex "MMX_EMMS")>; + +//=== Floating Point XMM and YMM Instructions ===// +//-- Move instructions --// + +// MOVMSKP S/D. +// r32 <- x. +def WriteMOVMSKPr : SchedWriteRes<[HWPort0]> { + let Latency = 3; +} +def : InstRW<[WriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)rr")>; + +// r32 <- y. +def WriteVMOVMSKPYr : SchedWriteRes<[HWPort0]> { + let Latency = 2; +} +def : InstRW<[WriteVMOVMSKPYr], (instregex "VMOVMSKP(S|D)Yrr")>; + +// VPERM2F128. +def : InstRW<[WriteFShuffle256], (instregex "VPERM2F128rr")>; +def : InstRW<[WriteFShuffle256Ld, ReadAfterLd], (instregex "VPERM2F128rm")>; + +// BLENDVP S/D. +def : InstRW<[WriteFVarBlend], (instregex "BLENDVP(S|D)rr0")>; +def : InstRW<[WriteFVarBlendLd, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>; + +// VBROADCASTF128. +def : InstRW<[WriteLoad], (instregex "VBROADCASTF128")>; + +// EXTRACTPS. +// r32,x,i. +def WriteEXTRACTPSr : SchedWriteRes<[HWPort0, HWPort5]> { + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>; + +// m32,x,i. +def WriteEXTRACTPSm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; +} +def : InstRW<[WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>; + +// VEXTRACTF128. +// x,y,i. +def : InstRW<[WriteFShuffle256], (instregex "VEXTRACTF128rr")>; + +// m128,y,i. +def WriteVEXTRACTF128m : SchedWriteRes<[HWPort23, HWPort4]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteVEXTRACTF128m], (instregex "VEXTRACTF128mr")>; + +// VINSERTF128. +// y,y,x,i. +def : InstRW<[WriteFShuffle256], (instregex "VINSERTF128rr")>; + +// y,y,m128,i. +def WriteVINSERTF128m : SchedWriteRes<[HWPort015, HWPort23]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteFShuffle256, ReadAfterLd], (instregex "VINSERTF128rm")>; + +// VMASKMOVP S/D. +// v,v,m. +def WriteVMASKMOVPrm : SchedWriteRes<[HWPort5, HWPort23]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteVMASKMOVPrm], (instregex "VMASKMOVP(S|D)(Y?)rm")>; + +// m128,x,x. +def WriteVMASKMOVPmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 1, 1]; +} +def : InstRW<[WriteVMASKMOVPmr], (instregex "VMASKMOVP(S|D)mr")>; + +// m256,y,y. +def WriteVMASKMOVPYmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 1, 1]; +} +def : InstRW<[WriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>; + +// VGATHERDPS. +// x. +def WriteVGATHERDPS128 : SchedWriteRes<[]> { + let NumMicroOps = 20; +} +def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>; + +// y. +def WriteVGATHERDPS256 : SchedWriteRes<[]> { + let NumMicroOps = 34; +} +def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>; + +// VGATHERQPS. +// x. +def WriteVGATHERQPS128 : SchedWriteRes<[]> { + let NumMicroOps = 15; +} +def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>; + +// y. +def WriteVGATHERQPS256 : SchedWriteRes<[]> { + let NumMicroOps = 22; +} +def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>; + +// VGATHERDPD. +// x. +def WriteVGATHERDPD128 : SchedWriteRes<[]> { + let NumMicroOps = 12; +} +def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>; + +// y. +def WriteVGATHERDPD256 : SchedWriteRes<[]> { + let NumMicroOps = 20; +} +def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>; + +// VGATHERQPD. +// x. +def WriteVGATHERQPD128 : SchedWriteRes<[]> { + let NumMicroOps = 14; +} +def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>; + +// y. +def WriteVGATHERQPD256 : SchedWriteRes<[]> { + let NumMicroOps = 22; +} +def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>; + +//-- Conversion instructions --// + +// CVTPD2PS. +// x,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVTPD2PSrr")>; + +// x,m128. +def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVTPD2PS(X?)rm")>; + +// x,y. +def WriteCVTPD2PSYrr : SchedWriteRes<[HWPort1, HWPort5]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVTPD2PSYrr], (instregex "(V?)CVTPD2PSYrr")>; + +// x,m256. +def WriteCVTPD2PSYrm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; +} +def : InstRW<[WriteCVTPD2PSYrm], (instregex "(V?)CVTPD2PSYrm")>; + +// CVTSD2SS. +// x,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V)?CVTSD2SSrr")>; + +// x,m64. +def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(Int_)?(V)?CVTSD2SSrm")>; + +// CVTPS2PD. +// x,x. +def WriteCVTPS2PDrr : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVTPS2PDrr], (instregex "(V?)CVTPS2PDrr")>; + +// x,m64. +// y,m128. +def WriteCVTPS2PDrm : SchedWriteRes<[HWPort0, HWPort23]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVTPS2PDrm], (instregex "(V?)CVTPS2PD(Y?)rm")>; + +// y,x. +def WriteVCVTPS2PDYrr : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteVCVTPS2PDYrr], (instregex "VCVTPS2PDYrr")>; + +// CVTSS2SD. +// x,x. +def WriteCVTSS2SDrr : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVTSS2SDrr], (instregex "(Int_)?(V?)CVTSS2SDrr")>; + +// x,m32. +def WriteCVTSS2SDrm : SchedWriteRes<[HWPort0, HWPort23]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVTSS2SDrm], (instregex "(Int_)?(V?)CVTSS2SDrm")>; + +// CVTDQ2PD. +// x,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(V)?CVTDQ2PDrr")>; + +// y,x. +def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVTDQ2PDYrr")>; + +// CVT(T)PD2DQ. +// x,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVT(T?)PD2DQrr")>; +// x,m128. +def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVT(T?)PD2DQrm")>; +// x,y. +def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVT(T?)PD2DQYrr")>; +// x,m256. +def : InstRW<[WriteP1_P5_Lat6Ld], (instregex "VCVT(T?)PD2DQYrm")>; + +// CVT(T)PS2PI. +// mm,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PS2PIirr")>; + +// CVTPI2PD. +// x,mm. +def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PI2PDirr")>; + +// CVT(T)PD2PI. +// mm,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PD2PIirr")>; + +// CVSTSI2SS. +// x,r32. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>; + +// CVT(T)SS2SI. +// r32,x. +def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>; +// r32,m32. +def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>; + +// CVTSI2SD. +// x,r32/64. +def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>; + +// CVTSD2SI. +// r32/64 +def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rr")>; +// r32,m32. +def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rm")>; + +// VCVTPS2PH. +// x,v,i. +def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPS2PH(Y?)rr")>; +// m,v,i. +def : InstRW<[WriteP1_P5_Lat4Ld, WriteRMW], (instregex "VCVTPS2PH(Y?)mr")>; + +// VCVTPH2PS. +// v,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPH2PS(Y?)rr")>; + +//-- Arithmetic instructions --// + +// HADD, HSUB PS/PD +// x,x / v,v,v. +def WriteHADDSUBPr : SchedWriteRes<[HWPort1, HWPort5]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteHADDSUBPr], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rr")>; + +// x,m / v,v,m. +def WriteHADDSUBPm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; +} +def : InstRW<[WriteHADDSUBPm], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rm")>; + +// MULL SS/SD PS/PD. +// x,x / v,v,v. +def WriteMULr : SchedWriteRes<[HWPort01]> { + let Latency = 5; +} +def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>; + +// x,m / v,v,m. +def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteMULm], (instregex "(V?)MUL(P|S)(S|D)rm")>; + +// VDIVPS. +// y,y,y. +def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 19; // 18-21 cycles. + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>; + +// y,y,m256. +def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 23; // 18-21 + 4 cycles. + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; +} +def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>; + +// VDIVPD. +// y,y,y. +def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 27; // 19-35 cycles. + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>; + +// y,y,m256. +def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 31; // 19-35 + 4 cycles. + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; +} +def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>; + +// VRCPPS. +// y,y. +def WriteVRCPPSr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>; + +// y,m256. +def WriteVRCPPSm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; +} +def : InstRW<[WriteVRCPPSm], (instregex "VRCPPSYm(_Int)?")>; + +// ROUND SS/SD PS/PD. +// v,v,i. +def WriteROUNDr : SchedWriteRes<[HWPort1]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def : InstRW<[WriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>; + +// v,m,i. +def WriteROUNDm : SchedWriteRes<[HWPort1, HWPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>; + +// DPPS. +// x,x,i / v,v,v,i. +def WriteDPPSr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; +} +def : InstRW<[WriteDPPSr], (instregex "(V?)DPPS(Y?)rri")>; + +// x,m,i / v,v,m,i. +def WriteDPPSm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23, HWPort6]> { + let Latency = 18; + let NumMicroOps = 6; + let ResourceCycles = [2, 1, 1, 1, 1]; +} +def : InstRW<[WriteDPPSm, ReadAfterLd], (instregex "(V?)DPPS(Y?)rmi")>; + +// DPPD. +// x,x,i. +def WriteDPPDr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; +} +def : InstRW<[WriteDPPDr], (instregex "(V?)DPPDrri")>; + +// x,m,i. +def WriteDPPDm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 1, 1]; +} +def : InstRW<[WriteDPPDm], (instregex "(V?)DPPDrmi")>; + +// VFMADD. +// v,v,v. +def WriteFMADDr : SchedWriteRes<[HWPort01]> { + let Latency = 5; + let NumMicroOps = 1; +} +def : InstRW<[WriteFMADDr], + (instregex + // 3p forms. + "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?", + // 3s forms. + "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)r", + // 4s/4s_int forms. + "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?", + // 4p forms. + "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>; + +// v,v,m. +def WriteFMADDm : SchedWriteRes<[HWPort01, HWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteFMADDm], + (instregex + // 3p forms. + "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?", + // 3s forms. + "VF(N?)M(ADD|SUB)S(S|D)(r132|231|213)m", + // 4s/4s_int forms. + "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?", + // 4p forms. + "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>; + +//-- Math instructions --// + +// VSQRTPS. +// y,y. +def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 19; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>; + +// y,m256. +def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 23; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; +} +def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>; + +// VSQRTPD. +// y,y. +def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 28; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>; + +// y,m256. +def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 32; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; +} +def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>; + +// RSQRT SS/PS. +// x,x. +def WriteRSQRTr : SchedWriteRes<[HWPort0]> { + let Latency = 5; +} +def : InstRW<[WriteRSQRTr], (instregex "(V?)RSQRT(SS|PS)r(_Int)?")>; + +// x,m128. +def WriteRSQRTm : SchedWriteRes<[HWPort0, HWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteRSQRTm], (instregex "(V?)RSQRT(SS|PS)m(_Int)?")>; + +// RSQRTPS 256. +// y,y. +def WriteRSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>; + +// y,m256. +def WriteRSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; +} +def : InstRW<[WriteRSQRTPSYm], (instregex "VRSQRTPSYm(_Int)?")>; + +//-- Logic instructions --// + +// AND, ANDN, OR, XOR PS/PD. +// x,x / v,v,v. +def : InstRW<[WriteP5], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>; +// x,m / v,v,m. +def : InstRW<[WriteP5Ld, ReadAfterLd], + (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>; + +//-- Other instructions --// + +// VZEROUPPER. +def WriteVZEROUPPER : SchedWriteRes<[]> { + let NumMicroOps = 4; +} +def : InstRW<[WriteVZEROUPPER], (instregex "VZEROUPPER")>; + +// VZEROALL. +def WriteVZEROALL : SchedWriteRes<[]> { + let NumMicroOps = 12; +} +def : InstRW<[WriteVZEROALL], (instregex "VZEROALL")>; + +// LDMXCSR. +def WriteLDMXCSR : SchedWriteRes<[HWPort0, HWPort6, HWPort23]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; +} +def : InstRW<[WriteLDMXCSR], (instregex "(V)?LDMXCSR")>; + +// STMXCSR. +def WriteSTMXCSR : SchedWriteRes<[HWPort0, HWPort4, HWPort6, HWPort237]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 1, 1]; +} +def : InstRW<[WriteSTMXCSR], (instregex "(V)?STMXCSR")>; + } // SchedModel