X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMScheduleA9.td;h=9a1d2227564649f65cc21162e71b6943bc854b8d;hb=47b167dd84902e2cdc4796d68ddcfa5f540a67cd;hp=382e6cc4cd7e5d6e52e81dbb02d6ec980cf5a88b;hpb=55097ff56765b9a1e41a7e676df764a8749bc81f;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 382e6cc4cd7..9a1d2227564 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1879,32 +1879,43 @@ def CortexA9Itineraries : ProcessorItineraries< // The following definitions describe the simpler per-operand machine model. // This works with MachineScheduler and will eventually replace itineraries. +class A9WriteLMOpsListType writes> { + list Writes = writes; + SchedMachineModel SchedModel = ?; +} // Cortex-A9 machine model for scheduling and other instruction cost heuristics. def CortexA9Model : SchedMachineModel { let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = 0; // Data dependencies are allowed within dispatch groups. + let MicroOpBufferSize = 56; // Based on available renamed registers. let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. - let ILPWindow = 10; // Don't reschedule small blocks to hide - // latency. Minimum latency requirements are already - // modeled strictly by reserving resources. let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; + + // FIXME: Many vector operations were never given an itinerary. We + // haven't mapped these to the new model either. + let CompleteModel = 0; } //===----------------------------------------------------------------------===// // Define each kind of processor resource and number available. +// +// The AGU unit has BufferSize=1 so that the latency between operations +// that use it are considered to stall other operations. +// +// The FP unit has BufferSize=0 so that it is a hard dispatch +// hazard. No instruction may be dispatched while the unit is reserved. let SchedModel = CortexA9Model in { def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } -def A9UnitAGU : ProcResource<1>; +def A9UnitAGU : ProcResource<1> { let BufferSize = 1; } def A9UnitLS : ProcResource<1>; -def A9UnitFP : ProcResource<1> { let Buffered = 0; } +def A9UnitFP : ProcResource<1> { let BufferSize = 0; } def A9UnitB : ProcResource<1>; //===----------------------------------------------------------------------===// @@ -1920,7 +1931,7 @@ def A9WriteI : SchedWriteRes<[A9UnitALU]>; def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } // Basic ALU. -def : WriteRes; +def A9WriteALU : SchedWriteRes<[A9UnitALU]>; // ALU with operand shifted by immediate. def : WriteRes { let Latency = 2; } // ALU with operand shifted by register. @@ -2014,7 +2025,7 @@ def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>; // Define a predicate to select the LDM based on number of memory addresses. def A9LMAdr#NumAddr#Pred : - SchedPredicate<"TII->getNumLDMAddresses(MI) == "#NumAddr>; + SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>; } // foreach NumAddr @@ -2057,48 +2068,30 @@ def A9WriteL#NumAddr#Hi : WriteSequence< //===----------------------------------------------------------------------===// // LDM: Load multiple into 32-bit integer registers. +def A9WriteLMOpsList : A9WriteLMOpsListType< + [A9WriteL1, A9WriteL1Hi, + A9WriteL2, A9WriteL2Hi, + A9WriteL3, A9WriteL3Hi, + A9WriteL4, A9WriteL4Hi, + A9WriteL5, A9WriteL5Hi, + A9WriteL6, A9WriteL6Hi, + A9WriteL7, A9WriteL7Hi, + A9WriteL8, A9WriteL8Hi]>; + // A9WriteLM variants expand into a pair of writes for each 64-bit // value loaded. When the number of registers is odd, the last // A9WriteLnHi is naturally ignored because the instruction has no // following def operands. These variants take no issue resource, so // they may need to be part of a WriteSequence that includes A9WriteIssue. def A9WriteLM : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, // For unknown LDMs, define the maximum number of writes, but only // make the first two consume resources. SchedVar; // 21-22 + def A9WriteLMfpPostRA : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, - SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, + SchedVar, // For unknown LDMs, define the maximum number of writes, but only - // make the first two consume resources. - SchedVar]>; //===----------------------------------------------------------------------===// -// Resources for other (non LDM/VLDM) Variants. +// Resources for other (non-LDM/VLDM) Variants. // These mov immediate writers are unconditionally expanded with // additive latency. @@ -2275,10 +2258,10 @@ def A9Read4 : SchedReadAdvance<3>; // This table follows the ARM Cortex-A9 Technical Reference Manuals, // mostly in order. -def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, +def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, IIC_iMVNi,IIC_iMVNsi, IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; -def :ItinRW<[A9WriteI,ReadALU],[IIC_iMVNr]>; +def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>; def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; @@ -2286,13 +2269,13 @@ def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; -def :ItinRW<[WriteALU, ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; -def :ItinRW<[WriteALU, ReadALU, ReadALU],[IIC_iALUr,IIC_iCMPr]>; +def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; +def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>; def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; -def :ItinRW<[WriteALUsi, ReadALU], [IIC_iALUsi]>; -def :ItinRW<[WriteALUsi, ReadDefault, ReadALU], [IIC_iALUsir]>; // RSB +def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>; +def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; -def :ItinRW<[A9WriteALUsr, ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; +def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; // A9WriteHi ignored for MUL32. def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, @@ -2424,6 +2407,7 @@ def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>; // ... // VHADD/VRHADD/VQADD/VTST/VADH/VRADH def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>; + // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>; // VQNEG/VQABS @@ -2458,7 +2442,7 @@ def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>; def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>; // NEON permute -def :ItinRW<[A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; +def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>; def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2], [IIC_VPERMQ3, IIC_VEXTQ]>; def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>; @@ -2482,9 +2466,64 @@ def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; // Map SchedRWs that are identical for cortexa9 to existing resources. +def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; - +def : InstRW< [WriteALU], + (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr", + "BICrr")>; +def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>; +def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>; + + +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + +def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi", + "MOVCCsr")>; +def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>; +def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm", + "MOV_ga_dyn")>; +def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>; +def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; + +def : InstRW< [WriteALU], (instregex "SEL")>; + +def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>; + +def : InstRW< [A9WriteM], + (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS", + "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>; +def : InstRW< [A9WriteM, A9WriteMHi], + (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL", + "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB", + "SMLALTT")>; +// FIXME: These instructions used to have NoItinerary. Just copied the one from above. +def : InstRW< [A9WriteM, A9WriteMHi], + (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX", + "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>; + +def : InstRW<[A9WriteM16, A9WriteM16Hi], + (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>; +def : InstRW<[A9WriteM16, A9WriteM16Hi], + (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>; + +def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>; +def : InstRW<[A9WriteLsi], (instregex "LDRrs")>; +def : InstRW<[A9WriteLb], + (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB", + "LDRH", "LDRSH", "LDRSB")>; +def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; + +def : WriteRes { let Latency = 0; } + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : SchedAlias; +def : WriteRes { let Latency = 0; let NumMicroOps = 0; } } // SchedModel = CortexA9Model