From: David Goodwin Date: Wed, 12 Aug 2009 18:31:53 +0000 (+0000) Subject: Enhance the InstrStage object to enable the specification of an Itinerary with overla... X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=1a8f36e3ce5b9c230781b66600c81536128abfb5;p=oota-llvm.git Enhance the InstrStage object to enable the specification of an Itinerary with overlapping stages. The default is to maintain the current behavior that the "next" stage immediately follows the previous one. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78827 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h index 1d5af9a3484..237ca4ef2a6 100644 --- a/include/llvm/Target/TargetInstrItineraries.h +++ b/include/llvm/Target/TargetInstrItineraries.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file describes the structures used for instruction itineraries and -// states. This is used by schedulers to determine instruction states and +// stages. This is used by schedulers to determine instruction stages and // latencies. // //===----------------------------------------------------------------------===// @@ -16,17 +16,57 @@ #ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H #define LLVM_TARGET_TARGETINSTRITINERARIES_H +#include + namespace llvm { //===----------------------------------------------------------------------===// -/// Instruction stage - These values represent a step in the execution of an -/// instruction. The latency represents the number of discrete time slots -/// needed to complete the stage. Units represent the choice of functional -/// units that can be used to complete the stage. Eg. IntUnit1, IntUnit2. +/// Instruction stage - These values represent a non-pipelined step in +/// the execution of an instruction. Cycles represents the number of +/// discrete time slots needed to complete the stage. Units represent +/// the choice of functional units that can be used to complete the +/// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many +/// cycles should elapse from the start of this stage to the start of +/// the next stage in the itinerary. A value of -1 indicates that the +/// next stage should start immediately after the current one. +/// For example: +/// +/// { 1, x, -1 } +/// indicates that the stage occupies FU x for 1 cycle and that +/// the next stage starts immediately after this one. +/// +/// { 2, x|y, 1 } +/// indicates that the stage occupies either FU x or FU y for 2 +/// consecuative cycles and that the next stage starts one cycle +/// after this stage starts. That is, the stage requirements +/// overlap in time. +/// +/// { 1, x, 0 } +/// indicates that the stage occupies FU x for 1 cycle and that +/// the next stage starts in this same cycle. This can be used to +/// indicate that the instruction requires multiple stages at the +/// same time. /// struct InstrStage { - unsigned Cycles; ///< Length of stage in machine cycles - unsigned Units; ///< Choice of functional units + unsigned Cycles_; ///< Length of stage in machine cycles + unsigned Units_; ///< Choice of functional units + int NextCycles_; ///< Number of machine cycles to next stage + + /// getCycles - returns the number of cycles the stage is occupied + unsigned getCycles() const { + return Cycles_; + } + + /// getUnits - returns the choice of FUs + unsigned getUnits() const { + return Units_; + } + + /// getNextCycles - returns the number of cycles from the start of + /// this stage to the start of the next stage in the itinerary + unsigned getNextCycles() const { + return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_; + } }; @@ -84,13 +124,17 @@ struct InstrItineraryData { if (isEmpty()) return 1; - // Just sum the cycle count for each stage. The assumption is that all - // inputs are consumed at the start of the first stage and that all - // outputs are produced at the end of the last stage. - unsigned Latency = 0; + // Caclulate the maximum completion time for any stage. The + // assumption is that all inputs are consumed at the start of the + // first stage and that all outputs are produced at the end of the + // latest completing last stage. + unsigned Latency = 0, StartCycle = 0; for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx); - IS != E; ++IS) - Latency += IS->Cycles; + IS != E; ++IS) { + Latency = std::max(Latency, StartCycle + IS->getCycles()); + StartCycle += IS->getNextCycles(); + } + return Latency; } }; diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index 38461c5a380..4940e8b82bd 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -23,14 +23,23 @@ class FuncUnit; //===----------------------------------------------------------------------===// -// Instruction stage - These values represent a step in the execution of an -// instruction. The latency represents the number of discrete time slots used -// need to complete the stage. Units represent the choice of functional units -// that can be used to complete the stage. Eg. IntUnit1, IntUnit2. +// Instruction stage - These values represent a non-pipelined step in +// the execution of an instruction. Cycles represents the number of +// discrete time slots needed to complete the stage. Units represent +// the choice of functional units that can be used to complete the +// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many +// cycles should elapse from the start of this stage to the start of +// the next stage in the itinerary. For example: // -class InstrStage units> { +// A stage is specified in one of two ways: +// +// InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles +// InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit +// +class InstrStage units, int timeinc = -1> { int Cycles = cycles; // length of stage in machine cycles list Units = units; // choice of functional units + int TimeInc = timeinc; // cycles till start of next stage } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp index 48043f286cc..8bac08a4a05 100644 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ b/lib/CodeGen/ExactHazardRecognizer.cpp @@ -34,12 +34,12 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData // If the begin stage of an itinerary has 0 cycles and units, // then we have reached the end of the itineraries. const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); - if ((IS->Cycles == 0) && (IS->Units == 0)) + if ((IS->getCycles() == 0) && (IS->getUnits() == 0)) break; unsigned ItinDepth = 0; for (; IS != E; ++IS) - ItinDepth += std::max(1U, IS->Cycles); + ItinDepth += IS->getCycles(); ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); } @@ -89,27 +89,25 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { - // If the stages cycles are 0, then we must have the FU free in - // the current cycle, but we don't advance the cycle time . - unsigned StageCycles = std::max(1U, IS->Cycles); - // We must find one of the stage's units free for every cycle the - // stage is occupied. - for (unsigned int i = 0; i < StageCycles; ++i) { - assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle); - unsigned freeUnits = IS->Units & ~Scoreboard[index]; + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; if (!freeUnits) { - DEBUG(errs() << "*** Hazard in cycle " << cycle << ", "); + DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", "); DEBUG(errs() << "SU(" << SU->NodeNum << "): "); DEBUG(SU->getInstr()->dump()); return Hazard; } - - if (IS->Cycles > 0) - ++cycle; } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); } return NoHazard; @@ -123,17 +121,15 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { unsigned idx = SU->getInstr()->getDesc().getSchedClass(); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); IS != E; ++IS) { - // If the stages cycles are 0, then we must reserve the FU in the - // current cycle, but we don't advance the cycle time . - unsigned StageCycles = std::max(1U, IS->Cycles); - // We must reserve one of the stage's units for every cycle the - // stage is occupied. - for (unsigned int i = 0; i < StageCycles; ++i) { - assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); - - unsigned index = getFutureIndex(cycle); - unsigned freeUnits = IS->Units & ~Scoreboard[index]; + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < ScoreboardDepth) && + "Scoreboard depth exceeded!"); + + unsigned index = getFutureIndex(cycle + i); + unsigned freeUnits = IS->getUnits() & ~Scoreboard[index]; // reduce to a single unit unsigned freeUnit = 0; @@ -144,10 +140,10 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { assert(freeUnit && "No function unit available!"); Scoreboard[index] |= freeUnit; - - if (IS->Cycles > 0) - ++cycle; } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); } DEBUG(dumpScoreboard()); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 0985265e19a..8e5a01d4dda 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -615,7 +615,7 @@ let isReturn = 1, isTerminator = 1, mayLoad = 1 in []>; // On non-Darwin platforms R9 is callee-saved. -let isCall = 1, Itinerary = IIC_Br, +let isCall = 1, Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, @@ -652,7 +652,7 @@ let isCall = 1, Itinerary = IIC_Br, } // On Darwin R9 is call-clobbered. -let isCall = 1, Itinerary = IIC_Br, +let isCall = 1, Defs = [R0, R1, R2, R3, R9, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, @@ -685,7 +685,7 @@ let isCall = 1, Itinerary = IIC_Br, } } -let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in { +let isBranch = 1, isTerminator = 1 in { // B is "predicable" since it can be xformed into a Bcc. let isBarrier = 1 in { let isPredicable = 1 in @@ -1057,7 +1057,7 @@ defm BIC : AsI1_bin_irs<0b1110, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, NoItinerary, + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALU, "bfc", " $dst, $imm", "$src = $dst", [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { @@ -1084,16 +1084,16 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm), // let isCommutable = 1 in -def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, +def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, "mul", " $dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "mla", " $dst, $a, $b, $c", + IIC_iMPY, "mla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "mls", " $dst, $a, $b, $c", + IIC_iMPY, "mls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>, Requires<[IsARM, HasV6T2]>; @@ -1101,32 +1101,32 @@ def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), let neverHasSideEffects = 1 in { let isCommutable = 1 in { def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "smull", " $ldst, $hdst, $a, $b", []>; def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "umull", " $ldst, $hdst, $a, $b", []>; } // Multiply + accumulate def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "smlal", " $ldst, $hdst, $a, $b", []>; def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "umlal", " $ldst, $hdst, $a, $b", []>; def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), - (ins GPR:$a, GPR:$b), IIC_iALU, + (ins GPR:$a, GPR:$b), IIC_iMPY, "umaal", " $ldst, $hdst, $a, $b", []>, Requires<[IsARM, HasV6]>; } // neverHasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, "smmul", " $dst, $a, $b", + IIC_iMPY, "smmul", " $dst, $a, $b", [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1134,7 +1134,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), } def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "smmla", " $dst, $a, $b, $c", + IIC_iMPY, "smmla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1142,7 +1142,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iALU, "smmls", " $dst, $a, $b, $c", + IIC_iMPY, "smmls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b1101; @@ -1150,7 +1150,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), multiclass AI_smul { def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1159,7 +1159,7 @@ multiclass AI_smul { } def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1168,7 +1168,7 @@ multiclass AI_smul { } def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1177,7 +1177,7 @@ multiclass AI_smul { } def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1186,7 +1186,7 @@ multiclass AI_smul { } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1195,7 +1195,7 @@ multiclass AI_smul { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b", + IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1207,7 +1207,7 @@ multiclass AI_smul { multiclass AI_smla { def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16))))]>, @@ -1217,7 +1217,7 @@ multiclass AI_smla { } def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { @@ -1226,7 +1226,7 @@ multiclass AI_smla { } def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1235,7 +1235,7 @@ multiclass AI_smla { } def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { @@ -1244,7 +1244,7 @@ multiclass AI_smla { } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1253,7 +1253,7 @@ multiclass AI_smla { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 0da798e15df..1a823319807 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -508,7 +508,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iALU, // multiply register let isCommutable = 1 in -def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU, +def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMPY, "mul", " $dst, $rhs", [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0ef9cc03e08..5800a430477 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -808,80 +808,80 @@ def : T2Pat<(t2_so_imm_not:$src), // Multiply Instructions. // let isCommutable = 1 in -def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, "mul", " $dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; -def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "mla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; -def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "mls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>; // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { -def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "smull", " $ldst, $hdst, $a, $b", []>; -def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "umull", " $ldst, $hdst, $a, $b", []>; } // Multiply + accumulate -def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "smlal", " $ldst, $hdst, $a, $b", []>; -def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "umlal", " $ldst, $hdst, $a, $b", []>; -def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY, "umaal", " $ldst, $hdst, $a, $b", []>; } // neverHasSideEffects // Most significant word multiply -def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, +def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, "smmul", " $dst, $a, $b", [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>; -def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "smmla", " $dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>; -def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, +def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY, "smmls", " $dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>; multiclass T2I_smul { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>; - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>; - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>; - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>; - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16)))]>; - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16)))]>; @@ -889,33 +889,33 @@ multiclass T2I_smul { multiclass T2I_smla { - def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16))))]>; - def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16)))))]>; - def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>; - def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16)))))]>; - def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16))))]>; - def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, + def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16))))]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 8a7b42eb729..a789d711479 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -16,34 +16,34 @@ def CortexA8Itineraries : ProcessorItineraries<[ // two fully-pipelined integer ALU pipelines InstrItinData]>, // one fully-pipelined integer Multiply pipeline - // function units are used in alpha order, so use FU_Pipe1 - // for the Multiple pipeline - InstrItinData]>, + // function units are reserved by the scheduler in reverse alpha order, + // so use FU_Pipe0 for the Multiple pipeline + InstrItinData]>, // loads have an extra cycle of latency, but are fully pipelined - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, // fully-pipelined stores - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, // no delay slots, so the latency of a branch is unimportant InstrItinData]>, - // VFP ALU is not pipelined so stall all issues - // FIXME assume NFP pipeline and 7 cycle non-pipelined latency - InstrItinData]>, + // NFP ALU is not pipelined so stall all issues + InstrItinData, + InstrStage<7, [FU_Pipe1], 0>]>, // VFP MPY is not pipelined so stall all issues - // FIXME assume NFP pipeline and 7 cycle non-pipelined latency - InstrItinData]>, + InstrItinData, + InstrStage<7, [FU_Pipe1], 0>]>, // loads have an extra cycle of latency, but are fully pipelined - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>, - // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData, InstrStage<1, [FU_Pipe0, FU_Pipe1]>]> ]>; diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index 919ac664efa..4a0bacd19cf 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -215,7 +215,7 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData, // Next stage const Record *Stage = StageList[i]; - // Form string as ,{ cycles, u1 | u2 | ... | un } + // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc } int Cycles = Stage->getValueAsInt("Cycles"); ItinString += " { " + itostr(Cycles) + ", "; @@ -229,6 +229,9 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData, if (++j < M) ItinString += " | "; } + int TimeInc = Stage->getValueAsInt("TimeInc"); + ItinString += ", " + itostr(TimeInc); + // Close off stage ItinString += " }"; if (++i < N) ItinString += ", "; @@ -252,7 +255,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS, // Begin stages table OS << "static const llvm::InstrStage Stages[] = {\n" - " { 0, 0 }, // No itinerary\n"; + " { 0, 0, 0 }, // No itinerary\n"; unsigned StageCount = 1; unsigned ItinEnum = 1; @@ -289,7 +292,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS, // If new itinerary if (Find == 0) { - // Emit as { cycles, u1 | u2 | ... | un }, // index + // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index OS << ItinString << ", // " << ItinEnum << "\n"; // Record Itin class number. ItinMap[ItinString] = Find = StageCount; @@ -313,7 +316,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS, } // Closing stage - OS << " { 0, 0 } // End itinerary\n"; + OS << " { 0, 0, 0 } // End itinerary\n"; // End stages table OS << "};\n";