From: Andrew Trick Date: Fri, 24 Dec 2010 05:03:26 +0000 (+0000) Subject: Various bits of framework needed for precise machine-level selection X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=2da8bc8a5f7705ac131184cd247f48500da0d74e;p=oota-llvm.git Various bits of framework needed for precise machine-level selection DAG scheduling during isel. Most new functionality is currently guarded by -enable-sched-cycles and -enable-sched-hazard. Added InstrItineraryData::IssueWidth field, currently derived from ARM itineraries, but could be initialized differently on other targets. Added ScheduleHazardRecognizer::MaxLookAhead to indicate whether it is active, and if so how many cycles of state it holds. Added SchedulingPriorityQueue::HasReadyFilter to allowing gating entry into the scheduler's available queue. ScoreboardHazardRecognizer now accesses the ScheduleDAG in order to get information about it's SUnits, provides RecedeCycle for bottom-up scheduling, correctly computes scoreboard depth, tracks IssueCount, and considers potential stall cycles when checking for hazards. ScheduleDAGRRList now models machine cycles and hazards (under flags). It tracks MinAvailableCycle, drives the hazard recognizer and priority queue's ready filter, manages a new PendingQueue, properly accounts for stall cycles, etc. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122541 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h index 07d04288ce0..1ed2547ca6c 100644 --- a/include/llvm/CodeGen/LatencyPriorityQueue.h +++ b/include/llvm/CodeGen/LatencyPriorityQueue.h @@ -47,6 +47,8 @@ namespace llvm { LatencyPriorityQueue() : Picker(this) { } + bool isBottomUp() const { return false; } + void initNodes(std::vector &sunits) { SUnits = &sunits; NumNodesSolelyBlocking.resize(SUnits->size(), 0); @@ -81,6 +83,8 @@ namespace llvm { virtual void remove(SUnit *SU); + virtual void dump(ScheduleDAG* DAG) const; + // ScheduledNode - As nodes are scheduled, we look to see if there are any // successor nodes that have a single unscheduled predecessor. If so, that // single predecessor has a higher priority, since scheduling it will make diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 0f40f77b96e..69aec439204 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -326,6 +326,10 @@ namespace llvm { return Node; } + /// isInstr - Return true if this SUnit refers to a machine instruction as + /// opposed to an SDNode. + bool isInstr() const { return !Node; } + /// setInstr - Assign the instruction for the SUnit. /// This may be used during post-regalloc scheduling. void setInstr(MachineInstr *MI) { @@ -421,16 +425,27 @@ namespace llvm { /// class SchedulingPriorityQueue { unsigned CurCycle; + bool HasReadyFilter; public: - SchedulingPriorityQueue() : CurCycle(0) {} + SchedulingPriorityQueue(bool rf = false): + CurCycle(0), HasReadyFilter(rf) {} virtual ~SchedulingPriorityQueue() {} + virtual bool isBottomUp() const = 0; + virtual void initNodes(std::vector &SUnits) = 0; virtual void addNode(const SUnit *SU) = 0; virtual void updateNode(const SUnit *SU) = 0; virtual void releaseState() = 0; virtual bool empty() const = 0; + + bool hasReadyFilter() const { return HasReadyFilter; } + + virtual bool isReady(SUnit *U) const { + assert(!HasReadyFilter && "The ready filter must override isReady()"); + return true; + } virtual void push(SUnit *U) = 0; void push_all(const std::vector &Nodes) { @@ -443,6 +458,8 @@ namespace llvm { virtual void remove(SUnit *SU) = 0; + virtual void dump(ScheduleDAG *DAG) const {} + /// ScheduledNode - As each node is scheduled, this method is invoked. This /// allows the priority function to adjust the priority of related /// unscheduled nodes, for example. @@ -479,6 +496,13 @@ namespace llvm { virtual ~ScheduleDAG(); + /// getInstrDesc - Return the TargetInstrDesc of this SUnit. + /// Return NULL for SDNodes without a machine opcode. + const TargetInstrDesc *getInstrDesc(const SUnit *SU) const { + if (SU->isInstr()) return &SU->getInstr()->getDesc(); + return getNodeDesc(SU->getNode()); + } + /// viewGraph - Pop up a GraphViz/gv window with the ScheduleDAG rendered /// using 'dot'. /// @@ -542,6 +566,10 @@ namespace llvm { void EmitNoop(); void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap); + + private: + // Return the TargetInstrDesc of this SDNode or NULL. + const TargetInstrDesc *getNodeDesc(const SDNode *Node) const; }; class SUnitIterator : public std::iteratorgetHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index bd5b2b898b2..60c24b71079 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -133,18 +133,12 @@ namespace { std::vector KillIndices; public: - SchedulePostRATDList(MachineFunction &MF, - const MachineLoopInfo &MLI, - const MachineDominatorTree &MDT, - ScheduleHazardRecognizer *HR, - AntiDepBreaker *ADB, - AliasAnalysis *aa) - : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), - HazardRec(HR), AntiDepBreak(ADB), AA(aa), - KillIndices(TRI->getNumRegs()) {} - - ~SchedulePostRATDList() { - } + SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + SmallVectorImpl &CriticalPathRCs); + + ~SchedulePostRATDList(); /// StartBlock - Initialize register live-range state for scheduling in /// this block. @@ -183,9 +177,34 @@ namespace { }; } +SchedulePostRATDList::SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + SmallVectorImpl &CriticalPathRCs) + : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA), + KillIndices(TRI->getNumRegs()) +{ + const TargetMachine &TM = MF.getTarget(); + const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); + HazardRec = + TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); + AntiDepBreak = + ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? + (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) : + ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL)); +} + +SchedulePostRATDList::~SchedulePostRATDList() { + delete HazardRec; + delete AntiDepBreak; +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { - AA = &getAnalysis(); TII = Fn.getTarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis(); + MachineDominatorTree &MDT = getAnalysis(); + AliasAnalysis *AA = &getAnalysis(); // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; @@ -195,6 +214,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { return false; } else { // Check that post-RA scheduling is enabled for this target. + // This may upgrade the AntiDepMode. const TargetSubtarget &ST = Fn.getTarget().getSubtarget(); if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) return false; @@ -210,19 +230,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "PostRAScheduler\n"); - const MachineLoopInfo &MLI = getAnalysis(); - const MachineDominatorTree &MDT = getAnalysis(); - const TargetMachine &TM = Fn.getTarget(); - const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); - ScheduleHazardRecognizer *HR = - TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins); - AntiDepBreaker *ADB = - ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : - ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL)); - - SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA); + SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode, + CriticalPathRCs); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); @@ -270,9 +279,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.FixupKills(MBB); } - delete HR; - delete ADB; - return true; } @@ -617,13 +623,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { MinDepth = PendingQueue[i]->getDepth(); } - DEBUG(dbgs() << "\n*** Examining Available\n"; - LatencyPriorityQueue q = AvailableQueue; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(this); - }); + DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); SUnit *FoundSUnit = 0; bool HasNoopHazards = false; @@ -631,7 +631,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { SUnit *CurSUnit = AvailableQueue.pop(); ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit); + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { FoundSUnit = CurSUnit; break; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index d2fbc0e28dc..02e398f7efc 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,6 +34,12 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) ScheduleDAG::~ScheduleDAG() {} +/// getInstrDesc helper to handle SDNodes. +const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { + if (!Node->isMachineOpcode()) return NULL; + return &TII->get(Node->getMachineOpcode()); +} + /// dump - dump the schedule. void ScheduleDAG::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index d78b5d3ecbd..b00e0cd0998 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sched-hazard" +#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" @@ -23,29 +23,48 @@ using namespace llvm; +#ifndef NDEBUG +const char *ScoreboardHazardRecognizer::DebugType = ""; +#endif + ScoreboardHazardRecognizer:: -ScoreboardHazardRecognizer(const InstrItineraryData *LItinData) : - ScheduleHazardRecognizer(), ItinData(LItinData) { +ScoreboardHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *SchedDAG, + const char *ParentDebugType) : + ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0), + IssueCount(0) { + +#ifndef NDEBUG + DebugType = ParentDebugType; +#endif + // Determine the maximum depth of any itinerary. This determines the // depth of the scoreboard. We always make the scoreboard at least 1 // cycle deep to avoid dealing with the boundary condition. unsigned ScoreboardDepth = 1; if (ItinData && !ItinData->isEmpty()) { + IssueWidth = ItinData->IssueWidth; + for (unsigned idx = 0; ; ++idx) { if (ItinData->isEndMarker(idx)) break; const InstrStage *IS = ItinData->beginStage(idx); const InstrStage *E = ItinData->endStage(idx); + unsigned CurCycle = 0; unsigned ItinDepth = 0; - for (; IS != E; ++IS) - ItinDepth += IS->getCycles(); + for (; IS != E; ++IS) { + unsigned StageDepth = CurCycle + IS->getCycles(); + if (ItinDepth < StageDepth) ItinDepth = StageDepth; + CurCycle += IS->getNextCycles(); + } // Find the next power-of-2 >= ItinDepth while (ItinDepth > ScoreboardDepth) { ScoreboardDepth *= 2; } } + MaxLookAhead = ScoreboardDepth; } ReservedScoreboard.reset(ScoreboardDepth); @@ -56,6 +75,7 @@ ScoreboardHazardRecognizer(const InstrItineraryData *LItinData) : } void ScoreboardHazardRecognizer::Reset() { + IssueCount = 0; RequiredScoreboard.reset(); ReservedScoreboard.reset(); } @@ -76,24 +96,46 @@ void ScoreboardHazardRecognizer::Scoreboard::dump() const { } } +bool ScoreboardHazardRecognizer::atIssueLimit() const { + if (IssueWidth == 0) + return false; + + return IssueCount == IssueWidth; +} + ScheduleHazardRecognizer::HazardType -ScoreboardHazardRecognizer::getHazardType(SUnit *SU) { +ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (!ItinData || ItinData->isEmpty()) return NoHazard; - unsigned cycle = 0; + // Note that stalls will be negative for bottom-up scheduling. + int cycle = Stalls; // Use the itinerary for the underlying instruction to check for // free FU's in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + + const TargetInstrDesc *TID = DAG->getInstrDesc(SU); + if (TID == NULL) { + // Don't check hazards for non-machineinstr Nodes. + return NoHazard; + } + unsigned idx = TID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must find one of the stage's units free for every cycle the // stage is occupied. FIXME it would be more accurate to find the // same unit free in all the cycles. for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < RequiredScoreboard.getDepth()) && - "Scoreboard depth exceeded!"); + int StageCycle = cycle + (int)i; + if (StageCycle < 0) + continue; + + if (StageCycle >= (int)RequiredScoreboard.getDepth()) { + assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() && + "Scoreboard depth exceeded!"); + // This stage was stalled beyond pipeline depth, so cannot conflict. + break; + } unsigned freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { @@ -101,18 +143,18 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU) { assert(0 && "Invalid FU reservation"); case InstrStage::Required: // Required FUs conflict with both reserved and required ones - freeUnits &= ~ReservedScoreboard[cycle + i]; + freeUnits &= ~ReservedScoreboard[StageCycle]; // FALLTHROUGH case InstrStage::Reserved: // Reserved FUs can conflict only with required ones. - freeUnits &= ~RequiredScoreboard[cycle + i]; + freeUnits &= ~RequiredScoreboard[StageCycle]; break; } if (!freeUnits) { DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); - DEBUG(SU->getInstr()->dump()); + DEBUG(DAG->dumpNode(SU)); return Hazard; } } @@ -128,11 +170,15 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { if (!ItinData || ItinData->isEmpty()) return; + ++IssueCount; + unsigned cycle = 0; // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + const TargetInstrDesc *TID = DAG->getInstrDesc(SU); + assert(TID && "The scheduler must filter non-machineinstrs"); + unsigned idx = TID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the @@ -179,11 +225,13 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { } void ScoreboardHazardRecognizer::AdvanceCycle() { + IssueCount = 0; ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); } void ScoreboardHazardRecognizer::RecedeCycle() { + IssueCount = 0; ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0; ReservedScoreboard.recede(); RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 11df0963c3b..430283d5eff 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -63,11 +63,12 @@ private: public: ScheduleDAGList(MachineFunction &mf, - SchedulingPriorityQueue *availqueue, - ScheduleHazardRecognizer *HR) - : ScheduleDAGSDNodes(mf), - AvailableQueue(availqueue), HazardRec(HR) { - } + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { + + const TargetMachine &tm = mf.getTarget(); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } ~ScheduleDAGList() { delete HazardRec; @@ -202,7 +203,7 @@ void ScheduleDAGList::ListScheduleTopDown() { SUnit *CurSUnit = AvailableQueue->pop(); ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit); + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { FoundSUnit = CurSUnit; break; @@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // Public Constructor Functions //===----------------------------------------------------------------------===// -/// createTDListDAGScheduler - This creates a top-down list scheduler with a -/// new hazard recognizer. This scheduler takes ownership of the hazard -/// recognizer and deletes it when done. +/// createTDListDAGScheduler - This creates a top-down list scheduler. ScheduleDAGSDNodes * llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { - return new ScheduleDAGList(*IS->MF, - new LatencyPriorityQueue(), - IS->CreateTargetHazardRecognizer()); + return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue()); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8fc6eacbf66..c93ef4dd467 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -20,6 +20,7 @@ #include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -65,6 +66,11 @@ static RegisterScheduler "which tries to balance ILP and register pressure", createILPListDAGScheduler); +static cl::opt EnableSchedCycles( + "enable-sched-cycles", + cl::desc("Enable cycle-level precision during preRA scheduling"), + cl::init(false), cl::Hidden); + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -83,9 +89,21 @@ private: /// AvailableQueue - The priority queue to use for the available SUnits. SchedulingPriorityQueue *AvailableQueue; + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + /// CurCycle - The current scheduler state corresponds to this cycle. unsigned CurCycle; + /// MinAvailableCycle - Cycle of the soonest available instruction. + unsigned MinAvailableCycle; + /// LiveRegDefs - A set of physical registers and their definition /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. @@ -98,14 +116,22 @@ private: ScheduleDAGTopologicalSort Topo; public: - ScheduleDAGRRList(MachineFunction &mf, - bool isbottomup, bool needlatency, - SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), - AvailableQueue(availqueue), CurCycle(0), Topo(SUnits) { - } + ScheduleDAGRRList(MachineFunction &mf, bool needlatency, + SchedulingPriorityQueue *availqueue, + CodeGenOpt::Level OptLevel) + : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), + NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + Topo(SUnits) { + + const TargetMachine &tm = mf.getTarget(); + if (EnableSchedCycles && OptLevel != CodeGenOpt::None) + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + else + HazardRec = new ScheduleHazardRecognizer(); + } ~ScheduleDAGRRList() { + delete HazardRec; delete AvailableQueue; } @@ -139,20 +165,31 @@ public: } private: + bool isReady(SUnit *SU) { + return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() || + AvailableQueue->isReady(SU); + } + void ReleasePred(SUnit *SU, const SDep *PredEdge); void ReleasePredecessors(SUnit *SU); void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); - void CapturePred(SDep *PredEdge); + void ReleasePending(); + void AdvanceToCycle(unsigned NextCycle); + void AdvancePastStalls(SUnit *SU); + void EmitNode(SUnit *SU); void ScheduleNodeBottomUp(SUnit*); + void CapturePred(SDep *PredEdge); void UnscheduleNodeBottomUp(SUnit*); - void BacktrackBottomUp(SUnit*, unsigned); + void RestoreHazardCheckerBottomUp(); + void BacktrackBottomUp(SUnit*, SUnit*); SUnit *CopyAndMoveSuccessors(SUnit*); void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, SmallVector&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); + SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); @@ -198,6 +235,7 @@ void ScheduleDAGRRList::Schedule() { << " '" << BB->getName() << "' **********\n"); CurCycle = 0; + MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0; NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegGens.resize(TRI->getNumRegs(), NULL); @@ -211,6 +249,8 @@ void ScheduleDAGRRList::Schedule() { AvailableQueue->initNodes(SUnits); + HazardRec->Reset(); + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. if (isBottomUp) ListScheduleBottomUp(); @@ -249,7 +289,20 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { PredSU->isAvailable = true; - AvailableQueue->push(PredSU); + + unsigned Height = PredSU->getHeight(); + if (Height < MinAvailableCycle) + MinAvailableCycle = Height; + + if (isReady(SU)) { + AvailableQueue->push(PredSU); + } + // CapturePred and others may have left the node in the pending queue, avoid + // adding it twice. + else if (!PredSU->isPending) { + PredSU->isPending = true; + PendingQueue.push_back(PredSU); + } } } @@ -292,6 +345,127 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } +/// Check to see if any of the pending instructions are ready to issue. If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { + assert(!EnableSchedCycles && "requires --enable-sched-cycles" ); + + // If the available queue is empty, it is safe to reset MinAvailableCycle. + if (AvailableQueue->empty()) + MinAvailableCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + unsigned ReadyCycle = + isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); + if (ReadyCycle < MinAvailableCycle) + MinAvailableCycle = ReadyCycle; + + if (PendingQueue[i]->isAvailable) { + if (!isReady(PendingQueue[i])) + continue; + AvailableQueue->push(PendingQueue[i]); + } + PendingQueue[i]->isPending = false; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { + if (NextCycle <= CurCycle) + return; + + AvailableQueue->setCurCycle(NextCycle); + if (HazardRec->getMaxLookAhead() == 0) { + // Bypass lots of virtual calls in case of long latency. + CurCycle = NextCycle; + } + else { + for (; CurCycle != NextCycle; ++CurCycle) { + if (isBottomUp) + HazardRec->RecedeCycle(); + else + HazardRec->AdvanceCycle(); + } + } + // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the + // available Q to release pending nodes at least once before popping. + ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { + if (!EnableSchedCycles) + return; + + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + + // Bump CurCycle to account for latency. We assume the latency of other + // available instructions may be hidden by the stall (not a full pipe stall). + // This updates the hazard recognizer's cycle before reserving resources for + // this instruction. + AdvanceToCycle(ReadyCycle); + + // Calls are scheduled in their preceding cycle, so don't conflict with + // hazards from instructions after the call. EmitNode will reset the + // scoreboard state before emitting the call. + if (isBottomUp && SU->isCall) + return; + + // FIXME: For resource conflicts in very long non-pipelined stages, we + // should probably skip ahead here to avoid useless scoreboard checks. + int Stalls = 0; + while (true) { + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + + if (HT == ScheduleHazardRecognizer::NoHazard) + break; + + ++Stalls; + } + AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { + switch (SU->getNode()->getOpcode()) { + default: + assert(SU->getNode()->isMachineOpcode() && + "This target-independent node should not be scheduled."); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: + case ISD::CopyToReg: + case ISD::CopyFromReg: + case ISD::EH_LABEL: + // Noops don't affect the scoreboard state. Copies are likely to be + // removed. + return; + case ISD::INLINEASM: + // For inline asm, clear the pipeline state. + HazardRec->Reset(); + return; + } + if (isBottomUp && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + + HazardRec->EmitInstruction(SU); + + if (!isBottomUp && SU->isCall) { + HazardRec->Reset(); + } +} + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. @@ -304,8 +478,15 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); #endif - // FIXME: Handle noop hazard. + // FIXME: Do not modify node height. It may interfere with + // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the + // node it's ready cycle can aid heuristics, and after scheduling it can + // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); + + // Reserve resources for the scheduled intruction. + EmitNode(SU); + Sequence.push_back(SU); AvailableQueue->ScheduledNode(SU); @@ -327,6 +508,15 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } SU->isScheduled = true; + + // Conditions under which the scheduler should eagerly advance the cycle: + // (1) No available instructions + // (2) All pipelines full, so available instructions must have hazards. + // + // If SchedCycles is disabled, count each inst as one cycle. + if (!EnableSchedCycles || + AvailableQueue->empty() || HazardRec->atIssueLimit()) + AdvanceToCycle(CurCycle + 1); } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -377,31 +567,69 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = I->getSUnit(); } } + if (SU->getHeight() < MinAvailableCycle) + MinAvailableCycle = SU->getHeight(); SU->setHeightDirty(); SU->isScheduled = false; SU->isAvailable = true; - AvailableQueue->push(SU); + if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) { + // Don't make available until backtracking is complete. + SU->isPending = true; + PendingQueue.push_back(SU); + } + else { + AvailableQueue->push(SU); + } AvailableQueue->UnscheduledNode(SU); } +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { + HazardRec->Reset(); + + unsigned LookAhead = std::min((unsigned)Sequence.size(), + HazardRec->getMaxLookAhead()); + if (LookAhead == 0) + return; + + std::vector::const_iterator I = (Sequence.end() - LookAhead); + unsigned HazardCycle = (*I)->getHeight(); + for (std::vector::const_iterator E = Sequence.end(); I != E; ++I) { + SUnit *SU = *I; + for (; SU->getHeight() > HazardCycle; ++HazardCycle) { + HazardRec->RecedeCycle(); + } + EmitNode(SU); + } +} + /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in /// BTCycle in order to schedule a specific node. -void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle) { - SUnit *OldSU = NULL; - while (CurCycle > BtCycle) { - OldSU = Sequence.back(); +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { + SUnit *OldSU = Sequence.back(); + while (true) { Sequence.pop_back(); if (SU->isSucc(OldSU)) // Don't try to remove SU from AvailableQueue. SU->isAvailable = false; + // FIXME: use ready cycle instead of height + CurCycle = OldSU->getHeight(); UnscheduleNodeBottomUp(OldSU); - --CurCycle; AvailableQueue->setCurCycle(CurCycle); + if (OldSU == BtSU) + break; + OldSU = Sequence.back(); } assert(!SU->isSucc(OldSU) && "Something is wrong!"); + RestoreHazardCheckerBottomUp(); + + if (EnableSchedCycles) + ReleasePending(); + ++NumBacktracks; } @@ -741,7 +969,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { /// Return a node that can be scheduled in this cycle. Requirements: /// (1) Ready: latency has been satisfied -/// (2) No Hazards: resources are available (TBD) +/// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SmallVector Interferences; @@ -777,24 +1005,26 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Try unscheduling up to the point where it's safe to schedule // this node. - unsigned LiveCycle = CurCycle; + SUnit *BtSU = NULL; + unsigned LiveCycle = UINT_MAX; for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { unsigned Reg = LRegs[j]; - unsigned LCycle = LiveRegGens[Reg]->getHeight(); - LiveCycle = std::min(LiveCycle, LCycle); + if (LiveRegGens[Reg]->getHeight() < LiveCycle) { + BtSU = LiveRegGens[Reg]; + LiveCycle = BtSU->getHeight(); + } } - SUnit *OldSU = Sequence[LiveCycle]; - if (!WillCreateCycle(TrySU, OldSU)) { - BacktrackBottomUp(TrySU, LiveCycle); + if (!WillCreateCycle(TrySU, BtSU)) { + BacktrackBottomUp(TrySU, BtSU); // Force the current node to be scheduled before the node that // requires the physical reg dep. - if (OldSU->isAvailable) { - OldSU->isAvailable = false; - if (!OldSU->isPending) - AvailableQueue->remove(OldSU); + if (BtSU->isAvailable) { + BtSU->isAvailable = false; + if (!BtSU->isPending) + AvailableQueue->remove(BtSU); } - AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, + AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); @@ -891,15 +1121,22 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { + DEBUG(dbgs() << "\n*** Examining Available\n"; + AvailableQueue->dump(this)); + // Pick the best node to schedule taking all constraints into // consideration. SUnit *SU = PickNodeToScheduleBottomUp(); - if (SU) - ScheduleNodeBottomUp(SU); + AdvancePastStalls(SU); - ++CurCycle; - AvailableQueue->setCurCycle(CurCycle); + ScheduleNodeBottomUp(SU); + + while (AvailableQueue->empty() && !PendingQueue.empty()) { + // Advance the cycle to free resources. Skip ahead to the next ready SU. + assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); + AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); + } } // Reverse the order if it is bottom up. @@ -967,7 +1204,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. void ScheduleDAGRRList::ListScheduleTopDown() { - unsigned CurCycle = 0; AvailableQueue->setCurCycle(CurCycle); // Release any successors of the special Entry node. @@ -1011,9 +1247,18 @@ namespace { template class RegReductionPriorityQueue; + struct queue_sort : public std::binary_function { + bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } + }; + /// bu_ls_rr_sort - Priority function for bottom up register pressure // reduction scheduler. - struct bu_ls_rr_sort : public std::binary_function { + struct bu_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + RegReductionPriorityQueue *SPQ; bu_ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} @@ -1023,8 +1268,13 @@ namespace { // td_ls_rr_sort - Priority function for top down register pressure reduction // scheduler. - struct td_ls_rr_sort : public std::binary_function { - RegReductionPriorityQueue *SPQ; + struct td_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = false, + HasReadyFilter = false + }; + + RegReductionPriorityQueue *SPQ; td_ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} @@ -1032,7 +1282,12 @@ namespace { }; // src_ls_rr_sort - Priority function for source order scheduler. - struct src_ls_rr_sort : public std::binary_function { + struct src_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + RegReductionPriorityQueue *SPQ; src_ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} @@ -1043,7 +1298,12 @@ namespace { }; // hybrid_ls_rr_sort - Priority function for hybrid scheduler. - struct hybrid_ls_rr_sort : public std::binary_function { + struct hybrid_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + RegReductionPriorityQueue *SPQ; hybrid_ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} @@ -1055,13 +1315,20 @@ namespace { // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) // scheduler. - struct ilp_ls_rr_sort : public std::binary_function { + struct ilp_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = true + }; + RegReductionPriorityQueue *SPQ; ilp_ls_rr_sort(RegReductionPriorityQueue *spq) : SPQ(spq) {} ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + bool isReady(SUnit *SU, unsigned CurCycle) const; + bool operator()(const SUnit* left, const SUnit* right) const; }; } // end anonymous namespace @@ -1098,6 +1365,19 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector &SUNumbers) { namespace { template class RegReductionPriorityQueue : public SchedulingPriorityQueue { + static SUnit *popFromQueue(std::vector &Q, SF &Picker) { + std::vector::iterator Best = Q.begin(); + for (std::vector::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; + } + std::vector Queue; SF Picker; unsigned CurQueueId; @@ -1130,7 +1410,8 @@ namespace { const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), + : SchedulingPriorityQueue(SF::HasReadyFilter), Picker(this), + CurQueueId(0), TracksRegPressure(tracksrp), MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); @@ -1144,6 +1425,8 @@ namespace { } } + bool isBottomUp() const { return SF::IsBottomUp; } + void initNodes(std::vector &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. @@ -1205,6 +1488,10 @@ namespace { bool empty() const { return Queue.empty(); } + bool isReady(SUnit *U) const { + return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); + } + void push(SUnit *U) { assert(!U->NodeQueueId && "Node in the queue already"); U->NodeQueueId = ++CurQueueId; @@ -1212,16 +1499,9 @@ namespace { } SUnit *pop() { - if (empty()) return NULL; - std::vector::iterator Best = Queue.begin(); - for (std::vector::iterator I = llvm::next(Queue.begin()), - E = Queue.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != prior(Queue.end())) - std::swap(*Best, Queue.back()); - Queue.pop_back(); + if (Queue.empty()) return NULL; + + SUnit *V = popFromQueue(Queue, Picker); V->NodeQueueId = 0; return V; } @@ -1475,6 +1755,20 @@ namespace { } } + void dump(ScheduleDAG *DAG) const { + // Emulate pop() without clobbering NodeQueueIds. + std::vector DumpQueue = Queue; + SF DumpPicker = Picker; + while (!DumpQueue.empty()) { + SUnit *SU = popFromQueue(DumpQueue, DumpPicker); + if (isBottomUp()) + dbgs() << "Height " << SU->getHeight() << ": "; + else + dbgs() << "Depth " << SU->getDepth() << ": "; + SU->dump(DAG); + } + } + protected: bool canClobber(const SUnit *SU, const SUnit *Op); void AddPseudoTwoAddrDeps(); @@ -1605,6 +1899,7 @@ static bool BURRSort(const SUnit *left, const SUnit *right, if (LScratch != RScratch) return LScratch > RScratch; + // Note: with a bottom-up ready filter, the height check may be redundant. if (left->getHeight() != right->getHeight()) return left->getHeight() > right->getHeight(); @@ -1696,6 +1991,12 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ return BURRSort(left, right, SPQ); } +// Schedule as many instructions in each cycle as possible. So don't make an +// instruction available unless it is ready in the current cycle. +bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + return SU->getHeight() <= CurCycle; +} + bool ilp_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { if (left->isCall || right->isCall) @@ -2051,46 +2352,50 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { //===----------------------------------------------------------------------===// llvm::ScheduleDAGSDNodes * -llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); @@ -2098,13 +2403,15 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); @@ -2112,7 +2419,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b7e90be8b3f..23ed19afbbd 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1201,10 +1201,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { return Ctor(this, OptLevel); } -ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { - return new ScheduleHazardRecognizer(); -} - //===----------------------------------------------------------------------===// // Helper functions used by the generated instruction selector. //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index baad14c5c76..28b973b16eb 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -414,8 +414,18 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, return false; } +// Default implementation of CreateTargetPreRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfoImpl:: +CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + // Dummy hazard recognizer allows all instructions to issue. + return new ScheduleHazardRecognizer(); +} + // Default implementation of CreateTargetPostRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfoImpl:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const { - return (ScheduleHazardRecognizer *)new ScoreboardHazardRecognizer(II); +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8edc0e7f121..fd13485d1f1 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -41,6 +41,13 @@ static cl::opt EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); +// Other targets already have a hazard recognizer enabled by default, so this +// flag currently only affects ARM. It will be generalized when it becomes a +// disabled flag. +static cl::opt EnableHazardRecognizer( + "enable-sched-hazard", cl::Hidden, + cl::desc("Enable hazard detection during preRA scheduling"), + cl::init(false)); /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { @@ -85,12 +92,25 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) } } +// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl +// currently defaults to no prepass hazard recognizer. ScheduleHazardRecognizer *ARMBaseInstrInfo:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const { +CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + if (EnableHazardRecognizer) { + const InstrItineraryData *II = TM->getInstrItineraryData(); + return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); + } + return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); +} + +ScheduleHazardRecognizer *ARMBaseInstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { if (Subtarget.isThumb2() || Subtarget.hasVFP2()) return (ScheduleHazardRecognizer *) - new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget); - return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II); + new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); + return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); } MachineInstr * diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index ca8b9a02405..0ea8a9600bf 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -211,7 +211,12 @@ public: const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * - CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const; + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const; // Branch analysis. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index b8d385b6e3d..683a7cc1695 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -34,7 +34,9 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, } ScheduleHazardRecognizer::HazardType -ARMHazardRecognizer::getHazardType(SUnit *SU) { +ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead"); + MachineInstr *MI = SU->getInstr(); if (!MI->isDebugValue()) { @@ -61,19 +63,19 @@ ARMHazardRecognizer::getHazardType(SUnit *SU) { (TII.canCauseFpMLxStall(MI->getOpcode()) || hasRAWHazard(DefMI, MI, TRI))) { // Try to schedule another instruction for the next 4 cycles. - if (Stalls == 0) - Stalls = 4; + if (FpMLxStalls == 0) + FpMLxStalls = 4; return Hazard; } } } - return ScoreboardHazardRecognizer::getHazardType(SU); + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); } void ARMHazardRecognizer::Reset() { LastMI = 0; - Stalls = 0; + FpMLxStalls = 0; ITBlockSize = 0; ScoreboardHazardRecognizer::Reset(); } @@ -100,14 +102,14 @@ void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { if (!MI->isDebugValue()) { LastMI = MI; - Stalls = 0; + FpMLxStalls = 0; } ScoreboardHazardRecognizer::EmitInstruction(SU); } void ARMHazardRecognizer::AdvanceCycle() { - if (Stalls && --Stalls == 0) + if (FpMLxStalls && --FpMLxStalls == 0) // Stalled for 4 cycles but still can't schedule any other instructions. LastMI = 0; ScoreboardHazardRecognizer::AdvanceCycle(); diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index 9473bc52072..2bc218d8566 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -29,7 +29,7 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer { const ARMSubtarget &STI; MachineInstr *LastMI; - unsigned Stalls; + unsigned FpMLxStalls; unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. MachineInstr *ITBlockMIs[4]; @@ -37,11 +37,12 @@ public: ARMHazardRecognizer(const InstrItineraryData *ItinData, const ARMBaseInstrInfo &tii, const ARMBaseRegisterInfo &tri, - const ARMSubtarget &sti) : - ScoreboardHazardRecognizer(ItinData), TII(tii), TRI(tri), STI(sti), - LastMI(0), ITBlockSize(0) {} + const ARMSubtarget &sti, + const ScheduleDAG *DAG) : + ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), + TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {} - virtual HazardType getHazardType(SUnit *SU); + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void Reset(); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b0057bd2093..6290e676758 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -140,6 +140,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, FSWithArch = FS; CPUString = ParseSubtargetFeatures(FSWithArch, CPUString); + // After parsing Itineraries, set ItinData.IssueWidth. + computeIssueWidth(); + // Thumb2 implies at least V6T2. if (ARMArchVersion >= V6T2) ThumbMode = Thumb2; @@ -224,6 +227,21 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { return 10; } +void ARMSubtarget::computeIssueWidth() { + unsigned allStage1Units = 0; + for (const InstrItinerary *itin = InstrItins.Itineraries; + itin->FirstStage != ~0U; ++itin) { + const InstrStage *IS = InstrItins.Stages + itin->FirstStage; + allStage1Units |= IS->getUnits(); + } + InstrItins.IssueWidth = 0; + while (allStage1Units) { + ++InstrItins.IssueWidth; + // clear the lowest bit + allStage1Units ^= allStage1Units & ~(allStage1Units - 1); + } +} + bool ARMSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtarget::AntiDepBreakMode& Mode, diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index fdd394dce3d..8d911d010a4 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -156,6 +156,8 @@ protected: std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); + void computeIssueWidth(); + bool hasV4TOps() const { return ARMArchVersion >= V4T; } bool hasV5TOps() const { return ARMArchVersion >= V5T; } bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp index 9dbab1da990..403d7ef1fd9 100644 --- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp +++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp @@ -41,12 +41,14 @@ SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) : /// /// \return NoHazard ScheduleHazardRecognizer::HazardType -SPUHazardRecognizer::getHazardType(SUnit *SU) +SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Initial thoughts on how to do this, but this code cannot work unless the // function's prolog and epilog code are also being scheduled so that we can // accurately determine which pipeline is being scheduled. #if 0 + assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead"); + const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); ScheduleHazardRecognizer::HazardType retval = NoHazard; bool mustBeOdd = false; diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h index 3469292d57f..675632cc7f1 100644 --- a/lib/Target/CellSPU/SPUHazardRecognizers.h +++ b/lib/Target/CellSPU/SPUHazardRecognizers.h @@ -30,7 +30,7 @@ private: public: SPUHazardRecognizer(const TargetInstrInfo &TII); - virtual HazardType getHazardType(SUnit *SU); + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); virtual void EmitNoop(); diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index a3ad3436faa..24c23dd6978 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -301,14 +301,6 @@ namespace { return "Cell SPU DAG->DAG Pattern Instruction Selection"; } - /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for - /// this target when scheduling the DAG. - virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { - const TargetInstrInfo *II = TM.getInstrInfo(); - assert(II && "No InstrInfo?"); - return new SPUHazardRecognizer(*II); - } - private: SDValue getRC( MVT ); diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 26d6b4f25ef..e5048e6d4ea 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -16,6 +16,7 @@ #include "SPUInstrBuilder.h" #include "SPUTargetMachine.h" #include "SPUGenInstrInfo.inc" +#include "SPUHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -54,6 +55,16 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) RI(*TM.getSubtargetImpl(), *this) { /* NOP */ } +/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for +/// this target when scheduling the DAG. +ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer( + const TargetMachine *TM, + const ScheduleDAG *DAG) const { + const TargetInstrInfo *TII = TM->getInstrInfo(); + assert(TII && "No InstrInfo?"); + return new SPUHazardRecognizer(*TII); +} + unsigned SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index 191e55d0ca6..e5e91481419 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -32,6 +32,10 @@ namespace llvm { /// virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 301e89cdcb3..0de5844d1c2 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -122,7 +122,9 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { /// instructions that wouldn't terminate the dispatch group that would cause a /// pipeline flush. ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: -getHazardType(SUnit *SU) { +getHazardType(SUnit *SU, int Stalls) { + assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); + const SDNode *Node = SU->getNode()->getGluedMachineNode(); bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index ca95f7be9c9..2f81f0f7c7f 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -48,7 +48,7 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { public: PPCHazardRecognizer970(const TargetInstrInfo &TII); - virtual HazardType getHazardType(SUnit *SU); + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 0d624d08cd2..664bfe7a9c0 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,7 +16,6 @@ #include "PPC.h" #include "PPCPredicates.h" #include "PPCTargetMachine.h" -#include "PPCHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -155,16 +154,6 @@ namespace { return "PowerPC DAG->DAG Pattern Instruction Selection"; } - /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for - /// this target when scheduling the DAG. - virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { - // Should use subtarget info to pick the right hazard recognizer. For - // now, always return a PPC970 recognizer. - const TargetInstrInfo *II = TM.getInstrInfo(); - assert(II && "No InstrInfo?"); - return new PPCHazardRecognizer970(*II); - } - // Include the pieces autogenerated from the target description. #include "PPCGenDAGISel.inc" diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 8093789127e..53b049135e2 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -17,6 +17,7 @@ #include "PPCPredicates.h" #include "PPCGenInstrInfo.inc" #include "PPCTargetMachine.h" +#include "PPCHazardRecognizers.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -39,6 +40,18 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} +/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for +/// this target when scheduling the DAG. +ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( + const TargetMachine *TM, + const ScheduleDAG *DAG) const { + // Should use subtarget info to pick the right hazard recognizer. For + // now, always return a PPC970 recognizer. + const TargetInstrInfo *TII = TM->getInstrInfo(); + assert(TII && "No InstrInfo?"); + return new PPCHazardRecognizer970(*TII); +} + unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 40835779976..b5249ae0376 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -82,6 +82,10 @@ public: /// virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; } + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI,