From b4566a999970b514d7c6973d99e293a6625d3f70 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 22 Feb 2012 06:08:11 +0000 Subject: [PATCH] Initialize SUnits before DAG building. Affect on SD scheduling and postRA scheduling: Printing the DAG will display the nodes in top-down topological order. This matches the order within the MBB and makes my life much easier in general. Affect on misched: We don't need to track virtual register uses at all. This is awesome. I also intend to rely on the SUnit ID as a topo-sort index. So if A < B then we cannot have an edge B -> A. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151135 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/ScheduleDAG.h | 1 + lib/CodeGen/CriticalAntiDepBreaker.cpp | 2 + lib/CodeGen/LatencyPriorityQueue.cpp | 2 +- lib/CodeGen/MachineScheduler.cpp | 13 +-- lib/CodeGen/ScheduleDAGInstrs.cpp | 144 ++++++++++++++----------- lib/CodeGen/ScheduleDAGInstrs.h | 27 +++-- 6 files changed, 114 insertions(+), 75 deletions(-) diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 8e75948d7f6..72eadd9698c 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -231,6 +231,7 @@ namespace llvm { public: SUnit *OrigNode; // If not this, the node from which // this node was cloned. + // (SD scheduling only) // Preds/Succs - The SUnits before/after us in the graph. SmallVector Preds; // All sunit predecessors. diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 5c325396dbe..0751229f4e0 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -427,6 +427,8 @@ BreakAntiDependencies(const std::vector& SUnits, // Keep a map of the MachineInstr*'s back to the SUnit representing them. // This is used for updating debug information. + // + // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap DenseMap MISUnitMap; // Find the node at the bottom of the critical path. diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 0eb009ddac2..0578229432d 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -46,7 +46,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { // Finally, just to provide a stable ordering, use the node number as a // deciding factor. - return LHSNum < RHSNum; + return RHSNum < LHSNum; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 6034c9082c4..8a485e0fdef 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -149,7 +149,8 @@ protected: MachineScheduler *Pass; public: ScheduleTopDownLive(MachineScheduler *P): - ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false), Pass(P) {} + ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS), + Pass(P) {} /// ScheduleDAGInstrs callback. void Schedule(); @@ -310,7 +311,8 @@ class DefaultMachineScheduler : public ScheduleDAGInstrs { MachineScheduler *Pass; public: DefaultMachineScheduler(MachineScheduler *P): - ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false), Pass(P) {} + ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS), + Pass(P) {} /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's /// time to do some work. @@ -348,15 +350,14 @@ void DefaultMachineScheduler::Schedule() { #ifndef NDEBUG namespace { -// Nodes with a higher number have lower priority. This way we attempt to +// Nodes with a higher number have higher priority. This way we attempt to // schedule the latest instructions earliest. // // TODO: Relies on the property of the BuildSchedGraph that results in SUnits -// being ordered in sequence bottom-up. This will be formalized, probably be -// constructing SUnits in a prepass. +// being ordered in sequence top-down. struct ShuffleSUnitOrder { bool operator()(SUnit *A, SUnit *B) const { - return A->NodeNum > B->NodeNum; + return A->NodeNum < B->NodeNum; } }; diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 879b65f9d08..a0992c1c464 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -17,6 +17,7 @@ #include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -34,11 +35,14 @@ using namespace llvm; ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, - bool IsPostRAFlag) + bool IsPostRAFlag, + LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag), - UnitLatencies(false), Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), + LIS(lis), UnitLatencies(false), + Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT), FirstDbgValue(0) { + assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MF.getRegInfo().getNumVirtRegs()) && "Virtual registers must be removed prior to PostRA scheduling"); @@ -357,8 +361,6 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); - const TargetSubtargetInfo &ST = TM.getSubtarget(); - // Add output dependence to the next nearest def of this vreg. // // Unless this definition is dead, the output dependence should be @@ -366,60 +368,94 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { // uses. We're conservative for now until we have a way to guarantee the uses // are not eliminated sometime during scheduling. The output dependence edge // is also useful if output latency exceeds def-use latency. - SUnit *DefSU = VRegDefs[Reg]; + SUnit *&DefSU = VRegDefs[Reg]; if (DefSU && DefSU != SU && DefSU != &ExitSU) { unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx, DefSU->getInstr()); DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg)); } - VRegDefs[Reg] = SU; + DefSU = SU; +} - // Add data dependence to any uses of this vreg before the next nearest def. - // - // TODO: Handle ExitSU properly. - // - // TODO: Data dependence could be handled more efficiently at the use-side. - std::vector &UseList = VRegUses[Reg]; - for (std::vector::const_iterator UI = UseList.begin(), - UE = UseList.end(); UI != UE; ++UI) { - SUnit *UseSU = *UI; - if (UseSU == SU) continue; - - // TODO: Handle "special" address latencies cleanly. - const SDep& dep = SDep(SU, SDep::Data, SU->Latency, Reg); - if (!UnitLatencies) { - // Adjust the dependence latency using operand def/use information, then - // allow the target to perform its own adjustments. - ComputeOperandLatency(SU, UseSU, const_cast(dep)); - ST.adjustSchedDependency(SU, UseSU, const_cast(dep)); +/// addVRegUseDeps - Add a register data dependency if the instruction that +/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a +/// register antidependency from this SUnit to instructions that occur later in +/// the same scheduling region if they write the virtual register. +/// +/// TODO: Handle ExitSU "uses" properly. +void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { + MachineInstr *MI = SU->getInstr(); + unsigned Reg = MI->getOperand(OperIdx).getReg(); + + // Lookup this operand's reaching definition. + assert(LIS && "vreg dependencies requires LiveIntervals"); + SlotIndex UseIdx = LIS->getSlotIndexes()->getInstructionIndex(MI); + LiveInterval *LI = &LIS->getInterval(Reg); + VNInfo *VNI = LI->getVNInfoAt(UseIdx); + MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); + if (Def) { + SUnit *DefSU = getSUnit(Def); + if (DefSU) { + // The reaching Def lives within this scheduling region. + // Create a data dependence. + // + // TODO: Handle "special" address latencies cleanly. + const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg); + if (!UnitLatencies) { + // Adjust the dependence latency using operand def/use information, then + // allow the target to perform its own adjustments. + ComputeOperandLatency(DefSU, SU, const_cast(dep)); + const TargetSubtargetInfo &ST = TM.getSubtarget(); + ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); + } + SU->addPred(dep); } - UseSU->addPred(dep); } - UseList.clear(); + + // Add antidependence to the following def of the vreg it uses. + DenseMap::const_iterator I = VRegDefs.find(Reg); + if (I != VRegDefs.end()) { + SUnit *DefSU = I->second; + if (DefSU != SU) + DefSU->addPred(SDep(SU, SDep::Anti, 0, Reg)); + } } -/// addVRegUseDeps - Add register antidependencies from this SUnit to -/// instructions that occur later in the same scheduling region if they -/// write the virtual register referenced at OperIdx. -void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { - unsigned Reg = SU->getInstr()->getOperand(OperIdx).getReg(); +/// Create an SUnit for each real instruction, numbered in top-down toplological +/// order. The instruction order A < B, implies that no edge exists from B to A. +/// +/// Map each real instruction to its SUnit. +/// +/// After initSUnits, the SUnits vector is cannot be resized and the scheduler +/// may hang onto SUnit pointers. We may relax this in the future by using SUnit +/// IDs instead of pointers. +void ScheduleDAGInstrs::initSUnits() { + // We'll be allocating one SUnit for each real instruction in the region, + // which is contained within a basic block. + SUnits.reserve(BB->size()); - // Add antidependence to the following def of the vreg it uses. - SUnit *DefSU = VRegDefs[Reg]; - if (DefSU && DefSU != SU) - DefSU->addPred(SDep(SU, SDep::Anti, 0, Reg)); + for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; - // Add this SUnit to the use list of the vreg it uses. - // - // TODO: pinch the DAG before we see too many uses to avoid quadratic - // behavior. Limiting the scheduling window can accomplish the same thing. - VRegUses[Reg].push_back(SU); + SUnit *SU = NewSUnit(MI); + MISUnitMap[MI] = SU; + + SU->isCall = MI->isCall(); + SU->isCommutable = MI->isCommutable(); + + // Assign the Latency field of SU using target-provided information. + if (UnitLatencies) + SU->Latency = 1; + else + ComputeLatency(SU); + } } void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { - // We'll be allocating one SUnit for each instruction, plus one for - // the region exit node. - SUnits.reserve(BB->size()); + // Create an SUnit for each real instruction. + initSUnits(); // We build scheduling units by walking a block's instruction list from bottom // to top. @@ -447,14 +483,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs"); } - // Reinitialize the large VReg vectors, while reusing the memory. - // - // Note: this can be an expensive part of DAG building. We may want to be more - // clever. Reevaluate after VRegUses goes away. - assert(VRegDefs.size() == 0 && VRegUses.size() == 0 && - "Only BuildSchedGraph should access VRegDefs/Uses"); - VRegDefs.resize(MF.getRegInfo().getNumVirtRegs()); - VRegUses.resize(MF.getRegInfo().getNumVirtRegs()); + assert(VRegDefs.size() == 0 && "Only BuildSchedGraph may access VRegDefs"); // Walk the list of instructions, from bottom moving up. MachineInstr *PrevMI = NULL; @@ -473,16 +502,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { assert(!MI->isTerminator() && !MI->isLabel() && "Cannot schedule terminators or labels!"); - // Create the SUnit for this MI. - SUnit *SU = NewSUnit(MI); - SU->isCall = MI->isCall(); - SU->isCommutable = MI->isCommutable(); - // Assign the Latency field of SU using target-provided information. - if (UnitLatencies) - SU->Latency = 1; - else - ComputeLatency(SU); + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); // Add register-based dependencies (data, anti, and output). for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { @@ -657,8 +679,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { Uses[i].clear(); } VRegDefs.clear(); - VRegUses.clear(); PendingLoads.clear(); + MISUnitMap.clear(); } void ScheduleDAGInstrs::FinishBlock() { diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index 17183644cc3..9a209fff4c4 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -27,6 +27,7 @@ namespace llvm { class MachineLoopInfo; class MachineDominatorTree; + class LiveIntervals; /// LoopDependencies - This class analyzes loop-oriented register /// dependencies, which are used to guide scheduling decisions. @@ -108,6 +109,11 @@ namespace llvm { /// isPostRA flag indicates vregs cannot be present. bool IsPostRA; + /// Live Intervals provides reaching defs in preRA scheduling. + LiveIntervals *LIS; + + DenseMap MISUnitMap; + /// UnitLatencies (misnamed) flag avoids computing def-use latencies, using /// the def-side latency only. bool UnitLatencies; @@ -119,12 +125,9 @@ namespace llvm { std::vector > Defs; std::vector > Uses; - // Virtual register Defs and Uses. - // - // TODO: Eliminate VRegUses by creating SUnits in a prepass and looking up - // the live range's reaching def. - IndexedMap VRegDefs; - IndexedMap, VirtReg2IndexFunctor> VRegUses; + // Track the last instructon in this region defining each virtual register. + // FIXME: turn this into a sparse set with constant time clear(). + DenseMap VRegDefs; /// PendingLoads - Remember where unknown loads are after the most recent /// unknown store, as we iterate. As with Defs and Uses, this is here @@ -152,7 +155,8 @@ namespace llvm { explicit ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt, - bool IsPostRAFlag); + bool IsPostRAFlag, + LiveIntervals *LIS = 0); virtual ~ScheduleDAGInstrs() {} @@ -169,6 +173,7 @@ namespace llvm { return &SUnits.back(); } + /// Run - perform scheduling. /// void Run(MachineBasicBlock *bb, @@ -219,6 +224,14 @@ namespace llvm { virtual std::string getGraphNodeLabel(const SUnit *SU) const; protected: + SUnit *getSUnit(MachineInstr *MI) const { + DenseMap::const_iterator I = MISUnitMap.find(MI); + if (I == MISUnitMap.end()) + return 0; + return I->second; + } + + void initSUnits(); void addPhysRegDeps(SUnit *SU, unsigned OperIdx); void addVRegDefDeps(SUnit *SU, unsigned OperIdx); void addVRegUseDeps(SUnit *SU, unsigned OperIdx); -- 2.34.1