From: Evan Cheng Date: Fri, 5 Oct 2007 01:39:18 +0000 (+0000) Subject: If a node that defines a physical register that is expensive to copy. The X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=f10c973797cf79da802f9b0118543cbd50954c9c;p=oota-llvm.git If a node that defines a physical register that is expensive to copy. The scheduler will try a number of tricks in order to avoid generating the copies. This may not be possible in case the node produces a chain value that prevent movement. Try unfolding the load from the node before to allow it to be moved / cloned. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@42625 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 912437d4406..5dbdd605bc7 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -115,7 +115,7 @@ namespace llvm { short NumSuccsLeft; // # of succs not scheduled. bool isTwoAddress : 1; // Is a two-address instruction. bool isCommutable : 1; // Is a commutable instruction. - bool hasPhysRegDefs : 1; // Has physreg defs that are being used. + bool hasPhysRegDefs : 1; // Has physreg defs that are being used. bool isPending : 1; // True once pending. bool isAvailable : 1; // True once available. bool isScheduled : 1; // True once scheduled. @@ -297,6 +297,10 @@ namespace llvm { /// together nodes with a single SUnit. void BuildSchedUnits(); + /// ComputeLatency - Compute node latency. + /// + void ComputeLatency(SUnit *SU); + /// CalculateDepths, CalculateHeights - Calculate node depth / height. /// void CalculateDepths(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp index bd5c5983327..b616b7e4825 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAG.cpp @@ -68,6 +68,7 @@ SUnit *ScheduleDAG::Clone(SUnit *Old) { return SU; } + /// BuildSchedUnits - Build SUnits from the selection dag that we are input. /// This SUnit graph is similar to the SelectionDAG, but represents flagged /// together nodes with a single SUnit. @@ -77,8 +78,6 @@ void ScheduleDAG::BuildSchedUnits() { // invalidated. SUnits.reserve(std::distance(DAG.allnodes_begin(), DAG.allnodes_end())); - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(), E = DAG.allnodes_end(); NI != E; ++NI) { if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. @@ -131,32 +130,8 @@ void ScheduleDAG::BuildSchedUnits() { // Update the SUnit NodeSUnit->Node = N; SUnitMap[N].push_back(NodeSUnit); - - // Compute the latency for the node. We use the sum of the latencies for - // all nodes flagged together into this SUnit. - if (InstrItins.isEmpty()) { - // No latency information. - NodeSUnit->Latency = 1; - } else { - NodeSUnit->Latency = 0; - if (N->isTargetOpcode()) { - unsigned SchedClass = TII->getSchedClass(N->getTargetOpcode()); - InstrStage *S = InstrItins.begin(SchedClass); - InstrStage *E = InstrItins.end(SchedClass); - for (; S != E; ++S) - NodeSUnit->Latency += S->Cycles; - } - for (unsigned i = 0, e = NodeSUnit->FlaggedNodes.size(); i != e; ++i) { - SDNode *FNode = NodeSUnit->FlaggedNodes[i]; - if (FNode->isTargetOpcode()) { - unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode()); - InstrStage *S = InstrItins.begin(SchedClass); - InstrStage *E = InstrItins.end(SchedClass); - for (; S != E; ++S) - NodeSUnit->Latency += S->Cycles; - } - } - } + + ComputeLatency(NodeSUnit); } // Pass 2: add the preds, succs, etc. @@ -214,6 +189,36 @@ void ScheduleDAG::BuildSchedUnits() { return; } +void ScheduleDAG::ComputeLatency(SUnit *SU) { + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes flagged together into this SUnit. + if (InstrItins.isEmpty()) { + // No latency information. + SU->Latency = 1; + } else { + SU->Latency = 0; + if (SU->Node->isTargetOpcode()) { + unsigned SchedClass = TII->getSchedClass(SU->Node->getTargetOpcode()); + InstrStage *S = InstrItins.begin(SchedClass); + InstrStage *E = InstrItins.end(SchedClass); + for (; S != E; ++S) + SU->Latency += S->Cycles; + } + for (unsigned i = 0, e = SU->FlaggedNodes.size(); i != e; ++i) { + SDNode *FNode = SU->FlaggedNodes[i]; + if (FNode->isTargetOpcode()) { + unsigned SchedClass = TII->getSchedClass(FNode->getTargetOpcode()); + InstrStage *S = InstrItins.begin(SchedClass); + InstrStage *E = InstrItins.end(SchedClass); + for (; S != E; ++S) + SU->Latency += S->Cycles; + } + } + } +} + void ScheduleDAG::CalculateDepths() { std::vector > WorkList; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index ff751a94441..0575b41d1f5 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -34,6 +34,7 @@ using namespace llvm; STATISTIC(NumBacktracks, "Number of times scheduler backtraced"); +STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); STATISTIC(NumCCCopies, "Number of cross class copies"); @@ -385,32 +386,145 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle, ++NumBacktracks; } -/// isSafeToCopy - True if the SUnit for the given SDNode can safely cloned, -/// i.e. the node does not produce a flag, it does not read a flag and it does -/// not have an incoming chain. -static bool isSafeToCopy(SDNode *N) { +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->FlaggedNodes.size()) + return NULL; + + SDNode *N = SU->Node; if (!N) - return true; + return NULL; + SUnit *NewSU; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) if (N->getValueType(i) == MVT::Flag) - return false; + return NULL; + bool TryUnfold = false; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDOperand &Op = N->getOperand(i); MVT::ValueType VT = Op.Val->getValueType(Op.ResNo); - if (VT == MVT::Other || VT == MVT::Flag) - return false; + if (VT == MVT::Flag) + return NULL; + else if (VT == MVT::Other) + TryUnfold = true; } - return true; -} + if (TryUnfold) { + SmallVector NewNodes; + if (!MRI->unfoldMemoryOperand(DAG, N, NewNodes)) + return NULL; + + DOUT << "Unfolding SU # " << SU->NodeNum << "\n"; + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + std::vector Deleted; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->Node->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, i), + SDOperand(N, i), Deleted); + DAG.ReplaceAllUsesOfValueWith(SDOperand(SU->Node, OldNumVals-1), + SDOperand(LoadNode, 1), Deleted); + + SUnit *LoadSU = NewSUnit(LoadNode); + SUnit *NewSU = NewSUnit(N); + SUnitMap[LoadNode].push_back(LoadSU); + SUnitMap[N].push_back(NewSU); + const TargetInstrDescriptor *TID = &TII->get(LoadNode->getTargetOpcode()); + for (unsigned i = 0; i != TID->numOperands; ++i) { + if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) { + LoadSU->isTwoAddress = true; + break; + } + } + if (TID->Flags & M_COMMUTABLE) + LoadSU->isCommutable = true; -/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled -/// successors to the newly created node. -SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { - DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + TID = &TII->get(N->getTargetOpcode()); + for (unsigned i = 0; i != TID->numOperands; ++i) { + if (TID->getOperandConstraint(i, TOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (TID->Flags & M_COMMUTABLE) + NewSU->isCommutable = true; + + // FIXME: Calculate height / depth and propagate the changes? + LoadSU->Depth = NewSU->Depth = SU->Depth; + LoadSU->Height = NewSU->Height = SU->Height; + ComputeLatency(LoadSU); + ComputeLatency(NewSU); + + SUnit *ChainPred = NULL; + SmallVector ChainSuccs; + SmallVector LoadPreds; + SmallVector NodePreds; + SmallVector NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl) + ChainPred = I->Dep; + else if (I->Dep->Node && I->Dep->Node->isOperand(LoadNode)) + LoadPreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); + else + NodePreds.push_back(SDep(I->Dep, I->Reg, I->Cost, false, false)); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl) + ChainSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, + I->isCtrl, I->isSpecial)); + else + NodeSuccs.push_back(SDep(I->Dep, I->Reg, I->Cost, + I->isCtrl, I->isSpecial)); + } - SUnit *NewSU = Clone(SU); + SU->removePred(ChainPred, true, false); + LoadSU->addPred(ChainPred, true, false); + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + SDep *Pred = &LoadPreds[i]; + SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); + LoadSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, + Pred->Reg, Pred->Cost); + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + SDep *Pred = &NodePreds[i]; + SU->removePred(Pred->Dep, Pred->isCtrl, Pred->isSpecial); + NewSU->addPred(Pred->Dep, Pred->isCtrl, Pred->isSpecial, + Pred->Reg, Pred->Cost); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep *Succ = &NodeSuccs[i]; + Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); + Succ->Dep->addPred(NewSU, Succ->isCtrl, Succ->isSpecial, + Succ->Reg, Succ->Cost); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep *Succ = &ChainSuccs[i]; + Succ->Dep->removePred(SU, Succ->isCtrl, Succ->isSpecial); + Succ->Dep->addPred(LoadSU, Succ->isCtrl, Succ->isSpecial, + Succ->Reg, Succ->Cost); + } + NewSU->addPred(LoadSU, false, false); + + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } else + SU = NewSU; + } + + DOUT << "Duplicating SU # " << SU->NodeNum << "\n"; + NewSU = Clone(SU); // New SUnit has the exact same predecessors. for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); @@ -452,6 +566,7 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVector &Copies) { + abort(); SUnit *CopyFromSU = NewSUnit(NULL); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -640,10 +755,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - SUnit *NewDef; - if (isSafeToCopy(LRDef->Node)) - NewDef = CopyAndMoveSuccessors(LRDef); - else { + SUnit *NewDef = CopyAndMoveSuccessors(LRDef); + if (!NewDef) { // Issue expensive cross register class copies. MVT::ValueType VT = getPhysicalRegisterVT(LRDef->Node, Reg, TII); const TargetRegisterClass *RC =