X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FCodeGen%2FStrongPHIElimination.cpp;h=b337c5393343b0cd11bcd0b41f2841c6eb2ceb1f;hp=b04a87929094b7b23c643fa2099b53b9ccb25022;hb=d04a8d4b33ff316ca4cf961e06c9e312eff8e64f;hpb=d723f722b2457dd847ece84f9cfa7cfae33f9bb0 diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index b04a8792909..b337c539334 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -1,4 +1,4 @@ -//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===// +//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// // // The LLVM Compiler Infrastructure // @@ -7,922 +7,819 @@ // //===----------------------------------------------------------------------===// // -// This pass eliminates machine instruction PHI nodes by inserting copy -// instructions, using an intelligent copy-folding technique based on -// dominator information. This is technique is derived from: +// This pass eliminates PHI instructions by aggressively coalescing the copies +// that would be inserted by a naive algorithm and only inserting the copies +// that are necessary. The coalescing technique initially assumes that all +// registers appearing in a PHI instruction do not interfere. It then eliminates +// proven interferences, using dominators to only perform a linear number of +// interference tests instead of the quadratic number of interference tests +// that this would naively require. This is a technique derived from: // // Budimlic, et al. Fast copy coalescing and live-range identification. // In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language // Design and Implementation (Berlin, Germany, June 17 - 19, 2002). // PLDI '02. ACM, New York, NY, 25-32. -// DOI= http://doi.acm.org/10.1145/512529.512534 +// +// The original implementation constructs a data structure they call a dominance +// forest for this purpose. The dominance forest was shown to be unnecessary, +// as it is possible to emulate the creation and traversal of a dominance forest +// by directly using the dominator tree, rather than actually constructing the +// dominance forest. This technique is explained in: +// +// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code +// Quality and Efficiency, +// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code +// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). +// CGO '09. IEEE, Washington, DC, 114-125. +// +// Careful implementation allows for all of the dominator forest interference +// checks to be performed at once in a single depth-first traversal of the +// dominator tree, which is what is implemented here. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "strongphielim" #include "llvm/CodeGen/Passes.h" +#include "PHIEliminationUtils.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterCoalescer.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" using namespace llvm; namespace { - struct VISIBILITY_HIDDEN StrongPHIElimination : public MachineFunctionPass { + class StrongPHIElimination : public MachineFunctionPass { + public: static char ID; // Pass identification, replacement for typeid - StrongPHIElimination() : MachineFunctionPass((intptr_t)&ID) {} - - // Waiting stores, for each MBB, the set of copies that need to - // be inserted into that MBB - DenseMap > Waiting; - - // Stacks holds the renaming stack for each register - std::map > Stacks; - - // Registers in UsedByAnother are PHI nodes that are themselves - // used as operands to another another PHI node - std::set UsedByAnother; - - // RenameSets are the is a map from a PHI-defined register - // to the input registers to be coalesced along with the - // predecessor block for those input registers. - std::map > RenameSets; - - // PhiValueNumber holds the ID numbers of the VNs for each phi that we're - // eliminating, indexed by the register defined by that phi. - std::map PhiValueNumber; - - // Store the DFS-in number of each block - DenseMap preorder; - - // Store the DFS-out number of each block - DenseMap maxpreorder; - - bool runOnMachineFunction(MachineFunction &Fn); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - - // TODO: Actually make this true. - AU.addPreserved(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual void releaseMemory() { - preorder.clear(); - maxpreorder.clear(); - - Waiting.clear(); - Stacks.clear(); - UsedByAnother.clear(); - RenameSets.clear(); + StrongPHIElimination() : MachineFunctionPass(ID) { + initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); } + virtual void getAnalysisUsage(AnalysisUsage&) const; + bool runOnMachineFunction(MachineFunction&); + private: - - /// DomForestNode - Represents a node in the "dominator forest". This is - /// a forest in which the nodes represent registers and the edges - /// represent a dominance relation in the block defining those registers. - struct DomForestNode { - private: - // Store references to our children - std::vector children; - // The register we represent - unsigned reg; - - // Add another node as our child - void addChild(DomForestNode* DFN) { children.push_back(DFN); } - - public: - typedef std::vector::iterator iterator; - - // Create a DomForestNode by providing the register it represents, and - // the node to be its parent. The virtual root node has register 0 - // and a null parent. - DomForestNode(unsigned r, DomForestNode* parent) : reg(r) { - if (parent) - parent->addChild(this); - } - - ~DomForestNode() { - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; - } - - /// getReg - Return the regiser that this node represents - inline unsigned getReg() { return reg; } - - // Provide iterator access to our children - inline DomForestNode::iterator begin() { return children.begin(); } - inline DomForestNode::iterator end() { return children.end(); } + /// This struct represents a single node in the union-find data structure + /// representing the variable congruence classes. There is one difference + /// from a normal union-find data structure. We steal two bits from the parent + /// pointer . One of these bits is used to represent whether the register + /// itself has been isolated, and the other is used to represent whether the + /// PHI with that register as its destination has been isolated. + /// + /// Note that this leads to the strange situation where the leader of a + /// congruence class may no longer logically be a member, due to being + /// isolated. + struct Node { + enum Flags { + kRegisterIsolatedFlag = 1, + kPHIIsolatedFlag = 2 + }; + Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } + + Node *getLeader(); + + PointerIntPair parent; + unsigned value; + unsigned rank; }; - - void computeDFS(MachineFunction& MF); - void processBlock(MachineBasicBlock* MBB); - - std::vector computeDomForest( - std::map& instrs, - MachineRegisterInfo& MRI); - void processPHIUnion(MachineInstr* Inst, - std::map& PHIUnion, - std::vector& DF, - std::vector >& locals); - void ScheduleCopies(MachineBasicBlock* MBB, std::set& pushed); - void InsertCopies(MachineBasicBlock* MBB, - SmallPtrSet& v); - void mergeLiveIntervals(unsigned primary, unsigned secondary, - MachineBasicBlock* pred); + + /// Add a register in a new congruence class containing only itself. + void addReg(unsigned); + + /// Join the congruence classes of two registers. This function is biased + /// towards the left argument, i.e. after + /// + /// addReg(r2); + /// unionRegs(r1, r2); + /// + /// the leader of the unioned congruence class is the same as the leader of + /// r1's congruence class prior to the union. This is actually relied upon + /// in the copy insertion code. + void unionRegs(unsigned, unsigned); + + /// Get the color of a register. The color is 0 if the register has been + /// isolated. + unsigned getRegColor(unsigned); + + // Isolate a register. + void isolateReg(unsigned); + + /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been + /// isolated. Otherwise, it is the original color of its destination and + /// all of its operands (before they were isolated, if they were). + unsigned getPHIColor(MachineInstr*); + + /// Isolate a PHI. + void isolatePHI(MachineInstr*); + + /// Traverses a basic block, splitting any interferences found between + /// registers in the same congruence class. It takes two DenseMaps as + /// arguments that it also updates: CurrentDominatingParent, which maps + /// a color to the register in that congruence class whose definition was + /// most recently seen, and ImmediateDominatingParent, which maps a register + /// to the register in the same congruence class that most immediately + /// dominates it. + /// + /// This function assumes that it is being called in a depth-first traversal + /// of the dominator tree. + void SplitInterferencesForBasicBlock( + MachineBasicBlock&, + DenseMap &CurrentDominatingParent, + DenseMap &ImmediateDominatingParent); + + // Lowers a PHI instruction, inserting copies of the source and destination + // registers as necessary. + void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); + + // Merges the live interval of Reg into NewReg and renames Reg to NewReg + // everywhere that Reg appears. Requires Reg and NewReg to have non- + // overlapping lifetimes. + void MergeLIsAndRename(unsigned Reg, unsigned NewReg); + + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + MachineDominatorTree *DT; + LiveIntervals *LI; + + BumpPtrAllocator Allocator; + + DenseMap RegNodeMap; + + // Maps a basic block to a list of its defs of registers that appear as PHI + // sources. + DenseMap > PHISrcDefs; + + // Maps a color to a pair of a MachineInstr* and a virtual register, which + // is the operand of that PHI corresponding to the current basic block. + DenseMap > CurrentPHIForColor; + + // FIXME: Can these two data structures be combined? Would a std::multimap + // be any better? + + // Stores pairs of predecessor basic blocks and the source registers of + // inserted copy instructions. + typedef DenseSet > SrcCopySet; + SrcCopySet InsertedSrcCopySet; + + // Maps pairs of predecessor basic blocks and colors to their defining copy + // instructions. + typedef DenseMap, MachineInstr*> + SrcCopyMap; + SrcCopyMap InsertedSrcCopyMap; + + // Maps inserted destination copy registers to their defining copy + // instructions. + typedef DenseMap DestCopyMap; + DestCopyMap InsertedDestCopies; }; -} -char StrongPHIElimination::ID = 0; -static RegisterPass -X("strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently"); - -const PassInfo *const llvm::StrongPHIEliminationID = &X; - -/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree -/// of the given MachineFunction. These numbers are then used in other parts -/// of the PHI elimination process. -void StrongPHIElimination::computeDFS(MachineFunction& MF) { - SmallPtrSet frontier; - SmallPtrSet visited; - - unsigned time = 0; - - MachineDominatorTree& DT = getAnalysis(); - - MachineDomTreeNode* node = DT.getRootNode(); - - std::vector worklist; - worklist.push_back(node); - - while (!worklist.empty()) { - MachineDomTreeNode* currNode = worklist.back(); - - if (!frontier.count(currNode)) { - frontier.insert(currNode); - ++time; - preorder.insert(std::make_pair(currNode->getBlock(), time)); + struct MIIndexCompare { + MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { } + + bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { + return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS); } - - bool inserted = false; - for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end(); - I != E; ++I) - if (!frontier.count(*I) && !visited.count(*I)) { - worklist.push_back(*I); - inserted = true; - break; - } - - if (!inserted) { - frontier.erase(currNode); - visited.insert(currNode); - maxpreorder.insert(std::make_pair(currNode->getBlock(), time)); - - worklist.pop_back(); + + LiveIntervals *LI; + }; +} // namespace + +STATISTIC(NumPHIsLowered, "Number of PHIs lowered"); +STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted"); +STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted"); + +char StrongPHIElimination::ID = 0; +INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination", + "Eliminate PHI nodes for register allocation, intelligently", false, false) + +char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; + +void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { + // FIXME: This only needs to check from the first terminator, as only the + // first terminator can use a virtual register. + for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) { + assert (RI != MBB->rend()); + MachineInstr *MI = &*RI; + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + MachineOperand &MO = *OI; + if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) + return &MO; } } } -namespace { +bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + TII = MF.getTarget().getInstrInfo(); + DT = &getAnalysis(); + LI = &getAnalysis(); -/// PreorderSorter - a helper class that is used to sort registers -/// according to the preorder number of their defining blocks -class PreorderSorter { -private: - DenseMap& preorder; - MachineRegisterInfo& MRI; - -public: - PreorderSorter(DenseMap& p, - MachineRegisterInfo& M) : preorder(p), MRI(M) { } - - bool operator()(unsigned A, unsigned B) { - if (A == B) - return false; - - MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent(); - MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent(); - - if (preorder[ABlock] < preorder[BBlock]) - return true; - else if (preorder[ABlock] > preorder[BBlock]) - return false; - - return false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + unsigned DestReg = BBI->getOperand(0).getReg(); + addReg(DestReg); + PHISrcDefs[I].push_back(BBI); + + for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { + MachineOperand &SrcMO = BBI->getOperand(i); + unsigned SrcReg = SrcMO.getReg(); + addReg(SrcReg); + unionRegs(DestReg, SrcReg); + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI) + PHISrcDefs[DefMI->getParent()].push_back(DefMI); + } + } } -}; -} + // Perform a depth-first traversal of the dominator tree, splitting + // interferences amongst PHI-congruence classes. + DenseMap CurrentDominatingParent; + DenseMap ImmediateDominatingParent; + for (df_iterator DI = df_begin(DT->getRootNode()), + DE = df_end(DT->getRootNode()); DI != DE; ++DI) { + SplitInterferencesForBasicBlock(*DI->getBlock(), + CurrentDominatingParent, + ImmediateDominatingParent); + } -/// computeDomForest - compute the subforest of the DomTree corresponding -/// to the defining blocks of the registers in question -std::vector -StrongPHIElimination::computeDomForest( - std::map& regs, - MachineRegisterInfo& MRI) { - // Begin by creating a virtual root node, since the actual results - // may well be a forest. Assume this node has maximum DFS-out number. - DomForestNode* VirtualRoot = new DomForestNode(0, 0); - maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL)); - - // Populate a worklist with the registers - std::vector worklist; - worklist.reserve(regs.size()); - for (std::map::iterator I = regs.begin(), - E = regs.end(); I != E; ++I) - worklist.push_back(I->first); - - // Sort the registers by the DFS-in number of their defining block - PreorderSorter PS(preorder, MRI); - std::sort(worklist.begin(), worklist.end(), PS); - - // Create a "current parent" stack, and put the virtual root on top of it - DomForestNode* CurrentParent = VirtualRoot; - std::vector stack; - stack.push_back(VirtualRoot); - - // Iterate over all the registers in the previously computed order - for (std::vector::iterator I = worklist.begin(), E = worklist.end(); + // Insert copies for all PHI source and destination registers. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { - unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()]; - MachineBasicBlock* parentBlock = CurrentParent->getReg() ? - MRI.getVRegDef(CurrentParent->getReg())->getParent() : - 0; - - // If the DFS-in number of the register is greater than the DFS-out number - // of the current parent, repeatedly pop the parent stack until it isn't. - while (pre > maxpreorder[parentBlock]) { - stack.pop_back(); - CurrentParent = stack.back(); - - parentBlock = CurrentParent->getReg() ? - MRI.getVRegDef(CurrentParent->getReg())->getParent() : - 0; + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + InsertCopiesForPHI(BBI, I); } - - // Now that we've found the appropriate parent, create a DomForestNode for - // this register and attach it to the forest - DomForestNode* child = new DomForestNode(*I, CurrentParent); - - // Push this new node on the "current parent" stack - stack.push_back(child); - CurrentParent = child; } - - // Return a vector containing the children of the virtual root node - std::vector ret; - ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end()); - return ret; -} -/// isLiveIn - helper method that determines, from a regno, if a register -/// is live into a block -static bool isLiveIn(unsigned r, MachineBasicBlock* MBB, - LiveIntervals& LI) { - LiveInterval& I = LI.getOrCreateInterval(r); - unsigned idx = LI.getMBBStartIdx(MBB); - return I.liveBeforeAndAt(idx); -} + // FIXME: Preserve the equivalence classes during copy insertion and use + // the preversed equivalence classes instead of recomputing them. + RegNodeMap.clear(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + unsigned DestReg = BBI->getOperand(0).getReg(); + addReg(DestReg); -/// isLiveOut - help method that determines, from a regno, if a register is -/// live out of a block. -static bool isLiveOut(unsigned r, MachineBasicBlock* MBB, - LiveIntervals& LI) { - for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), - E = MBB->succ_end(); PI != E; ++PI) { - if (isLiveIn(r, *PI, LI)) - return true; + for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { + unsigned SrcReg = BBI->getOperand(i).getReg(); + addReg(SrcReg); + unionRegs(DestReg, SrcReg); + } + } } - - return false; -} -/// interferes - checks for local interferences by scanning a block. The only -/// trick parameter is 'mode' which tells it the relationship of the two -/// registers. 0 - defined in the same block, 1 - first properly dominates -/// second, 2 - second properly dominates first -static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan, - LiveIntervals& LV, unsigned mode) { - MachineInstr* def = 0; - MachineInstr* kill = 0; - - // The code is still in SSA form at this point, so there is only one - // definition per VReg. Thus we can safely use MRI->getVRegDef(). - const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo(); - - bool interference = false; - - // Wallk the block, checking for interferences - for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end(); - MBI != MBE; ++MBI) { - MachineInstr* curr = MBI; - - // Same defining block... - if (mode == 0) { - if (curr == MRI->getVRegDef(a)) { - // If we find our first definition, save it - if (!def) { - def = curr; - // If there's already an unkilled definition, then - // this is an interference - } else if (!kill) { - interference = true; - break; - // If there's a definition followed by a KillInst, then - // they can't interfere - } else { - interference = false; - break; - } - // Symmetric with the above - } else if (curr == MRI->getVRegDef(b)) { - if (!def) { - def = curr; - } else if (!kill) { - interference = true; - break; - } else { - interference = false; - break; - } - // Store KillInsts if they match up with the definition - } else if (curr->killsRegister(a)) { - if (def == MRI->getVRegDef(a)) { - kill = curr; - } else if (curr->killsRegister(b)) { - if (def == MRI->getVRegDef(b)) { - kill = curr; - } - } - } - // First properly dominates second... - } else if (mode == 1) { - if (curr == MRI->getVRegDef(b)) { - // Definition of second without kill of first is an interference - if (!kill) { - interference = true; - break; - // Definition after a kill is a non-interference - } else { - interference = false; - break; - } - // Save KillInsts of First - } else if (curr->killsRegister(a)) { - kill = curr; + DenseMap RegRenamingMap; + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); + while (BBI != BBE && BBI->isPHI()) { + MachineInstr *PHI = BBI; + + assert(PHI->getNumOperands() > 0); + + unsigned SrcReg = PHI->getOperand(1).getReg(); + unsigned SrcColor = getRegColor(SrcReg); + unsigned NewReg = RegRenamingMap[SrcColor]; + if (!NewReg) { + NewReg = SrcReg; + RegRenamingMap[SrcColor] = SrcReg; } - // Symmetric with the above - } else if (mode == 2) { - if (curr == MRI->getVRegDef(a)) { - if (!kill) { - interference = true; - break; - } else { - interference = false; - break; - } - } else if (curr->killsRegister(b)) { - kill = curr; + MergeLIsAndRename(SrcReg, NewReg); + + unsigned DestReg = PHI->getOperand(0).getReg(); + if (!InsertedDestCopies.count(DestReg)) + MergeLIsAndRename(DestReg, NewReg); + + for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { + unsigned SrcReg = PHI->getOperand(i).getReg(); + MergeLIsAndRename(SrcReg, NewReg); } + + ++BBI; + LI->RemoveMachineInstrFromMaps(PHI); + PHI->eraseFromParent(); + Changed = true; } } - - return interference; -} -/// processBlock - Determine how to break up PHIs in the current block. Each -/// PHI is broken up by some combination of renaming its operands and inserting -/// copies. This method is responsible for determining which operands receive -/// which treatment. -void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) { - LiveIntervals& LI = getAnalysis(); - MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); - - // Holds names that have been added to a set in any PHI within this block - // before the current one. - std::set ProcessedNames; - - // Iterate over all the PHI nodes in this block - MachineBasicBlock::iterator P = MBB->begin(); - while (P != MBB->end() && P->getOpcode() == TargetInstrInfo::PHI) { - unsigned DestReg = P->getOperand(0).getReg(); - - // Don't both doing PHI elimination for dead PHI's. - if (P->registerDefIsDead(DestReg)) { - ++P; - continue; - } + // Due to the insertion of copies to split live ranges, the live intervals are + // guaranteed to not overlap, except in one case: an original PHI source and a + // PHI destination copy. In this case, they have the same value and thus don't + // truly intersect, so we merge them into the value live at that point. + // FIXME: Is there some better way we can handle this? + for (DestCopyMap::iterator I = InsertedDestCopies.begin(), + E = InsertedDestCopies.end(); I != E; ++I) { + unsigned DestReg = I->first; + unsigned DestColor = getRegColor(DestReg); + unsigned NewReg = RegRenamingMap[DestColor]; - LiveInterval& PI = LI.getOrCreateInterval(DestReg); - unsigned pIdx = LI.getDefIndex(LI.getInstructionIndex(P)); - VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno; - PhiValueNumber.insert(std::make_pair(DestReg, PVN->id)); - - // PHIUnion is the set of incoming registers to the PHI node that - // are going to be renames rather than having copies inserted. This set - // is refinded over the course of this function. UnionedBlocks is the set - // of corresponding MBBs. - std::map PHIUnion; - SmallPtrSet UnionedBlocks; - - // Iterate over the operands of the PHI node - for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { - unsigned SrcReg = P->getOperand(i-1).getReg(); - - // Check for trivial interferences via liveness information, allowing us - // to avoid extra work later. Any registers that interfere cannot both - // be in the renaming set, so choose one and add copies for it instead. - // The conditions are: - // 1) if the operand is live into the PHI node's block OR - // 2) if the PHI node is live out of the operand's defining block OR - // 3) if the operand is itself a PHI node and the original PHI is - // live into the operand's defining block OR - // 4) if the operand is already being renamed for another PHI node - // in this block OR - // 5) if any two operands are defined in the same block, insert copies - // for one of them - if (isLiveIn(SrcReg, P->getParent(), LI) || - isLiveOut(P->getOperand(0).getReg(), - MRI.getVRegDef(SrcReg)->getParent(), LI) || - ( MRI.getVRegDef(SrcReg)->getOpcode() == TargetInstrInfo::PHI && - isLiveIn(P->getOperand(0).getReg(), - MRI.getVRegDef(SrcReg)->getParent(), LI) ) || - ProcessedNames.count(SrcReg) || - UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) { - - // Add a copy for the selected register - MachineBasicBlock* From = P->getOperand(i).getMBB(); - Waiting[From].insert(std::make_pair(SrcReg, DestReg)); - UsedByAnother.insert(SrcReg); - } else { - // Otherwise, add it to the renaming set - PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB())); - UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent()); - } + LiveInterval &DestLI = LI->getInterval(DestReg); + LiveInterval &NewLI = LI->getInterval(NewReg); + + assert(DestLI.ranges.size() == 1 + && "PHI destination copy's live interval should be a single live " + "range from the beginning of the BB to the copy instruction."); + LiveRange *DestLR = DestLI.begin(); + VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); + if (!NewVNI) { + NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); + MachineInstr *CopyInstr = I->second; + CopyInstr->getOperand(1).setIsKill(true); } - - // Compute the dominator forest for the renaming set. This is a forest - // where the nodes are the registers and the edges represent dominance - // relations between the defining blocks of the registers - std::vector DF = - computeDomForest(PHIUnion, MRI); - - // Walk DomForest to resolve interferences at an inter-block level. This - // will remove registers from the renaming set (and insert copies for them) - // if interferences are found. - std::vector > localInterferences; - processPHIUnion(P, PHIUnion, DF, localInterferences); - - // If one of the inputs is defined in the same block as the current PHI - // then we need to check for a local interference between that input and - // the PHI. - for (std::map::iterator I = PHIUnion.begin(), - E = PHIUnion.end(); I != E; ++I) - if (MRI.getVRegDef(I->first)->getParent() == P->getParent()) - localInterferences.push_back(std::make_pair(I->first, - P->getOperand(0).getReg())); - - // The dominator forest walk may have returned some register pairs whose - // interference cannot be determined from dominator analysis. We now - // examine these pairs for local interferences. - for (std::vector >::iterator I = - localInterferences.begin(), E = localInterferences.end(); I != E; ++I) { - std::pair p = *I; - - MachineDominatorTree& MDT = getAnalysis(); - - // Determine the block we need to scan and the relationship between - // the two registers - MachineBasicBlock* scan = 0; - unsigned mode = 0; - if (MRI.getVRegDef(p.first)->getParent() == - MRI.getVRegDef(p.second)->getParent()) { - scan = MRI.getVRegDef(p.first)->getParent(); - mode = 0; // Same block - } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(), - MRI.getVRegDef(p.second)->getParent())) { - scan = MRI.getVRegDef(p.second)->getParent(); - mode = 1; // First dominates second - } else { - scan = MRI.getVRegDef(p.first)->getParent(); - mode = 2; // Second dominates first - } - - // If there's an interference, we need to insert copies - if (interferes(p.first, p.second, scan, LI, mode)) { - // Insert copies for First - for (int i = P->getNumOperands() - 1; i >= 2; i-=2) { - if (P->getOperand(i-1).getReg() == p.first) { - unsigned SrcReg = p.first; - MachineBasicBlock* From = P->getOperand(i).getMBB(); - - Waiting[From].insert(std::make_pair(SrcReg, - P->getOperand(0).getReg())); - UsedByAnother.insert(SrcReg); - - PHIUnion.erase(SrcReg); - } - } + + LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); + NewLI.addRange(NewLR); + + LI->removeInterval(DestReg); + MRI->replaceRegWith(DestReg, NewReg); + } + + // Adjust the live intervals of all PHI source registers to handle the case + // where the PHIs in successor blocks were the only later uses of the source + // register. + for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), + E = InsertedSrcCopySet.end(); I != E; ++I) { + MachineBasicBlock *MBB = I->first; + unsigned SrcReg = I->second; + if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) + SrcReg = RenamedRegister; + + LiveInterval &SrcLI = LI->getInterval(SrcReg); + + bool isLiveOut = false; + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { + isLiveOut = true; + break; } } - - // Add the renaming set for this PHI node to our overall renaming information - RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion)); - - // Remember which registers are already renamed, so that we don't try to - // rename them for another PHI node in this block - for (std::map::iterator I = PHIUnion.begin(), - E = PHIUnion.end(); I != E; ++I) - ProcessedNames.insert(I->first); - - ++P; + + if (isLiveOut) + continue; + + MachineOperand *LastUse = findLastUse(MBB, SrcReg); + assert(LastUse); + SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); + SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); + LastUse->setIsKill(true); } + + Allocator.Reset(); + RegNodeMap.clear(); + PHISrcDefs.clear(); + InsertedSrcCopySet.clear(); + InsertedSrcCopyMap.clear(); + InsertedDestCopies.clear(); + + return Changed; } -/// processPHIUnion - Take a set of candidate registers to be coalesced when -/// decomposing the PHI instruction. Use the DominanceForest to remove the ones -/// that are known to interfere, and flag others that need to be checked for -/// local interferences. -void StrongPHIElimination::processPHIUnion(MachineInstr* Inst, - std::map& PHIUnion, - std::vector& DF, - std::vector >& locals) { - - std::vector worklist(DF.begin(), DF.end()); - SmallPtrSet visited; - - // Code is still in SSA form, so we can use MRI::getVRegDef() - MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo(); - - LiveIntervals& LI = getAnalysis(); - unsigned DestReg = Inst->getOperand(0).getReg(); - - // DF walk on the DomForest - while (!worklist.empty()) { - DomForestNode* DFNode = worklist.back(); - - visited.insert(DFNode); - - bool inserted = false; - for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end(); - CI != CE; ++CI) { - DomForestNode* child = *CI; - - // If the current node is live-out of the defining block of one of its - // children, insert a copy for it. NOTE: The paper actually calls for - // a more elaborate heuristic for determining whether to insert copies - // for the child or the parent. In the interest of simplicity, we're - // just always choosing the parent. - if (isLiveOut(DFNode->getReg(), - MRI.getVRegDef(child->getReg())->getParent(), LI)) { - // Insert copies for parent - for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) { - if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) { - unsigned SrcReg = DFNode->getReg(); - MachineBasicBlock* From = Inst->getOperand(i).getMBB(); - - Waiting[From].insert(std::make_pair(SrcReg, DestReg)); - UsedByAnother.insert(SrcReg); - - PHIUnion.erase(SrcReg); - } - } - - // If a node is live-in to the defining block of one of its children, but - // not live-out, then we need to scan that block for local interferences. - } else if (isLiveIn(DFNode->getReg(), - MRI.getVRegDef(child->getReg())->getParent(), LI) || - MRI.getVRegDef(DFNode->getReg())->getParent() == - MRI.getVRegDef(child->getReg())->getParent()) { - // Add (p, c) to possible local interferences - locals.push_back(std::make_pair(DFNode->getReg(), child->getReg())); - } - - if (!visited.count(child)) { - worklist.push_back(child); - inserted = true; - } - } - - if (!inserted) worklist.pop_back(); +void StrongPHIElimination::addReg(unsigned Reg) { + Node *&N = RegNodeMap[Reg]; + if (!N) + N = new (Allocator) Node(Reg); +} + +StrongPHIElimination::Node* +StrongPHIElimination::Node::getLeader() { + Node *N = this; + Node *Parent = parent.getPointer(); + Node *Grandparent = Parent->parent.getPointer(); + + while (Parent != Grandparent) { + N->parent.setPointer(Grandparent); + N = Grandparent; + Parent = Parent->parent.getPointer(); + Grandparent = Parent->parent.getPointer(); } + + return Parent; } -/// ScheduleCopies - Insert copies into predecessor blocks, scheduling -/// them properly so as to avoid the 'lost copy' and the 'virtual swap' -/// problems. -/// -/// Based on "Practical Improvements to the Construction and Destruction -/// of Static Single Assignment Form" by Briggs, et al. -void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, - std::set& pushed) { - // FIXME: This function needs to update LiveIntervals - std::map& copy_set= Waiting[MBB]; - - std::map worklist; - std::map map; - - // Setup worklist of initial copies - for (std::map::iterator I = copy_set.begin(), - E = copy_set.end(); I != E; ) { - map.insert(std::make_pair(I->first, I->first)); - map.insert(std::make_pair(I->second, I->second)); - - if (!UsedByAnother.count(I->second)) { - worklist.insert(*I); - - // Avoid iterator invalidation - unsigned first = I->first; - ++I; - copy_set.erase(first); - } else { - ++I; - } +unsigned StrongPHIElimination::getRegColor(unsigned Reg) { + DenseMap::iterator RI = RegNodeMap.find(Reg); + if (RI == RegNodeMap.end()) + return 0; + Node *Node = RI->second; + if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) + return 0; + return Node->getLeader()->value; +} + +void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { + Node *Node1 = RegNodeMap[Reg1]->getLeader(); + Node *Node2 = RegNodeMap[Reg2]->getLeader(); + + if (Node1->rank > Node2->rank) { + Node2->parent.setPointer(Node1->getLeader()); + } else if (Node1->rank < Node2->rank) { + Node1->parent.setPointer(Node2->getLeader()); + } else if (Node1 != Node2) { + Node2->parent.setPointer(Node1->getLeader()); + Node1->rank++; } - - LiveIntervals& LI = getAnalysis(); - MachineFunction* MF = MBB->getParent(); - MachineRegisterInfo& MRI = MF->getRegInfo(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - - SmallVector, 4> InsertedPHIDests; - - // Iterate over the worklist, inserting copies - while (!worklist.empty() || !copy_set.empty()) { - while (!worklist.empty()) { - std::pair curr = *worklist.begin(); - worklist.erase(curr.first); - - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first); - - if (isLiveOut(curr.second, MBB, LI)) { - // Create a temporary - unsigned t = MF->getRegInfo().createVirtualRegister(RC); - - // Insert copy from curr.second to a temporary at - // the Phi defining curr.second - MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); - TII->copyRegToReg(*PI->getParent(), PI, t, - curr.second, RC, RC); - - // Push temporary on Stacks - Stacks[curr.second].push_back(t); - - // Insert curr.second in pushed - pushed.insert(curr.second); - } - - // Insert copy from map[curr.first] to curr.second - TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, - map[curr.first], RC, RC); - map[curr.first] = curr.second; - - // Push this copy onto InsertedPHICopies so we can - // update LiveIntervals with it. - MachineBasicBlock::iterator MI = MBB->getFirstTerminator(); - InsertedPHIDests.push_back(std::make_pair(curr.second, --MI)); - - // If curr.first is a destination in copy_set... - for (std::map::iterator I = copy_set.begin(), - E = copy_set.end(); I != E; ) - if (curr.first == I->second) { - std::pair temp = *I; - - // Avoid iterator invalidation - ++I; - copy_set.erase(temp.first); - worklist.insert(temp); - - break; - } else { - ++I; - } - } - - if (!copy_set.empty()) { - std::pair curr = *copy_set.begin(); - copy_set.erase(curr.first); - - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first); - - // Insert a copy from dest to a new temporary t at the end of b - unsigned t = MF->getRegInfo().createVirtualRegister(RC); - TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t, - curr.second, RC, RC); - map[curr.second] = t; - - worklist.insert(curr); - } +} + +void StrongPHIElimination::isolateReg(unsigned Reg) { + Node *Node = RegNodeMap[Reg]; + Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); +} + +unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) { + assert(PHI->isPHI()); + + unsigned DestReg = PHI->getOperand(0).getReg(); + Node *DestNode = RegNodeMap[DestReg]; + if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) + return 0; + + for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { + unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); + if (SrcColor) + return SrcColor; } - - // Renumber the instructions so that we can perform the index computations - // needed to create new live intervals. - LI.computeNumbering(); - - // For copies that we inserted at the ends of predecessors, we construct - // live intervals. This is pretty easy, since we know that the destination - // register cannot have be in live at that point previously. We just have - // to make sure that, for registers that serve as inputs to more than one - // PHI, we don't create multiple overlapping live intervals. - std::set RegHandled; - for (SmallVector, 4>::iterator I = - InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) - if (RegHandled.insert(I->first).second) - LI.addLiveRangeToEndOfBlock(I->first, I->second); + return 0; } -/// InsertCopies - insert copies into MBB and all of its successors -void StrongPHIElimination::InsertCopies(MachineBasicBlock* MBB, - SmallPtrSet& visited) { - visited.insert(MBB); - - std::set pushed; - - // Rewrite register uses from Stacks - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) - for (unsigned i = 0; i < I->getNumOperands(); ++i) - if (I->getOperand(i).isRegister() && - Stacks[I->getOperand(i).getReg()].size()) { - I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back()); - } - - // Schedule the copies for this block - ScheduleCopies(MBB, pushed); - - // Recur to our successors - for (GraphTraits::ChildIteratorType I = - GraphTraits::child_begin(MBB), E = - GraphTraits::child_end(MBB); I != E; ++I) - if (!visited.count(*I)) - InsertCopies(*I, visited); - - // As we exit this block, pop the names we pushed while processing it - for (std::set::iterator I = pushed.begin(), - E = pushed.end(); I != E; ++I) - Stacks[*I].pop_back(); +void StrongPHIElimination::isolatePHI(MachineInstr *PHI) { + assert(PHI->isPHI()); + Node *Node = RegNodeMap[PHI->getOperand(0).getReg()]; + Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); } -void StrongPHIElimination::mergeLiveIntervals(unsigned primary, - unsigned secondary, - MachineBasicBlock* pred) { - - LiveIntervals& LI = getAnalysis(); - LiveInterval& LHS = LI.getOrCreateInterval(primary); - LiveInterval& RHS = LI.getOrCreateInterval(secondary); - - LI.computeNumbering(); - const LiveRange* RangeMergingIn = - RHS.getLiveRangeContaining(LI.getMBBEndIdx(pred)); - VNInfo* RHSVN = RangeMergingIn->valno; - VNInfo* NewVN = LHS.getNextValue(RangeMergingIn->valno->def, - RangeMergingIn->valno->copy, - LI.getVNInfoAllocator()); - - // If we discover that a live range was defined by a two-addr - // instruction, we need to merge over the input as well, even if - // it has a different VNInfo. - SmallPtrSet MergedVNs; - MergedVNs.insert(RHSVN); - - DenseMap VNMap; - VNMap.insert(std::make_pair(RangeMergingIn->valno, NewVN)); - - // Find all VNs that are the inputs to two-address instructiosn - // chaining upwards from the VN we're trying to merge. - bool addedVN = true; - while (addedVN) { - addedVN = false; - unsigned defIndex = RHSVN->def; - - if (defIndex != ~0U) { - MachineInstr* instr = LI.getInstructionFromIndex(defIndex); - - for (unsigned i = 0; i < instr->getNumOperands(); ++i) { - if (instr->getOperand(i).isReg() && - instr->getOperand(i).getReg() == secondary) - if (instr->isRegReDefinedByTwoAddr(secondary, i)) { - RHSVN = RHS.getLiveRangeContaining(defIndex-1)->valno; - addedVN = true; - - VNInfo* NextVN = LHS.getNextValue(RHSVN->def, - RHSVN->copy, - LI.getVNInfoAllocator()); - VNMap.insert(std::make_pair(RHSVN, NextVN)); - - break; - } +/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any +/// interferences found between registers in the same congruence class. It +/// takes two DenseMaps as arguments that it also updates: +/// +/// 1) CurrentDominatingParent, which maps a color to the register in that +/// congruence class whose definition was most recently seen. +/// +/// 2) ImmediateDominatingParent, which maps a register to the register in the +/// same congruence class that most immediately dominates it. +/// +/// This function assumes that it is being called in a depth-first traversal +/// of the dominator tree. +/// +/// The algorithm used here is a generalization of the dominance-based SSA test +/// for two variables. If there are variables a_1, ..., a_n such that +/// +/// def(a_1) dom ... dom def(a_n), +/// +/// then we can test for an interference between any two a_i by only using O(n) +/// interference tests between pairs of variables. If i < j and a_i and a_j +/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). +/// Thus, in order to test for an interference involving a_i, we need only check +/// for a potential interference with a_i+1. +/// +/// This method can be generalized to arbitrary sets of variables by performing +/// a depth-first traversal of the dominator tree. As we traverse down a branch +/// of the dominator tree, we keep track of the current dominating variable and +/// only perform an interference test with that variable. However, when we go to +/// another branch of the dominator tree, the definition of the current dominating +/// variable may no longer dominate the current block. In order to correct this, +/// we need to use a stack of past choices of the current dominating variable +/// and pop from this stack until we find a variable whose definition actually +/// dominates the current block. +/// +/// There will be one push on this stack for each variable that has become the +/// current dominating variable, so instead of using an explicit stack we can +/// simply associate the previous choice for a current dominating variable with +/// the new choice. This works better in our implementation, where we test for +/// interference in multiple distinct sets at once. +void +StrongPHIElimination::SplitInterferencesForBasicBlock( + MachineBasicBlock &MBB, + DenseMap &CurrentDominatingParent, + DenseMap &ImmediateDominatingParent) { + // Sort defs by their order in the original basic block, as the code below + // assumes that it is processing definitions in dominance order. + std::vector &DefInstrs = PHISrcDefs[&MBB]; + std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); + + for (std::vector::const_iterator BBI = DefInstrs.begin(), + BBE = DefInstrs.end(); BBI != BBE; ++BBI) { + for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), + E = (*BBI)->operands_end(); I != E; ++I) { + const MachineOperand &MO = *I; + + // FIXME: This would be faster if it were possible to bail out of checking + // an instruction's operands after the explicit defs, but this is incorrect + // for variadic instructions, which may appear before register allocation + // in the future. + if (!MO.isReg() || !MO.isDef()) + continue; + + unsigned DestReg = MO.getReg(); + if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // If the virtual register being defined is not used in any PHI or has + // already been isolated, then there are no more interferences to check. + unsigned DestColor = getRegColor(DestReg); + if (!DestColor) + continue; + + // The input to this pass sometimes is not in SSA form in every basic + // block, as some virtual registers have redefinitions. We could eliminate + // this by fixing the passes that generate the non-SSA code, or we could + // handle it here by tracking defining machine instructions rather than + // virtual registers. For now, we just handle the situation conservatively + // in a way that will possibly lead to false interferences. + unsigned &CurrentParent = CurrentDominatingParent[DestColor]; + unsigned NewParent = CurrentParent; + if (NewParent == DestReg) + continue; + + // Pop registers from the stack represented by ImmediateDominatingParent + // until we find a parent that dominates the current instruction. + while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) + || !getRegColor(NewParent))) + NewParent = ImmediateDominatingParent[NewParent]; + + // If NewParent is nonzero, then its definition dominates the current + // instruction, so it is only necessary to check for the liveness of + // NewParent in order to check for an interference. + if (NewParent + && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { + // If there is an interference, always isolate the new register. This + // could be improved by using a heuristic that decides which of the two + // registers to isolate. + isolateReg(DestReg); + CurrentParent = NewParent; + } else { + // If there is no interference, update ImmediateDominatingParent and set + // the CurrentDominatingParent for this color to the current register. + ImmediateDominatingParent[DestReg] = NewParent; + CurrentParent = DestReg; } } } - - // Merge VNs from RHS into LHS using the mapping we computed above. - for (DenseMap::iterator VI = VNMap.begin(), - VE = VNMap.end(); VI != VE; ++VI) { - LHS.MergeValueInAsValue(RHS, VI->first, VI->second); - RHS.removeValNo(VI->first); + + // We now walk the PHIs in successor blocks and check for interferences. This + // is necessary because the use of a PHI's operands are logically contained in + // the predecessor block. The def of a PHI's destination register is processed + // along with the other defs in a basic block. + + CurrentPHIForColor.clear(); + + for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + MachineInstr *PHI = BBI; + + // If a PHI is already isolated, either by being isolated directly or + // having all of its operands isolated, ignore it. + unsigned Color = getPHIColor(PHI); + if (!Color) + continue; + + // Find the index of the PHI operand that corresponds to this basic block. + unsigned PredIndex; + for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { + if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) + break; + } + assert(PredIndex < PHI->getNumOperands()); + unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); + + // Pop registers from the stack represented by ImmediateDominatingParent + // until we find a parent that dominates the current instruction. + unsigned &CurrentParent = CurrentDominatingParent[Color]; + unsigned NewParent = CurrentParent; + while (NewParent + && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) + || !getRegColor(NewParent))) + NewParent = ImmediateDominatingParent[NewParent]; + CurrentParent = NewParent; + + // If there is an interference with a register, always isolate the + // register rather than the PHI. It is also possible to isolate the + // PHI, but that introduces copies for all of the registers involved + // in that PHI. + if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) + && NewParent != PredOperandReg) + isolateReg(NewParent); + + std::pair + &CurrentPHI = CurrentPHIForColor[Color]; + + // If two PHIs have the same operand from every shared predecessor, then + // they don't actually interfere. Otherwise, isolate the current PHI. This + // could possibly be improved, e.g. we could isolate the PHI with the + // fewest operands. + if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) + isolatePHI(PHI); + else + CurrentPHI = std::make_pair(PHI, PredOperandReg); + } } - - if (RHS.begin() == RHS.end()) - LI.removeInterval(RHS.reg); } -bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { - LiveIntervals& LI = getAnalysis(); - - // Compute DFS numbers of each block - computeDFS(Fn); - - // Determine which phi node operands need copies - for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - if (!I->empty() && - I->begin()->getOpcode() == TargetInstrInfo::PHI) - processBlock(I); - - // Insert copies - // FIXME: This process should probably preserve LiveIntervals - SmallPtrSet visited; - InsertCopies(Fn.begin(), visited); - - // Perform renaming - typedef std::map > - RenameSetType; - for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end(); - I != E; ++I) - for (std::map::iterator SI = - I->second.begin(), SE = I->second.end(); SI != SE; ++SI) { - mergeLiveIntervals(I->first, SI->first, SI->second); - Fn.getRegInfo().replaceRegWith(SI->first, I->first); +void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, + MachineBasicBlock *MBB) { + assert(PHI->isPHI()); + ++NumPHIsLowered; + unsigned PHIColor = getPHIColor(PHI); + + for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { + MachineOperand &SrcMO = PHI->getOperand(i); + + // If a source is defined by an implicit def, there is no need to insert a + // copy in the predecessor. + if (SrcMO.isUndef()) + continue; + + unsigned SrcReg = SrcMO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB(); + unsigned SrcColor = getRegColor(SrcReg); + + // If neither the PHI nor the operand were isolated, then we only need to + // set the phi-kill flag on the VNInfo at this PHI. + if (PHIColor && SrcColor == PHIColor) { + LiveInterval &SrcInterval = LI->getInterval(SrcReg); + SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); + VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); + (void)SrcVNI; + assert(SrcVNI); + continue; } - - // FIXME: Insert last-minute copies - - // Remove PHIs - std::vector phis; - for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end(); - BI != BE; ++BI) - if (BI->getOpcode() == TargetInstrInfo::PHI) - phis.push_back(BI); - } - - for (std::vector::iterator I = phis.begin(), E = phis.end(); - I != E; ) { - MachineInstr* PInstr = *(I++); - - // Trim live intervals of input registers. They are no longer live into - // this block. - for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) { - unsigned reg = PInstr->getOperand(i).getReg(); - MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB(); - LiveInterval& InputI = LI.getInterval(reg); - if (MBB != PInstr->getParent() && - InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent()))) - InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()), - LI.getInstructionIndex(PInstr), - true); + + unsigned CopyReg = 0; + if (PHIColor) { + SrcCopyMap::const_iterator I + = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); + CopyReg + = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; } - - // If this is a dead PHI node, then remove it from LiveIntervals. - unsigned DestReg = PInstr->getOperand(0).getReg(); - LiveInterval& PI = LI.getInterval(DestReg); - if (PInstr->registerDefIsDead(DestReg)) { - if (PI.containsOneValue()) { - LI.removeInterval(DestReg); + + if (!CopyReg) { + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + CopyReg = MRI->createVirtualRegister(RC); + + MachineBasicBlock::iterator + CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); + unsigned SrcSubReg = SrcMO.getSubReg(); + MachineInstr *CopyInstr = BuildMI(*PredBB, + CopyInsertPoint, + PHI->getDebugLoc(), + TII->get(TargetOpcode::COPY), + CopyReg).addReg(SrcReg, 0, SrcSubReg); + LI->InsertMachineInstrInMaps(CopyInstr); + ++NumSrcCopiesInserted; + + // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for + // the newly added range. + LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); + InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); + + addReg(CopyReg); + if (PHIColor) { + unionRegs(PHIColor, CopyReg); + assert(getRegColor(CopyReg) != CopyReg); } else { - unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); - PI.removeRange(*PI.getLiveRangeContaining(idx), true); + PHIColor = CopyReg; + assert(getRegColor(CopyReg) == CopyReg); } - } else { - // If the PHI is not dead, then the valno defined by the PHI - // now has an unknown def. - unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr)); - const LiveRange* PLR = PI.getLiveRangeContaining(idx); - PLR->valno->def = ~0U; - LiveRange R (LI.getMBBStartIdx(PInstr->getParent()), - PLR->start, PLR->valno); - PI.addRange(R); + + // Insert into map if not already there. + InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), + CopyInstr)); } - - LI.RemoveMachineInstrFromMaps(PInstr); - PInstr->eraseFromParent(); + + SrcMO.setReg(CopyReg); + + // If SrcReg is not live beyond the PHI, trim its interval so that it is no + // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are + // processed later, but this is still correct to do at this point because we + // never rely on LiveIntervals being correct while inserting copies. + // FIXME: Should this just count uses at PHIs like the normal PHIElimination + // pass does? + LiveInterval &SrcLI = LI->getInterval(SrcReg); + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); + if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) + SrcLI.removeRange(MBBStartIndex, PHIIndex, true); } - - LI.computeNumbering(); - - return true; + + unsigned DestReg = PHI->getOperand(0).getReg(); + unsigned DestColor = getRegColor(DestReg); + + if (PHIColor && DestColor == PHIColor) { + LiveInterval &DestLI = LI->getInterval(DestReg); + + // Set the phi-def flag for the VN at this PHI. + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); + assert(DestVNI); + + // Prior to PHI elimination, the live ranges of PHIs begin at their defining + // instruction. After PHI elimination, PHI instructions are replaced by VNs + // with the phi-def flag set, and the live ranges of these VNs start at the + // beginning of the basic block. + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + DestVNI->def = MBBStartIndex; + DestLI.addRange(LiveRange(MBBStartIndex, + PHIIndex.getRegSlot(), + DestVNI)); + return; + } + + const TargetRegisterClass *RC = MRI->getRegClass(DestReg); + unsigned CopyReg = MRI->createVirtualRegister(RC); + + MachineInstr *CopyInstr = BuildMI(*MBB, + MBB->SkipPHIsAndLabels(MBB->begin()), + PHI->getDebugLoc(), + TII->get(TargetOpcode::COPY), + DestReg).addReg(CopyReg); + LI->InsertMachineInstrInMaps(CopyInstr); + PHI->getOperand(0).setReg(CopyReg); + ++NumDestCopiesInserted; + + // Add the region from the beginning of MBB to the copy instruction to + // CopyReg's live interval, and give the VNInfo the phidef flag. + LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg); + SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); + SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); + VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, + LI->getVNInfoAllocator()); + CopyLI.addRange(LiveRange(MBBStartIndex, + DestCopyIndex.getRegSlot(), + CopyVNI)); + + // Adjust DestReg's live interval to adjust for its new definition at + // CopyInstr. + LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); + SlotIndex PHIIndex = LI->getInstructionIndex(PHI); + DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); + + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); + assert(DestVNI); + DestVNI->def = DestCopyIndex.getRegSlot(); + + InsertedDestCopies[CopyReg] = CopyInstr; +} + +void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { + if (Reg == NewReg) + return; + + LiveInterval &OldLI = LI->getInterval(Reg); + LiveInterval &NewLI = LI->getInterval(NewReg); + + // Merge the live ranges of the two registers. + DenseMap VNMap; + for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); + LRI != LRE; ++LRI) { + LiveRange OldLR = *LRI; + VNInfo *OldVN = OldLR.valno; + + VNInfo *&NewVN = VNMap[OldVN]; + if (!NewVN) { + NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); + VNMap[OldVN] = NewVN; + } + + LiveRange LR(OldLR.start, OldLR.end, NewVN); + NewLI.addRange(LR); + } + + // Remove the LiveInterval for the register being renamed and replace all + // of its defs and uses with the new register. + LI->removeInterval(Reg); + MRI->replaceRegWith(Reg, NewReg); }