X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FUtils%2FPromoteMemoryToRegister.cpp;h=f6ad3b8b9e8355b4426ea8d8b4532880cb6366cb;hb=6a02bbcdae9996a984ccb2497cb89719a69413ef;hp=d00f870fdfa2b765527a6ea024d32315b56d61e0;hpb=9fc5cdf77c812aaa80419036de27576d45894d0d;p=oota-llvm.git diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index d00f870fdfa..f6ad3b8b9e8 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -9,32 +9,46 @@ // // This file promotes memory references to be register references. It promotes // alloca instructions which only have loads and stores as uses. An alloca is -// transformed by using dominator frontiers to place PHI nodes, then traversing -// the function in depth-first order to rewrite loads and stores as appropriate. -// This is just the standard SSA construction algorithm to construct "pruned" -// SSA form. +// transformed by using iterated dominator frontiers to place PHI nodes, then +// traversing the function in depth-first order to rewrite loads and stores as +// appropriate. +// +// The algorithm used here is based on: +// +// Sreedhar and Gao. A linear time algorithm for placing phi-nodes. +// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of +// Programming Languages +// POPL '95. ACM, New York, NY, 62-73. +// +// It has been modified to not explicitly use the DJ graph data structure and to +// directly compute pruned SSA using per-variable liveness information. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Constants.h" +#include "llvm/DebugInfo.h" #include "llvm/DerivedTypes.h" +#include "llvm/DIBuilder.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Metadata.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/DebugInfo.h" -#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" #include +#include using namespace llvm; STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); @@ -53,7 +67,8 @@ struct DenseMapInfo > { return EltTy(reinterpret_cast(-2), 0U); } static unsigned getHashValue(const std::pair &Val) { - return DenseMapInfo::getHashValue(Val.first) + Val.second*2; + using llvm::hash_value; + return static_cast(hash_value(Val)); } static bool isEqual(const EltTy &LHS, const EltTy &RHS) { return LHS == RHS; @@ -73,13 +88,33 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { UI != UE; ++UI) { // Loop over all of the uses of the alloca const User *U = *UI; if (const LoadInst *LI = dyn_cast(U)) { + // Note that atomic loads can be transformed; atomic semantics do + // not have any meaning for a local alloca. if (LI->isVolatile()) return false; } else if (const StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == AI) return false; // Don't allow a store OF the AI, only INTO the AI. + // Note that atomic stores can be transformed; atomic semantics do + // not have any meaning for a local alloca. if (SI->isVolatile()) return false; + } else if (const IntrinsicInst *II = dyn_cast(U)) { + if (II->getIntrinsicID() != Intrinsic::lifetime_start && + II->getIntrinsicID() != Intrinsic::lifetime_end) + return false; + } else if (const BitCastInst *BCI = dyn_cast(U)) { + if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!onlyUsedByLifetimeMarkers(BCI)) + return false; + } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + return false; + if (!GEPI->hasAllZeroIndices()) + return false; + if (!onlyUsedByLifetimeMarkers(GEPI)) + return false; } else { return false; } @@ -88,18 +123,6 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { return true; } -/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the -/// alloca 'V', if any. -static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { - if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) - for (Value::use_iterator UI = DebugNode->use_begin(), - E = DebugNode->use_end(); UI != E; ++UI) - if (DbgDeclareInst *DDI = dyn_cast(*UI)) - return DDI; - - return 0; -} - namespace { struct AllocaInfo; @@ -179,8 +202,7 @@ namespace { /// std::vector Allocas; DominatorTree &DT; - DominanceFrontier &DF; - DIFactory *DIF; + DIBuilder *DIB; /// AST - An AliasSetTracker object to update. If null, don't update it. /// @@ -188,11 +210,11 @@ namespace { /// AllocaLookup - Reverse mapping of Allocas. /// - std::map AllocaLookup; + DenseMap AllocaLookup; /// NewPhiNodes - The PhiNodes we're adding. /// - DenseMap, PHINode*> NewPhiNodes; + DenseMap, PHINode*> NewPhiNodes; /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas /// it corresponds to. @@ -217,14 +239,17 @@ namespace { /// non-determinstic behavior. DenseMap BBNumbers; + /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. + DenseMap DomLevels; + /// BBNumPreds - Lazily compute the number of predecessors a block has. DenseMap BBNumPreds; public: PromoteMem2Reg(const std::vector &A, DominatorTree &dt, - DominanceFrontier &df, AliasSetTracker *ast) - : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {} + AliasSetTracker *ast) + : Allocas(A), DT(dt), DIB(0), AST(ast) {} ~PromoteMem2Reg() { - delete DIF; + delete DIB; } void run(); @@ -259,8 +284,6 @@ namespace { LargeBlockInfo &LBI); void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI); - void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI); - void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, @@ -269,8 +292,8 @@ namespace { }; struct AllocaInfo { - std::vector DefiningBlocks; - std::vector UsingBlocks; + SmallVector DefiningBlocks; + SmallVector UsingBlocks; StoreInst *OnlyStore; BasicBlock *OnlyBlock; @@ -325,11 +348,44 @@ namespace { DbgDeclare = FindAllocaDbgDeclare(AI); } }; + + typedef std::pair DomTreeNodePair; + + struct DomTreeNodeCompare { + bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { + return LHS.second < RHS.second; + } + }; } // end of anonymous namespace +static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. + + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE;) { + Instruction *I = cast(*UI); + ++UI; + if (isa(I) || isa(I)) + continue; + + if (!I->getType()->isVoidTy()) { + // The only users of this bitcast/GEP instruction are lifetime intrinsics. + // Follow the use/def chain to erase them now instead of leaving it for + // dead code elimination later. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE;) { + Instruction *Inst = cast(*UI); + ++UI; + Inst->eraseFromParent(); + } + } + I->eraseFromParent(); + } +} void PromoteMem2Reg::run() { - Function &F = *DF.getRoot()->getParent(); + Function &F = *DT.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); @@ -345,6 +401,8 @@ void PromoteMem2Reg::run() { assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); + removeLifetimeIntrinsicUsers(AI); + if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); @@ -367,9 +425,12 @@ void PromoteMem2Reg::run() { // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { - // Record debuginfo for the store and remove the declaration's debuginfo. + // Record debuginfo for the store and remove the declaration's + // debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); + if (!DIB) + DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. @@ -401,8 +462,11 @@ void PromoteMem2Reg::run() { while (!AI->use_empty()) { StoreInst *SI = cast(AI->use_back()); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = Info.DbgDeclare) - ConvertDebugDeclareToDebugValue(DDI, SI); + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } SI->eraseFromParent(); LBI.deleteValue(SI); } @@ -422,7 +486,26 @@ void PromoteMem2Reg::run() { continue; } } - + + // If we haven't computed dominator tree levels, do so now. + if (DomLevels.empty()) { + SmallVector Worklist; + + DomTreeNode *Root = DT.getRootNode(); + DomLevels[Root] = 0; + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + unsigned ChildLevel = DomLevels[Node] + 1; + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); + CI != CE; ++CI) { + DomLevels[*CI] = ChildLevel; + Worklist.push_back(*CI); + } + } + } + // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { @@ -484,9 +567,8 @@ void PromoteMem2Reg::run() { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in - // sections of dead code that were not processed on the dominance frontier. - // Just delete the users now. - // + // unreachable basic blocks that were not processed by walking the dominator + // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); @@ -506,12 +588,12 @@ void PromoteMem2Reg::run() { while (EliminatedAPHI) { EliminatedAPHI = false; - for (DenseMap, PHINode*>::iterator I = + for (DenseMap, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, 0, &DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); @@ -530,7 +612,7 @@ void PromoteMem2Reg::run() { // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // - for (DenseMap, PHINode*>::iterator I = + for (DenseMap, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. @@ -663,7 +745,6 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, /// avoiding insertion of dead phi nodes. void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, AllocaInfo &Info) { - // Unique the set of defining blocks for efficient lookup. SmallPtrSet DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); @@ -673,46 +754,78 @@ void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, SmallPtrSet LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); - // Compute the locations where PhiNodes need to be inserted. Look at the - // dominance frontier of EACH basic-block we have a write in. - unsigned CurrentVersion = 0; - std::vector > DFBlocks; - while (!Info.DefiningBlocks.empty()) { - BasicBlock *BB = Info.DefiningBlocks.back(); - Info.DefiningBlocks.pop_back(); - - // Look up the DF for this write, add it to defining blocks. - DominanceFrontier::const_iterator it = DF.find(BB); - if (it == DF.end()) continue; - - const DominanceFrontier::DomSetType &S = it->second; - - // In theory we don't need the indirection through the DFBlocks vector. - // In practice, the order of calling QueuePhiNode would depend on the - // (unspecified) ordering of basic blocks in the dominance frontier, - // which would give PHI nodes non-determinstic subscripts. Fix this by - // processing blocks in order of the occurance in the function. - for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), - PE = S.end(); P != PE; ++P) { - // If the frontier block is not in the live-in set for the alloca, don't - // bother processing it. - if (!LiveInBlocks.count(*P)) - continue; - - DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); - } - - // Sort by which the block ordering in the function. - if (DFBlocks.size() > 1) - std::sort(DFBlocks.begin(), DFBlocks.end()); - - for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { - BasicBlock *BB = DFBlocks[i].second; - if (QueuePhiNode(BB, AllocaNum, CurrentVersion)) - Info.DefiningBlocks.push_back(BB); + // Use a priority queue keyed on dominator tree level so that inserted nodes + // are handled from the bottom of the dominator tree upwards. + typedef std::priority_queue, + DomTreeNodeCompare> IDFPriorityQueue; + IDFPriorityQueue PQ; + + for (SmallPtrSet::const_iterator I = DefBlocks.begin(), + E = DefBlocks.end(); I != E; ++I) { + if (DomTreeNode *Node = DT.getNode(*I)) + PQ.push(std::make_pair(Node, DomLevels[Node])); + } + + SmallVector, 32> DFBlocks; + SmallPtrSet Visited; + SmallVector Worklist; + while (!PQ.empty()) { + DomTreeNodePair RootPair = PQ.top(); + PQ.pop(); + DomTreeNode *Root = RootPair.first; + unsigned RootLevel = RootPair.second; + + // Walk all dominator tree children of Root, inspecting their CFG edges with + // targets elsewhere on the dominator tree. Only targets whose level is at + // most Root's level are added to the iterated dominance frontier of the + // definition set. + + Worklist.clear(); + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + BasicBlock *BB = Node->getBlock(); + + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; + ++SI) { + DomTreeNode *SuccNode = DT.getNode(*SI); + + // Quickly skip all CFG edges that are also dominator tree edges instead + // of catching them below. + if (SuccNode->getIDom() == Node) + continue; + + unsigned SuccLevel = DomLevels[SuccNode]; + if (SuccLevel > RootLevel) + continue; + + if (!Visited.insert(SuccNode)) + continue; + + BasicBlock *SuccBB = SuccNode->getBlock(); + if (!LiveInBlocks.count(SuccBB)) + continue; + + DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); + if (!DefBlocks.count(SuccBB)) + PQ.push(std::make_pair(SuccNode, SuccLevel)); + } + + for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; + ++CI) { + if (!Visited.count(*CI)) + Worklist.push_back(*CI); + } } - DFBlocks.clear(); } + + if (DFBlocks.size() > 1) + std::sort(DFBlocks.begin(), DFBlocks.end()); + + unsigned CurrentVersion = 0; + for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) + QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); } /// RewriteSingleStoreAlloca - If there is only a single store to this value, @@ -873,47 +986,24 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, } } -// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value -// that has an associated llvm.dbg.decl intrinsic. -void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, - StoreInst *SI) { - DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) - return; - - if (!DIF) - DIF = new DIFactory(*SI->getParent()->getParent()->getParent()); - Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0, - DIVar, SI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc SIDL = SI->getDebugLoc(); - if (!SIDL.isUnknown()) - DbgVal->setDebugLoc(SIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); -} - // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific // Alloca returns true if there wasn't already a phi-node for that variable // bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, unsigned &Version) { // Look up the basic-block in question. - PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; + PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; // If the BB already has a phi node added for the i'th alloca then we're done! if (PN) return false; // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. - PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), Allocas[AllocaNo]->getName() + "." + Twine(Version++), BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; - PN->reserveOperandSpace(getNumPreds(BB)); if (AST && PN->getType()->isPointerTy()) AST->copyValue(PointerAllocaValues[AllocaNo], PN); @@ -982,7 +1072,7 @@ NextIteration: AllocaInst *Src = dyn_cast(LI->getPointerOperand()); if (!Src) continue; - std::map::iterator AI = AllocaLookup.find(Src); + DenseMap::iterator AI = AllocaLookup.find(Src); if (AI == AllocaLookup.end()) continue; Value *V = IncomingVals[AI->second]; @@ -998,15 +1088,18 @@ NextIteration: AllocaInst *Dest = dyn_cast(SI->getPointerOperand()); if (!Dest) continue; - std::map::iterator ai = AllocaLookup.find(Dest); + DenseMap::iterator ai = AllocaLookup.find(Dest); if (ai == AllocaLookup.end()) continue; // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) - ConvertDebugDeclareToDebugValue(DDI, SI); + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } BB->getInstList().erase(SI); } } @@ -1032,18 +1125,17 @@ NextIteration: } /// PromoteMemToReg - Promote the specified list of alloca instructions into -/// scalar registers, inserting PHI nodes as appropriate. This function makes -/// use of DominanceFrontier information. This function does not modify the CFG -/// of the function at all. All allocas must be from the same function. +/// scalar registers, inserting PHI nodes as appropriate. This function does +/// not modify the CFG of the function at all. All allocas must be from the +/// same function. /// /// If AST is specified, the specified tracker is updated to reflect changes /// made to the IR. /// void llvm::PromoteMemToReg(const std::vector &Allocas, - DominatorTree &DT, DominanceFrontier &DF, - AliasSetTracker *AST) { + DominatorTree &DT, AliasSetTracker *AST) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, DF, AST).run(); + PromoteMem2Reg(Allocas, DT, AST).run(); }