X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FGVN.cpp;h=e7be98506b99f53e9a2092487a39536db7400615;hp=fe4c03a751875ffec62921d8cc6c025d2f13f8ef;hb=c23197a26f34f559ea9797de51e187087c039c42;hpb=237a8287454389a5b940e18c1efb2201fc443208 diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index fe4c03a7518..e7be98506b9 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1,4 +1,4 @@ -//===- GVN.cpp - Eliminate redundant values and loads ------------===// +//===- GVN.cpp - Eliminate redundant values and loads ---------------------===// // // The LLVM Compiler Infrastructure // @@ -10,7 +10,7 @@ // This pass performs global value numbering to eliminate fully redundant // instructions. It also performs simple dead load elimination. // -// Note that this pass does the value numbering itself, it does not use the +// Note that this pass does the value numbering itself; it does not use the // ValueNumbering analysis passes. // //===----------------------------------------------------------------------===// @@ -21,10 +21,12 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -35,17 +37,21 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; -STATISTIC(NumGVNInstr, "Number of instructions deleted"); -STATISTIC(NumGVNLoad, "Number of loads deleted"); -STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); +STATISTIC(NumGVNInstr, "Number of instructions deleted"); +STATISTIC(NumGVNLoad, "Number of loads deleted"); +STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); STATISTIC(NumGVNBlocks, "Number of blocks merged"); +STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt EnablePRE("enable-pre", cl::init(true), cl::Hidden); +static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -56,7 +62,8 @@ static cl::opt EnablePRE("enable-pre", /// two values. namespace { struct VISIBILITY_HIDDEN Expression { - enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, + enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, + UDIV, SDIV, FDIV, UREM, SREM, FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, @@ -107,30 +114,7 @@ namespace { } bool operator!=(const Expression &other) const { - if (opcode != other.opcode) - return true; - else if (opcode == EMPTY || opcode == TOMBSTONE) - return false; - else if (type != other.type) - return true; - else if (function != other.function) - return true; - else if (firstVN != other.firstVN) - return true; - else if (secondVN != other.secondVN) - return true; - else if (thirdVN != other.thirdVN) - return true; - else { - if (varargs.size() != other.varargs.size()) - return true; - - for (size_t i = 0; i < varargs.size(); ++i) - if (varargs[i] != other.varargs[i]) - return true; - - return false; - } + return !(*this == other); } }; @@ -166,9 +150,11 @@ namespace { void erase(Value* v); unsigned size(); void setAliasAnalysis(AliasAnalysis* A) { AA = A; } + AliasAnalysis *getAliasAnalysis() const { return AA; } void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } void setDomTree(DominatorTree* D) { DT = D; } uint32_t getNextUnusedValueNumber() { return nextValueNumber; } + void verifyRemoved(const Value *) const; }; } @@ -216,10 +202,13 @@ template <> struct DenseMapInfo { Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { switch(BO->getOpcode()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Binary operator with unknown opcode?"); + llvm_unreachable("Binary operator with unknown opcode?"); case Instruction::Add: return Expression::ADD; + case Instruction::FAdd: return Expression::FADD; case Instruction::Sub: return Expression::SUB; + case Instruction::FSub: return Expression::FSUB; case Instruction::Mul: return Expression::MUL; + case Instruction::FMul: return Expression::FMUL; case Instruction::UDiv: return Expression::UDIV; case Instruction::SDiv: return Expression::SDIV; case Instruction::FDiv: return Expression::FDIV; @@ -236,10 +225,10 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { } Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { - if (isa(C) || isa(C)) { + if (isa(C)) { switch (C->getPredicate()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Comparison with unknown predicate?"); + llvm_unreachable("Comparison with unknown predicate?"); case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; case ICmpInst::ICMP_NE: return Expression::ICMPNE; case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; @@ -251,32 +240,32 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; } - } - assert((isa(C) || isa(C)) && "Unknown compare"); - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Comparison with unknown predicate?"); - case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; - case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; - case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; - case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; - case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; - case FCmpInst::FCMP_ONE: return Expression::FCMPONE; - case FCmpInst::FCMP_ORD: return Expression::FCMPORD; - case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; - case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; - case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; - case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; - case FCmpInst::FCMP_ULT: return Expression::FCMPULT; - case FCmpInst::FCMP_ULE: return Expression::FCMPULE; - case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + } else { + switch (C->getPredicate()) { + default: // THIS SHOULD NEVER HAPPEN + llvm_unreachable("Comparison with unknown predicate?"); + case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; + case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; + case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; + case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; + case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; + case FCmpInst::FCMP_ONE: return Expression::FCMPONE; + case FCmpInst::FCMP_ORD: return Expression::FCMPORD; + case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; + case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; + case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; + case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; + case FCmpInst::FCMP_ULT: return Expression::FCMPULT; + case FCmpInst::FCMP_ULE: return Expression::FCMPULE; + case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + } } } Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { switch(C->getOpcode()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Cast operator with unknown opcode?"); + llvm_unreachable("Cast operator with unknown opcode?"); case Instruction::Trunc: return Expression::TRUNC; case Instruction::ZExt: return Expression::ZEXT; case Instruction::SExt: return Expression::SEXT; @@ -458,68 +447,64 @@ uint32_t ValueTable::lookup_or_add(Value* V) { MemDepResult local_dep = MD->getDependency(C); - if (local_dep.isNone()) { + if (!local_dep.isDef() && !local_dep.isNonLocal()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; - } else if (Instruction *LocalDepInst = local_dep.getInst()) { - // FIXME: INDENT PROPERLY! - if (!isa(LocalDepInst)) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - CallInst* local_cdep = cast(LocalDepInst); + } + + if (local_dep.isDef()) { + CallInst* local_cdep = cast(local_dep.getInst()); - // FIXME: INDENT PROPERLY. - if (local_cdep->getCalledFunction() != C->getCalledFunction() || - local_cdep->getNumOperands() != C->getNumOperands()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } else if (!C->getCalledFunction()) { + if (local_cdep->getNumOperands() != C->getNumOperands()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; - } else { - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); - if (c_vn != cd_vn) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } + } + + for (unsigned i = 1; i < C->getNumOperands(); ++i) { + uint32_t c_vn = lookup_or_add(C->getOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); + if (c_vn != cd_vn) { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; } - - uint32_t v = lookup_or_add(local_cdep); - valueNumbering.insert(std::make_pair(V, v)); - return v; } - } - - SmallVector, 32> deps; - MD->getNonLocalDependency(C, deps); + uint32_t v = lookup_or_add(local_cdep); + valueNumbering.insert(std::make_pair(V, v)); + return v; + } + + // Non-local case. + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + MD->getNonLocalCallDependency(CallSite(C)); + // FIXME: call/call dependencies for readonly calls should return def, not + // clobber! Move the checking logic to MemDep! CallInst* cdep = 0; - for (SmallVector, 32> - ::iterator I = deps.begin(), E = deps.end(); I != E; ++I) { - if (I->second.isNone()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + // Check to see if we have a single dominating call instruction that is + // identical to C. + for (unsigned i = 0, e = deps.size(); i != e; ++i) { + const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; + // Ignore non-local dependencies. + if (I->second.isNonLocal()) + continue; - return nextValueNumber++; - } else if (Instruction *NonLocalDepInst = I->second.getInst()) { - // FIXME: INDENT PROPERLY - // FIXME: All duplicated with non-local case. - if (cdep == 0 && DT->properlyDominates(I->first, C->getParent())) { - if (CallInst* CD = dyn_cast(NonLocalDepInst)) - cdep = CD; - else { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - } else { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } + // We don't handle non-depedencies. If we already have a call, reject + // instruction dependencies. + if (I->second.isClobber() || cdep != 0) { + cdep = 0; + break; } + + CallInst *NonLocalDepCall = dyn_cast(I->second.getInst()); + // FIXME: All duplicated with non-local case. + if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ + cdep = NonLocalDepCall; + continue; + } + + cdep = 0; + break; } if (!cdep) { @@ -527,34 +512,23 @@ uint32_t ValueTable::lookup_or_add(Value* V) { return nextValueNumber++; } - // FIXME: THIS ISN'T SAFE: CONSIDER: - // X = strlen(str) - // if (C) - // str[0] = 1; - // Y = strlen(str) - // This doesn't guarantee all-paths availability! - if (cdep->getCalledFunction() != C->getCalledFunction() || - cdep->getNumOperands() != C->getNumOperands()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } else if (!C->getCalledFunction()) { + if (cdep->getNumOperands() != C->getNumOperands()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; - } else { - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); - if (c_vn != cd_vn) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } + } + for (unsigned i = 1; i < C->getNumOperands(); ++i) { + uint32_t c_vn = lookup_or_add(C->getOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + if (c_vn != cd_vn) { + valueNumbering.insert(std::make_pair(V, nextValueNumber)); + return nextValueNumber++; } - - uint32_t v = lookup_or_add(cdep); - valueNumbering.insert(std::make_pair(V, v)); - return v; } + uint32_t v = lookup_or_add(cdep); + valueNumbering.insert(std::make_pair(V, v)); + return v; + } else { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; @@ -689,8 +663,17 @@ void ValueTable::erase(Value* V) { valueNumbering.erase(V); } +/// verifyRemoved - Verify that the value is removed from all internal data +/// structures. +void ValueTable::verifyRemoved(const Value *V) const { + for (DenseMap::iterator + I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { + assert(I->first != V && "Inst still occurs in value numbering map!"); + } +} + //===----------------------------------------------------------------------===// -// GVN Pass +// GVN Pass //===----------------------------------------------------------------------===// namespace { @@ -711,6 +694,9 @@ namespace { GVN() : FunctionPass(&ID) { } private: + MemoryDependenceAnalysis *MD; + DominatorTree *DT; + ValueTable VN; DenseMap localAvail; @@ -731,15 +717,13 @@ namespace { // Helper fuctions // FIXME: eliminate or document these better bool processLoad(LoadInst* L, - DenseMap &lastLoad, SmallVectorImpl &toErase); bool processInstruction(Instruction* I, - DenseMap& lastSeenLoad, SmallVectorImpl &toErase); bool processNonLocalLoad(LoadInst* L, SmallVectorImpl &toErase); - bool processBlock(DomTreeNode* DTN); - Value *GetValueForBlock(BasicBlock *BB, LoadInst* orig, + bool processBlock(BasicBlock* BB); + Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, DenseMap &Phis, bool top_level = false); void dump(DenseMap& d); @@ -749,7 +733,9 @@ namespace { bool performPRE(Function& F); Value* lookupNumber(BasicBlock* BB, uint32_t num); bool mergeBlockIntoPredecessor(BasicBlock* BB); + Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); void cleanupGlobalSets(); + void verifyRemoved(const Instruction *I) const; }; char GVN::ID = 0; @@ -772,16 +758,14 @@ void GVN::dump(DenseMap& d) { } Value* GVN::CollapsePhi(PHINode* p) { - DominatorTree &DT = getAnalysis(); Value* constVal = p->hasConstantValue(); - if (!constVal) return 0; Instruction* inst = dyn_cast(constVal); if (!inst) return constVal; - if (DT.dominates(inst, p)) + if (DT->dominates(inst, p)) if (isSafeReplacement(p, inst)) return inst; return 0; @@ -802,7 +786,7 @@ bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { /// GetValueForBlock - Get the value to use within the specified basic block. /// available values are in Phis. -Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig, +Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, DenseMap &Phis, bool top_level) { @@ -812,24 +796,31 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig, // If the block is unreachable, just return undef, since this path // can't actually occur at runtime. - if (!getAnalysis().isReachableFromEntry(BB)) - return Phis[BB] = UndefValue::get(orig->getType()); + if (!DT->isReachableFromEntry(BB)) + return Phis[BB] = Context->getUndef(orig->getType()); - BasicBlock* singlePred = BB->getSinglePredecessor(); - if (singlePred) { - Value *ret = GetValueForBlock(singlePred, orig, Phis); + if (BasicBlock *Pred = BB->getSinglePredecessor()) { + Value *ret = GetValueForBlock(Pred, orig, Phis); Phis[BB] = ret; return ret; } + + // Get the number of predecessors of this block so we can reserve space later. + // If there is already a PHI in it, use the #preds from it, otherwise count. + // Getting it from the PHI is constant time. + unsigned NumPreds; + if (PHINode *ExistingPN = dyn_cast(BB->begin())) + NumPreds = ExistingPN->getNumIncomingValues(); + else + NumPreds = std::distance(pred_begin(BB), pred_end(BB)); // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so // now, then get values to fill in the incoming values for the PHI. PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", BB->begin()); - PN->reserveOperandSpace(std::distance(pred_begin(BB), pred_end(BB))); + PN->reserveOperandSpace(NumPreds); - if (Phis.count(BB) == 0) - Phis.insert(std::make_pair(BB, PN)); + Phis.insert(std::make_pair(BB, PN)); // Fill in the incoming values for the block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { @@ -837,197 +828,457 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, LoadInst* orig, PN->addIncoming(val, *PI); } - AliasAnalysis& AA = getAnalysis(); - AA.copyValue(orig, PN); + VN.getAliasAnalysis()->copyValue(orig, PN); // Attempt to collapse PHI nodes that are trivially redundant Value* v = CollapsePhi(PN); if (!v) { // Cache our phi construction results - phiMap[orig->getPointerOperand()].insert(PN); + if (LoadInst* L = dyn_cast(orig)) + phiMap[L->getPointerOperand()].insert(PN); + else + phiMap[orig].insert(PN); + return PN; } - MemoryDependenceAnalysis& MD = getAnalysis(); - - MD.removeInstruction(PN); PN->replaceAllUsesWith(v); + if (isa(v->getType())) + MD->invalidateCachedPointerInfo(v); for (DenseMap::iterator I = Phis.begin(), E = Phis.end(); I != E; ++I) if (I->second == PN) I->second = v; + DEBUG(cerr << "GVN removed: " << *PN); + MD->removeInstruction(PN); PN->eraseFromParent(); + DEBUG(verifyRemoved(PN)); Phis[BB] = v; return v; } +/// IsValueFullyAvailableInBlock - Return true if we can prove that the value +/// we're analyzing is fully available in the specified block. As we go, keep +/// track of which blocks we know are fully alive in FullyAvailableBlocks. This +/// map is actually a tri-state map with the following values: +/// 0) we know the block *is not* fully available. +/// 1) we know the block *is* fully available. +/// 2) we do not know whether the block is fully available or not, but we are +/// currently speculating that it will be. +/// 3) we are speculating for this block and have used that to speculate for +/// other blocks. +static bool IsValueFullyAvailableInBlock(BasicBlock *BB, + DenseMap &FullyAvailableBlocks) { + // Optimistically assume that the block is fully available and check to see + // if we already know about this block in one lookup. + std::pair::iterator, char> IV = + FullyAvailableBlocks.insert(std::make_pair(BB, 2)); + + // If the entry already existed for this block, return the precomputed value. + if (!IV.second) { + // If this is a speculative "available" value, mark it as being used for + // speculation of other blocks. + if (IV.first->second == 2) + IV.first->second = 3; + return IV.first->second != 0; + } + + // Otherwise, see if it is fully available in all predecessors. + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + + // If this block has no predecessors, it isn't live-in here. + if (PI == PE) + goto SpeculationFailure; + + for (; PI != PE; ++PI) + // If the value isn't fully available in one of our predecessors, then it + // isn't fully available in this block either. Undo our previous + // optimistic assumption and bail out. + if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) + goto SpeculationFailure; + + return true; + +// SpeculationFailure - If we get here, we found out that this is not, after +// all, a fully-available block. We have a problem if we speculated on this and +// used the speculation to mark other blocks as available. +SpeculationFailure: + char &BBVal = FullyAvailableBlocks[BB]; + + // If we didn't speculate on this, just return with it set to false. + if (BBVal == 2) { + BBVal = 0; + return false; + } + + // If we did speculate on this value, we could have blocks set to 1 that are + // incorrect. Walk the (transitive) successors of this block and mark them as + // 0 if set to one. + SmallVector BBWorklist; + BBWorklist.push_back(BB); + + while (!BBWorklist.empty()) { + BasicBlock *Entry = BBWorklist.pop_back_val(); + // Note that this sets blocks to 0 (unavailable) if they happen to not + // already be in FullyAvailableBlocks. This is safe. + char &EntryVal = FullyAvailableBlocks[Entry]; + if (EntryVal == 0) continue; // Already unavailable. + + // Mark as unavailable. + EntryVal = 0; + + for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) + BBWorklist.push_back(*I); + } + + return false; +} + /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. -bool GVN::processNonLocalLoad(LoadInst* L, +bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl &toErase) { - MemoryDependenceAnalysis& MD = getAnalysis(); - - // Find the non-local dependencies of the load - SmallVector, 32> deps; - MD.getNonLocalDependency(L, deps); + // Find the non-local dependencies of the load. + SmallVector Deps; + MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), + Deps); + //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI); // If we had to process more than one hundred blocks to find the // dependencies, this load isn't worth worrying about. Optimizing // it will be too expensive. - if (deps.size() > 100) + if (Deps.size() > 100) return false; + + // If we had a phi translation failure, we'll have a single entry which is a + // clobber in the current block. Reject this early. + if (Deps.size() == 1 && Deps[0].second.isClobber()) { + DEBUG( + DOUT << "GVN: non-local load "; + WriteAsOperand(*DOUT.stream(), LI); + DOUT << " is clobbered by " << *Deps[0].second.getInst(); + ); + return false; + } - BasicBlock *EntryBlock = &L->getParent()->getParent()->getEntryBlock(); - - DenseMap repl; + // Filter out useless results (non-locals, etc). Keep track of the blocks + // where we have a value available in repl, also keep track of whether we see + // dependencies that produce an unknown value for the load (such as a call + // that could potentially clobber the load). + SmallVector, 16> ValuesPerBlock; + SmallVector UnavailableBlocks; - // Filter out useless results (non-locals, etc) - for (SmallVector, 32>::iterator - I = deps.begin(), E = deps.end(); I != E; ++I) { - if (I->second.isNone()) { - repl[I->first] = UndefValue::get(L->getType()); + for (unsigned i = 0, e = Deps.size(); i != e; ++i) { + BasicBlock *DepBB = Deps[i].first; + MemDepResult DepInfo = Deps[i].second; + + if (DepInfo.isClobber()) { + UnavailableBlocks.push_back(DepBB); continue; } - - if (I->second.isNonLocal()) { - // If this is a non-local dependency in the entry block, then we depend on - // the value live-in at the start of the function. We could insert a load - // in the entry block to get this, but for now we'll just bail out. - // FIXME: Consider emitting a load in the entry block to catch this case! - if (I->first == EntryBlock) - return false; + + Instruction *DepInst = DepInfo.getInst(); + + // Loading the allocation -> undef. + if (isa(DepInst)) { + ValuesPerBlock.push_back(std::make_pair(DepBB, + Context->getUndef(LI->getType()))); continue; } - if (StoreInst* S = dyn_cast(I->second.getInst())) { - if (S->getPointerOperand() != L->getPointerOperand()) - return false; - repl[I->first] = S->getOperand(0); - } else if (LoadInst* LD = dyn_cast(I->second.getInst())) { - if (LD->getPointerOperand() != L->getPointerOperand()) - return false; - repl[I->first] = LD; + if (StoreInst* S = dyn_cast(DepInst)) { + // Reject loads and stores that are to the same address but are of + // different types. + // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because + // of bitfield access, it would be interesting to optimize for it at some + // point. + if (S->getOperand(0)->getType() != LI->getType()) { + UnavailableBlocks.push_back(DepBB); + continue; + } + + ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); + + } else if (LoadInst* LD = dyn_cast(DepInst)) { + if (LD->getType() != LI->getType()) { + UnavailableBlocks.push_back(DepBB); + continue; + } + ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); } else { - return false; + UnavailableBlocks.push_back(DepBB); + continue; } } - // Use cached PHI construction information from previous runs - SmallPtrSet& p = phiMap[L->getPointerOperand()]; - for (SmallPtrSet::iterator I = p.begin(), E = p.end(); - I != E; ++I) { - if ((*I)->getParent() == L->getParent()) { - MD.removeInstruction(L); - L->replaceAllUsesWith(*I); - toErase.push_back(L); - NumGVNLoad++; - return true; + // If we have no predecessors that produce a known value for this load, exit + // early. + if (ValuesPerBlock.empty()) return false; + + // If all of the instructions we depend on produce a known value for this + // load, then it is fully redundant and we can use PHI insertion to compute + // its value. Insert PHIs and remove the fully redundant value now. + if (UnavailableBlocks.empty()) { + // Use cached PHI construction information from previous runs + SmallPtrSet &p = phiMap[LI->getPointerOperand()]; + // FIXME: What does phiMap do? Are we positive it isn't getting invalidated? + for (SmallPtrSet::iterator I = p.begin(), E = p.end(); + I != E; ++I) { + if ((*I)->getParent() == LI->getParent()) { + DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI); + LI->replaceAllUsesWith(*I); + if (isa((*I)->getType())) + MD->invalidateCachedPointerInfo(*I); + toErase.push_back(LI); + NumGVNLoad++; + return true; + } + + ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); } - repl.insert(std::make_pair((*I)->getParent(), *I)); + DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI); + + DenseMap BlockReplValues; + BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); + // Perform PHI construction. + Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); + LI->replaceAllUsesWith(v); + + if (isa(v)) + v->takeName(LI); + if (isa(v->getType())) + MD->invalidateCachedPointerInfo(v); + toErase.push_back(LI); + NumGVNLoad++; + return true; } - // Perform PHI construction - SmallPtrSet visited; - Value* v = GetValueForBlock(L->getParent(), L, repl, true); + if (!EnablePRE || !EnableLoadPRE) + return false; + + // Okay, we have *some* definitions of the value. This means that the value + // is available in some of our (transitive) predecessors. Lets think about + // doing PRE of this load. This will involve inserting a new load into the + // predecessor when it's not available. We could do this in general, but + // prefer to not increase code size. As such, we only do this when we know + // that we only have to insert *one* load (which means we're basically moving + // the load, not inserting a new one). + + SmallPtrSet Blockers; + for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) + Blockers.insert(UnavailableBlocks[i]); + + // Lets find first basic block with more than one predecessor. Walk backwards + // through predecessors if needed. + BasicBlock *LoadBB = LI->getParent(); + BasicBlock *TmpBB = LoadBB; + + bool isSinglePred = false; + bool allSingleSucc = true; + while (TmpBB->getSinglePredecessor()) { + isSinglePred = true; + TmpBB = TmpBB->getSinglePredecessor(); + if (!TmpBB) // If haven't found any, bail now. + return false; + if (TmpBB == LoadBB) // Infinite (unreachable) loop. + return false; + if (Blockers.count(TmpBB)) + return false; + if (TmpBB->getTerminator()->getNumSuccessors() != 1) + allSingleSucc = false; + } + + assert(TmpBB); + LoadBB = TmpBB; + + // If we have a repl set with LI itself in it, this means we have a loop where + // at least one of the values is LI. Since this means that we won't be able + // to eliminate LI even if we insert uses in the other predecessors, we will + // end up increasing code size. Reject this by scanning for LI. + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) + if (ValuesPerBlock[i].second == LI) + return false; + + if (isSinglePred) { + bool isHot = false; + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) + if (Instruction *I = dyn_cast(ValuesPerBlock[i].second)) + // "Hot" Instruction is in some loop (because it dominates its dep. + // instruction). + if (DT->dominates(LI, I)) { + isHot = true; + break; + } + + // We are interested only in "hot" instructions. We don't want to do any + // mis-optimizations here. + if (!isHot) + return false; + } + + // Okay, we have some hope :). Check to see if the loaded value is fully + // available in all but one predecessor. + // FIXME: If we could restructure the CFG, we could make a common pred with + // all the preds that don't have an available LI and insert a new load into + // that one block. + BasicBlock *UnavailablePred = 0; + + DenseMap FullyAvailableBlocks; + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) + FullyAvailableBlocks[ValuesPerBlock[i].first] = true; + for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) + FullyAvailableBlocks[UnavailableBlocks[i]] = false; + + for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); + PI != E; ++PI) { + if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) + continue; + + // If this load is not available in multiple predecessors, reject it. + if (UnavailablePred && UnavailablePred != *PI) + return false; + UnavailablePred = *PI; + } + + assert(UnavailablePred != 0 && + "Fully available value should be eliminated above!"); - MD.removeInstruction(L); - L->replaceAllUsesWith(v); - toErase.push_back(L); - NumGVNLoad++; + // If the loaded pointer is PHI node defined in this block, do PHI translation + // to get its value in the predecessor. + Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); + + // Make sure the value is live in the predecessor. If it was defined by a + // non-PHI instruction in this block, we don't know how to recompute it above. + if (Instruction *LPInst = dyn_cast(LoadPtr)) + if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { + DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " + << *LPInst << *LI << "\n"); + return false; + } + + // We don't currently handle critical edges :( + if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { + DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" + << UnavailablePred->getName() << "': " << *LI); + return false; + } + // Make sure it is valid to move this load here. We have to watch out for: + // @1 = getelementptr (i8* p, ... + // test p and branch if == 0 + // load @1 + // It is valid to have the getelementptr before the test, even if p can be 0, + // as getelementptr only does address arithmetic. + // If we are not pushing the value through any multiple-successor blocks + // we do not have this case. Otherwise, check that the load is safe to + // put anywhere; this can be improved, but should be conservatively safe. + if (!allSingleSucc && + !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) + return false; + + // Okay, we can eliminate this load by inserting a reload in the predecessor + // and using PHI construction to get the value in the other predecessors, do + // it. + DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI); + + Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, + LI->getAlignment(), + UnavailablePred->getTerminator()); + + SmallPtrSet &p = phiMap[LI->getPointerOperand()]; + for (SmallPtrSet::iterator I = p.begin(), E = p.end(); + I != E; ++I) + ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); + + DenseMap BlockReplValues; + BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); + BlockReplValues[UnavailablePred] = NewLoad; + + // Perform PHI construction. + Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); + LI->replaceAllUsesWith(v); + if (isa(v)) + v->takeName(LI); + if (isa(v->getType())) + MD->invalidateCachedPointerInfo(v); + toErase.push_back(LI); + NumPRELoad++; return true; } /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. -bool GVN::processLoad(LoadInst *L, DenseMap &lastLoad, - SmallVectorImpl &toErase) { - if (L->isVolatile()) { - lastLoad[L->getPointerOperand()] = L; +bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { + if (L->isVolatile()) return false; - } Value* pointer = L->getPointerOperand(); - LoadInst*& last = lastLoad[pointer]; - + // ... to a pointer that has been loaded from before... - MemoryDependenceAnalysis& MD = getAnalysis(); - bool removedNonLocal = false; - MemDepResult dep = MD.getDependency(L); - if (dep.isNonLocal() && - L->getParent() != &L->getParent()->getParent()->getEntryBlock()) { - removedNonLocal = processNonLocalLoad(L, toErase); + MemDepResult dep = MD->getDependency(L); + + // If the value isn't available, don't do anything! + if (dep.isClobber()) { + DEBUG( + // fast print dep, using operator<< on instruction would be too slow + DOUT << "GVN: load "; + WriteAsOperand(*DOUT.stream(), L); + Instruction *I = dep.getInst(); + DOUT << " is clobbered by " << *I; + ); + return false; + } + + // If it is defined in another block, try harder. + if (dep.isNonLocal()) + return processNonLocalLoad(L, toErase); + + Instruction *DepInst = dep.getInst(); + if (StoreInst *DepSI = dyn_cast(DepInst)) { + // Only forward substitute stores to loads of the same type. + // FIXME: Could do better! + if (DepSI->getPointerOperand()->getType() != pointer->getType()) + return false; - if (!removedNonLocal) - last = L; + // Remove it! + L->replaceAllUsesWith(DepSI->getOperand(0)); + if (isa(DepSI->getOperand(0)->getType())) + MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + + if (LoadInst *DepLI = dyn_cast(DepInst)) { + // Only forward substitute stores to loads of the same type. + // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. + if (DepLI->getType() != L->getType()) + return false; - return removedNonLocal; + // Remove it! + L->replaceAllUsesWith(DepLI); + if (isa(DepLI->getType())) + MD->invalidateCachedPointerInfo(DepLI); + toErase.push_back(L); + NumGVNLoad++; + return true; } - - bool deletedLoad = false; - - // Walk up the dependency chain until we either find - // a dependency we can use, or we can't walk any further - while (Instruction *DepInst = dep.getInst()) { - // ... that depends on a store ... - if (StoreInst* S = dyn_cast(DepInst)) { - if (S->getPointerOperand() == pointer) { - // Remove it! - MD.removeInstruction(L); - - L->replaceAllUsesWith(S->getOperand(0)); - toErase.push_back(L); - deletedLoad = true; - NumGVNLoad++; - } - - // Whether we removed it or not, we can't - // go any further - break; - } else if (!isa(DepInst)) { - // Only want to handle loads below. - break; - } else if (!last) { - // If we don't depend on a store, and we haven't - // been loaded before, bail. - break; - } else if (DepInst == last) { - // Remove it! - MD.removeInstruction(L); - - L->replaceAllUsesWith(last); - toErase.push_back(L); - deletedLoad = true; - NumGVNLoad++; - break; - } else { - dep = MD.getDependencyFrom(L, DepInst, DepInst->getParent()); - } - } - // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (dep.isNone()) { - // If this load depends directly on an allocation, there isn't - // anything stored there; therefore, we can optimize this load - // to undef. - MD.removeInstruction(L); - L->replaceAllUsesWith(UndefValue::get(L->getType())); + if (isa(DepInst)) { + L->replaceAllUsesWith(Context->getUndef(L->getType())); toErase.push_back(L); - deletedLoad = true; NumGVNLoad++; + return true; } - if (!deletedLoad) - last = L; - - return deletedLoad; + return false; } Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { @@ -1048,13 +1299,66 @@ Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { return 0; } +/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination +/// by inheritance from the dominator fails, see if we can perform phi +/// construction to eliminate the redundancy. +Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { + BasicBlock* BaseBlock = orig->getParent(); + + SmallPtrSet Visited; + SmallVector Stack; + Stack.push_back(BaseBlock); + + DenseMap Results; + + // Walk backwards through our predecessors, looking for instances of the + // value number we're looking for. Instances are recorded in the Results + // map, which is then used to perform phi construction. + while (!Stack.empty()) { + BasicBlock* Current = Stack.back(); + Stack.pop_back(); + + // If we've walked all the way to a proper dominator, then give up. Cases + // where the instance is in the dominator will have been caught by the fast + // path, and any cases that require phi construction further than this are + // probably not worth it anyways. Note that this is a SIGNIFICANT compile + // time improvement. + if (DT->properlyDominates(Current, orig->getParent())) return 0; + + DenseMap::iterator LA = + localAvail.find(Current); + if (LA == localAvail.end()) return 0; + DenseMap::iterator V = LA->second->table.find(valno); + + if (V != LA->second->table.end()) { + // Found an instance, record it. + Results.insert(std::make_pair(Current, V->second)); + continue; + } + + // If we reach the beginning of the function, then give up. + if (pred_begin(Current) == pred_end(Current)) + return 0; + + for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); + PI != PE; ++PI) + if (Visited.insert(*PI)) + Stack.push_back(*PI); + } + + // If we didn't find instances, give up. Otherwise, perform phi construction. + if (Results.size() == 0) + return 0; + else + return GetValueForBlock(BaseBlock, orig, Results, true); +} + /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I, - DenseMap &lastSeenLoad, SmallVectorImpl &toErase) { if (LoadInst* L = dyn_cast(I)) { - bool changed = processLoad(L, lastSeenLoad, toErase); + bool changed = processLoad(L, toErase); if (!changed) { unsigned num = VN.lookup_or_add(L); @@ -1067,9 +1371,28 @@ bool GVN::processInstruction(Instruction *I, uint32_t nextNum = VN.getNextUnusedValueNumber(); unsigned num = VN.lookup_or_add(I); + if (BranchInst* BI = dyn_cast(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + + if (!BI->isConditional() || isa(BI->getCondition())) + return false; + + Value* branchCond = BI->getCondition(); + uint32_t condVN = VN.lookup_or_add(branchCond); + + BasicBlock* trueSucc = BI->getSuccessor(0); + BasicBlock* falseSucc = BI->getSuccessor(1); + + if (trueSucc->getSinglePredecessor()) + localAvail[trueSucc]->table[condVN] = Context->getConstantIntTrue(); + if (falseSucc->getSinglePredecessor()) + localAvail[falseSucc]->table[condVN] = Context->getConstantIntFalse(); + + return false; + // Allocations are always uniquely numbered, so we can save time and memory - // by fast failing them. - if (isa(I) || isa(I)) { + // by fast failing them. + } else if (isa(I) || isa(I)) { localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); return false; } @@ -1081,10 +1404,13 @@ bool GVN::processInstruction(Instruction *I, if (constVal) { for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); PI != PE; ++PI) - if (PI->second.count(p)) - PI->second.erase(p); + PI->second.erase(p); p->replaceAllUsesWith(constVal); + if (isa(constVal->getType())) + MD->invalidateCachedPointerInfo(constVal); + VN.erase(p); + toErase.push_back(p); } else { localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); @@ -1096,16 +1422,28 @@ bool GVN::processInstruction(Instruction *I, } else if (num == nextNum) { localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); - // Perform value-number based elimination + // Perform fast-path value-number based elimination of values inherited from + // dominators. } else if (Value* repl = lookupNumber(I->getParent(), num)) { // Remove it! - MemoryDependenceAnalysis& MD = getAnalysis(); - MD.removeInstruction(I); - VN.erase(I); I->replaceAllUsesWith(repl); + if (isa(repl->getType())) + MD->invalidateCachedPointerInfo(repl); toErase.push_back(I); return true; + +#if 0 + // Perform slow-pathvalue-number based elimination with phi construction. + } else if (Value* repl = AttemptRedundancyElimination(I, num)) { + // Remove it! + VN.erase(I); + I->replaceAllUsesWith(repl); + if (isa(repl->getType())) + MD->invalidateCachedPointerInfo(repl); + toErase.push_back(I); + return true; +#endif } else { localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); } @@ -1113,13 +1451,13 @@ bool GVN::processInstruction(Instruction *I, return false; } -// GVN::runOnFunction - This is the main transformation entry point for a -// function. -// +/// runOnFunction - This is the main transformation entry point for a function. bool GVN::runOnFunction(Function& F) { + MD = &getAnalysis(); + DT = &getAnalysis(); VN.setAliasAnalysis(&getAnalysis()); - VN.setMemDep(&getAnalysis()); - VN.setDomTree(&getAnalysis()); + VN.setMemDep(MD); + VN.setDomTree(DT); bool changed = false; bool shouldContinue = true; @@ -1135,9 +1473,13 @@ bool GVN::runOnFunction(Function& F) { changed |= removedBlock; } + unsigned Iteration = 0; + while (shouldContinue) { + DEBUG(cerr << "GVN iteration: " << Iteration << "\n"); shouldContinue = iterateOnFunction(F); changed |= shouldContinue; + ++Iteration; } if (EnablePRE) { @@ -1147,6 +1489,10 @@ bool GVN::runOnFunction(Function& F) { changed |= PREChanged; } } + // FIXME: Should perform GVN again after PRE does something. PRE can move + // computations into blocks where they become fully redundant. Note that + // we can't do this until PRE's critical edge splitting updates memdep. + // Actually, when this happens, we should just fully integrate PRE into GVN. cleanupGlobalSets(); @@ -1154,22 +1500,15 @@ bool GVN::runOnFunction(Function& F) { } -bool GVN::processBlock(DomTreeNode* DTN) { - BasicBlock* BB = DTN->getBlock(); - +bool GVN::processBlock(BasicBlock* BB) { + // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and + // incrementing BI before processing an instruction). SmallVector toErase; - DenseMap lastSeenLoad; bool changed_function = false; - if (DTN->getIDom()) - localAvail[BB] = - new ValueNumberScope(localAvail[DTN->getIDom()->getBlock()]); - else - localAvail[BB] = new ValueNumberScope(0); - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - changed_function |= processInstruction(BI, lastSeenLoad, toErase); + changed_function |= processInstruction(BI, toErase); if (toErase.empty()) { ++BI; continue; @@ -1184,15 +1523,18 @@ bool GVN::processBlock(DomTreeNode* DTN) { --BI; for (SmallVector::iterator I = toErase.begin(), - E = toErase.end(); I != E; ++I) + E = toErase.end(); I != E; ++I) { + DEBUG(cerr << "GVN removed: " << **I); + MD->removeInstruction(*I); (*I)->eraseFromParent(); + DEBUG(verifyRemoved(*I)); + } + toErase.clear(); if (AtStart) BI = BB->begin(); else ++BI; - - toErase.clear(); } return changed_function; @@ -1201,8 +1543,9 @@ bool GVN::processBlock(DomTreeNode* DTN) { /// performPRE - Perform a purely local form of PRE that looks for diamond /// control flow patterns and attempts to perform simple PRE at the join point. bool GVN::performPRE(Function& F) { - bool changed = false; + bool Changed = false; SmallVector, 4> toSplit; + DenseMap predMap; for (df_iterator DI = df_begin(&F.getEntryBlock()), DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { BasicBlock* CurrentBlock = *DI; @@ -1212,14 +1555,15 @@ bool GVN::performPRE(Function& F) { for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { - if (isa(BI) || isa(BI) || - isa(BI) || BI->mayReadFromMemory() || - BI->mayWriteToMemory()) { - BI++; + Instruction *CurInst = BI++; + + if (isa(CurInst) || isa(CurInst) || + isa(CurInst) || (CurInst->getType() == Type::VoidTy) || + CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || + isa(CurInst)) continue; - } - - uint32_t valno = VN.lookup(BI); + + uint32_t valno = VN.lookup(CurInst); // Look for the predecessors for PRE opportunities. We're // only trying to solve the basic diamond case, where @@ -1230,7 +1574,8 @@ bool GVN::performPRE(Function& F) { unsigned numWith = 0; unsigned numWithout = 0; BasicBlock* PREPred = 0; - DenseMap predMap; + predMap.clear(); + for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) { // We're not interested in PRE where the block is its @@ -1249,7 +1594,7 @@ bool GVN::performPRE(Function& F) { if (predV == localAvail[*PI]->table.end()) { PREPred = *PI; numWithout++; - } else if (predV->second == BI) { + } else if (predV->second == CurInst) { numWithout = 2; } else { predMap[*PI] = predV->second; @@ -1259,10 +1604,8 @@ bool GVN::performPRE(Function& F) { // Don't do PRE when it might increase code size, i.e. when // we would need to insert instructions in more than one pred. - if (numWithout != 1 || numWith == 0) { - BI++; + if (numWithout != 1 || numWith == 0) continue; - } // We can't do PRE safely on a critical edge, so instead we schedule // the edge to be split and perform the PRE the next time we iterate @@ -1277,8 +1620,6 @@ bool GVN::performPRE(Function& F) { if (isCriticalEdge(PREPred->getTerminator(), succNum)) { toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); - changed = true; - BI++; continue; } @@ -1287,19 +1628,18 @@ bool GVN::performPRE(Function& F) { // will be available in the predecessor by the time we need them. Any // that weren't original present will have been instantiated earlier // in this loop. - Instruction* PREInstr = BI->clone(); + Instruction* PREInstr = CurInst->clone(*Context); bool success = true; - for (unsigned i = 0; i < BI->getNumOperands(); ++i) { - Value* op = BI->getOperand(i); - if (isa(op) || isa(op) || isa(op)) - PREInstr->setOperand(i, op); - else { - Value* V = lookupNumber(PREPred, VN.lookup(op)); - if (!V) { - success = false; - break; - } else - PREInstr->setOperand(i, V); + for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { + Value *Op = PREInstr->getOperand(i); + if (isa(Op) || isa(Op) || isa(Op)) + continue; + + if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { + PREInstr->setOperand(i, V); + } else { + success = false; + break; } } @@ -1308,12 +1648,12 @@ bool GVN::performPRE(Function& F) { // are not value numbered precisely. if (!success) { delete PREInstr; - BI++; + DEBUG(verifyRemoved(PREInstr)); continue; } PREInstr->insertBefore(PREPred->getTerminator()); - PREInstr->setName(BI->getName() + ".pre"); + PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; VN.add(PREInstr, valno); NumGVNPRE++; @@ -1322,8 +1662,8 @@ bool GVN::performPRE(Function& F) { localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(BI->getType(), - BI->getName() + ".pre-phi", + PHINode* Phi = PHINode::Create(CurInst->getType(), + CurInst->getName() + ".pre-phi", CurrentBlock->begin()); for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) @@ -1332,14 +1672,16 @@ bool GVN::performPRE(Function& F) { VN.add(Phi, valno); localAvail[CurrentBlock]->table[valno] = Phi; - BI->replaceAllUsesWith(Phi); - VN.erase(BI); + CurInst->replaceAllUsesWith(Phi); + if (isa(Phi->getType())) + MD->invalidateCachedPointerInfo(Phi); + VN.erase(CurInst); - Instruction* erase = BI; - BI++; - erase->eraseFromParent(); - - changed = true; + DEBUG(cerr << "GVN PRE removed: " << *CurInst); + MD->removeInstruction(CurInst); + CurInst->eraseFromParent(); + DEBUG(verifyRemoved(CurInst)); + Changed = true; } } @@ -1347,21 +1689,36 @@ bool GVN::performPRE(Function& F) { I = toSplit.begin(), E = toSplit.end(); I != E; ++I) SplitCriticalEdge(I->first, I->second, this); - return changed || toSplit.size(); + return Changed || toSplit.size(); } -// iterateOnFunction - Executes one iteration of GVN +/// iterateOnFunction - Executes one iteration of GVN bool GVN::iterateOnFunction(Function &F) { - DominatorTree &DT = getAnalysis(); - cleanupGlobalSets(); + for (df_iterator DI = df_begin(DT->getRootNode()), + DE = df_end(DT->getRootNode()); DI != DE; ++DI) { + if (DI->getIDom()) + localAvail[DI->getBlock()] = + new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); + else + localAvail[DI->getBlock()] = new ValueNumberScope(0); + } + // Top-down walk of the dominator tree bool changed = false; - for (df_iterator DI = df_begin(DT.getRootNode()), - DE = df_end(DT.getRootNode()); DI != DE; ++DI) - changed |= processBlock(*DI); - +#if 0 + // Needed for value numbering with phi construction to work. + ReversePostOrderTraversal RPOT(&F); + for (ReversePostOrderTraversal::rpo_iterator RI = RPOT.begin(), + RE = RPOT.end(); RI != RE; ++RI) + changed |= processBlock(*RI); +#else + for (df_iterator DI = df_begin(DT->getRootNode()), + DE = df_end(DT->getRootNode()); DI != DE; ++DI) + changed |= processBlock(DI->getBlock()); +#endif + return changed; } @@ -1374,3 +1731,37 @@ void GVN::cleanupGlobalSets() { delete I->second; localAvail.clear(); } + +/// verifyRemoved - Verify that the specified instruction does not occur in our +/// internal data structures. +void GVN::verifyRemoved(const Instruction *Inst) const { + VN.verifyRemoved(Inst); + + // Walk through the PHI map to make sure the instruction isn't hiding in there + // somewhere. + for (PhiMapType::iterator + I = phiMap.begin(), E = phiMap.end(); I != E; ++I) { + assert(I->first != Inst && "Inst is still a key in PHI map!"); + + for (SmallPtrSet::iterator + II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { + assert(*II != Inst && "Inst is still a value in PHI map!"); + } + } + + // Walk through the value number scope to make sure the instruction isn't + // ferreted away in it. + for (DenseMap::iterator + I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { + const ValueNumberScope *VNS = I->second; + + while (VNS) { + for (DenseMap::iterator + II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { + assert(II->second != Inst && "Inst still in value numbering scope!"); + } + + VNS = VNS->parent; + } + } +}