X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FGVN.cpp;h=e7be98506b99f53e9a2092487a39536db7400615;hp=aca49ba3c096a86d14137b65aec92a745370e39d;hb=c23197a26f34f559ea9797de51e187087c039c42;hpb=75f02ee771bae2d6682f873895cd003bac60ba7e diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index aca49ba3c09..e7be98506b9 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -10,7 +10,7 @@ // This pass performs global value numbering to eliminate fully redundant // instructions. It also performs simple dead load elimination. // -// Note that this pass does the value numbering itself, it does not use the +// Note that this pass does the value numbering itself; it does not use the // ValueNumbering analysis passes. // //===----------------------------------------------------------------------===// @@ -21,7 +21,8 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -36,7 +37,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; @@ -48,7 +51,7 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt EnablePRE("enable-pre", cl::init(true), cl::Hidden); -cl::opt EnableLoadPRE("enable-load-pre"/*, cl::init(true)*/); +static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -59,7 +62,8 @@ cl::opt EnableLoadPRE("enable-load-pre"/*, cl::init(true)*/); /// two values. namespace { struct VISIBILITY_HIDDEN Expression { - enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, + enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, + UDIV, SDIV, FDIV, UREM, SREM, FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, @@ -198,10 +202,13 @@ template <> struct DenseMapInfo { Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { switch(BO->getOpcode()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Binary operator with unknown opcode?"); + llvm_unreachable("Binary operator with unknown opcode?"); case Instruction::Add: return Expression::ADD; + case Instruction::FAdd: return Expression::FADD; case Instruction::Sub: return Expression::SUB; + case Instruction::FSub: return Expression::FSUB; case Instruction::Mul: return Expression::MUL; + case Instruction::FMul: return Expression::FMUL; case Instruction::UDiv: return Expression::UDIV; case Instruction::SDiv: return Expression::SDIV; case Instruction::FDiv: return Expression::FDIV; @@ -218,10 +225,10 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { } Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { - if (isa(C) || isa(C)) { + if (isa(C)) { switch (C->getPredicate()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Comparison with unknown predicate?"); + llvm_unreachable("Comparison with unknown predicate?"); case ICmpInst::ICMP_EQ: return Expression::ICMPEQ; case ICmpInst::ICMP_NE: return Expression::ICMPNE; case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; @@ -233,32 +240,32 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { case ICmpInst::ICMP_SLT: return Expression::ICMPSLT; case ICmpInst::ICMP_SLE: return Expression::ICMPSLE; } - } - assert((isa(C) || isa(C)) && "Unknown compare"); - switch (C->getPredicate()) { - default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Comparison with unknown predicate?"); - case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; - case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; - case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; - case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; - case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; - case FCmpInst::FCMP_ONE: return Expression::FCMPONE; - case FCmpInst::FCMP_ORD: return Expression::FCMPORD; - case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; - case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; - case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; - case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; - case FCmpInst::FCMP_ULT: return Expression::FCMPULT; - case FCmpInst::FCMP_ULE: return Expression::FCMPULE; - case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + } else { + switch (C->getPredicate()) { + default: // THIS SHOULD NEVER HAPPEN + llvm_unreachable("Comparison with unknown predicate?"); + case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; + case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; + case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; + case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; + case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; + case FCmpInst::FCMP_ONE: return Expression::FCMPONE; + case FCmpInst::FCMP_ORD: return Expression::FCMPORD; + case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; + case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; + case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; + case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; + case FCmpInst::FCMP_ULT: return Expression::FCMPULT; + case FCmpInst::FCMP_ULE: return Expression::FCMPULE; + case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; + } } } Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { switch(C->getOpcode()) { default: // THIS SHOULD NEVER HAPPEN - assert(0 && "Cast operator with unknown opcode?"); + llvm_unreachable("Cast operator with unknown opcode?"); case Instruction::Trunc: return Expression::TRUNC; case Instruction::ZExt: return Expression::ZEXT; case Instruction::SExt: return Expression::SEXT; @@ -666,7 +673,7 @@ void ValueTable::verifyRemoved(const Value *V) const { } //===----------------------------------------------------------------------===// -// GVN Pass +// GVN Pass //===----------------------------------------------------------------------===// namespace { @@ -790,7 +797,7 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, // If the block is unreachable, just return undef, since this path // can't actually occur at runtime. if (!DT->isReachableFromEntry(BB)) - return Phis[BB] = UndefValue::get(orig->getType()); + return Phis[BB] = Context->getUndef(orig->getType()); if (BasicBlock *Pred = BB->getSinglePredecessor()) { Value *ret = GetValueForBlock(Pred, orig, Phis); @@ -948,8 +955,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].second.isClobber()) + if (Deps.size() == 1 && Deps[0].second.isClobber()) { + DEBUG( + DOUT << "GVN: non-local load "; + WriteAsOperand(*DOUT.stream(), LI); + DOUT << " is clobbered by " << *Deps[0].second.getInst(); + ); return false; + } // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see @@ -972,7 +985,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Loading the allocation -> undef. if (isa(DepInst)) { ValuesPerBlock.push_back(std::make_pair(DepBB, - UndefValue::get(LI->getType()))); + Context->getUndef(LI->getType()))); continue; } @@ -1035,7 +1048,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); LI->replaceAllUsesWith(v); - if (!isa(v)) + if (isa(v)) v->takeName(LI); if (isa(v->getType())) MD->invalidateCachedPointerInfo(v); @@ -1055,11 +1068,32 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // that we only have to insert *one* load (which means we're basically moving // the load, not inserting a new one). - // Everything we do here is based on local predecessors of LI's block. If it - // only has one predecessor, bail now. + SmallPtrSet Blockers; + for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) + Blockers.insert(UnavailableBlocks[i]); + + // Lets find first basic block with more than one predecessor. Walk backwards + // through predecessors if needed. BasicBlock *LoadBB = LI->getParent(); - if (LoadBB->getSinglePredecessor()) - return false; + BasicBlock *TmpBB = LoadBB; + + bool isSinglePred = false; + bool allSingleSucc = true; + while (TmpBB->getSinglePredecessor()) { + isSinglePred = true; + TmpBB = TmpBB->getSinglePredecessor(); + if (!TmpBB) // If haven't found any, bail now. + return false; + if (TmpBB == LoadBB) // Infinite (unreachable) loop. + return false; + if (Blockers.count(TmpBB)) + return false; + if (TmpBB->getTerminator()->getNumSuccessors() != 1) + allSingleSucc = false; + } + + assert(TmpBB); + LoadBB = TmpBB; // If we have a repl set with LI itself in it, this means we have a loop where // at least one of the values is LI. Since this means that we won't be able @@ -1069,6 +1103,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (ValuesPerBlock[i].second == LI) return false; + if (isSinglePred) { + bool isHot = false; + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) + if (Instruction *I = dyn_cast(ValuesPerBlock[i].second)) + // "Hot" Instruction is in some loop (because it dominates its dep. + // instruction). + if (DT->dominates(LI, I)) { + isHot = true; + break; + } + + // We are interested only in "hot" instructions. We don't want to do any + // mis-optimizations here. + if (!isHot) + return false; + } + // Okay, we have some hope :). Check to see if the loaded value is fully // available in all but one predecessor. // FIXME: If we could restructure the CFG, we could make a common pred with @@ -1115,7 +1166,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, << UnavailablePred->getName() << "': " << *LI); return false; } - + + // Make sure it is valid to move this load here. We have to watch out for: + // @1 = getelementptr (i8* p, ... + // test p and branch if == 0 + // load @1 + // It is valid to have the getelementptr before the test, even if p can be 0, + // as getelementptr only does address arithmetic. + // If we are not pushing the value through any multiple-successor blocks + // we do not have this case. Otherwise, check that the load is safe to + // put anywhere; this can be improved, but should be conservatively safe. + if (!allSingleSucc && + !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) + return false; + // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. @@ -1125,6 +1189,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI, LI->getAlignment(), UnavailablePred->getTerminator()); + SmallPtrSet &p = phiMap[LI->getPointerOperand()]; + for (SmallPtrSet::iterator I = p.begin(), E = p.end(); + I != E; ++I) + ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); + DenseMap BlockReplValues; BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); BlockReplValues[UnavailablePred] = NewLoad; @@ -1132,7 +1201,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Perform PHI construction. Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); LI->replaceAllUsesWith(v); - if (!isa(v)) + if (isa(v)) v->takeName(LI); if (isa(v->getType())) MD->invalidateCachedPointerInfo(v); @@ -1153,8 +1222,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { MemDepResult dep = MD->getDependency(L); // If the value isn't available, don't do anything! - if (dep.isClobber()) + if (dep.isClobber()) { + DEBUG( + // fast print dep, using operator<< on instruction would be too slow + DOUT << "GVN: load "; + WriteAsOperand(*DOUT.stream(), L); + Instruction *I = dep.getInst(); + DOUT << " is clobbered by " << *I; + ); return false; + } // If it is defined in another block, try harder. if (dep.isNonLocal()) @@ -1195,7 +1272,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. if (isa(DepInst)) { - L->replaceAllUsesWith(UndefValue::get(L->getType())); + L->replaceAllUsesWith(Context->getUndef(L->getType())); toErase.push_back(L); NumGVNLoad++; return true; @@ -1251,7 +1328,7 @@ Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { DenseMap::iterator LA = localAvail.find(Current); if (LA == localAvail.end()) return 0; - DenseMap::iterator V = LA->second->table.find(valno); + DenseMap::iterator V = LA->second->table.find(valno); if (V != LA->second->table.end()) { // Found an instance, record it. @@ -1294,9 +1371,28 @@ bool GVN::processInstruction(Instruction *I, uint32_t nextNum = VN.getNextUnusedValueNumber(); unsigned num = VN.lookup_or_add(I); + if (BranchInst* BI = dyn_cast(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + + if (!BI->isConditional() || isa(BI->getCondition())) + return false; + + Value* branchCond = BI->getCondition(); + uint32_t condVN = VN.lookup_or_add(branchCond); + + BasicBlock* trueSucc = BI->getSuccessor(0); + BasicBlock* falseSucc = BI->getSuccessor(1); + + if (trueSucc->getSinglePredecessor()) + localAvail[trueSucc]->table[condVN] = Context->getConstantIntTrue(); + if (falseSucc->getSinglePredecessor()) + localAvail[falseSucc]->table[condVN] = Context->getConstantIntFalse(); + + return false; + // Allocations are always uniquely numbered, so we can save time and memory - // by fast failing them. - if (isa(I) || isa(I)) { + // by fast failing them. + } else if (isa(I) || isa(I)) { localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); return false; } @@ -1313,6 +1409,8 @@ bool GVN::processInstruction(Instruction *I, p->replaceAllUsesWith(constVal); if (isa(constVal->getType())) MD->invalidateCachedPointerInfo(constVal); + VN.erase(p); + toErase.push_back(p); } else { localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); @@ -1353,9 +1451,7 @@ bool GVN::processInstruction(Instruction *I, return false; } -// GVN::runOnFunction - This is the main transformation entry point for a -// function. -// +/// runOnFunction - This is the main transformation entry point for a function. bool GVN::runOnFunction(Function& F) { MD = &getAnalysis(); DT = &getAnalysis(); @@ -1405,18 +1501,11 @@ bool GVN::runOnFunction(Function& F) { bool GVN::processBlock(BasicBlock* BB) { - DomTreeNode* DTN = DT->getNode(BB); // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and // incrementing BI before processing an instruction). SmallVector toErase; bool changed_function = false; - if (DTN->getIDom()) - localAvail[BB] = - new ValueNumberScope(localAvail[DTN->getIDom()->getBlock()]); - else - localAvail[BB] = new ValueNumberScope(0); - for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { changed_function |= processInstruction(BI, toErase); @@ -1467,12 +1556,13 @@ bool GVN::performPRE(Function& F) { for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; - + if (isa(CurInst) || isa(CurInst) || - isa(CurInst) || CurInst->mayReadFromMemory() || - CurInst->mayWriteToMemory()) + isa(CurInst) || (CurInst->getType() == Type::VoidTy) || + CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || + isa(CurInst)) continue; - + uint32_t valno = VN.lookup(CurInst); // Look for the predecessors for PRE opportunities. We're @@ -1538,7 +1628,7 @@ bool GVN::performPRE(Function& F) { // will be available in the predecessor by the time we need them. Any // that weren't original present will have been instantiated earlier // in this loop. - Instruction* PREInstr = CurInst->clone(); + Instruction* PREInstr = CurInst->clone(*Context); bool success = true; for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { Value *Op = PREInstr->getOperand(i); @@ -1602,10 +1692,19 @@ bool GVN::performPRE(Function& F) { return Changed || toSplit.size(); } -// iterateOnFunction - Executes one iteration of GVN +/// iterateOnFunction - Executes one iteration of GVN bool GVN::iterateOnFunction(Function &F) { cleanupGlobalSets(); + for (df_iterator DI = df_begin(DT->getRootNode()), + DE = df_end(DT->getRootNode()); DI != DE; ++DI) { + if (DI->getIDom()) + localAvail[DI->getBlock()] = + new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); + else + localAvail[DI->getBlock()] = new ValueNumberScope(0); + } + // Top-down walk of the dominator tree bool changed = false; #if 0 @@ -1635,18 +1734,34 @@ void GVN::cleanupGlobalSets() { /// verifyRemoved - Verify that the specified instruction does not occur in our /// internal data structures. -void GVN::verifyRemoved(const Instruction *I) const { - VN.verifyRemoved(I); +void GVN::verifyRemoved(const Instruction *Inst) const { + VN.verifyRemoved(Inst); // Walk through the PHI map to make sure the instruction isn't hiding in there // somewhere. for (PhiMapType::iterator - II = phiMap.begin(), IE = phiMap.end(); II != IE; ++II) { - assert(II->first != I && "Inst is still a key in PHI map!"); + I = phiMap.begin(), E = phiMap.end(); I != E; ++I) { + assert(I->first != Inst && "Inst is still a key in PHI map!"); for (SmallPtrSet::iterator - SI = II->second.begin(), SE = II->second.end(); SI != SE; ++SI) { - assert(*SI != I && "Inst is still a value in PHI map!"); + II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { + assert(*II != Inst && "Inst is still a value in PHI map!"); + } + } + + // Walk through the value number scope to make sure the instruction isn't + // ferreted away in it. + for (DenseMap::iterator + I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { + const ValueNumberScope *VNS = I->second; + + while (VNS) { + for (DenseMap::iterator + II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { + assert(II->second != Inst && "Inst still in value numbering scope!"); + } + + VNS = VNS->parent; } } }