X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FGVN.cpp;h=a2d210a36f2a737d7aec3b5a497a6ed6e6bd64f3;hb=19ad784dacc10247d47d0928f6222390c60fbb4b;hp=bdb571dddbe3ac415fd2011d5bc6fdd84556c4c3;hpb=b3056faa5554ded7ac1ac5865d10ef5839fb77d3;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index bdb571dddbe..a2d210a36f2 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -23,6 +23,7 @@ #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -32,12 +33,15 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MallocHelper.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -61,17 +65,17 @@ static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); /// as an efficient mechanism to determine the expression-wise equivalence of /// two values. namespace { - struct VISIBILITY_HIDDEN Expression { + struct Expression { enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, UDIV, SDIV, FDIV, UREM, SREM, - FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, - ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, - ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, - FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, - FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, + FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, + ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, + ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, + FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, + FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, - FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, + FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, EMPTY, TOMBSTONE }; @@ -81,11 +85,11 @@ namespace { uint32_t secondVN; uint32_t thirdVN; SmallVector varargs; - Value* function; - + Value *function; + Expression() { } Expression(ExpressionOpcode o) : opcode(o) { } - + bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; @@ -104,30 +108,30 @@ namespace { else { if (varargs.size() != other.varargs.size()) return false; - + for (size_t i = 0; i < varargs.size(); ++i) if (varargs[i] != other.varargs[i]) return false; - + return true; } } - + bool operator!=(const Expression &other) const { return !(*this == other); } }; - - class VISIBILITY_HIDDEN ValueTable { + + class ValueTable { private: DenseMap valueNumbering; DenseMap expressionNumbering; AliasAnalysis* AA; MemoryDependenceAnalysis* MD; DominatorTree* DT; - + uint32_t nextValueNumber; - + Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); Expression::ExpressionOpcode getOpcode(CmpInst* C); Expression::ExpressionOpcode getOpcode(CastInst* C); @@ -143,11 +147,11 @@ namespace { Expression create_expression(Constant* C); public: ValueTable() : nextValueNumber(1) { } - uint32_t lookup_or_add(Value* V); - uint32_t lookup(Value* V) const; - void add(Value* V, uint32_t num); + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + void add(Value *V, uint32_t num); void clear(); - void erase(Value* v); + void erase(Value *v); unsigned size(); void setAliasAnalysis(AliasAnalysis* A) { AA = A; } AliasAnalysis *getAliasAnalysis() const { return AA; } @@ -163,30 +167,30 @@ template <> struct DenseMapInfo { static inline Expression getEmptyKey() { return Expression(Expression::EMPTY); } - + static inline Expression getTombstoneKey() { return Expression(Expression::TOMBSTONE); } - + static unsigned getHashValue(const Expression e) { unsigned hash = e.opcode; - + hash = e.firstVN + hash * 37; hash = e.secondVN + hash * 37; hash = e.thirdVN + hash * 37; - + hash = ((unsigned)((uintptr_t)e.type >> 4) ^ (unsigned)((uintptr_t)e.type >> 9)) + hash * 37; - + for (SmallVector::const_iterator I = e.varargs.begin(), E = e.varargs.end(); I != E; ++I) hash = *I + hash * 37; - + hash = ((unsigned)((uintptr_t)e.function >> 4) ^ (unsigned)((uintptr_t)e.function >> 9)) + hash * 37; - + return hash; } static bool isEqual(const Expression &LHS, const Expression &RHS) { @@ -283,126 +287,126 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { Expression ValueTable::create_expression(CallInst* C) { Expression e; - + e.type = C->getType(); e.firstVN = 0; e.secondVN = 0; e.thirdVN = 0; e.function = C->getCalledFunction(); e.opcode = Expression::CALL; - + for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); - + return e; } Expression ValueTable::create_expression(BinaryOperator* BO) { Expression e; - + e.firstVN = lookup_or_add(BO->getOperand(0)); e.secondVN = lookup_or_add(BO->getOperand(1)); e.thirdVN = 0; e.function = 0; e.type = BO->getType(); e.opcode = getOpcode(BO); - + return e; } Expression ValueTable::create_expression(CmpInst* C) { Expression e; - + e.firstVN = lookup_or_add(C->getOperand(0)); e.secondVN = lookup_or_add(C->getOperand(1)); e.thirdVN = 0; e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); - + return e; } Expression ValueTable::create_expression(CastInst* C) { Expression e; - + e.firstVN = lookup_or_add(C->getOperand(0)); e.secondVN = 0; e.thirdVN = 0; e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); - + return e; } Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression e; - + e.firstVN = lookup_or_add(S->getOperand(0)); e.secondVN = lookup_or_add(S->getOperand(1)); e.thirdVN = lookup_or_add(S->getOperand(2)); e.function = 0; e.type = S->getType(); e.opcode = Expression::SHUFFLE; - + return e; } Expression ValueTable::create_expression(ExtractElementInst* E) { Expression e; - + e.firstVN = lookup_or_add(E->getOperand(0)); e.secondVN = lookup_or_add(E->getOperand(1)); e.thirdVN = 0; e.function = 0; e.type = E->getType(); e.opcode = Expression::EXTRACT; - + return e; } Expression ValueTable::create_expression(InsertElementInst* I) { Expression e; - + e.firstVN = lookup_or_add(I->getOperand(0)); e.secondVN = lookup_or_add(I->getOperand(1)); e.thirdVN = lookup_or_add(I->getOperand(2)); e.function = 0; e.type = I->getType(); e.opcode = Expression::INSERT; - + return e; } Expression ValueTable::create_expression(SelectInst* I) { Expression e; - + e.firstVN = lookup_or_add(I->getCondition()); e.secondVN = lookup_or_add(I->getTrueValue()); e.thirdVN = lookup_or_add(I->getFalseValue()); e.function = 0; e.type = I->getType(); e.opcode = Expression::SELECT; - + return e; } Expression ValueTable::create_expression(GetElementPtrInst* G) { Expression e; - + e.firstVN = lookup_or_add(G->getPointerOperand()); e.secondVN = 0; e.thirdVN = 0; e.function = 0; e.type = G->getType(); e.opcode = Expression::GEP; - + for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); - + return e; } @@ -411,21 +415,21 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) { //===----------------------------------------------------------------------===// /// add - Insert a value into the table with a specified value number. -void ValueTable::add(Value* V, uint32_t num) { +void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } /// lookup_or_add - Returns the value number for the specified value, assigning /// it a new number if it did not have one before. -uint32_t ValueTable::lookup_or_add(Value* V) { +uint32_t ValueTable::lookup_or_add(Value *V) { DenseMap::iterator VI = valueNumbering.find(V); if (VI != valueNumbering.end()) return VI->second; - + if (CallInst* C = dyn_cast(V)) { if (AA->doesNotAccessMemory(C)) { Expression e = create_expression(C); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -433,20 +437,20 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (AA->onlyReadsMemory(C)) { Expression e = create_expression(C); - + if (expressionNumbering.find(e) == expressionNumbering.end()) { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } - + MemDepResult local_dep = MD->getDependency(C); - + if (!local_dep.isDef() && !local_dep.isNonLocal()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; @@ -454,12 +458,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { if (local_dep.isDef()) { CallInst* local_cdep = cast(local_dep.getInst()); - + if (local_cdep->getNumOperands() != C->getNumOperands()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } - + for (unsigned i = 1; i < C->getNumOperands(); ++i) { uint32_t c_vn = lookup_or_add(C->getOperand(i)); uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); @@ -468,19 +472,19 @@ uint32_t ValueTable::lookup_or_add(Value* V) { return nextValueNumber++; } } - + uint32_t v = lookup_or_add(local_cdep); valueNumbering.insert(std::make_pair(V, v)); return v; } // Non-local case. - const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = MD->getNonLocalCallDependency(CallSite(C)); // FIXME: call/call dependencies for readonly calls should return def, not // clobber! Move the checking logic to MemDep! CallInst* cdep = 0; - + // Check to see if we have a single dominating call instruction that is // identical to C. for (unsigned i = 0, e = deps.size(); i != e; ++i) { @@ -495,23 +499,23 @@ uint32_t ValueTable::lookup_or_add(Value* V) { cdep = 0; break; } - + CallInst *NonLocalDepCall = dyn_cast(I->second.getInst()); // FIXME: All duplicated with non-local case. if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ cdep = NonLocalDepCall; continue; } - + cdep = 0; break; } - + if (!cdep) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } - + if (cdep->getNumOperands() != C->getNumOperands()) { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; @@ -524,18 +528,18 @@ uint32_t ValueTable::lookup_or_add(Value* V) { return nextValueNumber++; } } - + uint32_t v = lookup_or_add(cdep); valueNumbering.insert(std::make_pair(V, v)); return v; - + } else { valueNumbering.insert(std::make_pair(V, nextValueNumber)); return nextValueNumber++; } } else if (BinaryOperator* BO = dyn_cast(V)) { Expression e = create_expression(BO); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -543,12 +547,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (CmpInst* C = dyn_cast(V)) { Expression e = create_expression(C); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -556,12 +560,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (ShuffleVectorInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -569,12 +573,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (ExtractElementInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -582,12 +586,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (InsertElementInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -595,12 +599,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (SelectInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -608,12 +612,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (CastInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -621,12 +625,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else if (GetElementPtrInst* U = dyn_cast(V)) { Expression e = create_expression(U); - + DenseMap::iterator EI = expressionNumbering.find(e); if (EI != expressionNumbering.end()) { valueNumbering.insert(std::make_pair(V, EI->second)); @@ -634,7 +638,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) { } else { expressionNumbering.insert(std::make_pair(e, nextValueNumber)); valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + return nextValueNumber++; } } else { @@ -645,7 +649,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) { /// lookup - Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t ValueTable::lookup(Value* V) const { +uint32_t ValueTable::lookup(Value *V) const { DenseMap::iterator VI = valueNumbering.find(V); assert(VI != valueNumbering.end() && "Value not numbered?"); return VI->second; @@ -659,7 +663,7 @@ void ValueTable::clear() { } /// erase - Remove a value from the value numbering -void ValueTable::erase(Value* V) { +void ValueTable::erase(Value *V) { valueNumbering.erase(V); } @@ -677,17 +681,17 @@ void ValueTable::verifyRemoved(const Value *V) const { //===----------------------------------------------------------------------===// namespace { - struct VISIBILITY_HIDDEN ValueNumberScope { + struct ValueNumberScope { ValueNumberScope* parent; DenseMap table; - + ValueNumberScope(ValueNumberScope* p) : parent(p) { } }; } namespace { - class VISIBILITY_HIDDEN GVN : public FunctionPass { + class GVN : public FunctionPass { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid @@ -699,45 +703,43 @@ namespace { ValueTable VN; DenseMap localAvail; - + typedef DenseMap > PhiMapType; PhiMapType phiMap; - - + + // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); - + AU.addPreserved(); AU.addPreserved(); } - + // Helper fuctions // FIXME: eliminate or document these better bool processLoad(LoadInst* L, SmallVectorImpl &toErase); - bool processInstruction(Instruction* I, + bool processInstruction(Instruction *I, SmallVectorImpl &toErase); bool processNonLocalLoad(LoadInst* L, SmallVectorImpl &toErase); - bool processBlock(BasicBlock* BB); - Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, + bool processBlock(BasicBlock *BB); + Value *GetValueForBlock(BasicBlock *BB, Instruction *orig, DenseMap &Phis, bool top_level = false); void dump(DenseMap& d); bool iterateOnFunction(Function &F); - Value* CollapsePhi(PHINode* p); - bool isSafeReplacement(PHINode* p, Instruction* inst); + Value *CollapsePhi(PHINode* p); bool performPRE(Function& F); - Value* lookupNumber(BasicBlock* BB, uint32_t num); - bool mergeBlockIntoPredecessor(BasicBlock* BB); - Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); + Value *lookupNumber(BasicBlock *BB, uint32_t num); + Value *AttemptRedundancyElimination(Instruction *orig, unsigned valno); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; }; - + char GVN::ID = 0; } @@ -757,50 +759,50 @@ void GVN::dump(DenseMap& d) { printf("}\n"); } -Value* GVN::CollapsePhi(PHINode* p) { - Value* constVal = p->hasConstantValue(); - if (!constVal) return 0; - - Instruction* inst = dyn_cast(constVal); - if (!inst) - return constVal; - - if (DT->dominates(inst, p)) - if (isSafeReplacement(p, inst)) - return inst; - return 0; -} - -bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { +static bool isSafeReplacement(PHINode* p, Instruction *inst) { if (!isa(inst)) return true; - + for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); UI != E; ++UI) if (PHINode* use_phi = dyn_cast(UI)) if (use_phi->getParent() == inst->getParent()) return false; - + return true; } +Value *GVN::CollapsePhi(PHINode *PN) { + Value *ConstVal = PN->hasConstantValue(DT); + if (!ConstVal) return 0; + + Instruction *Inst = dyn_cast(ConstVal); + if (!Inst) + return ConstVal; + + if (DT->dominates(Inst, PN)) + if (isSafeReplacement(PN, Inst)) + return Inst; + return 0; +} + /// GetValueForBlock - Get the value to use within the specified basic block. /// available values are in Phis. -Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, +Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction *Orig, DenseMap &Phis, - bool top_level) { - + bool TopLevel) { + // If we have already computed this value, return the previously computed val. DenseMap::iterator V = Phis.find(BB); - if (V != Phis.end() && !top_level) return V->second; - + if (V != Phis.end() && !TopLevel) return V->second; + // If the block is unreachable, just return undef, since this path // can't actually occur at runtime. if (!DT->isReachableFromEntry(BB)) - return Phis[BB] = Context->getUndef(orig->getType()); - + return Phis[BB] = UndefValue::get(Orig->getType()); + if (BasicBlock *Pred = BB->getSinglePredecessor()) { - Value *ret = GetValueForBlock(Pred, orig, Phis); + Value *ret = GetValueForBlock(Pred, Orig, Phis); Phis[BB] = ret; return ret; } @@ -813,35 +815,35 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, NumPreds = ExistingPN->getNumIncomingValues(); else NumPreds = std::distance(pred_begin(BB), pred_end(BB)); - + // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so // now, then get values to fill in the incoming values for the PHI. - PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", + PHINode *PN = PHINode::Create(Orig->getType(), Orig->getName()+".rle", BB->begin()); PN->reserveOperandSpace(NumPreds); - + Phis.insert(std::make_pair(BB, PN)); - + // Fill in the incoming values for the block. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - Value* val = GetValueForBlock(*PI, orig, Phis); + Value *val = GetValueForBlock(*PI, Orig, Phis); PN->addIncoming(val, *PI); } - - VN.getAliasAnalysis()->copyValue(orig, PN); - + + VN.getAliasAnalysis()->copyValue(Orig, PN); + // Attempt to collapse PHI nodes that are trivially redundant - Value* v = CollapsePhi(PN); + Value *v = CollapsePhi(PN); if (!v) { // Cache our phi construction results - if (LoadInst* L = dyn_cast(orig)) + if (LoadInst* L = dyn_cast(Orig)) phiMap[L->getPointerOperand()].insert(PN); else - phiMap[orig].insert(PN); - + phiMap[Orig].insert(PN); + return PN; } - + PN->replaceAllUsesWith(v); if (isa(v->getType())) MD->invalidateCachedPointerInfo(v); @@ -851,7 +853,7 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, if (I->second == PN) I->second = v; - DEBUG(cerr << "GVN removed: " << *PN); + DEBUG(errs() << "GVN removed: " << *PN << '\n'); MD->removeInstruction(PN); PN->eraseFromParent(); DEBUG(verifyRemoved(PN)); @@ -870,11 +872,11 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, /// currently speculating that it will be. /// 3) we are speculating for this block and have used that to speculate for /// other blocks. -static bool IsValueFullyAvailableInBlock(BasicBlock *BB, +static bool IsValueFullyAvailableInBlock(BasicBlock *BB, DenseMap &FullyAvailableBlocks) { // Optimistically assume that the block is fully available and check to see // if we already know about this block in one lookup. - std::pair::iterator, char> IV = + std::pair::iterator, char> IV = FullyAvailableBlocks.insert(std::make_pair(BB, 2)); // If the entry already existed for this block, return the precomputed value. @@ -885,29 +887,29 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB, IV.first->second = 3; return IV.first->second != 0; } - + // Otherwise, see if it is fully available in all predecessors. pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - + // If this block has no predecessors, it isn't live-in here. if (PI == PE) goto SpeculationFailure; - + for (; PI != PE; ++PI) // If the value isn't fully available in one of our predecessors, then it // isn't fully available in this block either. Undo our previous // optimistic assumption and bail out. if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) goto SpeculationFailure; - + return true; - + // SpeculationFailure - If we get here, we found out that this is not, after // all, a fully-available block. We have a problem if we speculated on this and // used the speculation to mark other blocks as available. SpeculationFailure: char &BBVal = FullyAvailableBlocks[BB]; - + // If we didn't speculate on this, just return with it set to false. if (BBVal == 2) { BBVal = 0; @@ -919,7 +921,7 @@ SpeculationFailure: // 0 if set to one. SmallVector BBWorklist; BBWorklist.push_back(BB); - + while (!BBWorklist.empty()) { BasicBlock *Entry = BBWorklist.pop_back_val(); // Note that this sets blocks to 0 (unavailable) if they happen to not @@ -929,24 +931,357 @@ SpeculationFailure: // Mark as unavailable. EntryVal = 0; - + for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) BBWorklist.push_back(*I); } - + return false; } + +/// CanCoerceMustAliasedValueToLoad - Return true if +/// CoerceAvailableValueToLoadType will succeed. +static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, + const Type *LoadTy, + const TargetData &TD) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (isa(LoadTy) || isa(LoadTy) || + isa(StoredVal->getType()) || + isa(StoredVal->getType())) + return false; + + // The store has to be at least as big as the load. + if (TD.getTypeSizeInBits(StoredVal->getType()) < + TD.getTypeSizeInBits(LoadTy)) + return false; + + return true; +} + + +/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and +/// then a load from a must-aliased pointer of a different type, try to coerce +/// the stored value. LoadedTy is the type of the load we want to replace and +/// InsertPt is the place to insert new instructions. +/// +/// If we can't do it, return null. +static Value *CoerceAvailableValueToLoadType(Value *StoredVal, + const Type *LoadedTy, + Instruction *InsertPt, + const TargetData &TD) { + if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) + return 0; + + const Type *StoredValTy = StoredVal->getType(); + + uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); + + // If the store and reload are the same size, we can always reuse it. + if (StoreSize == LoadSize) { + if (isa(StoredValTy) && isa(LoadedTy)) { + // Pointer to Pointer -> use bitcast. + return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); + } + + // Convert source pointers to integers, which can be bitcast. + if (isa(StoredValTy)) { + StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); + } + + const Type *TypeToCastTo = LoadedTy; + if (isa(TypeToCastTo)) + TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); + + if (StoredValTy != TypeToCastTo) + StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); + + // Cast to pointer if the load needs a pointer type. + if (isa(LoadedTy)) + StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); + + return StoredVal; + } + + // If the loaded value is smaller than the available value, then we can + // extract out a piece from it. If the available value is too small, then we + // can't do anything. + assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); + + // Convert source pointers to integers, which can be manipulated. + if (isa(StoredValTy)) { + StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); + } + + // Convert vectors and fp to integer, which can be manipulated. + if (!isa(StoredValTy)) { + StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); + StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); + } + + // If this is a big-endian system, we need to shift the value down to the low + // bits so that a truncate will work. + if (TD.isBigEndian()) { + Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); + StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); + } + + // Truncate the integer to the right size now. + const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); + StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); + + if (LoadedTy == NewIntTy) + return StoredVal; + + // If the result is a pointer, inttoptr. + if (isa(LoadedTy)) + return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); + + // Otherwise, bitcast. + return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); +} + +/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can +/// be expressed as a base pointer plus a constant offset. Return the base and +/// offset to the caller. +static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const TargetData &TD) { + Operator *PtrOp = dyn_cast(Ptr); + if (PtrOp == 0) return Ptr; + + // Just look through bitcasts. + if (PtrOp->getOpcode() == Instruction::BitCast) + return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); + + // If this is a GEP with constant indices, we can look through it. + GEPOperator *GEP = dyn_cast(PtrOp); + if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = cast(*I); + if (OpC->isZero()) continue; + + // Handle a struct and array indices which add their offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += OpC->getSExtValue()*Size; + } + } + + // Re-sign extend from the pointer size if needed to get overflow edge cases + // right. + unsigned PtrSize = TD.getPointerSizeInBits(); + if (PtrSize < 64) + Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + + return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); +} + + +/// AnalyzeLoadFromClobberingStore - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. This means +/// that the store *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. This returns -1 if we have to +/// give up, or a byte number in the stored value of the piece that feeds the +/// load. +static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, + const TargetData &TD) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (isa(L->getType()) || isa(L->getType()) || + isa(DepSI->getOperand(0)->getType()) || + isa(DepSI->getOperand(0)->getType())) + return -1; + + int64_t StoreOffset = 0, LoadOffset = 0; + Value *StoreBase = + GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD); + Value *LoadBase = + GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD); + if (StoreBase != LoadBase) + return -1; + + // If the load and store are to the exact same address, they should have been + // a must alias. AA must have gotten confused. + // FIXME: Study to see if/when this happens. + if (LoadOffset == StoreOffset) { +#if 0 + errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" + << "Base = " << *StoreBase << "\n" + << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" + << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" + << "Load Ptr = " << *L->getPointerOperand() << "\n" + << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; + errs() << "'" << L->getParent()->getParent()->getName() << "'" + << *L->getParent(); +#endif + return -1; + } + + // If the load and store don't overlap at all, the store doesn't provide + // anything to the load. In this case, they really don't alias at all, AA + // must have gotten confused. + // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then + // remove this check, as it is duplicated with what we have below. + uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + uint64_t LoadSize = TD.getTypeSizeInBits(L->getType()); + + if ((StoreSize & 7) | (LoadSize & 7)) + return -1; + StoreSize >>= 3; // Convert to bytes. + LoadSize >>= 3; + + + bool isAAFailure = false; + if (StoreOffset < LoadOffset) { + isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; + } else { + isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; + } + if (isAAFailure) { +#if 0 + errs() << "STORE LOAD DEP WITH COMMON BASE:\n" + << "Base = " << *StoreBase << "\n" + << "Store Ptr = " << *DepSI->getPointerOperand() << "\n" + << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" + << "Load Ptr = " << *L->getPointerOperand() << "\n" + << "Load Offs = " << LoadOffset << " - " << *L << "\n\n"; + errs() << "'" << L->getParent()->getParent()->getName() << "'" + << *L->getParent(); +#endif + return -1; + } + + // If the Load isn't completely contained within the stored bits, we don't + // have all the bits to feed it. We could do something crazy in the future + // (issue a smaller load then merge the bits in) but this seems unlikely to be + // valuable. + if (StoreOffset > LoadOffset || + StoreOffset+StoreSize < LoadOffset+LoadSize) + return -1; + + // Okay, we can do this transformation. Return the number of bytes into the + // store that the load is. + return LoadOffset-StoreOffset; +} + + +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. This means +/// that the store *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, + const Type *LoadTy, + Instruction *InsertPt, const TargetData &TD){ + LLVMContext &Ctx = SrcVal->getType()->getContext(); + + uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + + + // Compute which bits of the stored value are being used by the load. Convert + // to an integer type to start with. + if (isa(SrcVal->getType())) + SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt); + if (!isa(SrcVal->getType())) + SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8), + "tmp", InsertPt); + + // Shift the bits to the least significant depending on endianness. + unsigned ShiftAmt; + if (TD.isLittleEndian()) { + ShiftAmt = Offset*8; + } else { + ShiftAmt = (StoreSize-LoadSize-Offset)*8; + } + + if (ShiftAmt) + SrcVal = BinaryOperator::CreateLShr(SrcVal, + ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt); + + if (LoadSize != StoreSize) + SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8), + "tmp", InsertPt); + + return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); +} + +struct AvailableValueInBlock { + /// BB - The basic block in question. + BasicBlock *BB; + /// V - The value that is live out of the block. + Value *V; + /// Offset - The byte offset in V that is interesting for the load query. + unsigned Offset; + + static AvailableValueInBlock get(BasicBlock *BB, Value *V, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.V = V; + Res.Offset = Offset; + return Res; + } +}; + +/// GetAvailableBlockValues - Given the ValuesPerBlock list, convert all of the +/// available values to values of the expected LoadTy in their blocks and insert +/// the new values into BlockReplValues. +static void +GetAvailableBlockValues(DenseMap &BlockReplValues, + const SmallVector &ValuesPerBlock, + const Type *LoadTy, + const TargetData *TD) { + + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { + BasicBlock *BB = ValuesPerBlock[i].BB; + Value *AvailableVal = ValuesPerBlock[i].V; + unsigned Offset = ValuesPerBlock[i].Offset; + + Value *&BlockEntry = BlockReplValues[BB]; + if (BlockEntry) continue; + + if (AvailableVal->getType() != LoadTy) { + assert(TD && "Need target data to handle type mismatch case"); + AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, + BB->getTerminator(), *TD); + + if (Offset) { + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" + << *ValuesPerBlock[i].V << '\n' + << *AvailableVal << '\n' << "\n\n\n"); + } + + + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" + << *ValuesPerBlock[i].V << '\n' + << *AvailableVal << '\n' << "\n\n\n"); + } + BlockEntry = AvailableVal; + } +} + /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl &toErase) { // Find the non-local dependencies of the load. - SmallVector Deps; + SmallVector Deps; MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), Deps); - //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI); - + //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " + // << Deps.size() << *LI << '\n'); + // If we had to process more than one hundred blocks to find the // dependencies, this load isn't worth worrying about. Optimizing // it will be too expensive. @@ -957,67 +1292,104 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // clobber in the current block. Reject this early. if (Deps.size() == 1 && Deps[0].second.isClobber()) { DEBUG( - DOUT << "GVN: non-local load "; - WriteAsOperand(*DOUT.stream(), LI); - DOUT << " is clobbered by " << *Deps[0].second.getInst(); + errs() << "GVN: non-local load "; + WriteAsOperand(errs(), LI); + errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n'; ); return false; } - + // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). - SmallVector, 16> ValuesPerBlock; + SmallVector ValuesPerBlock; SmallVector UnavailableBlocks; + + const TargetData *TD = 0; for (unsigned i = 0, e = Deps.size(); i != e; ++i) { BasicBlock *DepBB = Deps[i].first; MemDepResult DepInfo = Deps[i].second; - + if (DepInfo.isClobber()) { + // If the dependence is to a store that writes to a superset of the bits + // read by the load, we can extract the bits we need for the load from the + // stored value. + if (StoreInst *DepSI = dyn_cast(DepInfo.getInst())) { + if (TD == 0) + TD = getAnalysisIfAvailable(); + if (TD) { + int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD); + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + DepSI->getOperand(0), + Offset)); + continue; + } + } + } + + // FIXME: Handle memset/memcpy. UnavailableBlocks.push_back(DepBB); continue; } - + Instruction *DepInst = DepInfo.getInst(); - + // Loading the allocation -> undef. - if (isa(DepInst)) { - ValuesPerBlock.push_back(std::make_pair(DepBB, - Context->getUndef(LI->getType()))); + if (isa(DepInst) || isMalloc(DepInst)) { + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + UndefValue::get(LI->getType()))); continue; } - - if (StoreInst* S = dyn_cast(DepInst)) { - // Reject loads and stores that are to the same address but are of - // different types. - // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because - // of bitfield access, it would be interesting to optimize for it at some - // point. + + if (StoreInst *S = dyn_cast(DepInst)) { + // Reject loads and stores that are to the same address but are of + // different types if we have to. if (S->getOperand(0)->getType() != LI->getType()) { - UnavailableBlocks.push_back(DepBB); - continue; + if (TD == 0) + TD = getAnalysisIfAvailable(); + + // If the stored value is larger or equal to the loaded value, we can + // reuse it. + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), + LI->getType(), *TD)) { + UnavailableBlocks.push_back(DepBB); + continue; + } } - - ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); - - } else if (LoadInst* LD = dyn_cast(DepInst)) { + + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + S->getOperand(0))); + continue; + } + + if (LoadInst *LD = dyn_cast(DepInst)) { + // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { - UnavailableBlocks.push_back(DepBB); - continue; + if (TD == 0) + TD = getAnalysisIfAvailable(); + + // If the stored value is larger or equal to the loaded value, we can + // reuse it. + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ + UnavailableBlocks.push_back(DepBB); + continue; + } } - ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); - } else { - UnavailableBlocks.push_back(DepBB); + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); continue; } + + UnavailableBlocks.push_back(DepBB); + continue; } - + // If we have no predecessors that produce a known value for this load, exit // early. if (ValuesPerBlock.empty()) return false; - + // If all of the instructions we depend on produce a known value for this // load, then it is fully redundant and we can use PHI insertion to compute // its value. Insert PHIs and remove the fully redundant value now. @@ -1028,7 +1400,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, for (SmallPtrSet::iterator I = p.begin(), E = p.end(); I != E; ++I) { if ((*I)->getParent() == LI->getParent()) { - DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI); + DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD #1: " << *LI << '\n'); LI->replaceAllUsesWith(*I); if (isa((*I)->getType())) MD->invalidateCachedPointerInfo(*I); @@ -1036,27 +1408,30 @@ bool GVN::processNonLocalLoad(LoadInst *LI, NumGVNLoad++; return true; } - - ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); + + ValuesPerBlock.push_back(AvailableValueInBlock::get((*I)->getParent(), + *I)); } - - DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI); - + + DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); + + // Convert the block information to a map, and insert coersions as needed. DenseMap BlockReplValues; - BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); - // Perform PHI construction. - Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); - LI->replaceAllUsesWith(v); + GetAvailableBlockValues(BlockReplValues, ValuesPerBlock, LI->getType(), TD); - if (isa(v)) - v->takeName(LI); - if (isa(v->getType())) - MD->invalidateCachedPointerInfo(v); + // Perform PHI construction. + Value *V = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); + LI->replaceAllUsesWith(V); + + if (isa(V)) + V->takeName(LI); + if (isa(V->getType())) + MD->invalidateCachedPointerInfo(V); toErase.push_back(LI); NumGVNLoad++; return true; } - + if (!EnablePRE || !EnableLoadPRE) return false; @@ -1067,7 +1442,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // prefer to not increase code size. As such, we only do this when we know // that we only have to insert *one* load (which means we're basically moving // the load, not inserting a new one). - + SmallPtrSet Blockers; for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) Blockers.insert(UnavailableBlocks[i]); @@ -1091,28 +1466,28 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (TmpBB->getTerminator()->getNumSuccessors() != 1) allSingleSucc = false; } - + assert(TmpBB); LoadBB = TmpBB; - + // If we have a repl set with LI itself in it, this means we have a loop where // at least one of the values is LI. Since this means that we won't be able // to eliminate LI even if we insert uses in the other predecessors, we will // end up increasing code size. Reject this by scanning for LI. for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].second == LI) + if (ValuesPerBlock[i].V == LI) return false; - + if (isSinglePred) { bool isHot = false; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (Instruction *I = dyn_cast(ValuesPerBlock[i].second)) - // "Hot" Instruction is in some loop (because it dominates its dep. - // instruction). - if (DT->dominates(LI, I)) { - isHot = true; - break; - } + if (Instruction *I = dyn_cast(ValuesPerBlock[i].V)) + // "Hot" Instruction is in some loop (because it dominates its dep. + // instruction). + if (DT->dominates(LI, I)) { + isHot = true; + break; + } // We are interested only in "hot" instructions. We don't want to do any // mis-optimizations here. @@ -1129,7 +1504,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, DenseMap FullyAvailableBlocks; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - FullyAvailableBlocks[ValuesPerBlock[i].first] = true; + FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) FullyAvailableBlocks[UnavailableBlocks[i]] = false; @@ -1137,33 +1512,33 @@ bool GVN::processNonLocalLoad(LoadInst *LI, PI != E; ++PI) { if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) continue; - + // If this load is not available in multiple predecessors, reject it. if (UnavailablePred && UnavailablePred != *PI) return false; UnavailablePred = *PI; } - + assert(UnavailablePred != 0 && "Fully available value should be eliminated above!"); - + // If the loaded pointer is PHI node defined in this block, do PHI translation // to get its value in the predecessor. Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); - + // Make sure the value is live in the predecessor. If it was defined by a // non-PHI instruction in this block, we don't know how to recompute it above. if (Instruction *LPInst = dyn_cast(LoadPtr)) if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { - DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " - << *LPInst << *LI << "\n"); + DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " + << *LPInst << '\n' << *LI << "\n"); return false; } - + // We don't currently handle critical edges :( if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { - DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" - << UnavailablePred->getName() << "': " << *LI); + DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" + << UnavailablePred->getName() << "': " << *LI << '\n'); return false; } @@ -1183,28 +1558,28 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. - DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI); - + DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); + Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, LI->getAlignment(), UnavailablePred->getTerminator()); - + SmallPtrSet &p = phiMap[LI->getPointerOperand()]; for (SmallPtrSet::iterator I = p.begin(), E = p.end(); I != E; ++I) - ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); - + ValuesPerBlock.push_back(AvailableValueInBlock::get((*I)->getParent(), *I)); + DenseMap BlockReplValues; - BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); + GetAvailableBlockValues(BlockReplValues, ValuesPerBlock, LI->getType(), TD); BlockReplValues[UnavailablePred] = NewLoad; - + // Perform PHI construction. - Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); - LI->replaceAllUsesWith(v); - if (isa(v)) - v->takeName(LI); - if (isa(v->getType())) - MD->invalidateCachedPointerInfo(v); + Value *V = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); + LI->replaceAllUsesWith(V); + if (isa(V)) + V->takeName(LI); + if (isa(V->getType())) + MD->invalidateCachedPointerInfo(V); toErase.push_back(LI); NumPRELoad++; return true; @@ -1215,64 +1590,119 @@ bool GVN::processNonLocalLoad(LoadInst *LI, bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { if (L->isVolatile()) return false; - - Value* pointer = L->getPointerOperand(); // ... to a pointer that has been loaded from before... - MemDepResult dep = MD->getDependency(L); - + MemDepResult Dep = MD->getDependency(L); + // If the value isn't available, don't do anything! - if (dep.isClobber()) { + if (Dep.isClobber()) { + // FIXME: We should handle memset/memcpy/memmove as dependent instructions + // to forward the value if available. + //if (isa(Dep.getInst())) + //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n"; + + // Check to see if we have something like this: + // store i32 123, i32* %P + // %A = bitcast i32* %P to i8* + // %B = gep i8* %A, i32 1 + // %C = load i8* %B + // + // We could do that by recognizing if the clobber instructions are obviously + // a common base + constant offset, and if the previous store (or memset) + // completely covers this load. This sort of thing can happen in bitfield + // access code. + if (StoreInst *DepSI = dyn_cast(Dep.getInst())) + if (const TargetData *TD = getAnalysisIfAvailable()) { + int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD); + if (Offset != -1) { + Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, + L->getType(), L, *TD); + DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n' + << *AvailVal << '\n' << *L << "\n\n\n"); + + // Replace the load! + L->replaceAllUsesWith(AvailVal); + if (isa(AvailVal->getType())) + MD->invalidateCachedPointerInfo(AvailVal); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + } + DEBUG( // fast print dep, using operator<< on instruction would be too slow - DOUT << "GVN: load "; - WriteAsOperand(*DOUT.stream(), L); - Instruction *I = dep.getInst(); - DOUT << " is clobbered by " << *I; + errs() << "GVN: load "; + WriteAsOperand(errs(), L); + Instruction *I = Dep.getInst(); + errs() << " is clobbered by " << *I << '\n'; ); return false; } // If it is defined in another block, try harder. - if (dep.isNonLocal()) + if (Dep.isNonLocal()) return processNonLocalLoad(L, toErase); - Instruction *DepInst = dep.getInst(); + Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast(DepInst)) { - // Only forward substitute stores to loads of the same type. - // FIXME: Could do better! - if (DepSI->getPointerOperand()->getType() != pointer->getType()) - return false; + Value *StoredVal = DepSI->getOperand(0); + // The store and load are to a must-aliased pointer, but they may not + // actually have the same type. See if we know how to reuse the stored + // value (depending on its type). + const TargetData *TD = 0; + if (StoredVal->getType() != L->getType() && + (TD = getAnalysisIfAvailable())) { + StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), + L, *TD); + if (StoredVal == 0) + return false; + + DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + << '\n' << *L << "\n\n\n"); + } + // Remove it! - L->replaceAllUsesWith(DepSI->getOperand(0)); - if (isa(DepSI->getOperand(0)->getType())) - MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); + L->replaceAllUsesWith(StoredVal); + if (isa(StoredVal->getType())) + MD->invalidateCachedPointerInfo(StoredVal); toErase.push_back(L); NumGVNLoad++; return true; } if (LoadInst *DepLI = dyn_cast(DepInst)) { - // Only forward substitute stores to loads of the same type. - // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. - if (DepLI->getType() != L->getType()) - return false; + Value *AvailableVal = DepLI; + + // The loads are of a must-aliased pointer, but they may not actually have + // the same type. See if we know how to reuse the previously loaded value + // (depending on its type). + const TargetData *TD = 0; + if (DepLI->getType() != L->getType() && + (TD = getAnalysisIfAvailable())) { + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + if (AvailableVal == 0) + return false; + + DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + << "\n" << *L << "\n\n\n"); + } // Remove it! - L->replaceAllUsesWith(DepLI); + L->replaceAllUsesWith(AvailableVal); if (isa(DepLI->getType())) MD->invalidateCachedPointerInfo(DepLI); toErase.push_back(L); NumGVNLoad++; return true; } - + // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa(DepInst)) { - L->replaceAllUsesWith(Context->getUndef(L->getType())); + if (isa(DepInst) || isMalloc(DepInst)) { + L->replaceAllUsesWith(UndefValue::get(L->getType())); toErase.push_back(L); NumGVNLoad++; return true; @@ -1281,71 +1711,69 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { return false; } -Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { +Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { DenseMap::iterator I = localAvail.find(BB); if (I == localAvail.end()) return 0; - - ValueNumberScope* locals = I->second; - - while (locals) { - DenseMap::iterator I = locals->table.find(num); - if (I != locals->table.end()) + + ValueNumberScope *Locals = I->second; + while (Locals) { + DenseMap::iterator I = Locals->table.find(num); + if (I != Locals->table.end()) return I->second; - else - locals = locals->parent; + Locals = Locals->parent; } - + return 0; } /// AttemptRedundancyElimination - If the "fast path" of redundancy elimination -/// by inheritance from the dominator fails, see if we can perform phi +/// by inheritance from the dominator fails, see if we can perform phi /// construction to eliminate the redundancy. -Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { - BasicBlock* BaseBlock = orig->getParent(); - +Value *GVN::AttemptRedundancyElimination(Instruction *orig, unsigned valno) { + BasicBlock *BaseBlock = orig->getParent(); + SmallPtrSet Visited; SmallVector Stack; Stack.push_back(BaseBlock); - + DenseMap Results; - + // Walk backwards through our predecessors, looking for instances of the // value number we're looking for. Instances are recorded in the Results // map, which is then used to perform phi construction. while (!Stack.empty()) { - BasicBlock* Current = Stack.back(); + BasicBlock *Current = Stack.back(); Stack.pop_back(); - + // If we've walked all the way to a proper dominator, then give up. Cases // where the instance is in the dominator will have been caught by the fast // path, and any cases that require phi construction further than this are // probably not worth it anyways. Note that this is a SIGNIFICANT compile // time improvement. if (DT->properlyDominates(Current, orig->getParent())) return 0; - + DenseMap::iterator LA = localAvail.find(Current); if (LA == localAvail.end()) return 0; DenseMap::iterator V = LA->second->table.find(valno); - + if (V != LA->second->table.end()) { // Found an instance, record it. Results.insert(std::make_pair(Current, V->second)); continue; } - + // If we reach the beginning of the function, then give up. if (pred_begin(Current) == pred_end(Current)) return 0; - + for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); PI != PE; ++PI) if (Visited.insert(*PI)) Stack.push_back(*PI); } - + // If we didn't find instances, give up. Otherwise, perform phi construction. if (Results.size() == 0) return 0; @@ -1357,74 +1785,76 @@ Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I, SmallVectorImpl &toErase) { - if (LoadInst* L = dyn_cast(I)) { - bool changed = processLoad(L, toErase); - - if (!changed) { - unsigned num = VN.lookup_or_add(L); - localAvail[I->getParent()]->table.insert(std::make_pair(num, L)); + if (LoadInst *LI = dyn_cast(I)) { + bool Changed = processLoad(LI, toErase); + + if (!Changed) { + unsigned Num = VN.lookup_or_add(LI); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); } - - return changed; + + return Changed; } - - uint32_t nextNum = VN.getNextUnusedValueNumber(); - unsigned num = VN.lookup_or_add(I); - - if (BranchInst* BI = dyn_cast(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); - + + uint32_t NextNum = VN.getNextUnusedValueNumber(); + unsigned Num = VN.lookup_or_add(I); + + if (BranchInst *BI = dyn_cast(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + if (!BI->isConditional() || isa(BI->getCondition())) return false; - - Value* branchCond = BI->getCondition(); - uint32_t condVN = VN.lookup_or_add(branchCond); - - BasicBlock* trueSucc = BI->getSuccessor(0); - BasicBlock* falseSucc = BI->getSuccessor(1); - - if (trueSucc->getSinglePredecessor()) - localAvail[trueSucc]->table[condVN] = Context->getTrue(); - if (falseSucc->getSinglePredecessor()) - localAvail[falseSucc]->table[condVN] = Context->getFalse(); + + Value *BranchCond = BI->getCondition(); + uint32_t CondVN = VN.lookup_or_add(BranchCond); + + BasicBlock *TrueSucc = BI->getSuccessor(0); + BasicBlock *FalseSucc = BI->getSuccessor(1); + + if (TrueSucc->getSinglePredecessor()) + localAvail[TrueSucc]->table[CondVN] = + ConstantInt::getTrue(TrueSucc->getContext()); + if (FalseSucc->getSinglePredecessor()) + localAvail[FalseSucc]->table[CondVN] = + ConstantInt::getFalse(TrueSucc->getContext()); return false; - + // Allocations are always uniquely numbered, so we can save time and memory - // by fast failing them. - } else if (isa(I) || isa(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + // by fast failing them. + } else if (isa(I) || isMalloc(I) || isa(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); return false; } - + // Collapse PHI nodes if (PHINode* p = dyn_cast(I)) { - Value* constVal = CollapsePhi(p); - + Value *constVal = CollapsePhi(p); + if (constVal) { for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); PI != PE; ++PI) PI->second.erase(p); - + p->replaceAllUsesWith(constVal); if (isa(constVal->getType())) MD->invalidateCachedPointerInfo(constVal); VN.erase(p); - + toErase.push_back(p); } else { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); } - + // If the number we were assigned was a brand new VN, then we don't // need to do a lookup to see if the number already exists // somewhere in the domtree: it can't! - } else if (num == nextNum) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); - + } else if (Num == NextNum) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + // Perform fast-path value-number based elimination of values inherited from // dominators. - } else if (Value* repl = lookupNumber(I->getParent(), num)) { + } else if (Value *repl = lookupNumber(I->getParent(), Num)) { // Remove it! VN.erase(I); I->replaceAllUsesWith(repl); @@ -1435,7 +1865,7 @@ bool GVN::processInstruction(Instruction *I, #if 0 // Perform slow-pathvalue-number based elimination with phi construction. - } else if (Value* repl = AttemptRedundancyElimination(I, num)) { + } else if (Value *repl = AttemptRedundancyElimination(I, Num)) { // Remove it! VN.erase(I); I->replaceAllUsesWith(repl); @@ -1445,9 +1875,9 @@ bool GVN::processInstruction(Instruction *I, return true; #endif } else { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); } - + return false; } @@ -1458,35 +1888,35 @@ bool GVN::runOnFunction(Function& F) { VN.setAliasAnalysis(&getAnalysis()); VN.setMemDep(MD); VN.setDomTree(DT); - - bool changed = false; - bool shouldContinue = true; - + + bool Changed = false; + bool ShouldContinue = true; + // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { - BasicBlock* BB = FI; + BasicBlock *BB = FI; ++FI; bool removedBlock = MergeBlockIntoPredecessor(BB, this); if (removedBlock) NumGVNBlocks++; - - changed |= removedBlock; + + Changed |= removedBlock; } - + unsigned Iteration = 0; - - while (shouldContinue) { - DEBUG(cerr << "GVN iteration: " << Iteration << "\n"); - shouldContinue = iterateOnFunction(F); - changed |= shouldContinue; + + while (ShouldContinue) { + DEBUG(errs() << "GVN iteration: " << Iteration << "\n"); + ShouldContinue = iterateOnFunction(F); + Changed |= ShouldContinue; ++Iteration; } - + if (EnablePRE) { bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); - changed |= PREChanged; + Changed |= PREChanged; } } // FIXME: Should perform GVN again after PRE does something. PRE can move @@ -1496,27 +1926,27 @@ bool GVN::runOnFunction(Function& F) { cleanupGlobalSets(); - return changed; + return Changed; } -bool GVN::processBlock(BasicBlock* BB) { +bool GVN::processBlock(BasicBlock *BB) { // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and // incrementing BI before processing an instruction). SmallVector toErase; - bool changed_function = false; - + bool ChangedFunction = false; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - changed_function |= processInstruction(BI, toErase); + ChangedFunction |= processInstruction(BI, toErase); if (toErase.empty()) { ++BI; continue; } - + // If we need some instructions deleted, do it now. NumGVNInstr += toErase.size(); - + // Avoid iterator invalidation. bool AtStart = BI == BB->begin(); if (!AtStart) @@ -1524,7 +1954,7 @@ bool GVN::processBlock(BasicBlock* BB) { for (SmallVector::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) { - DEBUG(cerr << "GVN removed: " << **I); + DEBUG(errs() << "GVN removed: " << **I << '\n'); MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); @@ -1536,8 +1966,8 @@ bool GVN::processBlock(BasicBlock* BB) { else ++BI; } - - return changed_function; + + return ChangedFunction; } /// performPRE - Perform a purely local form of PRE that looks for diamond @@ -1548,32 +1978,33 @@ bool GVN::performPRE(Function& F) { DenseMap predMap; for (df_iterator DI = df_begin(&F.getEntryBlock()), DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock* CurrentBlock = *DI; - + BasicBlock *CurrentBlock = *DI; + // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; - + for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; - if (isa(CurInst) || isa(CurInst) || - isa(CurInst) || (CurInst->getType() == Type::VoidTy) || + if (isa(CurInst) || isMalloc(CurInst) || + isa(CurInst) || isa(CurInst) || + (CurInst->getType() == Type::getVoidTy(F.getContext())) || CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa(CurInst)) continue; - uint32_t valno = VN.lookup(CurInst); - + uint32_t ValNo = VN.lookup(CurInst); + // Look for the predecessors for PRE opportunities. We're // only trying to solve the basic diamond case, where // a value is computed in the successor and one predecessor, // but not the other. We also explicitly disallow cases // where the successor is its own predecessor, because they're // more complicated to get right. - unsigned numWith = 0; - unsigned numWithout = 0; - BasicBlock* PREPred = 0; + unsigned NumWith = 0; + unsigned NumWithout = 0; + BasicBlock *PREPred = 0; predMap.clear(); for (pred_iterator PI = pred_begin(CurrentBlock), @@ -1582,59 +2013,59 @@ bool GVN::performPRE(Function& F) { // own predecessor, on in blocks with predecessors // that are not reachable. if (*PI == CurrentBlock) { - numWithout = 2; + NumWithout = 2; break; } else if (!localAvail.count(*PI)) { - numWithout = 2; + NumWithout = 2; break; } - - DenseMap::iterator predV = - localAvail[*PI]->table.find(valno); + + DenseMap::iterator predV = + localAvail[*PI]->table.find(ValNo); if (predV == localAvail[*PI]->table.end()) { PREPred = *PI; - numWithout++; + NumWithout++; } else if (predV->second == CurInst) { - numWithout = 2; + NumWithout = 2; } else { predMap[*PI] = predV->second; - numWith++; + NumWith++; } } - + // Don't do PRE when it might increase code size, i.e. when // we would need to insert instructions in more than one pred. - if (numWithout != 1 || numWith == 0) + if (NumWithout != 1 || NumWith == 0) continue; - + // We can't do PRE safely on a critical edge, so instead we schedule // the edge to be split and perform the PRE the next time we iterate // on the function. - unsigned succNum = 0; + unsigned SuccNum = 0; for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); i != e; ++i) if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { - succNum = i; + SuccNum = i; break; } - - if (isCriticalEdge(PREPred->getTerminator(), succNum)) { - toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); + + if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { + toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); continue; } - + // Instantiate the expression the in predecessor that lacked it. // Because we are going top-down through the block, all value numbers // will be available in the predecessor by the time we need them. Any // that weren't original present will have been instantiated earlier // in this loop. - Instruction* PREInstr = CurInst->clone(*Context); + Instruction *PREInstr = CurInst->clone(CurInst->getContext()); bool success = true; for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { Value *Op = PREInstr->getOperand(i); if (isa(Op) || isa(Op) || isa(Op)) continue; - + if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { PREInstr->setOperand(i, V); } else { @@ -1642,25 +2073,25 @@ bool GVN::performPRE(Function& F) { break; } } - + // Fail out if we encounter an operand that is not available in - // the PRE predecessor. This is typically because of loads which + // the PRE predecessor. This is typically because of loads which // are not value numbered precisely. if (!success) { delete PREInstr; DEBUG(verifyRemoved(PREInstr)); continue; } - + PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; - VN.add(PREInstr, valno); + VN.add(PREInstr, ValNo); NumGVNPRE++; - + // Update the availability map to include the new instruction. - localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); - + localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); + // Create a PHI to make the value available in this block. PHINode* Phi = PHINode::Create(CurInst->getType(), CurInst->getName() + ".pre-phi", @@ -1668,27 +2099,27 @@ bool GVN::performPRE(Function& F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) Phi->addIncoming(predMap[*PI], *PI); - - VN.add(Phi, valno); - localAvail[CurrentBlock]->table[valno] = Phi; - + + VN.add(Phi, ValNo); + localAvail[CurrentBlock]->table[ValNo] = Phi; + CurInst->replaceAllUsesWith(Phi); if (isa(Phi->getType())) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); - - DEBUG(cerr << "GVN PRE removed: " << *CurInst); + + DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); MD->removeInstruction(CurInst); CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); Changed = true; } } - + for (SmallVector, 4>::iterator I = toSplit.begin(), E = toSplit.end(); I != E; ++I) SplitCriticalEdge(I->first, I->second, this); - + return Changed || toSplit.size(); } @@ -1706,20 +2137,20 @@ bool GVN::iterateOnFunction(Function &F) { } // Top-down walk of the dominator tree - bool changed = false; + bool Changed = false; #if 0 // Needed for value numbering with phi construction to work. ReversePostOrderTraversal RPOT(&F); for (ReversePostOrderTraversal::rpo_iterator RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) - changed |= processBlock(*RI); + Changed |= processBlock(*RI); #else for (df_iterator DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) - changed |= processBlock(DI->getBlock()); + Changed |= processBlock(DI->getBlock()); #endif - return changed; + return Changed; } void GVN::cleanupGlobalSets() {