X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FGVN.cpp;h=164730c3ca06d0f61bef011cc8ea6ade7e28d92c;hb=8c0c99016b4348bf9cc294a0f2dd60a219d4506c;hp=56bc816c4f49fb21d503ac6657db6453102e6a92;hpb=3e8b6631e67e01e4960a7ba4668a50c596607473;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 56bc816c4f4..164730c3ca0 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -20,9 +20,11 @@ #include "llvm/BasicBlock.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GlobalVariable.h" #include "llvm/Function.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Operator.h" #include "llvm/Value.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -30,17 +32,23 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumGVNInstr, "Number of instructions deleted"); @@ -52,6 +60,7 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt EnablePRE("enable-pre", cl::init(true), cl::Hidden); static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); +static cl::opt EnableFullLoadPRE("enable-full-load-pre", cl::init(false)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -62,30 +71,53 @@ static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); /// two values. namespace { struct Expression { - enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, - UDIV, SDIV, FDIV, UREM, SREM, - FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, - ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, - ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, - FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, - FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, - FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, - SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, - FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT, - PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT, - EMPTY, TOMBSTONE }; + enum ExpressionOpcode { + ADD = Instruction::Add, + FADD = Instruction::FAdd, + SUB = Instruction::Sub, + FSUB = Instruction::FSub, + MUL = Instruction::Mul, + FMUL = Instruction::FMul, + UDIV = Instruction::UDiv, + SDIV = Instruction::SDiv, + FDIV = Instruction::FDiv, + UREM = Instruction::URem, + SREM = Instruction::SRem, + FREM = Instruction::FRem, + SHL = Instruction::Shl, + LSHR = Instruction::LShr, + ASHR = Instruction::AShr, + AND = Instruction::And, + OR = Instruction::Or, + XOR = Instruction::Xor, + TRUNC = Instruction::Trunc, + ZEXT = Instruction::ZExt, + SEXT = Instruction::SExt, + FPTOUI = Instruction::FPToUI, + FPTOSI = Instruction::FPToSI, + UITOFP = Instruction::UIToFP, + SITOFP = Instruction::SIToFP, + FPTRUNC = Instruction::FPTrunc, + FPEXT = Instruction::FPExt, + PTRTOINT = Instruction::PtrToInt, + INTTOPTR = Instruction::IntToPtr, + BITCAST = Instruction::BitCast, + ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, + ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, + FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, + FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE, + FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT, + SHUFFLE, SELECT, GEP, CALL, CONSTANT, + INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE }; ExpressionOpcode opcode; const Type* type; - uint32_t firstVN; - uint32_t secondVN; - uint32_t thirdVN; SmallVector varargs; - Value* function; - + Value *function; + Expression() { } Expression(ExpressionOpcode o) : opcode(o) { } - + bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; @@ -95,29 +127,23 @@ namespace { return false; else if (function != other.function) return false; - else if (firstVN != other.firstVN) - return false; - else if (secondVN != other.secondVN) - return false; - else if (thirdVN != other.thirdVN) - return false; else { if (varargs.size() != other.varargs.size()) return false; - + for (size_t i = 0; i < varargs.size(); ++i) if (varargs[i] != other.varargs[i]) return false; - + return true; } } - + bool operator!=(const Expression &other) const { return !(*this == other); } }; - + class ValueTable { private: DenseMap valueNumbering; @@ -125,12 +151,10 @@ namespace { AliasAnalysis* AA; MemoryDependenceAnalysis* MD; DominatorTree* DT; - + uint32_t nextValueNumber; - - Expression::ExpressionOpcode getOpcode(BinaryOperator* BO); + Expression::ExpressionOpcode getOpcode(CmpInst* C); - Expression::ExpressionOpcode getOpcode(CastInst* C); Expression create_expression(BinaryOperator* BO); Expression create_expression(CmpInst* C); Expression create_expression(ShuffleVectorInst* V); @@ -141,13 +165,17 @@ namespace { Expression create_expression(GetElementPtrInst* G); Expression create_expression(CallInst* C); Expression create_expression(Constant* C); + Expression create_expression(ExtractValueInst* C); + Expression create_expression(InsertValueInst* C); + + uint32_t lookup_or_add_call(CallInst* C); public: ValueTable() : nextValueNumber(1) { } - uint32_t lookup_or_add(Value* V); - uint32_t lookup(Value* V) const; - void add(Value* V, uint32_t num); + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + void add(Value *V, uint32_t num); void clear(); - void erase(Value* v); + void erase(Value *v); unsigned size(); void setAliasAnalysis(AliasAnalysis* A) { AA = A; } AliasAnalysis *getAliasAnalysis() const { return AA; } @@ -163,66 +191,40 @@ template <> struct DenseMapInfo { static inline Expression getEmptyKey() { return Expression(Expression::EMPTY); } - + static inline Expression getTombstoneKey() { return Expression(Expression::TOMBSTONE); } - + static unsigned getHashValue(const Expression e) { unsigned hash = e.opcode; - - hash = e.firstVN + hash * 37; - hash = e.secondVN + hash * 37; - hash = e.thirdVN + hash * 37; - + hash = ((unsigned)((uintptr_t)e.type >> 4) ^ - (unsigned)((uintptr_t)e.type >> 9)) + - hash * 37; - + (unsigned)((uintptr_t)e.type >> 9)); + for (SmallVector::const_iterator I = e.varargs.begin(), E = e.varargs.end(); I != E; ++I) hash = *I + hash * 37; - + hash = ((unsigned)((uintptr_t)e.function >> 4) ^ (unsigned)((uintptr_t)e.function >> 9)) + hash * 37; - + return hash; } static bool isEqual(const Expression &LHS, const Expression &RHS) { return LHS == RHS; } - static bool isPod() { return true; } }; + +template <> +struct isPodLike { static const bool value = true; }; + } //===----------------------------------------------------------------------===// // ValueTable Internal Functions //===----------------------------------------------------------------------===// -Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { - switch(BO->getOpcode()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Binary operator with unknown opcode?"); - case Instruction::Add: return Expression::ADD; - case Instruction::FAdd: return Expression::FADD; - case Instruction::Sub: return Expression::SUB; - case Instruction::FSub: return Expression::FSUB; - case Instruction::Mul: return Expression::MUL; - case Instruction::FMul: return Expression::FMUL; - case Instruction::UDiv: return Expression::UDIV; - case Instruction::SDiv: return Expression::SDIV; - case Instruction::FDiv: return Expression::FDIV; - case Instruction::URem: return Expression::UREM; - case Instruction::SRem: return Expression::SREM; - case Instruction::FRem: return Expression::FREM; - case Instruction::Shl: return Expression::SHL; - case Instruction::LShr: return Expression::LSHR; - case Instruction::AShr: return Expression::ASHR; - case Instruction::And: return Expression::AND; - case Instruction::Or: return Expression::OR; - case Instruction::Xor: return Expression::XOR; - } -} Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { if (isa(C)) { @@ -262,147 +264,146 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { } } -Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) { - switch(C->getOpcode()) { - default: // THIS SHOULD NEVER HAPPEN - llvm_unreachable("Cast operator with unknown opcode?"); - case Instruction::Trunc: return Expression::TRUNC; - case Instruction::ZExt: return Expression::ZEXT; - case Instruction::SExt: return Expression::SEXT; - case Instruction::FPToUI: return Expression::FPTOUI; - case Instruction::FPToSI: return Expression::FPTOSI; - case Instruction::UIToFP: return Expression::UITOFP; - case Instruction::SIToFP: return Expression::SITOFP; - case Instruction::FPTrunc: return Expression::FPTRUNC; - case Instruction::FPExt: return Expression::FPEXT; - case Instruction::PtrToInt: return Expression::PTRTOINT; - case Instruction::IntToPtr: return Expression::INTTOPTR; - case Instruction::BitCast: return Expression::BITCAST; - } -} - Expression ValueTable::create_expression(CallInst* C) { Expression e; - + e.type = C->getType(); - e.firstVN = 0; - e.secondVN = 0; - e.thirdVN = 0; e.function = C->getCalledFunction(); e.opcode = Expression::CALL; - + for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); - + return e; } Expression ValueTable::create_expression(BinaryOperator* BO) { Expression e; - - e.firstVN = lookup_or_add(BO->getOperand(0)); - e.secondVN = lookup_or_add(BO->getOperand(1)); - e.thirdVN = 0; + e.varargs.push_back(lookup_or_add(BO->getOperand(0))); + e.varargs.push_back(lookup_or_add(BO->getOperand(1))); e.function = 0; e.type = BO->getType(); - e.opcode = getOpcode(BO); - + e.opcode = static_cast(BO->getOpcode()); + return e; } Expression ValueTable::create_expression(CmpInst* C) { Expression e; - - e.firstVN = lookup_or_add(C->getOperand(0)); - e.secondVN = lookup_or_add(C->getOperand(1)); - e.thirdVN = 0; + + e.varargs.push_back(lookup_or_add(C->getOperand(0))); + e.varargs.push_back(lookup_or_add(C->getOperand(1))); e.function = 0; e.type = C->getType(); e.opcode = getOpcode(C); - + return e; } Expression ValueTable::create_expression(CastInst* C) { Expression e; - - e.firstVN = lookup_or_add(C->getOperand(0)); - e.secondVN = 0; - e.thirdVN = 0; + + e.varargs.push_back(lookup_or_add(C->getOperand(0))); e.function = 0; e.type = C->getType(); - e.opcode = getOpcode(C); - + e.opcode = static_cast(C->getOpcode()); + return e; } Expression ValueTable::create_expression(ShuffleVectorInst* S) { Expression e; - - e.firstVN = lookup_or_add(S->getOperand(0)); - e.secondVN = lookup_or_add(S->getOperand(1)); - e.thirdVN = lookup_or_add(S->getOperand(2)); + + e.varargs.push_back(lookup_or_add(S->getOperand(0))); + e.varargs.push_back(lookup_or_add(S->getOperand(1))); + e.varargs.push_back(lookup_or_add(S->getOperand(2))); e.function = 0; e.type = S->getType(); e.opcode = Expression::SHUFFLE; - + return e; } Expression ValueTable::create_expression(ExtractElementInst* E) { Expression e; - - e.firstVN = lookup_or_add(E->getOperand(0)); - e.secondVN = lookup_or_add(E->getOperand(1)); - e.thirdVN = 0; + + e.varargs.push_back(lookup_or_add(E->getOperand(0))); + e.varargs.push_back(lookup_or_add(E->getOperand(1))); e.function = 0; e.type = E->getType(); e.opcode = Expression::EXTRACT; - + return e; } Expression ValueTable::create_expression(InsertElementInst* I) { Expression e; - - e.firstVN = lookup_or_add(I->getOperand(0)); - e.secondVN = lookup_or_add(I->getOperand(1)); - e.thirdVN = lookup_or_add(I->getOperand(2)); + + e.varargs.push_back(lookup_or_add(I->getOperand(0))); + e.varargs.push_back(lookup_or_add(I->getOperand(1))); + e.varargs.push_back(lookup_or_add(I->getOperand(2))); e.function = 0; e.type = I->getType(); e.opcode = Expression::INSERT; - + return e; } Expression ValueTable::create_expression(SelectInst* I) { Expression e; - - e.firstVN = lookup_or_add(I->getCondition()); - e.secondVN = lookup_or_add(I->getTrueValue()); - e.thirdVN = lookup_or_add(I->getFalseValue()); + + e.varargs.push_back(lookup_or_add(I->getCondition())); + e.varargs.push_back(lookup_or_add(I->getTrueValue())); + e.varargs.push_back(lookup_or_add(I->getFalseValue())); e.function = 0; e.type = I->getType(); e.opcode = Expression::SELECT; - + return e; } Expression ValueTable::create_expression(GetElementPtrInst* G) { Expression e; - - e.firstVN = lookup_or_add(G->getPointerOperand()); - e.secondVN = 0; - e.thirdVN = 0; + + e.varargs.push_back(lookup_or_add(G->getPointerOperand())); e.function = 0; e.type = G->getType(); e.opcode = Expression::GEP; - + for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); - + + return e; +} + +Expression ValueTable::create_expression(ExtractValueInst* E) { + Expression e; + + e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); + for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + e.function = 0; + e.type = E->getType(); + e.opcode = Expression::EXTRACTVALUE; + + return e; +} + +Expression ValueTable::create_expression(InsertValueInst* E) { + Expression e; + + e.varargs.push_back(lookup_or_add(E->getAggregateOperand())); + e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand())); + for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + e.function = 0; + e.type = E->getType(); + e.opcode = Expression::INSERTVALUE; + return e; } @@ -411,242 +412,212 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) { //===----------------------------------------------------------------------===// /// add - Insert a value into the table with a specified value number. -void ValueTable::add(Value* V, uint32_t num) { +void ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); } -/// lookup_or_add - Returns the value number for the specified value, assigning -/// it a new number if it did not have one before. -uint32_t ValueTable::lookup_or_add(Value* V) { - DenseMap::iterator VI = valueNumbering.find(V); - if (VI != valueNumbering.end()) - return VI->second; - - if (CallInst* C = dyn_cast(V)) { - if (AA->doesNotAccessMemory(C)) { - Expression e = create_expression(C); - - DenseMap::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; - } - } else if (AA->onlyReadsMemory(C)) { - Expression e = create_expression(C); - - if (expressionNumbering.find(e) == expressionNumbering.end()) { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - MemDepResult local_dep = MD->getDependency(C); - - if (!local_dep.isDef() && !local_dep.isNonLocal()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } +uint32_t ValueTable::lookup_or_add_call(CallInst* C) { + if (AA->doesNotAccessMemory(C)) { + Expression exp = create_expression(C); + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[C] = e; + return e; + } else if (AA->onlyReadsMemory(C)) { + Expression exp = create_expression(C); + uint32_t& e = expressionNumbering[exp]; + if (!e) { + e = nextValueNumber++; + valueNumbering[C] = e; + return e; + } + if (!MD) { + e = nextValueNumber++; + valueNumbering[C] = e; + return e; + } - if (local_dep.isDef()) { - CallInst* local_cdep = cast(local_dep.getInst()); - - if (local_cdep->getNumOperands() != C->getNumOperands()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); - if (c_vn != cd_vn) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - } - - uint32_t v = lookup_or_add(local_cdep); - valueNumbering.insert(std::make_pair(V, v)); - return v; - } + MemDepResult local_dep = MD->getDependency(C); - // Non-local case. - const MemoryDependenceAnalysis::NonLocalDepInfo &deps = - MD->getNonLocalCallDependency(CallSite(C)); - // FIXME: call/call dependencies for readonly calls should return def, not - // clobber! Move the checking logic to MemDep! - CallInst* cdep = 0; - - // Check to see if we have a single dominating call instruction that is - // identical to C. - for (unsigned i = 0, e = deps.size(); i != e; ++i) { - const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i]; - // Ignore non-local dependencies. - if (I->second.isNonLocal()) - continue; + if (!local_dep.isDef() && !local_dep.isNonLocal()) { + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } - // We don't handle non-depedencies. If we already have a call, reject - // instruction dependencies. - if (I->second.isClobber() || cdep != 0) { - cdep = 0; - break; - } - - CallInst *NonLocalDepCall = dyn_cast(I->second.getInst()); - // FIXME: All duplicated with non-local case. - if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){ - cdep = NonLocalDepCall; - continue; - } - - cdep = 0; - break; - } - - if (!cdep) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - - if (cdep->getNumOperands() != C->getNumOperands()) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + if (local_dep.isDef()) { + CallInst* local_cdep = cast(local_dep.getInst()); + + if (local_cdep->getNumOperands() != C->getNumOperands()) { + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } + for (unsigned i = 1; i < C->getNumOperands(); ++i) { uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); if (c_vn != cd_vn) { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } } - - uint32_t v = lookup_or_add(cdep); - valueNumbering.insert(std::make_pair(V, v)); + + uint32_t v = lookup_or_add(local_cdep); + valueNumbering[C] = v; return v; - - } else { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - return nextValueNumber++; - } - } else if (BinaryOperator* BO = dyn_cast(V)) { - Expression e = create_expression(BO); - - DenseMap::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; - } - } else if (CmpInst* C = dyn_cast(V)) { - Expression e = create_expression(C); - - DenseMap::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; - } - } else if (ShuffleVectorInst* U = dyn_cast(V)) { - Expression e = create_expression(U); - - DenseMap::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; - } - } else if (ExtractElementInst* U = dyn_cast(V)) { - Expression e = create_expression(U); - - DenseMap::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; } - } else if (InsertElementInst* U = dyn_cast(V)) { - Expression e = create_expression(U); - - DenseMap::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; + + // Non-local case. + const MemoryDependenceAnalysis::NonLocalDepInfo &deps = + MD->getNonLocalCallDependency(CallSite(C)); + // FIXME: call/call dependencies for readonly calls should return def, not + // clobber! Move the checking logic to MemDep! + CallInst* cdep = 0; + + // Check to see if we have a single dominating call instruction that is + // identical to C. + for (unsigned i = 0, e = deps.size(); i != e; ++i) { + const NonLocalDepEntry *I = &deps[i]; + // Ignore non-local dependencies. + if (I->getResult().isNonLocal()) + continue; + + // We don't handle non-depedencies. If we already have a call, reject + // instruction dependencies. + if (I->getResult().isClobber() || cdep != 0) { + cdep = 0; + break; + } + + CallInst *NonLocalDepCall = dyn_cast(I->getResult().getInst()); + // FIXME: All duplicated with non-local case. + if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ + cdep = NonLocalDepCall; + continue; + } + + cdep = 0; + break; } - } else if (SelectInst* U = dyn_cast(V)) { - Expression e = create_expression(U); - - DenseMap::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + + if (!cdep) { + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - } else if (CastInst* U = dyn_cast(V)) { - Expression e = create_expression(U); - - DenseMap::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - + + if (cdep->getNumOperands() != C->getNumOperands()) { + valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - } else if (GetElementPtrInst* U = dyn_cast(V)) { - Expression e = create_expression(U); - - DenseMap::iterator EI = expressionNumbering.find(e); - if (EI != expressionNumbering.end()) { - valueNumbering.insert(std::make_pair(V, EI->second)); - return EI->second; - } else { - expressionNumbering.insert(std::make_pair(e, nextValueNumber)); - valueNumbering.insert(std::make_pair(V, nextValueNumber)); - - return nextValueNumber++; + for (unsigned i = 1; i < C->getNumOperands(); ++i) { + uint32_t c_vn = lookup_or_add(C->getOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + if (c_vn != cd_vn) { + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } } + + uint32_t v = lookup_or_add(cdep); + valueNumbering[C] = v; + return v; + } else { - valueNumbering.insert(std::make_pair(V, nextValueNumber)); + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; + } +} + +/// lookup_or_add - Returns the value number for the specified value, assigning +/// it a new number if it did not have one before. +uint32_t ValueTable::lookup_or_add(Value *V) { + DenseMap::iterator VI = valueNumbering.find(V); + if (VI != valueNumbering.end()) + return VI->second; + + if (!isa(V)) { + valueNumbering[V] = nextValueNumber; return nextValueNumber++; } + + Instruction* I = cast(V); + Expression exp; + switch (I->getOpcode()) { + case Instruction::Call: + return lookup_or_add_call(cast(I)); + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or : + case Instruction::Xor: + exp = create_expression(cast(I)); + break; + case Instruction::ICmp: + case Instruction::FCmp: + exp = create_expression(cast(I)); + break; + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + exp = create_expression(cast(I)); + break; + case Instruction::Select: + exp = create_expression(cast(I)); + break; + case Instruction::ExtractElement: + exp = create_expression(cast(I)); + break; + case Instruction::InsertElement: + exp = create_expression(cast(I)); + break; + case Instruction::ShuffleVector: + exp = create_expression(cast(I)); + break; + case Instruction::ExtractValue: + exp = create_expression(cast(I)); + break; + case Instruction::InsertValue: + exp = create_expression(cast(I)); + break; + case Instruction::GetElementPtr: + exp = create_expression(cast(I)); + break; + default: + valueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[V] = e; + return e; } /// lookup - Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t ValueTable::lookup(Value* V) const { - DenseMap::iterator VI = valueNumbering.find(V); +uint32_t ValueTable::lookup(Value *V) const { + DenseMap::const_iterator VI = valueNumbering.find(V); assert(VI != valueNumbering.end() && "Value not numbered?"); return VI->second; } @@ -659,14 +630,14 @@ void ValueTable::clear() { } /// erase - Remove a value from the value numbering -void ValueTable::erase(Value* V) { +void ValueTable::erase(Value *V) { valueNumbering.erase(V); } /// verifyRemoved - Verify that the value is removed from all internal data /// structures. void ValueTable::verifyRemoved(const Value *V) const { - for (DenseMap::iterator + for (DenseMap::const_iterator I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) { assert(I->first != V && "Inst still occurs in value numbering map!"); } @@ -680,7 +651,7 @@ namespace { struct ValueNumberScope { ValueNumberScope* parent; DenseMap table; - + ValueNumberScope(ValueNumberScope* p) : parent(p) { } }; } @@ -691,171 +662,97 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - GVN() : FunctionPass(&ID) { } + explicit GVN(bool nopre = false, bool noloads = false) + : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { } private: + bool NoPRE; + bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; ValueTable VN; DenseMap localAvail; - - typedef DenseMap > PhiMapType; - PhiMapType phiMap; - - + + // List of critical edges to be split between iterations. + SmallVector, 4> toSplit; + // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); - AU.addRequired(); + if (!NoLoads) + AU.addRequired(); AU.addRequired(); - + AU.addPreserved(); AU.addPreserved(); } - + // Helper fuctions // FIXME: eliminate or document these better bool processLoad(LoadInst* L, SmallVectorImpl &toErase); - bool processInstruction(Instruction* I, + bool processInstruction(Instruction *I, SmallVectorImpl &toErase); bool processNonLocalLoad(LoadInst* L, SmallVectorImpl &toErase); - bool processBlock(BasicBlock* BB); - Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, - DenseMap &Phis, - bool top_level = false); + bool processBlock(BasicBlock *BB); void dump(DenseMap& d); bool iterateOnFunction(Function &F); - Value* CollapsePhi(PHINode* p); + Value *CollapsePhi(PHINode* p); bool performPRE(Function& F); - Value* lookupNumber(BasicBlock* BB, uint32_t num); - Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); + Value *lookupNumber(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; + bool splitCriticalEdges(); }; - + char GVN::ID = 0; } // createGVNPass - The public interface to this file... -FunctionPass *llvm::createGVNPass() { return new GVN(); } +FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) { + return new GVN(NoPRE, NoLoads); +} static RegisterPass X("gvn", "Global Value Numbering"); void GVN::dump(DenseMap& d) { - printf("{\n"); + errs() << "{\n"; for (DenseMap::iterator I = d.begin(), E = d.end(); I != E; ++I) { - printf("%d\n", I->first); + errs() << I->first << "\n"; I->second->dump(); } - printf("}\n"); + errs() << "}\n"; } -static bool isSafeReplacement(PHINode* p, Instruction* inst) { +static bool isSafeReplacement(PHINode* p, Instruction *inst) { if (!isa(inst)) return true; - + for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); UI != E; ++UI) if (PHINode* use_phi = dyn_cast(UI)) if (use_phi->getParent() == inst->getParent()) return false; - + return true; } -Value* GVN::CollapsePhi(PHINode* p) { - Value* constVal = p->hasConstantValue(); - if (!constVal) return 0; - - Instruction* inst = dyn_cast(constVal); - if (!inst) - return constVal; - - if (DT->dominates(inst, p)) - if (isSafeReplacement(p, inst)) - return inst; - return 0; -} +Value *GVN::CollapsePhi(PHINode *PN) { + Value *ConstVal = PN->hasConstantValue(DT); + if (!ConstVal) return 0; -/// GetValueForBlock - Get the value to use within the specified basic block. -/// available values are in Phis. -Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, - DenseMap &Phis, - bool top_level) { - - // If we have already computed this value, return the previously computed val. - DenseMap::iterator V = Phis.find(BB); - if (V != Phis.end() && !top_level) return V->second; - - // If the block is unreachable, just return undef, since this path - // can't actually occur at runtime. - if (!DT->isReachableFromEntry(BB)) - return Phis[BB] = UndefValue::get(orig->getType()); - - if (BasicBlock *Pred = BB->getSinglePredecessor()) { - Value *ret = GetValueForBlock(Pred, orig, Phis); - Phis[BB] = ret; - return ret; - } + Instruction *Inst = dyn_cast(ConstVal); + if (!Inst) + return ConstVal; - // Get the number of predecessors of this block so we can reserve space later. - // If there is already a PHI in it, use the #preds from it, otherwise count. - // Getting it from the PHI is constant time. - unsigned NumPreds; - if (PHINode *ExistingPN = dyn_cast(BB->begin())) - NumPreds = ExistingPN->getNumIncomingValues(); - else - NumPreds = std::distance(pred_begin(BB), pred_end(BB)); - - // Otherwise, the idom is the loop, so we need to insert a PHI node. Do so - // now, then get values to fill in the incoming values for the PHI. - PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", - BB->begin()); - PN->reserveOperandSpace(NumPreds); - - Phis.insert(std::make_pair(BB, PN)); - - // Fill in the incoming values for the block. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - Value* val = GetValueForBlock(*PI, orig, Phis); - PN->addIncoming(val, *PI); - } - - VN.getAliasAnalysis()->copyValue(orig, PN); - - // Attempt to collapse PHI nodes that are trivially redundant - Value* v = CollapsePhi(PN); - if (!v) { - // Cache our phi construction results - if (LoadInst* L = dyn_cast(orig)) - phiMap[L->getPointerOperand()].insert(PN); - else - phiMap[orig].insert(PN); - - return PN; - } - - PN->replaceAllUsesWith(v); - if (isa(v->getType())) - MD->invalidateCachedPointerInfo(v); - - for (DenseMap::iterator I = Phis.begin(), - E = Phis.end(); I != E; ++I) - if (I->second == PN) - I->second = v; - - DEBUG(errs() << "GVN removed: " << *PN << '\n'); - MD->removeInstruction(PN); - PN->eraseFromParent(); - DEBUG(verifyRemoved(PN)); - - Phis[BB] = v; - return v; + if (DT->dominates(Inst, PN)) + if (isSafeReplacement(PN, Inst)) + return Inst; + return 0; } /// IsValueFullyAvailableInBlock - Return true if we can prove that the value @@ -868,11 +765,11 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, /// currently speculating that it will be. /// 3) we are speculating for this block and have used that to speculate for /// other blocks. -static bool IsValueFullyAvailableInBlock(BasicBlock *BB, +static bool IsValueFullyAvailableInBlock(BasicBlock *BB, DenseMap &FullyAvailableBlocks) { // Optimistically assume that the block is fully available and check to see // if we already know about this block in one lookup. - std::pair::iterator, char> IV = + std::pair::iterator, char> IV = FullyAvailableBlocks.insert(std::make_pair(BB, 2)); // If the entry already existed for this block, return the precomputed value. @@ -883,29 +780,29 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB, IV.first->second = 3; return IV.first->second != 0; } - + // Otherwise, see if it is fully available in all predecessors. pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - + // If this block has no predecessors, it isn't live-in here. if (PI == PE) goto SpeculationFailure; - + for (; PI != PE; ++PI) // If the value isn't fully available in one of our predecessors, then it // isn't fully available in this block either. Undo our previous // optimistic assumption and bail out. if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) goto SpeculationFailure; - + return true; - + // SpeculationFailure - If we get here, we found out that this is not, after // all, a fully-available block. We have a problem if we speculated on this and // used the speculation to mark other blocks as available. SpeculationFailure: char &BBVal = FullyAvailableBlocks[BB]; - + // If we didn't speculate on this, just return with it set to false. if (BBVal == 2) { BBVal = 0; @@ -917,8 +814,8 @@ SpeculationFailure: // 0 if set to one. SmallVector BBWorklist; BBWorklist.push_back(BB); - - while (!BBWorklist.empty()) { + + do { BasicBlock *Entry = BBWorklist.pop_back_val(); // Note that this sets blocks to 0 (unavailable) if they happen to not // already be in FullyAvailableBlocks. This is safe. @@ -927,11 +824,519 @@ SpeculationFailure: // Mark as unavailable. EntryVal = 0; - + for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I) BBWorklist.push_back(*I); + } while (!BBWorklist.empty()); + + return false; +} + + +/// CanCoerceMustAliasedValueToLoad - Return true if +/// CoerceAvailableValueToLoadType will succeed. +static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, + const Type *LoadTy, + const TargetData &TD) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (LoadTy->isStructTy() || LoadTy->isArrayTy() || + StoredVal->getType()->isStructTy() || + StoredVal->getType()->isArrayTy()) + return false; + + // The store has to be at least as big as the load. + if (TD.getTypeSizeInBits(StoredVal->getType()) < + TD.getTypeSizeInBits(LoadTy)) + return false; + + return true; +} + + +/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and +/// then a load from a must-aliased pointer of a different type, try to coerce +/// the stored value. LoadedTy is the type of the load we want to replace and +/// InsertPt is the place to insert new instructions. +/// +/// If we can't do it, return null. +static Value *CoerceAvailableValueToLoadType(Value *StoredVal, + const Type *LoadedTy, + Instruction *InsertPt, + const TargetData &TD) { + if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) + return 0; + + const Type *StoredValTy = StoredVal->getType(); + + uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); + + // If the store and reload are the same size, we can always reuse it. + if (StoreSize == LoadSize) { + if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { + // Pointer to Pointer -> use bitcast. + return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); + } + + // Convert source pointers to integers, which can be bitcast. + if (StoredValTy->isPointerTy()) { + StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); + } + + const Type *TypeToCastTo = LoadedTy; + if (TypeToCastTo->isPointerTy()) + TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); + + if (StoredValTy != TypeToCastTo) + StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); + + // Cast to pointer if the load needs a pointer type. + if (LoadedTy->isPointerTy()) + StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); + + return StoredVal; + } + + // If the loaded value is smaller than the available value, then we can + // extract out a piece from it. If the available value is too small, then we + // can't do anything. + assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); + + // Convert source pointers to integers, which can be manipulated. + if (StoredValTy->isPointerTy()) { + StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); + StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); + } + + // Convert vectors and fp to integer, which can be manipulated. + if (!StoredValTy->isIntegerTy()) { + StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); + StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); + } + + // If this is a big-endian system, we need to shift the value down to the low + // bits so that a truncate will work. + if (TD.isBigEndian()) { + Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); + StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); + } + + // Truncate the integer to the right size now. + const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); + StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); + + if (LoadedTy == NewIntTy) + return StoredVal; + + // If the result is a pointer, inttoptr. + if (LoadedTy->isPointerTy()) + return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); + + // Otherwise, bitcast. + return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); +} + +/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can +/// be expressed as a base pointer plus a constant offset. Return the base and +/// offset to the caller. +static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, + const TargetData &TD) { + Operator *PtrOp = dyn_cast(Ptr); + if (PtrOp == 0) return Ptr; + + // Just look through bitcasts. + if (PtrOp->getOpcode() == Instruction::BitCast) + return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); + + // If this is a GEP with constant indices, we can look through it. + GEPOperator *GEP = dyn_cast(PtrOp); + if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = cast(*I); + if (OpC->isZero()) continue; + + // Handle a struct and array indices which add their offset to the pointer. + if (const StructType *STy = dyn_cast(*GTI)) { + Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + } else { + uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); + Offset += OpC->getSExtValue()*Size; + } + } + + // Re-sign extend from the pointer size if needed to get overflow edge cases + // right. + unsigned PtrSize = TD.getPointerSizeInBits(); + if (PtrSize < 64) + Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); + + return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); +} + + +/// AnalyzeLoadFromClobberingWrite - This function is called when we have a +/// memdep query of a load that ends up being a clobbering memory write (store, +/// memset, memcpy, memmove). This means that the write *may* provide bits used +/// by the load but we can't be sure because the pointers don't mustalias. +/// +/// Check this case to see if there is anything more we can do before we give +/// up. This returns -1 if we have to give up, or a byte number in the stored +/// value of the piece that feeds the load. +static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, + Value *WritePtr, + uint64_t WriteSizeInBits, + const TargetData &TD) { + // If the loaded or stored value is an first class array or struct, don't try + // to transform them. We need to be able to bitcast to integer. + if (LoadTy->isStructTy() || LoadTy->isArrayTy()) + return -1; + + int64_t StoreOffset = 0, LoadOffset = 0; + Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); + Value *LoadBase = + GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD); + if (StoreBase != LoadBase) + return -1; + + // If the load and store are to the exact same address, they should have been + // a must alias. AA must have gotten confused. + // FIXME: Study to see if/when this happens. + if (LoadOffset == StoreOffset) { +#if 0 + dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" + << "Base = " << *StoreBase << "\n" + << "Store Ptr = " << *WritePtr << "\n" + << "Store Offs = " << StoreOffset << "\n" + << "Load Ptr = " << *LoadPtr << "\n"; + abort(); +#endif + return -1; + } + + // If the load and store don't overlap at all, the store doesn't provide + // anything to the load. In this case, they really don't alias at all, AA + // must have gotten confused. + // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then + // remove this check, as it is duplicated with what we have below. + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); + + if ((WriteSizeInBits & 7) | (LoadSize & 7)) + return -1; + uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes. + LoadSize >>= 3; + + + bool isAAFailure = false; + if (StoreOffset < LoadOffset) { + isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; + } else { + isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; + } + if (isAAFailure) { +#if 0 + dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" + << "Base = " << *StoreBase << "\n" + << "Store Ptr = " << *WritePtr << "\n" + << "Store Offs = " << StoreOffset << "\n" + << "Load Ptr = " << *LoadPtr << "\n"; + abort(); +#endif + return -1; + } + + // If the Load isn't completely contained within the stored bits, we don't + // have all the bits to feed it. We could do something crazy in the future + // (issue a smaller load then merge the bits in) but this seems unlikely to be + // valuable. + if (StoreOffset > LoadOffset || + StoreOffset+StoreSize < LoadOffset+LoadSize) + return -1; + + // Okay, we can do this transformation. Return the number of bytes into the + // store that the load is. + return LoadOffset-StoreOffset; +} + +/// AnalyzeLoadFromClobberingStore - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. +static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, + StoreInst *DepSI, + const TargetData &TD) { + // Cannot handle reading from store of first-class aggregate yet. + if (DepSI->getOperand(0)->getType()->isStructTy() || + DepSI->getOperand(0)->getType()->isArrayTy()) + return -1; + + Value *StorePtr = DepSI->getPointerOperand(); + uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, + StorePtr, StoreSize, TD); +} + +static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, + MemIntrinsic *MI, + const TargetData &TD) { + // If the mem operation is a non-constant size, we can't handle it. + ConstantInt *SizeCst = dyn_cast(MI->getLength()); + if (SizeCst == 0) return -1; + uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; + + // If this is memset, we just need to see if the offset is valid in the size + // of the memset.. + if (MI->getIntrinsicID() == Intrinsic::memset) + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(), + MemSizeInBits, TD); + + // If we have a memcpy/memmove, the only case we can handle is if this is a + // copy from constant memory. In that case, we can read directly from the + // constant memory. + MemTransferInst *MTI = cast(MI); + + Constant *Src = dyn_cast(MTI->getSource()); + if (Src == 0) return -1; + + GlobalVariable *GV = dyn_cast(Src->getUnderlyingObject()); + if (GV == 0 || !GV->isConstant()) return -1; + + // See if the access is within the bounds of the transfer. + int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, + MI->getDest(), MemSizeInBits, TD); + if (Offset == -1) + return Offset; + + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = ConstantExpr::getBitCast(Src, + llvm::Type::getInt8PtrTy(Src->getContext())); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); + if (ConstantFoldLoadFromConstPtr(Src, &TD)) + return Offset; + return -1; +} + + +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store. This means +/// that the store *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, + const Type *LoadTy, + Instruction *InsertPt, const TargetData &TD){ + LLVMContext &Ctx = SrcVal->getType()->getContext(); + + uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + + IRBuilder<> Builder(InsertPt->getParent(), InsertPt); + + // Compute which bits of the stored value are being used by the load. Convert + // to an integer type to start with. + if (SrcVal->getType()->isPointerTy()) + SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp"); + if (!SrcVal->getType()->isIntegerTy()) + SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8), + "tmp"); + + // Shift the bits to the least significant depending on endianness. + unsigned ShiftAmt; + if (TD.isLittleEndian()) + ShiftAmt = Offset*8; + else + ShiftAmt = (StoreSize-LoadSize-Offset)*8; + + if (ShiftAmt) + SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp"); + + if (LoadSize != StoreSize) + SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8), + "tmp"); + + return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); +} + +/// GetMemInstValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering mem intrinsic. +static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, + const Type *LoadTy, Instruction *InsertPt, + const TargetData &TD){ + LLVMContext &Ctx = LoadTy->getContext(); + uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + + IRBuilder<> Builder(InsertPt->getParent(), InsertPt); + + // We know that this method is only called when the mem transfer fully + // provides the bits for the load. + if (MemSetInst *MSI = dyn_cast(SrcInst)) { + // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and + // independently of what the offset is. + Value *Val = MSI->getValue(); + if (LoadSize != 1) + Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8)); + + Value *OneElt = Val; + + // Splat the value out to the right number of bits. + for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) { + // If we can double the number of bytes set, do it. + if (NumBytesSet*2 <= LoadSize) { + Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8); + Val = Builder.CreateOr(Val, ShVal); + NumBytesSet <<= 1; + continue; + } + + // Otherwise insert one byte at a time. + Value *ShVal = Builder.CreateShl(Val, 1*8); + Val = Builder.CreateOr(OneElt, ShVal); + ++NumBytesSet; + } + + return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD); } + + // Otherwise, this is a memcpy/memmove from a constant global. + MemTransferInst *MTI = cast(SrcInst); + Constant *Src = cast(MTI->getSource()); + + // Otherwise, see if we can constant fold a load from the constant with the + // offset applied as appropriate. + Src = ConstantExpr::getBitCast(Src, + llvm::Type::getInt8PtrTy(Src->getContext())); + Constant *OffsetCst = + ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset); + Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1); + Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy)); + return ConstantFoldLoadFromConstPtr(Src, &TD); +} + + + +struct AvailableValueInBlock { + /// BB - The basic block in question. + BasicBlock *BB; + enum ValType { + SimpleVal, // A simple offsetted value that is accessed. + MemIntrin // A memory intrinsic which is loaded from. + }; + /// V - The value that is live out of the block. + PointerIntPair Val; + + /// Offset - The byte offset in Val that is interesting for the load query. + unsigned Offset; + + static AvailableValueInBlock get(BasicBlock *BB, Value *V, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(V); + Res.Val.setInt(SimpleVal); + Res.Offset = Offset; + return Res; + } + + static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(MI); + Res.Val.setInt(MemIntrin); + Res.Offset = Offset; + return Res; + } + + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + Value *getSimpleValue() const { + assert(isSimpleValue() && "Wrong accessor"); + return Val.getPointer(); + } + + MemIntrinsic *getMemIntrinValue() const { + assert(!isSimpleValue() && "Wrong accessor"); + return cast(Val.getPointer()); + } + + /// MaterializeAdjustedValue - Emit code into this block to adjust the value + /// defined here to the specified type. This handles various coercion cases. + Value *MaterializeAdjustedValue(const Type *LoadTy, + const TargetData *TD) const { + Value *Res; + if (isSimpleValue()) { + Res = getSimpleValue(); + if (Res->getType() != LoadTy) { + assert(TD && "Need target data to handle type mismatch case"); + Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), + *TD); + + DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + << *getSimpleValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + } else { + Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, + LoadTy, BB->getTerminator(), *TD); + DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + << " " << *getMemIntrinValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } + return Res; + } +}; + +/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, +/// construct SSA form, allowing us to eliminate LI. This returns the value +/// that should be used at LI's definition site. +static Value *ConstructSSAForLoadSet(LoadInst *LI, + SmallVectorImpl &ValuesPerBlock, + const TargetData *TD, + const DominatorTree &DT, + AliasAnalysis *AA) { + // Check for the fully redundant, dominating load case. In this case, we can + // just use the dominating value directly. + if (ValuesPerBlock.size() == 1 && + DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) + return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); + + // Otherwise, we have to construct SSA form. + SmallVector NewPHIs; + SSAUpdater SSAUpdate(&NewPHIs); + SSAUpdate.Initialize(LI); + + const Type *LoadTy = LI->getType(); + + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { + const AvailableValueInBlock &AV = ValuesPerBlock[i]; + BasicBlock *BB = AV.BB; + + if (SSAUpdate.HasValueForBlock(BB)) + continue; + + SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); + } + + // Perform PHI construction. + Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); + + // If new PHI nodes were created, notify alias analysis. + if (V->getType()->isPointerTy()) + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) + AA->copyValue(LI, NewPHIs[i]); + + return V; +} + +static bool isLifetimeStart(Instruction *Inst) { + if (IntrinsicInst* II = dyn_cast(Inst)) + return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; } @@ -940,12 +1345,12 @@ SpeculationFailure: bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl &toErase) { // Find the non-local dependencies of the load. - SmallVector Deps; + SmallVector Deps; MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), Deps); - //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " + //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " // << Deps.size() << *LI << '\n'); - + // If we had to process more than one hundred blocks to find the // dependencies, this load isn't worth worrying about. Optimizing // it will be too expensive. @@ -954,108 +1359,150 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].second.isClobber()) { + if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { DEBUG( - errs() << "GVN: non-local load "; - WriteAsOperand(errs(), LI); - errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n'; + dbgs() << "GVN: non-local load "; + WriteAsOperand(dbgs(), LI); + dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n'; ); return false; } - + // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). - SmallVector, 16> ValuesPerBlock; + SmallVector ValuesPerBlock; SmallVector UnavailableBlocks; + + const TargetData *TD = 0; for (unsigned i = 0, e = Deps.size(); i != e; ++i) { - BasicBlock *DepBB = Deps[i].first; - MemDepResult DepInfo = Deps[i].second; - + BasicBlock *DepBB = Deps[i].getBB(); + MemDepResult DepInfo = Deps[i].getResult(); + if (DepInfo.isClobber()) { + // The address being loaded in this non-local block may not be the same as + // the pointer operand of the load if PHI translation occurs. Make sure + // to consider the right address. + Value *Address = Deps[i].getAddress(); + + // If the dependence is to a store that writes to a superset of the bits + // read by the load, we can extract the bits we need for the load from the + // stored value. + if (StoreInst *DepSI = dyn_cast(DepInfo.getInst())) { + if (TD == 0) + TD = getAnalysisIfAvailable(); + if (TD && Address) { + int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, + DepSI, *TD); + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + DepSI->getOperand(0), + Offset)); + continue; + } + } + } + + // If the clobbering value is a memset/memcpy/memmove, see if we can + // forward a value on from it. + if (MemIntrinsic *DepMI = dyn_cast(DepInfo.getInst())) { + if (TD == 0) + TD = getAnalysisIfAvailable(); + if (TD && Address) { + int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, + DepMI, *TD); + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI, + Offset)); + continue; + } + } + } + UnavailableBlocks.push_back(DepBB); continue; } - + Instruction *DepInst = DepInfo.getInst(); - + // Loading the allocation -> undef. - if (isa(DepInst)) { - ValuesPerBlock.push_back(std::make_pair(DepBB, - UndefValue::get(LI->getType()))); + if (isa(DepInst) || isMalloc(DepInst) || + // Loading immediately after lifetime begin -> undef. + isLifetimeStart(DepInst)) { + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + UndefValue::get(LI->getType()))); continue; } - - if (StoreInst* S = dyn_cast(DepInst)) { - // Reject loads and stores that are to the same address but are of - // different types. - // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because - // of bitfield access, it would be interesting to optimize for it at some - // point. + + if (StoreInst *S = dyn_cast(DepInst)) { + // Reject loads and stores that are to the same address but are of + // different types if we have to. if (S->getOperand(0)->getType() != LI->getType()) { - UnavailableBlocks.push_back(DepBB); - continue; + if (TD == 0) + TD = getAnalysisIfAvailable(); + + // If the stored value is larger or equal to the loaded value, we can + // reuse it. + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), + LI->getType(), *TD)) { + UnavailableBlocks.push_back(DepBB); + continue; + } } - - ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); - - } else if (LoadInst* LD = dyn_cast(DepInst)) { + + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, + S->getOperand(0))); + continue; + } + + if (LoadInst *LD = dyn_cast(DepInst)) { + // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { - UnavailableBlocks.push_back(DepBB); - continue; + if (TD == 0) + TD = getAnalysisIfAvailable(); + + // If the stored value is larger or equal to the loaded value, we can + // reuse it. + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ + UnavailableBlocks.push_back(DepBB); + continue; + } } - ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); - } else { - UnavailableBlocks.push_back(DepBB); + ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); continue; } + + UnavailableBlocks.push_back(DepBB); + continue; } - + // If we have no predecessors that produce a known value for this load, exit // early. if (ValuesPerBlock.empty()) return false; - + // If all of the instructions we depend on produce a known value for this // load, then it is fully redundant and we can use PHI insertion to compute // its value. Insert PHIs and remove the fully redundant value now. if (UnavailableBlocks.empty()) { - // Use cached PHI construction information from previous runs - SmallPtrSet &p = phiMap[LI->getPointerOperand()]; - // FIXME: What does phiMap do? Are we positive it isn't getting invalidated? - for (SmallPtrSet::iterator I = p.begin(), E = p.end(); - I != E; ++I) { - if ((*I)->getParent() == LI->getParent()) { - DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD #1: " << *LI << '\n'); - LI->replaceAllUsesWith(*I); - if (isa((*I)->getType())) - MD->invalidateCachedPointerInfo(*I); - toErase.push_back(LI); - NumGVNLoad++; - return true; - } - - ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); - } + DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); - DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); - - DenseMap BlockReplValues; - BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); // Perform PHI construction. - Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); - LI->replaceAllUsesWith(v); - - if (isa(v)) - v->takeName(LI); - if (isa(v->getType())) - MD->invalidateCachedPointerInfo(v); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, + VN.getAliasAnalysis()); + LI->replaceAllUsesWith(V); + + if (isa(V)) + V->takeName(LI); + if (V->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + VN.erase(LI); toErase.push_back(LI); NumGVNLoad++; return true; } - + if (!EnablePRE || !EnableLoadPRE) return false; @@ -1066,7 +1513,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // prefer to not increase code size. As such, we only do this when we know // that we only have to insert *one* load (which means we're basically moving // the load, not inserting a new one). - + SmallPtrSet Blockers; for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) Blockers.insert(UnavailableBlocks[i]); @@ -1081,8 +1528,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI, while (TmpBB->getSinglePredecessor()) { isSinglePred = true; TmpBB = TmpBB->getSinglePredecessor(); - if (!TmpBB) // If haven't found any, bail now. - return false; if (TmpBB == LoadBB) // Infinite (unreachable) loop. return false; if (Blockers.count(TmpBB)) @@ -1090,28 +1535,36 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (TmpBB->getTerminator()->getNumSuccessors() != 1) allSingleSucc = false; } - + assert(TmpBB); LoadBB = TmpBB; - + // If we have a repl set with LI itself in it, this means we have a loop where // at least one of the values is LI. Since this means that we won't be able // to eliminate LI even if we insert uses in the other predecessors, we will // end up increasing code size. Reject this by scanning for LI. - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].second == LI) - return false; - + if (!EnableFullLoadPRE) { + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) + if (ValuesPerBlock[i].isSimpleValue() && + ValuesPerBlock[i].getSimpleValue() == LI) + return false; + } + + // FIXME: It is extremely unclear what this loop is doing, other than + // artificially restricting loadpre. if (isSinglePred) { bool isHot = false; - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (Instruction *I = dyn_cast(ValuesPerBlock[i].second)) - // "Hot" Instruction is in some loop (because it dominates its dep. - // instruction). - if (DT->dominates(LI, I)) { - isHot = true; - break; - } + for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { + const AvailableValueInBlock &AV = ValuesPerBlock[i]; + if (AV.isSimpleValue()) + // "Hot" Instruction is in some loop (because it dominates its dep. + // instruction). + if (Instruction *I = dyn_cast(AV.getSimpleValue())) + if (DT->dominates(LI, I)) { + isHot = true; + break; + } + } // We are interested only in "hot" instructions. We don't want to do any // mis-optimizations here. @@ -1119,91 +1572,154 @@ bool GVN::processNonLocalLoad(LoadInst *LI, return false; } - // Okay, we have some hope :). Check to see if the loaded value is fully - // available in all but one predecessor. - // FIXME: If we could restructure the CFG, we could make a common pred with - // all the preds that don't have an available LI and insert a new load into - // that one block. - BasicBlock *UnavailablePred = 0; - + // Check to see how many predecessors have the loaded value fully + // available. + DenseMap PredLoads; DenseMap FullyAvailableBlocks; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - FullyAvailableBlocks[ValuesPerBlock[i].first] = true; + FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) FullyAvailableBlocks[UnavailableBlocks[i]] = false; for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { - if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks)) + BasicBlock *Pred = *PI; + if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { continue; - - // If this load is not available in multiple predecessors, reject it. - if (UnavailablePred && UnavailablePred != *PI) + } + PredLoads[Pred] = 0; + + if (Pred->getTerminator()->getNumSuccessors() != 1) { + if (isa(Pred->getTerminator())) { + DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" + << Pred->getName() << "': " << *LI << '\n'); + return false; + } + unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); + toSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); return false; - UnavailablePred = *PI; + } } - - assert(UnavailablePred != 0 && + + // Decide whether PRE is profitable for this load. + unsigned NumUnavailablePreds = PredLoads.size(); + assert(NumUnavailablePreds != 0 && "Fully available value should be eliminated above!"); - - // If the loaded pointer is PHI node defined in this block, do PHI translation - // to get its value in the predecessor. - Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); - - // Make sure the value is live in the predecessor. If it was defined by a - // non-PHI instruction in this block, we don't know how to recompute it above. - if (Instruction *LPInst = dyn_cast(LoadPtr)) - if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { - DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " - << *LPInst << '\n' << *LI << "\n"); + if (!EnableFullLoadPRE) { + // If this load is unavailable in multiple predecessors, reject it. + // FIXME: If we could restructure the CFG, we could make a common pred with + // all the preds that don't have an available LI and insert a new load into + // that one block. + if (NumUnavailablePreds != 1) return false; + } + + // Check if the load can safely be moved to all the unavailable predecessors. + bool CanDoPRE = true; + SmallVector NewInsts; + for (DenseMap::iterator I = PredLoads.begin(), + E = PredLoads.end(); I != E; ++I) { + BasicBlock *UnavailablePred = I->first; + + // Do PHI translation to get its value in the predecessor if necessary. The + // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. + + // If all preds have a single successor, then we know it is safe to insert + // the load on the pred (?!?), so we can insert code to materialize the + // pointer if it is not available. + PHITransAddr Address(LI->getOperand(0), TD); + Value *LoadPtr = 0; + if (allSingleSucc) { + LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, + *DT, NewInsts); + } else { + Address.PHITranslateValue(LoadBB, UnavailablePred); + LoadPtr = Address.getAddr(); + + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null(LoadPtr)) + if (!DT->dominates(Inst->getParent(), UnavailablePred)) + LoadPtr = 0; } - - // We don't currently handle critical edges :( - if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { - DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" - << UnavailablePred->getName() << "': " << *LI << '\n'); - return false; + + // If we couldn't find or insert a computation of this phi translated value, + // we fail PRE. + if (LoadPtr == 0) { + DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " + << *LI->getOperand(0) << "\n"); + CanDoPRE = false; + break; + } + + // Make sure it is valid to move this load here. We have to watch out for: + // @1 = getelementptr (i8* p, ... + // test p and branch if == 0 + // load @1 + // It is valid to have the getelementptr before the test, even if p can be 0, + // as getelementptr only does address arithmetic. + // If we are not pushing the value through any multiple-successor blocks + // we do not have this case. Otherwise, check that the load is safe to + // put anywhere; this can be improved, but should be conservatively safe. + if (!allSingleSucc && + // FIXME: REEVALUTE THIS. + !isSafeToLoadUnconditionally(LoadPtr, + UnavailablePred->getTerminator(), + LI->getAlignment(), TD)) { + CanDoPRE = false; + break; + } + + I->second = LoadPtr; } - // Make sure it is valid to move this load here. We have to watch out for: - // @1 = getelementptr (i8* p, ... - // test p and branch if == 0 - // load @1 - // It is valid to have the getelementptr before the test, even if p can be 0, - // as getelementptr only does address arithmetic. - // If we are not pushing the value through any multiple-successor blocks - // we do not have this case. Otherwise, check that the load is safe to - // put anywhere; this can be improved, but should be conservatively safe. - if (!allSingleSucc && - !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) + if (!CanDoPRE) { + while (!NewInsts.empty()) + NewInsts.pop_back_val()->eraseFromParent(); return false; + } // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. - DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); - - Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, - LI->getAlignment(), - UnavailablePred->getTerminator()); - - SmallPtrSet &p = phiMap[LI->getPointerOperand()]; - for (SmallPtrSet::iterator I = p.begin(), E = p.end(); - I != E; ++I) - ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); - - DenseMap BlockReplValues; - BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); - BlockReplValues[UnavailablePred] = NewLoad; - + DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); + DEBUG(if (!NewInsts.empty()) + dbgs() << "INSERTED " << NewInsts.size() << " INSTS: " + << *NewInsts.back() << '\n'); + + // Assign value numbers to the new instructions. + for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) { + // FIXME: We really _ought_ to insert these value numbers into their + // parent's availability map. However, in doing so, we risk getting into + // ordering issues. If a block hasn't been processed yet, we would be + // marking a value as AVAIL-IN, which isn't what we intend. + VN.lookup_or_add(NewInsts[i]); + } + + for (DenseMap::iterator I = PredLoads.begin(), + E = PredLoads.end(); I != E; ++I) { + BasicBlock *UnavailablePred = I->first; + Value *LoadPtr = I->second; + + Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, + LI->getAlignment(), + UnavailablePred->getTerminator()); + + // Add the newly created load. + ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, + NewLoad)); + MD->invalidateCachedPointerInfo(LoadPtr); + DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); + } + // Perform PHI construction. - Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); - LI->replaceAllUsesWith(v); - if (isa(v)) - v->takeName(LI); - if (isa(v->getType())) - MD->invalidateCachedPointerInfo(v); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, + VN.getAliasAnalysis()); + LI->replaceAllUsesWith(V); + if (isa(V)) + V->takeName(LI); + if (V->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + VN.erase(LI); toErase.push_back(LI); NumPRELoad++; return true; @@ -1212,282 +1728,311 @@ bool GVN::processNonLocalLoad(LoadInst *LI, /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { + if (!MD) + return false; + if (L->isVolatile()) return false; - - Value* pointer = L->getPointerOperand(); // ... to a pointer that has been loaded from before... - MemDepResult dep = MD->getDependency(L); - + MemDepResult Dep = MD->getDependency(L); + // If the value isn't available, don't do anything! - if (dep.isClobber()) { + if (Dep.isClobber()) { + // Check to see if we have something like this: + // store i32 123, i32* %P + // %A = bitcast i32* %P to i8* + // %B = gep i8* %A, i32 1 + // %C = load i8* %B + // + // We could do that by recognizing if the clobber instructions are obviously + // a common base + constant offset, and if the previous store (or memset) + // completely covers this load. This sort of thing can happen in bitfield + // access code. + Value *AvailVal = 0; + if (StoreInst *DepSI = dyn_cast(Dep.getInst())) + if (const TargetData *TD = getAnalysisIfAvailable()) { + int Offset = AnalyzeLoadFromClobberingStore(L->getType(), + L->getPointerOperand(), + DepSI, *TD); + if (Offset != -1) + AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, + L->getType(), L, *TD); + } + + // If the clobbering value is a memset/memcpy/memmove, see if we can forward + // a value on from it. + if (MemIntrinsic *DepMI = dyn_cast(Dep.getInst())) { + if (const TargetData *TD = getAnalysisIfAvailable()) { + int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), + L->getPointerOperand(), + DepMI, *TD); + if (Offset != -1) + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); + } + } + + if (AvailVal) { + DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n' + << *AvailVal << '\n' << *L << "\n\n\n"); + + // Replace the load! + L->replaceAllUsesWith(AvailVal); + if (AvailVal->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(AvailVal); + VN.erase(L); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + DEBUG( // fast print dep, using operator<< on instruction would be too slow - errs() << "GVN: load "; - WriteAsOperand(errs(), L); - Instruction *I = dep.getInst(); - errs() << " is clobbered by " << *I << '\n'; + dbgs() << "GVN: load "; + WriteAsOperand(dbgs(), L); + Instruction *I = Dep.getInst(); + dbgs() << " is clobbered by " << *I << '\n'; ); return false; } // If it is defined in another block, try harder. - if (dep.isNonLocal()) + if (Dep.isNonLocal()) return processNonLocalLoad(L, toErase); - Instruction *DepInst = dep.getInst(); + Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast(DepInst)) { - // Only forward substitute stores to loads of the same type. - // FIXME: Could do better! - if (DepSI->getPointerOperand()->getType() != pointer->getType()) - return false; + Value *StoredVal = DepSI->getOperand(0); + // The store and load are to a must-aliased pointer, but they may not + // actually have the same type. See if we know how to reuse the stored + // value (depending on its type). + const TargetData *TD = 0; + if (StoredVal->getType() != L->getType()) { + if ((TD = getAnalysisIfAvailable())) { + StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), + L, *TD); + if (StoredVal == 0) + return false; + + DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal + << '\n' << *L << "\n\n\n"); + } + else + return false; + } + // Remove it! - L->replaceAllUsesWith(DepSI->getOperand(0)); - if (isa(DepSI->getOperand(0)->getType())) - MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); + L->replaceAllUsesWith(StoredVal); + if (StoredVal->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(StoredVal); + VN.erase(L); toErase.push_back(L); NumGVNLoad++; return true; } if (LoadInst *DepLI = dyn_cast(DepInst)) { - // Only forward substitute stores to loads of the same type. - // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. - if (DepLI->getType() != L->getType()) - return false; + Value *AvailableVal = DepLI; + + // The loads are of a must-aliased pointer, but they may not actually have + // the same type. See if we know how to reuse the previously loaded value + // (depending on its type). + const TargetData *TD = 0; + if (DepLI->getType() != L->getType()) { + if ((TD = getAnalysisIfAvailable())) { + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + if (AvailableVal == 0) + return false; + + DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal + << "\n" << *L << "\n\n\n"); + } + else + return false; + } // Remove it! - L->replaceAllUsesWith(DepLI); - if (isa(DepLI->getType())) + L->replaceAllUsesWith(AvailableVal); + if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); + VN.erase(L); toErase.push_back(L); NumGVNLoad++; return true; } - + // If this load really doesn't depend on anything, then we must be loading an // undef value. This can happen when loading for a fresh allocation with no // intervening stores, for example. - if (isa(DepInst)) { + if (isa(DepInst) || isMalloc(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); + VN.erase(L); toErase.push_back(L); NumGVNLoad++; return true; } + + // If this load occurs either right after a lifetime begin, + // then the loaded value is undefined. + if (IntrinsicInst* II = dyn_cast(DepInst)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + L->replaceAllUsesWith(UndefValue::get(L->getType())); + VN.erase(L); + toErase.push_back(L); + NumGVNLoad++; + return true; + } + } return false; } -Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { +Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { DenseMap::iterator I = localAvail.find(BB); if (I == localAvail.end()) return 0; - - ValueNumberScope* locals = I->second; - - while (locals) { - DenseMap::iterator I = locals->table.find(num); - if (I != locals->table.end()) + + ValueNumberScope *Locals = I->second; + while (Locals) { + DenseMap::iterator I = Locals->table.find(num); + if (I != Locals->table.end()) return I->second; - else - locals = locals->parent; + Locals = Locals->parent; } - + return 0; } -/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination -/// by inheritance from the dominator fails, see if we can perform phi -/// construction to eliminate the redundancy. -Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { - BasicBlock* BaseBlock = orig->getParent(); - - SmallPtrSet Visited; - SmallVector Stack; - Stack.push_back(BaseBlock); - - DenseMap Results; - - // Walk backwards through our predecessors, looking for instances of the - // value number we're looking for. Instances are recorded in the Results - // map, which is then used to perform phi construction. - while (!Stack.empty()) { - BasicBlock* Current = Stack.back(); - Stack.pop_back(); - - // If we've walked all the way to a proper dominator, then give up. Cases - // where the instance is in the dominator will have been caught by the fast - // path, and any cases that require phi construction further than this are - // probably not worth it anyways. Note that this is a SIGNIFICANT compile - // time improvement. - if (DT->properlyDominates(Current, orig->getParent())) return 0; - - DenseMap::iterator LA = - localAvail.find(Current); - if (LA == localAvail.end()) return 0; - DenseMap::iterator V = LA->second->table.find(valno); - - if (V != LA->second->table.end()) { - // Found an instance, record it. - Results.insert(std::make_pair(Current, V->second)); - continue; - } - - // If we reach the beginning of the function, then give up. - if (pred_begin(Current) == pred_end(Current)) - return 0; - - for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); - PI != PE; ++PI) - if (Visited.insert(*PI)) - Stack.push_back(*PI); - } - - // If we didn't find instances, give up. Otherwise, perform phi construction. - if (Results.size() == 0) - return 0; - else - return GetValueForBlock(BaseBlock, orig, Results, true); -} /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I, SmallVectorImpl &toErase) { - if (LoadInst* L = dyn_cast(I)) { - bool changed = processLoad(L, toErase); - - if (!changed) { - unsigned num = VN.lookup_or_add(L); - localAvail[I->getParent()]->table.insert(std::make_pair(num, L)); + // Ignore dbg info intrinsics. + if (isa(I)) + return false; + + if (LoadInst *LI = dyn_cast(I)) { + bool Changed = processLoad(LI, toErase); + + if (!Changed) { + unsigned Num = VN.lookup_or_add(LI); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); } - - return changed; + + return Changed; } - - uint32_t nextNum = VN.getNextUnusedValueNumber(); - unsigned num = VN.lookup_or_add(I); - - if (BranchInst* BI = dyn_cast(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); - + + uint32_t NextNum = VN.getNextUnusedValueNumber(); + unsigned Num = VN.lookup_or_add(I); + + if (BranchInst *BI = dyn_cast(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + if (!BI->isConditional() || isa(BI->getCondition())) return false; - - Value* branchCond = BI->getCondition(); - uint32_t condVN = VN.lookup_or_add(branchCond); - - BasicBlock* trueSucc = BI->getSuccessor(0); - BasicBlock* falseSucc = BI->getSuccessor(1); - - if (trueSucc->getSinglePredecessor()) - localAvail[trueSucc]->table[condVN] = - ConstantInt::getTrue(trueSucc->getContext()); - if (falseSucc->getSinglePredecessor()) - localAvail[falseSucc]->table[condVN] = - ConstantInt::getFalse(trueSucc->getContext()); + + Value *BranchCond = BI->getCondition(); + uint32_t CondVN = VN.lookup_or_add(BranchCond); + + BasicBlock *TrueSucc = BI->getSuccessor(0); + BasicBlock *FalseSucc = BI->getSuccessor(1); + + if (TrueSucc->getSinglePredecessor()) + localAvail[TrueSucc]->table[CondVN] = + ConstantInt::getTrue(TrueSucc->getContext()); + if (FalseSucc->getSinglePredecessor()) + localAvail[FalseSucc]->table[CondVN] = + ConstantInt::getFalse(TrueSucc->getContext()); return false; - + // Allocations are always uniquely numbered, so we can save time and memory - // by fast failing them. - } else if (isa(I) || isa(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + // by fast failing them. + } else if (isa(I) || isa(I)) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); return false; } - + // Collapse PHI nodes if (PHINode* p = dyn_cast(I)) { - Value* constVal = CollapsePhi(p); - + Value *constVal = CollapsePhi(p); + if (constVal) { - for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); - PI != PE; ++PI) - PI->second.erase(p); - p->replaceAllUsesWith(constVal); - if (isa(constVal->getType())) + if (MD && constVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(constVal); VN.erase(p); - + toErase.push_back(p); } else { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); } - + // If the number we were assigned was a brand new VN, then we don't // need to do a lookup to see if the number already exists // somewhere in the domtree: it can't! - } else if (num == nextNum) { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); - + } else if (Num == NextNum) { + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + // Perform fast-path value-number based elimination of values inherited from // dominators. - } else if (Value* repl = lookupNumber(I->getParent(), num)) { + } else if (Value *repl = lookupNumber(I->getParent(), Num)) { // Remove it! VN.erase(I); I->replaceAllUsesWith(repl); - if (isa(repl->getType())) + if (MD && repl->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); toErase.push_back(I); return true; -#if 0 - // Perform slow-pathvalue-number based elimination with phi construction. - } else if (Value* repl = AttemptRedundancyElimination(I, num)) { - // Remove it! - VN.erase(I); - I->replaceAllUsesWith(repl); - if (isa(repl->getType())) - MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); - return true; -#endif } else { - localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); + localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); } - + return false; } /// runOnFunction - This is the main transformation entry point for a function. bool GVN::runOnFunction(Function& F) { - MD = &getAnalysis(); + if (!NoLoads) + MD = &getAnalysis(); DT = &getAnalysis(); VN.setAliasAnalysis(&getAnalysis()); VN.setMemDep(MD); VN.setDomTree(DT); - - bool changed = false; - bool shouldContinue = true; - + + bool Changed = false; + bool ShouldContinue = true; + // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { - BasicBlock* BB = FI; + BasicBlock *BB = FI; ++FI; bool removedBlock = MergeBlockIntoPredecessor(BB, this); if (removedBlock) NumGVNBlocks++; - - changed |= removedBlock; + + Changed |= removedBlock; } - + unsigned Iteration = 0; - - while (shouldContinue) { - DEBUG(errs() << "GVN iteration: " << Iteration << "\n"); - shouldContinue = iterateOnFunction(F); - changed |= shouldContinue; + + while (ShouldContinue) { + DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n"); + ShouldContinue = iterateOnFunction(F); + if (splitCriticalEdges()) + ShouldContinue = true; + Changed |= ShouldContinue; ++Iteration; } - + if (EnablePRE) { bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); - changed |= PREChanged; + Changed |= PREChanged; } } // FIXME: Should perform GVN again after PRE does something. PRE can move @@ -1497,27 +2042,27 @@ bool GVN::runOnFunction(Function& F) { cleanupGlobalSets(); - return changed; + return Changed; } -bool GVN::processBlock(BasicBlock* BB) { +bool GVN::processBlock(BasicBlock *BB) { // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and // incrementing BI before processing an instruction). SmallVector toErase; - bool changed_function = false; - + bool ChangedFunction = false; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - changed_function |= processInstruction(BI, toErase); + ChangedFunction |= processInstruction(BI, toErase); if (toErase.empty()) { ++BI; continue; } - + // If we need some instructions deleted, do it now. NumGVNInstr += toErase.size(); - + // Avoid iterator invalidation. bool AtStart = BI == BB->begin(); if (!AtStart) @@ -1525,8 +2070,8 @@ bool GVN::processBlock(BasicBlock* BB) { for (SmallVector::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) { - DEBUG(errs() << "GVN removed: " << **I << '\n'); - MD->removeInstruction(*I); + DEBUG(dbgs() << "GVN removed: " << **I << '\n'); + if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); } @@ -1537,106 +2082,102 @@ bool GVN::processBlock(BasicBlock* BB) { else ++BI; } - - return changed_function; + + return ChangedFunction; } /// performPRE - Perform a purely local form of PRE that looks for diamond /// control flow patterns and attempts to perform simple PRE at the join point. -bool GVN::performPRE(Function& F) { +bool GVN::performPRE(Function &F) { bool Changed = false; - SmallVector, 4> toSplit; DenseMap predMap; for (df_iterator DI = df_begin(&F.getEntryBlock()), DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock* CurrentBlock = *DI; - + BasicBlock *CurrentBlock = *DI; + // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; - + for (BasicBlock::iterator BI = CurrentBlock->begin(), BE = CurrentBlock->end(); BI != BE; ) { Instruction *CurInst = BI++; - if (isa(CurInst) || isa(CurInst) || - isa(CurInst) || - (CurInst->getType() == Type::getVoidTy(F.getContext())) || + if (isa(CurInst) || + isa(CurInst) || isa(CurInst) || + CurInst->getType()->isVoidTy() || CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa(CurInst)) continue; - uint32_t valno = VN.lookup(CurInst); - + uint32_t ValNo = VN.lookup(CurInst); + // Look for the predecessors for PRE opportunities. We're // only trying to solve the basic diamond case, where // a value is computed in the successor and one predecessor, // but not the other. We also explicitly disallow cases // where the successor is its own predecessor, because they're // more complicated to get right. - unsigned numWith = 0; - unsigned numWithout = 0; - BasicBlock* PREPred = 0; + unsigned NumWith = 0; + unsigned NumWithout = 0; + BasicBlock *PREPred = 0; predMap.clear(); for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) { // We're not interested in PRE where the block is its - // own predecessor, on in blocks with predecessors + // own predecessor, or in blocks with predecessors // that are not reachable. if (*PI == CurrentBlock) { - numWithout = 2; + NumWithout = 2; break; } else if (!localAvail.count(*PI)) { - numWithout = 2; + NumWithout = 2; break; } - - DenseMap::iterator predV = - localAvail[*PI]->table.find(valno); + + DenseMap::iterator predV = + localAvail[*PI]->table.find(ValNo); if (predV == localAvail[*PI]->table.end()) { PREPred = *PI; - numWithout++; + NumWithout++; } else if (predV->second == CurInst) { - numWithout = 2; + NumWithout = 2; } else { predMap[*PI] = predV->second; - numWith++; + NumWith++; } } - + // Don't do PRE when it might increase code size, i.e. when // we would need to insert instructions in more than one pred. - if (numWithout != 1 || numWith == 0) + if (NumWithout != 1 || NumWith == 0) continue; + // Don't do PRE across indirect branch. + if (isa(PREPred->getTerminator())) + continue; + // We can't do PRE safely on a critical edge, so instead we schedule // the edge to be split and perform the PRE the next time we iterate // on the function. - unsigned succNum = 0; - for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors(); - i != e; ++i) - if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { - succNum = i; - break; - } - - if (isCriticalEdge(PREPred->getTerminator(), succNum)) { - toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); + unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); + if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { + toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); continue; } - - // Instantiate the expression the in predecessor that lacked it. + + // Instantiate the expression in the predecessor that lacked it. // Because we are going top-down through the block, all value numbers // will be available in the predecessor by the time we need them. Any - // that weren't original present will have been instantiated earlier + // that weren't originally present will have been instantiated earlier // in this loop. - Instruction* PREInstr = CurInst->clone(CurInst->getContext()); + Instruction *PREInstr = CurInst->clone(); bool success = true; for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) { Value *Op = PREInstr->getOperand(i); if (isa(Op) || isa(Op) || isa(Op)) continue; - + if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) { PREInstr->setOperand(i, V); } else { @@ -1644,25 +2185,25 @@ bool GVN::performPRE(Function& F) { break; } } - + // Fail out if we encounter an operand that is not available in - // the PRE predecessor. This is typically because of loads which + // the PRE predecessor. This is typically because of loads which // are not value numbered precisely. if (!success) { delete PREInstr; DEBUG(verifyRemoved(PREInstr)); continue; } - + PREInstr->insertBefore(PREPred->getTerminator()); PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; - VN.add(PREInstr, valno); + VN.add(PREInstr, ValNo); NumGVNPRE++; - + // Update the availability map to include the new instruction. - localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); - + localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); + // Create a PHI to make the value available in this block. PHINode* Phi = PHINode::Create(CurInst->getType(), CurInst->getName() + ".pre-phi", @@ -1670,28 +2211,40 @@ bool GVN::performPRE(Function& F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) Phi->addIncoming(predMap[*PI], *PI); - - VN.add(Phi, valno); - localAvail[CurrentBlock]->table[valno] = Phi; - + + VN.add(Phi, ValNo); + localAvail[CurrentBlock]->table[ValNo] = Phi; + CurInst->replaceAllUsesWith(Phi); - if (isa(Phi->getType())) + if (MD && Phi->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); - - DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n'); - MD->removeInstruction(CurInst); + + DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); + if (MD) MD->removeInstruction(CurInst); CurInst->eraseFromParent(); DEBUG(verifyRemoved(CurInst)); Changed = true; } } - - for (SmallVector, 4>::iterator - I = toSplit.begin(), E = toSplit.end(); I != E; ++I) - SplitCriticalEdge(I->first, I->second, this); - - return Changed || toSplit.size(); + + if (splitCriticalEdges()) + Changed = true; + + return Changed; +} + +/// splitCriticalEdges - Split critical edges found during the previous +/// iteration that may enable further optimization. +bool GVN::splitCriticalEdges() { + if (toSplit.empty()) + return false; + do { + std::pair Edge = toSplit.pop_back_val(); + SplitCriticalEdge(Edge.first, Edge.second, this); + } while (!toSplit.empty()); + MD->invalidateCachedPredecessors(); + return true; } /// iterateOnFunction - Executes one iteration of GVN @@ -1708,25 +2261,24 @@ bool GVN::iterateOnFunction(Function &F) { } // Top-down walk of the dominator tree - bool changed = false; + bool Changed = false; #if 0 // Needed for value numbering with phi construction to work. ReversePostOrderTraversal RPOT(&F); for (ReversePostOrderTraversal::rpo_iterator RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) - changed |= processBlock(*RI); + Changed |= processBlock(*RI); #else for (df_iterator DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) - changed |= processBlock(DI->getBlock()); + Changed |= processBlock(DI->getBlock()); #endif - return changed; + return Changed; } void GVN::cleanupGlobalSets() { VN.clear(); - phiMap.clear(); for (DenseMap::iterator I = localAvail.begin(), E = localAvail.end(); I != E; ++I) @@ -1739,26 +2291,14 @@ void GVN::cleanupGlobalSets() { void GVN::verifyRemoved(const Instruction *Inst) const { VN.verifyRemoved(Inst); - // Walk through the PHI map to make sure the instruction isn't hiding in there - // somewhere. - for (PhiMapType::iterator - I = phiMap.begin(), E = phiMap.end(); I != E; ++I) { - assert(I->first != Inst && "Inst is still a key in PHI map!"); - - for (SmallPtrSet::iterator - II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { - assert(*II != Inst && "Inst is still a value in PHI map!"); - } - } - // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. - for (DenseMap::iterator + for (DenseMap::const_iterator I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { const ValueNumberScope *VNS = I->second; while (VNS) { - for (DenseMap::iterator + for (DenseMap::const_iterator II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { assert(II->second != Inst && "Inst still in value numbering scope!"); }