X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FGVN.cpp;h=0d2822699d9c95e3784ff104b6f9aec0ca154c11;hb=aad94aa4375d5ed43be728e03d91751c102ff958;hp=eb6b901e92938f18be0ddca550e00f2a8994d9ff;hpb=9d2ed8e632b71914b2a668932f4f49b87c3ca0b1;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index eb6b901e929..0d2822699d9 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -17,7 +17,6 @@ #define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" -#include "llvm/BasicBlock.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" @@ -25,30 +24,32 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Operator.h" -#include "llvm/Value.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" +#include using namespace llvm; STATISTIC(NumGVNInstr, "Number of instructions deleted"); @@ -60,7 +61,6 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd"); static cl::opt EnablePRE("enable-pre", cl::init(true), cl::Hidden); static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); -static cl::opt EnableFullLoadPRE("enable-full-load-pre", cl::init(false)); //===----------------------------------------------------------------------===// // ValueTable Class @@ -127,21 +127,14 @@ namespace { return false; else if (function != other.function) return false; - else { - if (varargs.size() != other.varargs.size()) - return false; - - for (size_t i = 0; i < varargs.size(); ++i) - if (varargs[i] != other.varargs[i]) - return false; - - return true; - } + else if (varargs != other.varargs) + return false; + return true; } - bool operator!=(const Expression &other) const { + /*bool operator!=(const Expression &other) const { return !(*this == other); - } + }*/ }; class ValueTable { @@ -164,7 +157,6 @@ namespace { Expression create_expression(CastInst* C); Expression create_expression(GetElementPtrInst* G); Expression create_expression(CallInst* C); - Expression create_expression(Constant* C); Expression create_expression(ExtractValueInst* C); Expression create_expression(InsertValueInst* C); @@ -176,7 +168,6 @@ namespace { void add(Value *V, uint32_t num); void clear(); void erase(Value *v); - unsigned size(); void setAliasAnalysis(AliasAnalysis* A) { AA = A; } AliasAnalysis *getAliasAnalysis() const { return AA; } void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } @@ -216,9 +207,6 @@ template <> struct DenseMapInfo { return LHS == RHS; } }; - -template <> -struct isPodLike { static const bool value = true; }; } @@ -271,7 +259,8 @@ Expression ValueTable::create_expression(CallInst* C) { e.function = C->getCalledFunction(); e.opcode = Expression::CALL; - for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); + CallSite CS(C); + for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); @@ -447,14 +436,14 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { if (local_dep.isDef()) { CallInst* local_cdep = cast(local_dep.getInst()); - if (local_cdep->getNumOperands() != C->getNumOperands()) { + if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -504,13 +493,13 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { return nextValueNumber++; } - if (cdep->getNumOperands() != C->getNumOperands()) { + if (cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -662,17 +651,64 @@ namespace { bool runOnFunction(Function &F); public: static char ID; // Pass identification, replacement for typeid - explicit GVN(bool nopre = false, bool noloads = false) - : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { } + explicit GVN(bool noloads = false) + : FunctionPass(ID), NoLoads(noloads), MD(0) { + initializeGVNPass(*PassRegistry::getPassRegistry()); + } private: - bool NoPRE; bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; + const TargetData* TD; ValueTable VN; - DenseMap localAvail; + + /// NumberTable - A mapping from value numers to lists of Value*'s that + /// have that value number. Use lookupNumber to query it. + DenseMap > NumberTable; + BumpPtrAllocator TableAllocator; + + /// insert_table - Push a new Value to the NumberTable onto the list for + /// its value number. + void insert_table(uint32_t N, Value *V) { + std::pair& Curr = NumberTable[N]; + if (!Curr.first) { + Curr.first = V; + return; + } + + std::pair* Node = + TableAllocator.Allocate >(); + Node->first = V; + Node->second = Curr.second; + Curr.second = Node; + } + + /// erase_table - Scan the list of values corresponding to a given value + /// number, and remove the given value if encountered. + void erase_table(uint32_t N, Value *V) { + std::pair* Prev = 0; + std::pair* Curr = &NumberTable[N]; + + while (Curr->first != V) { + Prev = Curr; + Curr = static_cast*>(Curr->second); + } + + if (Prev) { + Prev->second = Curr->second; + } else { + if (!Curr->second) { + Curr->first = 0; + } else { + std::pair* Next = + static_cast*>(Curr->second); + Curr->first = Next->first; + Curr->second = Next->second; + } + } + } // List of critical edges to be split between iterations. SmallVector, 4> toSplit; @@ -699,7 +735,6 @@ namespace { bool processBlock(BasicBlock *BB); void dump(DenseMap& d); bool iterateOnFunction(Function &F); - Value *CollapsePhi(PHINode* p); bool performPRE(Function& F); Value *lookupNumber(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); @@ -711,12 +746,15 @@ namespace { } // createGVNPass - The public interface to this file... -FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) { - return new GVN(NoPRE, NoLoads); +FunctionPass *llvm::createGVNPass(bool NoLoads) { + return new GVN(NoLoads); } -static RegisterPass X("gvn", - "Global Value Numbering"); +INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) void GVN::dump(DenseMap& d) { errs() << "{\n"; @@ -728,33 +766,6 @@ void GVN::dump(DenseMap& d) { errs() << "}\n"; } -static bool isSafeReplacement(PHINode* p, Instruction *inst) { - if (!isa(inst)) - return true; - - for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end(); - UI != E; ++UI) - if (PHINode* use_phi = dyn_cast(UI)) - if (use_phi->getParent() == inst->getParent()) - return false; - - return true; -} - -Value *GVN::CollapsePhi(PHINode *PN) { - Value *ConstVal = PN->hasConstantValue(DT); - if (!ConstVal) return 0; - - Instruction *Inst = dyn_cast(ConstVal); - if (!Inst) - return ConstVal; - - if (DT->dominates(Inst, PN)) - if (isSafeReplacement(PN, Inst)) - return Inst; - return 0; -} - /// IsValueFullyAvailableInBlock - Return true if we can prove that the value /// we're analyzing is fully available in the specified block. As we go, keep /// track of which blocks we know are fully alive in FullyAvailableBlocks. This @@ -869,7 +880,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, const Type *StoredValTy = StoredVal->getType(); - uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); + uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. @@ -938,47 +949,6 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); } -/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can -/// be expressed as a base pointer plus a constant offset. Return the base and -/// offset to the caller. -static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const TargetData &TD) { - Operator *PtrOp = dyn_cast(Ptr); - if (PtrOp == 0) return Ptr; - - // Just look through bitcasts. - if (PtrOp->getOpcode() == Instruction::BitCast) - return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); - - // If this is a GEP with constant indices, we can look through it. - GEPOperator *GEP = dyn_cast(PtrOp); - if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = cast(*I); - if (OpC->isZero()) continue; - - // Handle a struct and array indices which add their offset to the pointer. - if (const StructType *STy = dyn_cast(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); - } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += OpC->getSExtValue()*Size; - } - } - - // Re-sign extend from the pointer size if needed to get overflow edge cases - // right. - unsigned PtrSize = TD.getPointerSizeInBits(); - if (PtrSize < 64) - Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); - - return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); -} - - /// AnalyzeLoadFromClobberingWrite - This function is called when we have a /// memdep query of a load that ends up being a clobbering memory write (store, /// memset, memcpy, memmove). This means that the write *may* provide bits used @@ -997,32 +967,29 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, return -1; int64_t StoreOffset = 0, LoadOffset = 0; - Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD); - Value *LoadBase = - GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD); + Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD); + Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD); if (StoreBase != LoadBase) return -1; // If the load and store are to the exact same address, they should have been // a must alias. AA must have gotten confused. - // FIXME: Study to see if/when this happens. - if (LoadOffset == StoreOffset) { + // FIXME: Study to see if/when this happens. One case is forwarding a memset + // to a load from the base of the memset. #if 0 + if (LoadOffset == StoreOffset) { dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" << "Base = " << *StoreBase << "\n" << "Store Ptr = " << *WritePtr << "\n" << "Store Offs = " << StoreOffset << "\n" << "Load Ptr = " << *LoadPtr << "\n"; abort(); -#endif - return -1; } +#endif // If the load and store don't overlap at all, the store doesn't provide // anything to the load. In this case, they really don't alias at all, AA // must have gotten confused. - // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then - // remove this check, as it is duplicated with what we have below. uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy); if ((WriteSizeInBits & 7) | (LoadSize & 7)) @@ -1032,11 +999,11 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr, bool isAAFailure = false; - if (StoreOffset < LoadOffset) { + if (StoreOffset < LoadOffset) isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; - } else { + else isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; - } + if (isAAFailure) { #if 0 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n" @@ -1068,12 +1035,12 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, StoreInst *DepSI, const TargetData &TD) { // Cannot handle reading from store of first-class aggregate yet. - if (DepSI->getOperand(0)->getType()->isStructTy() || - DepSI->getOperand(0)->getType()->isArrayTy()) + if (DepSI->getValueOperand()->getType()->isStructTy() || + DepSI->getValueOperand()->getType()->isArrayTy()) return -1; Value *StorePtr = DepSI->getPointerOperand(); - uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); + uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType()); return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize, TD); } @@ -1100,7 +1067,7 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, Constant *Src = dyn_cast(MTI->getSource()); if (Src == 0) return -1; - GlobalVariable *GV = dyn_cast(Src->getUnderlyingObject()); + GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Src)); if (GV == 0 || !GV->isConstant()) return -1; // See if the access is within the bounds of the transfer. @@ -1133,8 +1100,8 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, Instruction *InsertPt, const TargetData &TD){ LLVMContext &Ctx = SrcVal->getType()->getContext(); - uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; - uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; + uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8; + uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8; IRBuilder<> Builder(InsertPt->getParent(), InsertPt); @@ -1218,7 +1185,7 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, return ConstantFoldLoadFromConstPtr(Src, &TD); } - +namespace { struct AvailableValueInBlock { /// BB - The basic block in question. @@ -1292,6 +1259,8 @@ struct AvailableValueInBlock { } }; +} + /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. @@ -1309,7 +1278,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, // Otherwise, we have to construct SSA form. SmallVector NewPHIs; SSAUpdater SSAUpdate(&NewPHIs); - SSAUpdate.Initialize(LI); + SSAUpdate.Initialize(LI->getType(), LI->getName()); const Type *LoadTy = LI->getType(); @@ -1334,8 +1303,8 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, return V; } -static bool isLifetimeStart(Instruction *Inst) { - if (IntrinsicInst* II = dyn_cast(Inst)) +static bool isLifetimeStart(const Instruction *Inst) { + if (const IntrinsicInst* II = dyn_cast(Inst)) return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; } @@ -1346,8 +1315,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, SmallVectorImpl &toErase) { // Find the non-local dependencies of the load. SmallVector Deps; - MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(), - Deps); + AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); + MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps); //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: " // << Deps.size() << *LI << '\n'); @@ -1375,8 +1344,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI, SmallVector ValuesPerBlock; SmallVector UnavailableBlocks; - const TargetData *TD = 0; - for (unsigned i = 0, e = Deps.size(); i != e; ++i) { BasicBlock *DepBB = Deps[i].getBB(); MemDepResult DepInfo = Deps[i].getResult(); @@ -1391,14 +1358,12 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // read by the load, we can extract the bits we need for the load from the // stored value. if (StoreInst *DepSI = dyn_cast(DepInfo.getInst())) { - if (TD == 0) - TD = getAnalysisIfAvailable(); if (TD && Address) { int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, *TD); if (Offset != -1) { ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - DepSI->getOperand(0), + DepSI->getValueOperand(), Offset)); continue; } @@ -1408,8 +1373,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. if (MemIntrinsic *DepMI = dyn_cast(DepInfo.getInst())) { - if (TD == 0) - TD = getAnalysisIfAvailable(); if (TD && Address) { int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address, DepMI, *TD); @@ -1439,13 +1402,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI, if (StoreInst *S = dyn_cast(DepInst)) { // Reject loads and stores that are to the same address but are of // different types if we have to. - if (S->getOperand(0)->getType() != LI->getType()) { - if (TD == 0) - TD = getAnalysisIfAvailable(); - + if (S->getValueOperand()->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), + if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), LI->getType(), *TD)) { UnavailableBlocks.push_back(DepBB); continue; @@ -1453,16 +1413,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - S->getOperand(0))); + S->getValueOperand())); continue; } if (LoadInst *LD = dyn_cast(DepInst)) { // If the types mismatch and we can't handle it, reject reuse of the load. if (LD->getType() != LI->getType()) { - if (TD == 0) - TD = getAnalysisIfAvailable(); - // If the stored value is larger or equal to the loaded value, we can // reuse it. if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ @@ -1499,7 +1456,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1532,24 +1489,19 @@ bool GVN::processNonLocalLoad(LoadInst *LI, return false; if (Blockers.count(TmpBB)) return false; + + // If any of these blocks has more than one successor (i.e. if the edge we + // just traversed was critical), then there are other paths through this + // block along which the load may not be anticipated. Hoisting the load + // above this block would be adding the load to execution paths along + // which it was not previously executed. if (TmpBB->getTerminator()->getNumSuccessors() != 1) - allSingleSucc = false; + return false; } assert(TmpBB); LoadBB = TmpBB; - // If we have a repl set with LI itself in it, this means we have a loop where - // at least one of the values is LI. Since this means that we won't be able - // to eliminate LI even if we insert uses in the other predecessors, we will - // end up increasing code size. Reject this by scanning for LI. - if (!EnableFullLoadPRE) { - for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) - if (ValuesPerBlock[i].isSimpleValue() && - ValuesPerBlock[i].getSimpleValue() == LI) - return false; - } - // FIXME: It is extremely unclear what this loop is doing, other than // artificially restricting loadpre. if (isSinglePred) { @@ -1581,6 +1533,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i) FullyAvailableBlocks[UnavailableBlocks[i]] = false; + SmallVector, 4> NeedToSplit; for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { BasicBlock *Pred = *PI; @@ -1596,23 +1549,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI, return false; } unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB); - toSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); - return false; + NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum)); } } + if (!NeedToSplit.empty()) { + toSplit.append(NeedToSplit.begin(), NeedToSplit.end()); + return false; + } // Decide whether PRE is profitable for this load. unsigned NumUnavailablePreds = PredLoads.size(); assert(NumUnavailablePreds != 0 && "Fully available value should be eliminated above!"); - if (!EnableFullLoadPRE) { - // If this load is unavailable in multiple predecessors, reject it. - // FIXME: If we could restructure the CFG, we could make a common pred with - // all the preds that don't have an available LI and insert a new load into - // that one block. - if (NumUnavailablePreds != 1) + + // If this load is unavailable in multiple predecessors, reject it. + // FIXME: If we could restructure the CFG, we could make a common pred with + // all the preds that don't have an available LI and insert a new load into + // that one block. + if (NumUnavailablePreds != 1) return false; - } // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; @@ -1627,7 +1582,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If all preds have a single successor, then we know it is safe to insert // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. - PHITransAddr Address(LI->getOperand(0), TD); + PHITransAddr Address(LI->getPointerOperand(), TD); Value *LoadPtr = 0; if (allSingleSucc) { LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, @@ -1641,7 +1596,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // we fail PRE. if (LoadPtr == 0) { DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " - << *LI->getOperand(0) << "\n"); + << *LI->getPointerOperand() << "\n"); CanDoPRE = false; break; } @@ -1695,9 +1650,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI, BasicBlock *UnavailablePred = I->first; Value *LoadPtr = I->second; - Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, - LI->getAlignment(), - UnavailablePred->getTerminator()); + Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, + LI->getAlignment(), + UnavailablePred->getTerminator()); + + // Transfer the old load's TBAA tag to the new load. + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) + NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag); // Add the newly created load. ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred, @@ -1716,7 +1675,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumPRELoad++; + ++NumPRELoad; return true; } @@ -1746,19 +1705,19 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // access code. Value *AvailVal = 0; if (StoreInst *DepSI = dyn_cast(Dep.getInst())) - if (const TargetData *TD = getAnalysisIfAvailable()) { + if (TD) { int Offset = AnalyzeLoadFromClobberingStore(L->getType(), L->getPointerOperand(), DepSI, *TD); if (Offset != -1) - AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, + AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, L->getType(), L, *TD); } // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast(Dep.getInst())) { - if (const TargetData *TD = getAnalysisIfAvailable()) { + if (TD) { int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), L->getPointerOperand(), DepMI, *TD); @@ -1777,7 +1736,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { MD->invalidateCachedPointerInfo(AvailVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1797,14 +1756,13 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast(DepInst)) { - Value *StoredVal = DepSI->getOperand(0); + Value *StoredVal = DepSI->getValueOperand(); // The store and load are to a must-aliased pointer, but they may not // actually have the same type. See if we know how to reuse the stored // value (depending on its type). - const TargetData *TD = 0; if (StoredVal->getType() != L->getType()) { - if ((TD = getAnalysisIfAvailable())) { + if (TD) { StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, *TD); if (StoredVal == 0) @@ -1823,7 +1781,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { MD->invalidateCachedPointerInfo(StoredVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1833,9 +1791,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // The loads are of a must-aliased pointer, but they may not actually have // the same type. See if we know how to reuse the previously loaded value // (depending on its type). - const TargetData *TD = 0; if (DepLI->getType() != L->getType()) { - if ((TD = getAnalysisIfAvailable())) { + if (TD) { AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); if (AvailableVal == 0) return false; @@ -1853,7 +1810,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { MD->invalidateCachedPointerInfo(DepLI); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1864,7 +1821,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1875,7 +1832,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } } @@ -1883,17 +1840,31 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { return false; } +// lookupNumber - In order to find a leader for a given value number at a +// specific basic block, we first obtain the list of all Values for that number, +// and then scan the list to find one whose block dominates the block in +// question. This is fast because dominator tree queries consist of only +// a few comparisons of DFS numbers. Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) { - DenseMap::iterator I = localAvail.find(BB); - if (I == localAvail.end()) - return 0; - - ValueNumberScope *Locals = I->second; - while (Locals) { - DenseMap::iterator I = Locals->table.find(num); - if (I != Locals->table.end()) - return I->second; - Locals = Locals->parent; + std::pair Vals = NumberTable[num]; + if (!Vals.first) return 0; + Instruction *Inst = dyn_cast(Vals.first); + if (!Inst) return Vals.first; + BasicBlock *Parent = Inst->getParent(); + if (DT->dominates(Parent, BB)) + return Inst; + + std::pair* Next = + static_cast*>(Vals.second); + while (Next) { + Instruction *CurrInst = dyn_cast(Next->first); + if (!CurrInst) return Next->first; + + BasicBlock *Parent = CurrInst->getParent(); + if (DT->dominates(Parent, BB)) + return CurrInst; + + Next = static_cast*>(Next->second); } return 0; @@ -1908,12 +1879,25 @@ bool GVN::processInstruction(Instruction *I, if (isa(I)) return false; + // If the instruction can be easily simplified then do so now in preference + // to value numbering it. Value numbering often exposes redundancies, for + // example if it determines that %y is equal to %x then the instruction + // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. + if (Value *V = SimplifyInstruction(I, TD, DT)) { + I->replaceAllUsesWith(V); + if (MD && V->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(V); + VN.erase(I); + toErase.push_back(I); + return true; + } + if (LoadInst *LI = dyn_cast(I)) { bool Changed = processLoad(LI, toErase); if (!Changed) { unsigned Num = VN.lookup_or_add(LI); - localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI)); + insert_table(Num, LI); } return Changed; @@ -1922,71 +1906,37 @@ bool GVN::processInstruction(Instruction *I, uint32_t NextNum = VN.getNextUnusedValueNumber(); unsigned Num = VN.lookup_or_add(I); - if (BranchInst *BI = dyn_cast(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - - if (!BI->isConditional() || isa(BI->getCondition())) - return false; - - Value *BranchCond = BI->getCondition(); - uint32_t CondVN = VN.lookup_or_add(BranchCond); - - BasicBlock *TrueSucc = BI->getSuccessor(0); - BasicBlock *FalseSucc = BI->getSuccessor(1); - - if (TrueSucc->getSinglePredecessor()) - localAvail[TrueSucc]->table[CondVN] = - ConstantInt::getTrue(TrueSucc->getContext()); - if (FalseSucc->getSinglePredecessor()) - localAvail[FalseSucc]->table[CondVN] = - ConstantInt::getFalse(TrueSucc->getContext()); - - return false; - // Allocations are always uniquely numbered, so we can save time and memory // by fast failing them. - } else if (isa(I) || isa(I)) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + if (isa(I) || isa(I) || isa(I)) { + insert_table(Num, I); return false; } - // Collapse PHI nodes - if (PHINode* p = dyn_cast(I)) { - Value *constVal = CollapsePhi(p); - - if (constVal) { - p->replaceAllUsesWith(constVal); - if (MD && constVal->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(constVal); - VN.erase(p); - - toErase.push_back(p); - } else { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - } - // If the number we were assigned was a brand new VN, then we don't // need to do a lookup to see if the number already exists // somewhere in the domtree: it can't! - } else if (Num == NextNum) { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); - + if (Num == NextNum) { + insert_table(Num, I); + return false; + } + // Perform fast-path value-number based elimination of values inherited from // dominators. - } else if (Value *repl = lookupNumber(I->getParent(), Num)) { - // Remove it! - VN.erase(I); - I->replaceAllUsesWith(repl); - if (MD && repl->getType()->isPointerTy()) - MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); - return true; - - } else { - localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); + Value *repl = lookupNumber(I->getParent(), Num); + if (repl == 0) { + // Failure, just remember this instance for future use. + insert_table(Num, I); + return false; } - - return false; + + // Remove it! + VN.erase(I); + I->replaceAllUsesWith(repl); + if (MD && repl->getType()->isPointerTy()) + MD->invalidateCachedPointerInfo(repl); + toErase.push_back(I); + return true; } /// runOnFunction - This is the main transformation entry point for a function. @@ -1994,6 +1944,7 @@ bool GVN::runOnFunction(Function& F) { if (!NoLoads) MD = &getAnalysis(); DT = &getAnalysis(); + TD = getAnalysisIfAvailable(); VN.setAliasAnalysis(&getAnalysis()); VN.setMemDep(MD); VN.setDomTree(DT); @@ -2007,7 +1958,7 @@ bool GVN::runOnFunction(Function& F) { BasicBlock *BB = FI; ++FI; bool removedBlock = MergeBlockIntoPredecessor(BB, this); - if (removedBlock) NumGVNBlocks++; + if (removedBlock) ++NumGVNBlocks; Changed |= removedBlock; } @@ -2103,6 +2054,11 @@ bool GVN::performPRE(Function &F) { CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa(CurInst)) continue; + + // We don't currently value number ANY inline asm calls. + if (CallInst *CallI = dyn_cast(CurInst)) + if (CallI->isInlineAsm()) + continue; uint32_t ValNo = VN.lookup(CurInst); @@ -2119,27 +2075,27 @@ bool GVN::performPRE(Function &F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; // We're not interested in PRE where the block is its // own predecessor, or in blocks with predecessors // that are not reachable. - if (*PI == CurrentBlock) { + if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!localAvail.count(*PI)) { + } else if (!DT->dominates(&F.getEntryBlock(), P)) { NumWithout = 2; break; } - DenseMap::iterator predV = - localAvail[*PI]->table.find(ValNo); - if (predV == localAvail[*PI]->table.end()) { - PREPred = *PI; - NumWithout++; - } else if (predV->second == CurInst) { + Value* predV = lookupNumber(P, ValNo); + if (predV == 0) { + PREPred = P; + ++NumWithout; + } else if (predV == CurInst) { NumWithout = 2; } else { - predMap[*PI] = predV->second; - NumWith++; + predMap[P] = predV; + ++NumWith; } } @@ -2194,26 +2150,29 @@ bool GVN::performPRE(Function &F) { PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); - NumGVNPRE++; + ++NumGVNPRE; // Update the availability map to include the new instruction. - localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); + insert_table(ValNo, PREInstr); // Create a PHI to make the value available in this block. PHINode* Phi = PHINode::Create(CurInst->getType(), CurInst->getName() + ".pre-phi", CurrentBlock->begin()); for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) - Phi->addIncoming(predMap[*PI], *PI); + PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; + Phi->addIncoming(predMap[P], P); + } VN.add(Phi, ValNo); - localAvail[CurrentBlock]->table[ValNo] = Phi; + insert_table(ValNo, Phi); CurInst->replaceAllUsesWith(Phi); if (MD && Phi->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); + erase_table(ValNo, CurInst); DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); if (MD) MD->removeInstruction(CurInst); @@ -2238,23 +2197,14 @@ bool GVN::splitCriticalEdges() { std::pair Edge = toSplit.pop_back_val(); SplitCriticalEdge(Edge.first, Edge.second, this); } while (!toSplit.empty()); - MD->invalidateCachedPredecessors(); + if (MD) MD->invalidateCachedPredecessors(); return true; } /// iterateOnFunction - Executes one iteration of GVN bool GVN::iterateOnFunction(Function &F) { cleanupGlobalSets(); - - for (df_iterator DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) { - if (DI->getIDom()) - localAvail[DI->getBlock()] = - new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]); - else - localAvail[DI->getBlock()] = new ValueNumberScope(0); - } - + // Top-down walk of the dominator tree bool Changed = false; #if 0 @@ -2274,11 +2224,8 @@ bool GVN::iterateOnFunction(Function &F) { void GVN::cleanupGlobalSets() { VN.clear(); - - for (DenseMap::iterator - I = localAvail.begin(), E = localAvail.end(); I != E; ++I) - delete I->second; - localAvail.clear(); + NumberTable.clear(); + TableAllocator.Reset(); } /// verifyRemoved - Verify that the specified instruction does not occur in our @@ -2288,17 +2235,14 @@ void GVN::verifyRemoved(const Instruction *Inst) const { // Walk through the value number scope to make sure the instruction isn't // ferreted away in it. - for (DenseMap::const_iterator - I = localAvail.begin(), E = localAvail.end(); I != E; ++I) { - const ValueNumberScope *VNS = I->second; - - while (VNS) { - for (DenseMap::const_iterator - II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) { - assert(II->second != Inst && "Inst still in value numbering scope!"); - } - - VNS = VNS->parent; + for (DenseMap >::const_iterator + I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) { + std::pair const * Node = &I->second; + assert(Node->first != Inst && "Inst still in value numbering scope!"); + + while (Node->second) { + Node = static_cast*>(Node->second); + assert(Node->first != Inst && "Inst still in value numbering scope!"); } } }