X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FEarlyCSE.cpp;h=de539d53a4f55b3adf2c07da85d42d53c3031d0c;hb=45ef74f29c3cbccc33cbf05e0c26bdc029ce997b;hp=5daac84aa3e1f0820c8ecb97de560275533db90d;hpb=80c55f265d1a68b7f03845a3e9356447f7d258c8;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 5daac84aa3e..de539d53a4f 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -16,8 +16,10 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -27,7 +29,7 @@ #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -42,10 +44,6 @@ STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); STATISTIC(NumCSECall, "Number of call instructions CSE'd"); STATISTIC(NumDSE, "Number of trivial dead stores removed"); -static unsigned getHash(const void *V) { - return DenseMapInfo::getHashValue(V); -} - //===----------------------------------------------------------------------===// // SimpleValue //===----------------------------------------------------------------------===// @@ -239,16 +237,10 @@ template <> struct DenseMapInfo { unsigned DenseMapInfo::getHashValue(CallValue Val) { Instruction *Inst = Val.Inst; - // Hash in all of the operands as pointers. - unsigned Res = 0; - for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) { - assert(!Inst->getOperand(i)->getType()->isMetadataTy() && - "Cannot value number calls with metadata operands"); - Res ^= getHash(Inst->getOperand(i)) << (i & 0xF); - } - - // Mix in the opcode. - return (Res << 1) ^ Inst->getOpcode(); + // Hash all of the operands as pointers and mix in the opcode. + return hash_combine( + Inst->getOpcode(), + hash_combine_range(Inst->value_op_begin(), Inst->value_op_end())); } bool DenseMapInfo::isEqual(CallValue LHS, CallValue RHS) { @@ -272,8 +264,6 @@ namespace { /// expected that a later pass of GVN will catch the interesting/hard cases. class EarlyCSE { public: - Function &F; - const DataLayout *DL; const TargetLibraryInfo &TLI; const TargetTransformInfo &TTI; DominatorTree &DT; @@ -299,12 +289,19 @@ public: /// current generation count. The current generation count is incremented /// after every possibly writing memory operation, which ensures that we only /// CSE loads with other loads that have no intervening store. - typedef RecyclingAllocator< - BumpPtrAllocator, - ScopedHashTableVal>> + struct LoadValue { + Value *Data; + unsigned Generation; + int MatchingId; + LoadValue() : Data(nullptr), Generation(0), MatchingId(-1) {} + LoadValue(Value *Data, unsigned Generation, unsigned MatchingId) + : Data(Data), Generation(Generation), MatchingId(MatchingId) {} + }; + typedef RecyclingAllocator> LoadMapAllocator; - typedef ScopedHashTable, - DenseMapInfo, LoadMapAllocator> LoadHTType; + typedef ScopedHashTable, + LoadMapAllocator> LoadHTType; LoadHTType AvailableLoads; /// \brief A scoped hash table of the current values of read-only call @@ -318,11 +315,9 @@ public: unsigned CurrentGeneration; /// \brief Set up the EarlyCSE runner for a particular function. - EarlyCSE(Function &F, const DataLayout *DL, const TargetLibraryInfo &TLI, - const TargetTransformInfo &TTI, DominatorTree &DT, - AssumptionCache &AC) - : F(F), DL(DL), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) { - } + EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, + DominatorTree &DT, AssumptionCache &AC) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {} bool run(); @@ -338,8 +333,8 @@ private: CallScope(AvailableCalls) {} private: - NodeScope(const NodeScope &) LLVM_DELETED_FUNCTION; - void operator=(const NodeScope &) LLVM_DELETED_FUNCTION; + NodeScope(const NodeScope &) = delete; + void operator=(const NodeScope &) = delete; ScopedHTType::ScopeTy Scope; LoadHTType::ScopeTy LoadScope; @@ -375,8 +370,8 @@ private: void process() { Processed = true; } private: - StackNode(const StackNode &) LLVM_DELETED_FUNCTION; - void operator=(const StackNode &) LLVM_DELETED_FUNCTION; + StackNode(const StackNode &) = delete; + void operator=(const StackNode &) = delete; // Members. unsigned CurrentGeneration; @@ -420,17 +415,17 @@ private: Ptr = SI->getPointerOperand(); } } - bool isLoad() { return Load; } - bool isStore() { return Store; } - bool isVolatile() { return Vol; } - bool isMatchingMemLoc(const ParseMemoryInst &Inst) { + bool isLoad() const { return Load; } + bool isStore() const { return Store; } + bool isVolatile() const { return Vol; } + bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId; } - bool isValid() { return Ptr != nullptr; } - int getMatchingId() { return MatchingId; } - Value *getPtr() { return Ptr; } - bool mayReadFromMemory() { return MayReadFromMemory; } - bool mayWriteToMemory() { return MayWriteToMemory; } + bool isValid() const { return Ptr != nullptr; } + int getMatchingId() const { return MatchingId; } + Value *getPtr() const { return Ptr; } + bool mayReadFromMemory() const { return MayReadFromMemory; } + bool mayWriteToMemory() const { return MayWriteToMemory; } private: bool Load; @@ -472,6 +467,30 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (!BB->getSinglePredecessor()) ++CurrentGeneration; + // If this node has a single predecessor which ends in a conditional branch, + // we can infer the value of the branch condition given that we took this + // path. We need the single predeccesor to ensure there's not another path + // which reaches this block where the condition might hold a different + // value. Since we're adding this to the scoped hash table (like any other + // def), it will have been popped if we encounter a future merge block. + if (BasicBlock *Pred = BB->getSinglePredecessor()) + if (auto *BI = dyn_cast(Pred->getTerminator())) + if (BI->isConditional()) + if (auto *CondInst = dyn_cast(BI->getCondition())) + if (SimpleValue::canHandle(CondInst)) { + assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB); + auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ? + ConstantInt::getTrue(BB->getContext()) : + ConstantInt::getFalse(BB->getContext()); + AvailableValues.insert(CondInst, ConditionalConstant); + DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '" + << CondInst->getName() << "' as " << *ConditionalConstant + << " in " << BB->getName() << "\n"); + // Replace all dominated uses with the known value + replaceDominatedUsesWith(CondInst, ConditionalConstant, DT, + BasicBlockEdge(Pred, BB)); + } + /// LastStore - Keep track of the last non-volatile store that we saw... for /// as long as there in no instruction that reads memory. If we see a store /// to the same location, we delete the dead store. This zaps trivial dead @@ -479,11 +498,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { Instruction *LastStore = nullptr; bool Changed = false; + const DataLayout &DL = BB->getModule()->getDataLayout(); // See if any instructions in the block can be eliminated. If so, do it. If // not, add them to AvailableValues. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { - Instruction *Inst = I++; + Instruction *Inst = &*I++; // Dead instructions should just be removed. if (isInstructionTriviallyDead(Inst, &TLI)) { @@ -537,18 +557,21 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Ignore volatile loads. if (MemInst.isVolatile()) { LastStore = nullptr; + // Don't CSE across synchronization boundaries. + if (Inst->mayWriteToMemory()) + ++CurrentGeneration; continue; } // If we have an available version of this load, and if it is the right // generation, replace this instruction. - std::pair InVal = - AvailableLoads.lookup(MemInst.getPtr()); - if (InVal.first != nullptr && InVal.second == CurrentGeneration) { - Value *Op = getOrCreateResult(InVal.first, Inst->getType()); + LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr()); + if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration && + InVal.MatchingId == MemInst.getMatchingId()) { + Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); if (Op != nullptr) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst - << " to: " << *InVal.first << '\n'); + << " to: " << *InVal.Data << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(Op); Inst->eraseFromParent(); @@ -559,8 +582,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { } // Otherwise, remember that we have this instruction. - AvailableLoads.insert(MemInst.getPtr(), std::pair( - Inst, CurrentGeneration)); + AvailableLoads.insert( + MemInst.getPtr(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); LastStore = nullptr; continue; } @@ -596,6 +620,17 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } + // A release fence requires that all stores complete before it, but does + // not prevent the reordering of following loads 'before' the fence. As a + // result, we don't need to consider it as writing to memory and don't need + // to advance the generation. We do need to prevent DSE across the fence, + // but that's handled above. + if (FenceInst *FI = dyn_cast(Inst)) + if (FI->getOrdering() == Release) { + assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above"); + continue; + } + // Okay, this isn't something we can CSE at all. Check to see if it is // something that could modify memory. If so, our available memory values // cannot be used so bump the generation count. @@ -623,8 +658,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // version of the pointer. It is safe to forward from volatile stores // to non-volatile loads, so we don't have to check for volatility of // the store. - AvailableLoads.insert(MemInst.getPtr(), std::pair( - Inst, CurrentGeneration)); + AvailableLoads.insert( + MemInst.getPtr(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); // Remember that this was the last store we saw for DSE. if (!MemInst.isVolatile()) @@ -641,7 +677,7 @@ bool EarlyCSE::run() { // gains over vector when the container becomes very large due to the // specific access patterns. For more information see the mailing list // discussion on this: - // http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html + // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html std::deque nodesToProcess; bool Changed = false; @@ -692,14 +728,12 @@ bool EarlyCSE::run() { PreservedAnalyses EarlyCSEPass::run(Function &F, AnalysisManager *AM) { - const DataLayout *DL = F.getParent()->getDataLayout(); - auto &TLI = AM->getResult(F); auto &TTI = AM->getResult(F); auto &DT = AM->getResult(F); auto &AC = AM->getResult(F); - EarlyCSE CSE(F, DL, TLI, TTI, DT, AC); + EarlyCSE CSE(TLI, TTI, DT, AC); if (!CSE.run()) return PreservedAnalyses::all(); @@ -731,14 +765,12 @@ public: if (skipOptnoneFunction(F)) return false; - DataLayoutPass *DLP = getAnalysisIfAvailable(); - auto *DL = DLP ? &DLP->getDataLayout() : nullptr; auto &TLI = getAnalysis().getTLI(); - auto &TTI = getAnalysis().getTTI(); + auto &TTI = getAnalysis().getTTI(F); auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); - EarlyCSE CSE(F, DL, TLI, TTI, DT, AC); + EarlyCSE CSE(TLI, TTI, DT, AC); return CSE.run(); } @@ -748,6 +780,7 @@ public: AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.setPreservesCFG(); } };