X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FEarlyCSE.cpp;h=7ef062e71ff3ad7ba102fa1ec3cdc3625ddb3e83;hp=4c28d4bc5f7d4010e3f70ce95dd3854870c4d624;hb=912373de69045e491d6a301611ce31a2914a7d43;hpb=3f8a9448c501898a912830554762408460b9a61d diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 4c28d4bc5f7..7ef062e71ff 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -281,21 +281,31 @@ public: /// that dominated values can succeed in their lookup. ScopedHTType AvailableValues; - /// \brief A scoped hash table of the current values of loads. + /// A scoped hash table of the current values of previously encounted memory + /// locations. /// - /// This allows us to get efficient access to dominating loads when we have - /// a fully redundant load. In addition to the most recent load, we keep - /// track of a generation count of the read, which is compared against the - /// current generation count. The current generation count is incremented + /// This allows us to get efficient access to dominating loads or stores when + /// we have a fully redundant load. In addition to the most recent load, we + /// keep track of a generation count of the read, which is compared against + /// the current generation count. The current generation count is incremented /// after every possibly writing memory operation, which ensures that we only - /// CSE loads with other loads that have no intervening store. + /// CSE loads with other loads that have no intervening store. Ordering + /// events (such as fences or atomic instructions) increment the generation + /// count as well; essentially, we model these as writes to all possible + /// locations. Note that atomic and/or volatile loads and stores can be + /// present the table; it is the responsibility of the consumer to inspect + /// the atomicity/volatility if needed. struct LoadValue { Value *Data; unsigned Generation; int MatchingId; - LoadValue() : Data(nullptr), Generation(0), MatchingId(-1) {} - LoadValue(Value *Data, unsigned Generation, unsigned MatchingId) - : Data(Data), Generation(Generation), MatchingId(MatchingId) {} + bool IsAtomic; + LoadValue() + : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {} + LoadValue(Value *Data, unsigned Generation, unsigned MatchingId, + bool IsAtomic) + : Data(Data), Generation(Generation), MatchingId(MatchingId), + IsAtomic(IsAtomic) {} }; typedef RecyclingAllocator> @@ -401,15 +411,42 @@ private: if (IsTargetMemInst) return Info.WriteMem; return isa(Inst); } - bool isSimple() const { - if (IsTargetMemInst) return Info.IsSimple; + bool isAtomic() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; + } + return Inst->isAtomic(); + } + bool isUnordered() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return true; + } if (LoadInst *LI = dyn_cast(Inst)) { - return LI->isSimple(); + return LI->isUnordered(); } else if (StoreInst *SI = dyn_cast(Inst)) { - return SI->isSimple(); + return SI->isUnordered(); } - return Inst->isAtomic(); + // Conservative answer + return !Inst->isAtomic(); + } + + bool isVolatile() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; + } + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isVolatile(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->isVolatile(); + } + // Conservative answer + return true; } + + bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { return (getPointerOperand() == Inst.getPointerOperand() && getMatchingId() == Inst.getMatchingId()); @@ -433,8 +470,14 @@ private: } return nullptr; } - bool mayReadFromMemory() const { return Inst->mayReadFromMemory(); } - bool mayWriteToMemory() const { return Inst->mayWriteToMemory(); } + bool mayReadFromMemory() const { + if (IsTargetMemInst) return Info.ReadMem; + return Inst->mayReadFromMemory(); + } + bool mayWriteToMemory() const { + if (IsTargetMemInst) return Info.WriteMem; + return Inst->mayWriteToMemory(); + } private: bool IsTargetMemInst; @@ -555,20 +598,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ParseMemoryInst MemInst(Inst, TTI); // If this is a non-volatile load, process it. if (MemInst.isValid() && MemInst.isLoad()) { - // Ignore volatile or ordered loads. - if (!MemInst.isSimple()) { + // (conservatively) we can't peak past the ordering implied by this + // operation, but we can add this load to our set of available values + if (MemInst.isVolatile() || !MemInst.isUnordered()) { LastStore = nullptr; - // Don't CSE across synchronization boundaries. - if (Inst->mayWriteToMemory()) - ++CurrentGeneration; - continue; + ++CurrentGeneration; } // If we have an available version of this load, and if it is the right // generation, replace this instruction. LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration && - InVal.MatchingId == MemInst.getMatchingId()) { + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing loads with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered() && + // We can't replace an atomic load with one which isn't also atomic. + InVal.IsAtomic >= MemInst.isAtomic()) { Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); if (Op != nullptr) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst @@ -585,7 +630,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Otherwise, remember that we have this instruction. AvailableLoads.insert( MemInst.getPointerOperand(), - LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); LastStore = nullptr; continue; } @@ -632,6 +678,33 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } + // write back DSE - If we write back the same value we just loaded from + // the same location and haven't passed any intervening writes or ordering + // operations, we can remove the write. The primary benefit is in allowing + // the available load table to remain valid and value forward past where + // the store originally was. + if (MemInst.isValid() && MemInst.isStore()) { + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); + if (InVal.Data && + InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) && + InVal.Generation == CurrentGeneration && + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing stores with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered()) { + assert((!LastStore || + ParseMemoryInst(LastStore, TTI).getPointerOperand() == + MemInst.getPointerOperand()) && + "can't have an intervening store!"); + DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n'); + Inst->eraseFromParent(); + Changed = true; + ++NumDSE; + // We can avoid incrementing the generation count since we were able + // to eliminate this store. + continue; + } + } + // Okay, this isn't something we can CSE at all. Check to see if it is // something that could modify memory. If so, our available memory values // cannot be used so bump the generation count. @@ -641,8 +714,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (MemInst.isValid() && MemInst.isStore()) { // We do a trivial form of DSE if there are two stores to the same // location with no intervening loads. Delete the earlier store. + // At the moment, we don't remove ordered stores, but do remove + // unordered atomic stores. There's no special requirement (for + // unordered atomics) about removing atomic stores only in favor of + // other atomic stores since we we're going to execute the non-atomic + // one anyway and the atomic one might never have become visible. if (LastStore) { ParseMemoryInst LastStoreMemInst(LastStore, TTI); + assert(LastStoreMemInst.isUnordered() && + !LastStoreMemInst.isVolatile() && + "Violated invariant"); if (LastStoreMemInst.isMatchingMemLoc(MemInst)) { DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " << *Inst << '\n'); @@ -661,11 +742,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // the store. AvailableLoads.insert( MemInst.getPointerOperand(), - LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); - - // Remember that this was the last normal store we saw for DSE. - if (MemInst.isSimple()) + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); + + // Remember that this was the last unordered store we saw for DSE. We + // don't yet handle DSE on ordered or volatile stores since we don't + // have a good way to model the ordering requirement for following + // passes once the store is removed. We could insert a fence, but + // since fences are slightly stronger than stores in their ordering, + // it's not clear this is a profitable transform. Another option would + // be to merge the ordering with that of the post dominating store. + if (MemInst.isUnordered() && !MemInst.isVolatile()) LastStore = Inst; + else + LastStore = nullptr; } } }