X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FEarlyCSE.cpp;h=7ef062e71ff3ad7ba102fa1ec3cdc3625ddb3e83;hp=4c28d4bc5f7d4010e3f70ce95dd3854870c4d624;hb=912373de69045e491d6a301611ce31a2914a7d43;hpb=3f8a9448c501898a912830554762408460b9a61d

diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 4c28d4bc5f7..7ef062e71ff 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -281,21 +281,31 @@ public:
   /// that dominated values can succeed in their lookup.
   ScopedHTType AvailableValues;
 
-  /// \brief A scoped hash table of the current values of loads.
+  /// A scoped hash table of the current values of previously encounted memory
+  /// locations.
   ///
-  /// This allows us to get efficient access to dominating loads when we have
-  /// a fully redundant load.  In addition to the most recent load, we keep
-  /// track of a generation count of the read, which is compared against the
-  /// current generation count.  The current generation count is incremented
+  /// This allows us to get efficient access to dominating loads or stores when
+  /// we have a fully redundant load.  In addition to the most recent load, we
+  /// keep track of a generation count of the read, which is compared against
+  /// the current generation count.  The current generation count is incremented
   /// after every possibly writing memory operation, which ensures that we only
-  /// CSE loads with other loads that have no intervening store.
+  /// CSE loads with other loads that have no intervening store.  Ordering
+  /// events (such as fences or atomic instructions) increment the generation
+  /// count as well; essentially, we model these as writes to all possible
+  /// locations.  Note that atomic and/or volatile loads and stores can be
+  /// present the table; it is the responsibility of the consumer to inspect
+  /// the atomicity/volatility if needed.
   struct LoadValue {
     Value *Data;
     unsigned Generation;
     int MatchingId;
-    LoadValue() : Data(nullptr), Generation(0), MatchingId(-1) {}
-    LoadValue(Value *Data, unsigned Generation, unsigned MatchingId)
-        : Data(Data), Generation(Generation), MatchingId(MatchingId) {}
+    bool IsAtomic;
+    LoadValue()
+      : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
+    LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
+              bool IsAtomic)
+      : Data(Data), Generation(Generation), MatchingId(MatchingId),
+        IsAtomic(IsAtomic) {}
   };
   typedef RecyclingAllocator<BumpPtrAllocator,
                              ScopedHashTableVal<Value *, LoadValue>>
@@ -401,15 +411,42 @@ private:
       if (IsTargetMemInst) return Info.WriteMem;
       return isa<StoreInst>(Inst);
     }
-    bool isSimple() const {
-      if (IsTargetMemInst) return Info.IsSimple;
+    bool isAtomic() const {
+      if (IsTargetMemInst) {
+        assert(Info.IsSimple && "need to refine IsSimple in TTI");
+        return false;
+      }
+      return Inst->isAtomic();
+    }
+    bool isUnordered() const {
+      if (IsTargetMemInst) {
+        assert(Info.IsSimple && "need to refine IsSimple in TTI");
+        return true;
+      }
       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        return LI->isSimple();
+        return LI->isUnordered();
       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-        return SI->isSimple();
+        return SI->isUnordered();
       }
-      return Inst->isAtomic();
+      // Conservative answer
+      return !Inst->isAtomic();
+    }
+
+    bool isVolatile() const {
+      if (IsTargetMemInst) {
+        assert(Info.IsSimple && "need to refine IsSimple in TTI");
+        return false;
+      }
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        return LI->isVolatile();
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        return SI->isVolatile();
+      }
+      // Conservative answer
+      return true;
     }
+
+    
     bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
       return (getPointerOperand() == Inst.getPointerOperand() &&
               getMatchingId() == Inst.getMatchingId());
@@ -433,8 +470,14 @@ private:
       }
       return nullptr;
     }
-    bool mayReadFromMemory() const { return Inst->mayReadFromMemory(); }
-    bool mayWriteToMemory() const { return Inst->mayWriteToMemory(); }
+    bool mayReadFromMemory() const {
+      if (IsTargetMemInst) return Info.ReadMem;
+      return Inst->mayReadFromMemory();
+    }
+    bool mayWriteToMemory() const {
+      if (IsTargetMemInst) return Info.WriteMem;
+      return Inst->mayWriteToMemory();
+    }
 
   private:
     bool IsTargetMemInst;
@@ -555,20 +598,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     ParseMemoryInst MemInst(Inst, TTI);
     // If this is a non-volatile load, process it.
     if (MemInst.isValid() && MemInst.isLoad()) {
-      // Ignore volatile or ordered loads.
-      if (!MemInst.isSimple()) {
+      // (conservatively) we can't peak past the ordering implied by this
+      // operation, but we can add this load to our set of available values
+      if (MemInst.isVolatile() || !MemInst.isUnordered()) {
         LastStore = nullptr;
-        // Don't CSE across synchronization boundaries.
-        if (Inst->mayWriteToMemory())
-          ++CurrentGeneration;
-        continue;
+        ++CurrentGeneration;
       }
 
       // If we have an available version of this load, and if it is the right
       // generation, replace this instruction.
       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
       if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
-          InVal.MatchingId == MemInst.getMatchingId()) {
+          InVal.MatchingId == MemInst.getMatchingId() &&
+          // We don't yet handle removing loads with ordering of any kind.
+          !MemInst.isVolatile() && MemInst.isUnordered() &&
+          // We can't replace an atomic load with one which isn't also atomic.
+          InVal.IsAtomic >= MemInst.isAtomic()) {
         Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
         if (Op != nullptr) {
           DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
@@ -585,7 +630,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       // Otherwise, remember that we have this instruction.
       AvailableLoads.insert(
           MemInst.getPointerOperand(),
-          LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
+          LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+                    MemInst.isAtomic()));
       LastStore = nullptr;
       continue;
     }
@@ -632,6 +678,33 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         continue;
       }
 
+    // write back DSE - If we write back the same value we just loaded from
+    // the same location and haven't passed any intervening writes or ordering
+    // operations, we can remove the write.  The primary benefit is in allowing
+    // the available load table to remain valid and value forward past where
+    // the store originally was.
+    if (MemInst.isValid() && MemInst.isStore()) {
+      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+      if (InVal.Data &&
+          InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
+          InVal.Generation == CurrentGeneration &&
+          InVal.MatchingId == MemInst.getMatchingId() &&
+          // We don't yet handle removing stores with ordering of any kind.
+          !MemInst.isVolatile() && MemInst.isUnordered()) {
+        assert((!LastStore ||
+                ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
+                MemInst.getPointerOperand()) &&
+               "can't have an intervening store!");
+        DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumDSE;
+        // We can avoid incrementing the generation count since we were able
+        // to eliminate this store.
+        continue;
+      }
+    }
+
     // Okay, this isn't something we can CSE at all.  Check to see if it is
     // something that could modify memory.  If so, our available memory values
     // cannot be used so bump the generation count.
@@ -641,8 +714,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       if (MemInst.isValid() && MemInst.isStore()) {
         // We do a trivial form of DSE if there are two stores to the same
         // location with no intervening loads.  Delete the earlier store.
+        // At the moment, we don't remove ordered stores, but do remove
+        // unordered atomic stores.  There's no special requirement (for
+        // unordered atomics) about removing atomic stores only in favor of
+        // other atomic stores since we we're going to execute the non-atomic
+        // one anyway and the atomic one might never have become visible.
         if (LastStore) {
           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+          assert(LastStoreMemInst.isUnordered() &&
+                 !LastStoreMemInst.isVolatile() &&
+                 "Violated invariant");
           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
             DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
                          << "  due to: " << *Inst << '\n');
@@ -661,11 +742,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         // the store.
         AvailableLoads.insert(
             MemInst.getPointerOperand(),
-            LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
-
-        // Remember that this was the last normal store we saw for DSE.
-        if (MemInst.isSimple())
+            LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+                      MemInst.isAtomic()));
+
+        // Remember that this was the last unordered store we saw for DSE. We
+        // don't yet handle DSE on ordered or volatile stores since we don't
+        // have a good way to model the ordering requirement for following
+        // passes  once the store is removed.  We could insert a fence, but
+        // since fences are slightly stronger than stores in their ordering,
+        // it's not clear this is a profitable transform. Another option would
+        // be to merge the ordering with that of the post dominating store.
+        if (MemInst.isUnordered() && !MemInst.isVolatile())
           LastStore = Inst;
+        else
+          LastStore = nullptr;
       }
     }
   }