[EarlyCSE] DSE of atomic unordered stores

author Philip Reames <listmail@philipreames.com>

Thu, 17 Dec 2015 18:50:50 +0000 (18:50 +0000)

committer Philip Reames <listmail@philipreames.com>

Thu, 17 Dec 2015 18:50:50 +0000 (18:50 +0000)
author Philip Reames <listmail@philipreames.com>
Thu, 17 Dec 2015 18:50:50 +0000 (18:50 +0000)
committer Philip Reames <listmail@philipreames.com>
Thu, 17 Dec 2015 18:50:50 +0000 (18:50 +0000)
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp

index 03f8c05c9d32837a42a587654fda21994f29e62b..7ef062e71ff3ad7ba102fa1ec3cdc3625ddb3e83 100644 (file)
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -411,15 +411,6 @@ private:
        if (IsTargetMemInst) return Info.WriteMem;
        return isa<StoreInst>(Inst);
      }
-    bool isSimple() const {
-      if (IsTargetMemInst) return Info.IsSimple;
-      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        return LI->isSimple();
-      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-        return SI->isSimple();
-      }
-      return Inst->isAtomic();
-    }
      bool isAtomic() const {
        if (IsTargetMemInst) {
          assert(Info.IsSimple && "need to refine IsSimple in TTI");
@@ -722,12 +713,17 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
  
        if (MemInst.isValid() && MemInst.isStore()) {
          // We do a trivial form of DSE if there are two stores to the same
-        // location with no intervening loads.  Delete the earlier store. Note
-        // that we can delete an earlier simple store even if the following one
-        // is ordered/volatile/atomic store.
+        // location with no intervening loads.  Delete the earlier store.
+        // At the moment, we don't remove ordered stores, but do remove
+        // unordered atomic stores.  There's no special requirement (for
+        // unordered atomics) about removing atomic stores only in favor of
+        // other atomic stores since we we're going to execute the non-atomic
+        // one anyway and the atomic one might never have become visible.
          if (LastStore) {
            ParseMemoryInst LastStoreMemInst(LastStore, TTI);
-          assert(LastStoreMemInst.isSimple() && "Violated invariant");
+          assert(LastStoreMemInst.isUnordered() &&
+                 !LastStoreMemInst.isVolatile() &&
+                 "Violated invariant");
            if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
              DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
                           << "  due to: " << *Inst << '\n');
@@ -749,11 +745,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
              LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
                        MemInst.isAtomic()));
  
-        // Remember that this was the last normal store we saw for DSE.
-        // Note that we can't delete an earlier atomic or volatile store in
-        // favor of a later one which isn't.  We could in principle remove an
-        // earlier unordered store if the later one is also unordered.
-        if (MemInst.isSimple())
+        // Remember that this was the last unordered store we saw for DSE. We
+        // don't yet handle DSE on ordered or volatile stores since we don't
+        // have a good way to model the ordering requirement for following
+        // passes  once the store is removed.  We could insert a fence, but
+        // since fences are slightly stronger than stores in their ordering,
+        // it's not clear this is a profitable transform. Another option would
+        // be to merge the ordering with that of the post dominating store.
+        if (MemInst.isUnordered() && !MemInst.isVolatile())
            LastStore = Inst;
          else
            LastStore = nullptr;
diff --git a/test/Transforms/EarlyCSE/atomics.ll b/test/Transforms/EarlyCSE/atomics.ll

index ea85a86fa911bc93c8fcc20c6f0bd041021674d5..21c19cd8e88091bd346fd4f6e6a83da085749ec9 100644 (file)
--- a/test/Transforms/EarlyCSE/atomics.ll
+++ b/test/Transforms/EarlyCSE/atomics.ll
@@ -181,5 +181,79 @@ define void @test22(i1 %B, i32* %P1, i32* %P2) {
    ret void
  }
  
+; Can also DSE a unordered store in favor of a normal one
+define void @test23(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test23
+; CHECK-NEXT: store i32 0
+  store atomic i32 3, i32* %P1 unordered, align 4
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; As an implementation limitation, can't remove ordered stores
+; Note that we could remove the earlier store if we could
+; represent the required ordering.
+define void @test24(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test24
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: store i32 0
+  store atomic i32 3, i32* %P1 release, align 4
+  store i32 0, i32* %P1, align 4
+  ret void
+}
  
+; Can't remove volatile stores - each is independently observable and
+; the count of such stores is an observable program side effect.
+define void @test25(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test25
+; CHECK-NEXT: store volatile
+; CHECK-NEXT: store volatile
+  store volatile i32 3, i32* %P1, align 4
+  store volatile i32 0, i32* %P1, align 4
+  ret void
+}
  
+; Can DSE a unordered store in favor of a unordered one
+define void @test26(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test26
+; CHECK-NEXT: store atomic i32 3, i32* %P1 unordered, align 4
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %P1 unordered, align 4
+  store atomic i32 3, i32* %P1 unordered, align 4
+  ret void
+}
+
+; Can DSE a unordered store in favor of a ordered one,
+; but current don't due to implementation limits
+define void @test27(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test27
+; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
+; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %P1 unordered, align 4
+  store atomic i32 3, i32* %P1 release, align 4
+  ret void
+}
+
+; Can DSE an unordered atomic store in favor of an
+; ordered one, but current don't due to implementation limits
+define void @test28(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test28
+; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
+; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %P1 unordered, align 4
+  store atomic i32 3, i32* %P1 release, align 4
+  ret void
+}
+
+; As an implementation limitation, can't remove ordered stores
+; see also: @test24
+define void @test29(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test29
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: store atomic
+  store atomic i32 3, i32* %P1 release, align 4
+  store atomic i32 0, i32* %P1 unordered, align 4
+  ret void
+}
author	Philip Reames <listmail@philipreames.com>
	Thu, 17 Dec 2015 18:50:50 +0000 (18:50 +0000)
committer	Philip Reames <listmail@philipreames.com>
	Thu, 17 Dec 2015 18:50:50 +0000 (18:50 +0000)
lib/Transforms/Scalar/EarlyCSE.cpp		patch \| blob \| history
test/Transforms/EarlyCSE/atomics.ll		patch \| blob \| history