[InstCombine] Extend peephole DSE to handle unordered atomics

author Philip Reames <listmail@philipreames.com>

Thu, 17 Dec 2015 22:19:27 +0000 (22:19 +0000)

committer Philip Reames <listmail@philipreames.com>

Thu, 17 Dec 2015 22:19:27 +0000 (22:19 +0000)
author Philip Reames <listmail@philipreames.com>
Thu, 17 Dec 2015 22:19:27 +0000 (22:19 +0000)
committer Philip Reames <listmail@philipreames.com>
Thu, 17 Dec 2015 22:19:27 +0000 (22:19 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

index 12566922c8389fba9b33e7cff53a17f441ceda65..aca014f04cd84e6cca9efecc27ed9dcb3f732c90 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1038,9 +1038,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
        return &SI;
    }
  
        return &SI;
    }
  
-  // Don't hack volatile/atomic stores.
-  // FIXME: Some bits are legal for atomic stores; needs refactoring.
-  if (!SI.isSimple()) return nullptr;
+  // Don't hack volatile/ordered stores.
+  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
+  if (!SI.isUnordered()) return nullptr;
  
    // If the RHS is an alloca with a single use, zapify the store, making the
    // alloca dead.
  
    // If the RHS is an alloca with a single use, zapify the store, making the
    // alloca dead.
@@ -1072,7 +1072,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
  
      if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
        // Prev store isn't volatile, and stores to the same location?
  
      if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
        // Prev store isn't volatile, and stores to the same location?
-      if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
+      if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
                                                          SI.getOperand(1))) {
          ++NumDeadStore;
          ++BBI;
                                                          SI.getOperand(1))) {
          ++NumDeadStore;
          ++BBI;
@@ -1086,9 +1086,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
      // the pointer we're loading and is producing the pointer we're storing,
      // then *this* store is dead (X = load P; store X -> P).
      if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
      // the pointer we're loading and is producing the pointer we're storing,
      // then *this* store is dead (X = load P; store X -> P).
      if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
-      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
-          LI->isSimple())
+      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
+        assert(SI.isUnordered() && "can't eliminate ordering operation");
          return EraseInstFromFunction(SI);
          return EraseInstFromFunction(SI);
+      }
  
        // Otherwise, this is a load from some other location.  Stores before it
        // may not be dead.
  
        // Otherwise, this is a load from some other location.  Stores before it
        // may not be dead.
@@ -1114,6 +1115,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
    if (isa<UndefValue>(Val))
      return EraseInstFromFunction(SI);
  
    if (isa<UndefValue>(Val))
      return EraseInstFromFunction(SI);
  
+  // The code below needs to be audited and adjusted for unordered atomics
+  if (!SI.isSimple())
+    return nullptr;
+
    // If this store is the last instruction in the basic block (possibly
    // excepting debug info instructions), and if the block ends with an
    // unconditional branch, try to move it to the successor block.
    // If this store is the last instruction in the basic block (possibly
    // excepting debug info instructions), and if the block ends with an
    // unconditional branch, try to move it to the successor block.
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll

index 5dfbd7140901ff5209ce2bceb79d1fe7af7ca80d..b8730413f1b51b2da9c9bb4a1c5a2e139958ee24 100644 (file)
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -113,6 +113,119 @@ for.end:                                          ; preds = %for.cond
  ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
  }
  
  ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
  }
  
+define void @dse1(i32* %p) {
+; CHECK-LABEL: dse1
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 0, i32* %p
+  store i32 0, i32* %p
+  ret void
+} 
+
+; Slightly subtle: if we're mixing atomic and non-atomic access to the
+; same location, then the contents of the location are undefined if there's
+; an actual race.  As such, we're free to pick either store under the 
+; assumption that we're not racing with any other thread.
+define void @dse2(i32* %p) {
+; CHECK-LABEL: dse2
+; CHECK-NEXT: store i32 0, i32* %p
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %p unordered, align 4
+  store i32 0, i32* %p
+  ret void
+} 
+
+define void @dse3(i32* %p) {
+; CHECK-LABEL: dse3
+; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
+; CHECK-NEXT: ret
+  store i32 0, i32* %p
+  store atomic i32 0, i32* %p unordered, align 4
+  ret void
+} 
+
+define void @dse4(i32* %p) {
+; CHECK-LABEL: dse4
+; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %p unordered, align 4
+  store atomic i32 0, i32* %p unordered, align 4
+  ret void
+} 
+
+; Implementation limit - could remove unordered store here, but
+; currently don't.
+define void @dse5(i32* %p) {
+; CHECK-LABEL: dse5
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %p unordered, align 4
+  store atomic i32 0, i32* %p seq_cst, align 4
+  ret void
+}
+
+define void @write_back1(i32* %p) {
+; CHECK-LABEL: write_back1
+; CHECK-NEXT: ret
+  %v = load i32, i32* %p
+  store i32 %v, i32* %p
+  ret void
+} 
+
+define void @write_back2(i32* %p) {
+; CHECK-LABEL: write_back2
+; CHECK-NEXT: ret
+  %v = load atomic i32, i32* %p unordered, align 4
+  store i32 %v, i32* %p
+  ret void
+} 
+
+define void @write_back3(i32* %p) {
+; CHECK-LABEL: write_back3
+; CHECK-NEXT: ret
+  %v = load i32, i32* %p
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+} 
+
+define void @write_back4(i32* %p) {
+; CHECK-LABEL: write_back4
+; CHECK-NEXT: ret
+  %v = load atomic i32, i32* %p unordered, align 4
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+} 
+
+; Can't remove store due to ordering side effect
+define void @write_back5(i32* %p) {
+; CHECK-LABEL: write_back5
+; CHECK-NEXT: load
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  %v = load atomic i32, i32* %p unordered, align 4
+  store atomic i32 %v, i32* %p seq_cst, align 4
+  ret void
+}
+
+define void @write_back6(i32* %p) {
+; CHECK-LABEL: write_back6
+; CHECK-NEXT: load
+; CHECK-NEXT: ret
+  %v = load atomic i32, i32* %p seq_cst, align 4
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+}
+
+define void @write_back7(i32* %p) {
+; CHECK-LABEL: write_back7
+; CHECK-NEXT: load
+; CHECK-NEXT: ret
+  %v = load atomic volatile i32, i32* %p seq_cst, align 4
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+}
+
  !0 = !{!4, !4, i64 0}
  !1 = !{!"omnipotent char", !2}
  !2 = !{!"Simple C/C++ TBAA"}
  !0 = !{!4, !4, i64 0}
  !1 = !{!"omnipotent char", !2}
  !2 = !{!"Simple C/C++ TBAA"}
author	Philip Reames <listmail@philipreames.com>
	Thu, 17 Dec 2015 22:19:27 +0000 (22:19 +0000)
committer	Philip Reames <listmail@philipreames.com>
	Thu, 17 Dec 2015 22:19:27 +0000 (22:19 +0000)
lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp		patch \| blob \| history
test/Transforms/InstCombine/store.ll		patch \| blob \| history