Relax the constraint more in MemoryDependencyAnalysis.cpp

author Robin Morisset <morisset@google.com>

Fri, 29 Aug 2014 20:32:58 +0000 (20:32 +0000)

committer Robin Morisset <morisset@google.com>

Fri, 29 Aug 2014 20:32:58 +0000 (20:32 +0000)
author Robin Morisset <morisset@google.com>
Fri, 29 Aug 2014 20:32:58 +0000 (20:32 +0000)
committer Robin Morisset <morisset@google.com>
Fri, 29 Aug 2014 20:32:58 +0000 (20:32 +0000)
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp

index 5657aea0d9f810fd4ed6ca9880e3705e64ecb04d..93e64888049bd4ff1e9464cebd40770eff71af90 100644 (file)
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -370,6 +370,36 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
    int64_t MemLocOffset = 0;
    unsigned Limit = BlockScanLimit;
    bool isInvariantLoad = false;
+
+  // We must be careful with atomic accesses, as they may allow another thread
+  //   to touch this location, cloberring it. We are conservative: if the
+  //   QueryInst is not a simple (non-atomic) memory access, we automatically
+  //   return getClobber.
+  // If it is simple, we know based on the results of
+  // "Compiler testing via a theory of sound optimisations in the C11/C++11
+  //   memory model" in PLDI 2013, that a non-atomic location can only be
+  //   clobbered between a pair of a release and an acquire action, with no
+  //   access to the location in between.
+  // Here is an example for giving the general intuition behind this rule.
+  // In the following code:
+  //   store x 0;
+  //   release action; [1]
+  //   acquire action; [4]
+  //   %val = load x;
+  // It is unsafe to replace %val by 0 because another thread may be running:
+  //   acquire action; [2]
+  //   store x 42;
+  //   release action; [3]
+  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
+  // being 42. A key property of this program however is that if either
+  // 1 or 4 were missing, there would be a race between the store of 42
+  // either the store of 0 or the load (making the whole progam racy).
+  // The paper mentionned above shows that the same property is respected
+  // by every program that can detect any optimisation of that kind: either
+  // it is racy (undefined) or there is a release followed by an acquire
+  // between the pair of accesses under consideration.
+  bool HasSeenAcquire = false;
+
    if (isLoad && QueryInst) {
      LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
      if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
@@ -412,19 +442,21 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
      // be accessing the location.
      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
        // Atomic loads have complications involved.
-      // A monotonic load is OK if the query inst is itself not atomic.
+      // A Monotonic (or higher) load is OK if the query inst is itself not atomic.
+      // An Acquire (or higher) load sets the HasSeenAcquire flag, so that any
+      //   release store will know to return getClobber.
        // FIXME: This is overly conservative.
        if (!LI->isUnordered()) {
          if (!QueryInst)
            return MemDepResult::getClobber(LI);
-        if (LI->getOrdering() != Monotonic)
-          return MemDepResult::getClobber(LI);
          if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst))
            if (!QueryLI->isSimple())
              return MemDepResult::getClobber(LI);
          if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst))
            if (!QuerySI->isSimple())
              return MemDepResult::getClobber(LI);
+        if (isAtLeastAcquire(LI->getOrdering()))
+          HasSeenAcquire = true;
        }
  
        // FIXME: this is overly conservative.
@@ -490,19 +522,21 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
  
      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
        // Atomic stores have complications involved.
-      // A monotonic store is OK if the query inst is itself not atomic.
+      // A Monotonic store is OK if the query inst is itself not atomic.
+      // A Release (or higher) store further requires that no acquire load
+      //   has been seen.
        // FIXME: This is overly conservative.
        if (!SI->isUnordered()) {
          if (!QueryInst)
            return MemDepResult::getClobber(SI);
-        if (SI->getOrdering() != Monotonic)
-          return MemDepResult::getClobber(SI);
          if (auto *QueryLI = dyn_cast<LoadInst>(QueryInst))
            if (!QueryLI->isSimple())
              return MemDepResult::getClobber(SI);
          if (auto *QuerySI = dyn_cast<StoreInst>(QueryInst))
            if (!QuerySI->isSimple())
              return MemDepResult::getClobber(SI);
+        if (HasSeenAcquire && isAtLeastRelease(SI->getOrdering()))
+          return MemDepResult::getClobber(SI);
        }
  
        // FIXME: this is overly conservative.
diff --git a/test/Transforms/DeadStoreElimination/atomic.ll b/test/Transforms/DeadStoreElimination/atomic.ll

index 621958dbb794459bc92124c82875f957f541e245..af303fa983ee16046938b31d55c68d521bc94aef 100644 (file)
--- a/test/Transforms/DeadStoreElimination/atomic.ll
+++ b/test/Transforms/DeadStoreElimination/atomic.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-macosx10.7.0"
  
  ; Sanity tests for atomic stores.
  ; Note that it turns out essentially every transformation DSE does is legal on
-; atomic ops, just some transformations are not allowed across them. 
+; atomic ops, just some transformations are not allowed across release-acquire pairs.
  
  @x = common global i32 0, align 4
  @y = common global i32 0, align 4
@@ -13,35 +13,32 @@ target triple = "x86_64-apple-macosx10.7.0"
  declare void @randomop(i32*)
  
  ; DSE across unordered store (allowed)
-define void @test1()  nounwind uwtable ssp {
-; CHECK: test1
+define void @test1() {
+; CHECK-LABEL: test1
  ; CHECK-NOT: store i32 0
  ; CHECK: store i32 1
-entry:
    store i32 0, i32* @x
    store atomic i32 0, i32* @y unordered, align 4
    store i32 1, i32* @x
    ret void
  }
  
-; DSE across seq_cst load (allowed in theory; not implemented ATM)
-define i32 @test2()  nounwind uwtable ssp {
-; CHECK: test2
-; CHECK: store i32 0
+; DSE across seq_cst load (allowed)
+define i32 @test2() {
+; CHECK-LABEL: test2
+; CHECK-NOT: store i32 0
  ; CHECK: store i32 1
-entry:
    store i32 0, i32* @x
    %x = load atomic i32* @y seq_cst, align 4
    store i32 1, i32* @x
    ret i32 %x
  }
  
-; DSE across seq_cst store (store before atomic store must not be removed)
-define void @test3()  nounwind uwtable ssp {
-; CHECK: test3
-; CHECK: store i32
+; DSE across seq_cst store (allowed)
+define void @test3() {
+; CHECK-LABEL: test3
+; CHECK-NOT: store i32 0
  ; CHECK: store atomic i32 2
-entry:
    store i32 0, i32* @x
    store atomic i32 2, i32* @y seq_cst, align 4
    store i32 1, i32* @x
@@ -49,32 +46,29 @@ entry:
  }
  
  ; DSE remove unordered store (allowed)
-define void @test4()  nounwind uwtable ssp {
-; CHECK: test4
+define void @test4() {
+; CHECK-LABEL: test4
  ; CHECK-NOT: store atomic
  ; CHECK: store i32 1
-entry:
    store atomic i32 0, i32* @x unordered, align 4
    store i32 1, i32* @x
    ret void
  }
  
  ; DSE unordered store overwriting non-atomic store (allowed)
-define void @test5()  nounwind uwtable ssp {
-; CHECK: test5
+define void @test5() {
+; CHECK-LABEL: test5
  ; CHECK: store atomic i32 1
-entry:
    store i32 0, i32* @x
    store atomic i32 1, i32* @x unordered, align 4
    ret void
  }
  
  ; DSE no-op unordered atomic store (allowed)
-define void @test6()  nounwind uwtable ssp {
-; CHECK: test6
+define void @test6() {
+; CHECK-LABEL: test6
  ; CHECK-NOT: store
  ; CHECK: ret void
-entry:
    %x = load atomic i32* @x unordered, align 4
    store atomic i32 %x, i32* @x unordered, align 4
    ret void
@@ -82,10 +76,9 @@ entry:
  
  ; DSE seq_cst store (be conservative; DSE doesn't have infrastructure
  ; to reason about atomic operations).
-define void @test7()  nounwind uwtable ssp {
-; CHECK: test7
-; CHECK: store atomic 
-entry:
+define void @test7() {
+; CHECK-LABEL: test7
+; CHECK: store atomic
    %a = alloca i32
    store atomic i32 0, i32* %a seq_cst, align 4
    ret void
@@ -93,11 +86,10 @@ entry:
  
  ; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure
  ; to reason about atomic operations).
-define i32 @test8()  nounwind uwtable ssp {
-; CHECK: test8
+define i32 @test8() {
+; CHECK-LABEL: test8
  ; CHECK: store
-; CHECK: load atomic 
-entry:
+; CHECK: load atomic
    %a = alloca i32
    call void @randomop(i32* %a)
    store i32 0, i32* %a, align 4
@@ -106,11 +98,10 @@ entry:
  }
  
  ; DSE across monotonic load (allowed as long as the eliminated store isUnordered)
-define i32 @test9()  nounwind uwtable ssp {
-; CHECK: test9
+define i32 @test9() {
+; CHECK-LABEL: test9
  ; CHECK-NOT: store i32 0
  ; CHECK: store i32 1
-entry:
    store i32 0, i32* @x
    %x = load atomic i32* @y monotonic, align 4
    store i32 1, i32* @x
@@ -118,11 +109,10 @@ entry:
  }
  
  ; DSE across monotonic store (allowed as long as the eliminated store isUnordered)
-define void @test10()  nounwind uwtable ssp {
-; CHECK: test10
+define void @test10() {
+; CHECK-LABEL: test10
  ; CHECK-NOT: store i32 0
  ; CHECK: store i32 1
-entry:
    store i32 0, i32* @x
    store atomic i32 42, i32* @y monotonic, align 4
    store i32 1, i32* @x
@@ -130,11 +120,10 @@ entry:
  }
  
  ; DSE across monotonic load (forbidden since the eliminated store is atomic)
-define i32 @test11()  nounwind uwtable ssp {
-; CHECK: test11
+define i32 @test11() {
+; CHECK-LABEL: test11
  ; CHECK: store atomic i32 0
  ; CHECK: store atomic i32 1
-entry:
    store atomic i32 0, i32* @x monotonic, align 4
    %x = load atomic i32* @y monotonic, align 4
    store atomic i32 1, i32* @x monotonic, align 4
@@ -142,13 +131,48 @@ entry:
  }
  
  ; DSE across monotonic store (forbidden since the eliminated store is atomic)
-define void @test12()  nounwind uwtable ssp {
-; CHECK: test12
+define void @test12() {
+; CHECK-LABEL: test12
  ; CHECK: store atomic i32 0
  ; CHECK: store atomic i32 1
-entry:
    store atomic i32 0, i32* @x monotonic, align 4
    store atomic i32 42, i32* @y monotonic, align 4
    store atomic i32 1, i32* @x monotonic, align 4
    ret void
  }
+
+; DSE is allowed across a pair of an atomic read and then write.
+define i32 @test13() {
+; CHECK-LABEL: test13
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  %x = load atomic i32* @y seq_cst, align 4
+  store atomic i32 %x, i32* @y seq_cst, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; Same if it is acquire-release instead of seq_cst/seq_cst
+define i32 @test14() {
+; CHECK-LABEL: test14
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  %x = load atomic i32* @y acquire, align 4
+  store atomic i32 %x, i32* @y release, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; But DSE is not allowed across a release-acquire pair.
+define i32 @test15() {
+; CHECK-LABEL: test15
+; CHECK: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  store atomic i32 0, i32* @y release, align 4
+  %x = load atomic i32* @y acquire, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
diff --git a/test/Transforms/GVN/atomic.ll b/test/Transforms/GVN/atomic.ll

index 16ba94066a8e324329c90da333584b09826a6a62..8c13d209c53a1bc6231b5e5bc36199db49dd623a 100644 (file)
--- a/test/Transforms/GVN/atomic.ll
+++ b/test/Transforms/GVN/atomic.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.7.0"
  
  ; GVN across unordered store (allowed)
  define i32 @test1() nounwind uwtable ssp {
-; CHECK: test1
+; CHECK-LABEL: test1
  ; CHECK: add i32 %x, %x
  entry:
    %x = load i32* @y
@@ -18,10 +18,10 @@ entry:
    ret i32 %z
  }
  
-; GVN across seq_cst store (allowed in theory; not implemented ATM)
+; GVN across seq_cst store (allowed)
  define i32 @test2() nounwind uwtable ssp {
-; CHECK: test2
-; CHECK: add i32 %x, %y
+; CHECK-LABEL: test2
+; CHECK: add i32 %x, %x
  entry:
    %x = load i32* @y
    store atomic i32 %x, i32* @x seq_cst, align 4
@@ -32,7 +32,7 @@ entry:
  
  ; GVN across unordered load (allowed)
  define i32 @test3() nounwind uwtable ssp {
-; CHECK: test3
+; CHECK-LABEL: test3
  ; CHECK: add i32 %x, %x
  entry:
    %x = load i32* @y
@@ -43,11 +43,11 @@ entry:
    ret i32 %b
  }
  
-; GVN across acquire load (load after atomic load must not be removed)
+; GVN across acquire load (allowed as the original load was not atomic)
  define i32 @test4() nounwind uwtable ssp {
-; CHECK: test4
+; CHECK-LABEL: test4
  ; CHECK: load atomic i32* @x
-; CHECK: load i32* @y
+; CHECK-NOT: load i32* @y
  entry:
    %x = load i32* @y
    %y = load atomic i32* @x seq_cst, align 4
@@ -59,7 +59,7 @@ entry:
  
  ; GVN load to unordered load (allowed)
  define i32 @test5() nounwind uwtable ssp {
-; CHECK: test5
+; CHECK-LABEL: test5
  ; CHECK: add i32 %x, %x
  entry:
    %x = load atomic i32* @x unordered, align 4
@@ -70,7 +70,7 @@ entry:
  
  ; GVN unordered load to load (unordered load must not be removed)
  define i32 @test6() nounwind uwtable ssp {
-; CHECK: test6
+; CHECK-LABEL: test6
  ; CHECK: load atomic i32* @x unordered
  entry:
    %x = load i32* @x
@@ -79,9 +79,35 @@ entry:
    ret i32 %x3
  }
  
-; GVN across monotonic store (allowed)
+; GVN across release-acquire pair (forbidden)
  define i32 @test7() nounwind uwtable ssp {
-; CHECK: test7
+; CHECK-LABEL: test7
+; CHECK: add i32 %x, %y
+entry:
+  %x = load i32* @y
+  store atomic i32 %x, i32* @x release, align 4
+  %w = load atomic i32* @x acquire, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN across acquire-release pair (allowed)
+define i32 @test8() nounwind uwtable ssp {
+; CHECK-LABEL: test8
+; CHECK: add i32 %x, %x
+entry:
+  %x = load i32* @y
+  %w = load atomic i32* @x acquire, align 4
+  store atomic i32 %x, i32* @x release, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN across monotonic store (allowed)
+define i32 @test9() nounwind uwtable ssp {
+; CHECK-LABEL: test9
  ; CHECK: add i32 %x, %x
  entry:
    %x = load i32* @y
@@ -92,8 +118,8 @@ entry:
  }
  
  ; GVN of an unordered across monotonic load (not allowed)
-define i32 @test8() nounwind uwtable ssp {
-; CHECK: test8
+define i32 @test10() nounwind uwtable ssp {
+; CHECK-LABEL: test10
  ; CHECK: add i32 %x, %y
  entry:
    %x = load atomic i32* @y unordered, align 4
@@ -103,4 +129,3 @@ entry:
    ret i32 %z
  }
  
-
author	Robin Morisset <morisset@google.com>
	Fri, 29 Aug 2014 20:32:58 +0000 (20:32 +0000)
committer	Robin Morisset <morisset@google.com>
	Fri, 29 Aug 2014 20:32:58 +0000 (20:32 +0000)
lib/Analysis/MemoryDependenceAnalysis.cpp		patch \| blob \| history
test/Transforms/DeadStoreElimination/atomic.ll		patch \| blob \| history
test/Transforms/GVN/atomic.ll		patch \| blob \| history