[CodeGen] Fix AtomicExpand invalidation issue caused by r247429.

[oota-llvm.git] / lib / CodeGen / AtomicExpandPass.cpp
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp

index 4b64be053dfd31fa928895369e86ff7dab515cfa..73102ccfece1006a23f2708a5f14e403b3380c45 100644 (file)
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -8,10 +8,12 @@
  //===----------------------------------------------------------------------===//
  //
  // This file contains a pass (at IR level) to replace atomic instructions with
-// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
+// either (intrinsic-based) load-linked/store-conditional loops or
+// AtomicCmpXchg.
  //
  //===----------------------------------------------------------------------===//
  
+#include "llvm/CodeGen/AtomicExpandUtils.h"
  #include "llvm/CodeGen/Passes.h"
  #include "llvm/IR/Function.h"
  #include "llvm/IR/IRBuilder.h"
@@ -44,13 +46,12 @@ namespace {
    private:
      bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
                                 bool IsStore, bool IsLoad);
-    bool expandAtomicLoad(LoadInst *LI);
+    bool tryExpandAtomicLoad(LoadInst *LI);
      bool expandAtomicLoadToLL(LoadInst *LI);
      bool expandAtomicLoadToCmpXchg(LoadInst *LI);
      bool expandAtomicStore(StoreInst *SI);
-    bool expandAtomicRMW(AtomicRMWInst *AI);
+    bool tryExpandAtomicRMW(AtomicRMWInst *AI);
      bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
-    bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
      bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
      bool isIdempotentRMW(AtomicRMWInst *AI);
      bool simplifyIdempotentRMW(AtomicRMWInst *AI);
@@ -108,7 +109,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
          FenceOrdering = RMWI->getOrdering();
          RMWI->setOrdering(Monotonic);
          IsStore = IsLoad = true;
-      } else if (CASI && !TLI->hasLoadLinkedStoreConditional() &&
+      } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
                   (isAtLeastRelease(CASI->getSuccessOrdering()) ||
                    isAtLeastAcquire(CASI->getSuccessOrdering()))) {
          // If a compare and swap is lowered to LL/SC, we can do smarter fence
@@ -126,8 +127,8 @@ bool AtomicExpand::runOnFunction(Function &F) {
        }
      }
  
-    if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) {
-      MadeChange |= expandAtomicLoad(LI);
+    if (LI) {
+      MadeChange |= tryExpandAtomicLoad(LI);
      } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
        MadeChange |= expandAtomicStore(SI);
      } else if (RMWI) {
@@ -135,10 +136,13 @@ bool AtomicExpand::runOnFunction(Function &F) {
        // - into a load if it is idempotent
        // - into a Cmpxchg/LL-SC loop otherwise
        // we try them in that order.
-      MadeChange |=
-          (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
-          (TLI->shouldExpandAtomicRMWInIR(RMWI) && expandAtomicRMW(RMWI));
-    } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
+
+      if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+        MadeChange = true;
+      } else {
+        MadeChange |= tryExpandAtomicRMW(RMWI);
+      }
+    } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) {
        MadeChange |= expandAtomicCmpXchg(CASI);
      }
    }
@@ -166,11 +170,18 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
    return (LeadingFence || TrailingFence);
  }
  
-bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
-  if (TLI->hasLoadLinkedStoreConditional())
+bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
+  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
+  case TargetLoweringBase::AtomicExpansionKind::None:
+    return false;
+  case TargetLoweringBase::AtomicExpansionKind::LLSC: {
      return expandAtomicLoadToLL(LI);
-  else
+  }
+  case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
      return expandAtomicLoadToCmpXchg(LI);
+  }
+  }
+  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
  }
  
  bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
@@ -211,7 +222,7 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
    // atomic if implemented as a native store. So we replace them by an
    // atomic swap, that can be implemented for example as a ldrex/strex on ARM
    // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
-  // It is the responsibility of the target to only return true in
+  // It is the responsibility of the target to only signal expansion via
    // shouldExpandAtomicRMW in cases where this is required and possible.
    IRBuilder<> Builder(SI);
    AtomicRMWInst *AI =
@@ -220,14 +231,32 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
    SI->eraseFromParent();
  
    // Now we have an appropriate swap instruction, lower it as usual.
-  return expandAtomicRMW(AI);
+  return tryExpandAtomicRMW(AI);
  }
  
-bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
-  if (TLI->hasLoadLinkedStoreConditional())
+static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
+                                 Value *Loaded, Value *NewVal,
+                                 AtomicOrdering MemOpOrder,
+                                 Value *&Success, Value *&NewLoaded) {
+  Value* Pair = Builder.CreateAtomicCmpXchg(
+      Addr, Loaded, NewVal, MemOpOrder,
+      AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+  Success = Builder.CreateExtractValue(Pair, 1, "success");
+  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+}
+
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+  case TargetLoweringBase::AtomicExpansionKind::None:
+    return false;
+  case TargetLoweringBase::AtomicExpansionKind::LLSC: {
      return expandAtomicRMWToLLSC(AI);
-  else
-    return expandAtomicRMWToCmpXchg(AI);
+  }
+  case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
+    return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+  }
+  }
+  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
  }
  
  /// Emit IR to implement the given atomicrmw operation on values in registers,
@@ -322,70 +351,6 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
    return true;
  }
  
-bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
-  AtomicOrdering MemOpOrder =
-      AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
-  Value *Addr = AI->getPointerOperand();
-  BasicBlock *BB = AI->getParent();
-  Function *F = BB->getParent();
-  LLVMContext &Ctx = F->getContext();
-
-  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
-  //
-  // The standard expansion we produce is:
-  //     [...]
-  //     %init_loaded = load atomic iN* %addr
-  //     br label %loop
-  // loop:
-  //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
-  //     %new = some_op iN %loaded, %incr
-  //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
-  //     %new_loaded = extractvalue { iN, i1 } %pair, 0
-  //     %success = extractvalue { iN, i1 } %pair, 1
-  //     br i1 %success, label %atomicrmw.end, label %loop
-  // atomicrmw.end:
-  //     [...]
-  BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
-  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
-  // This grabs the DebugLoc from AI.
-  IRBuilder<> Builder(AI);
-
-  // The split call above "helpfully" added a branch at the end of BB (to the
-  // wrong place), but we want a load. It's easiest to just remove
-  // the branch entirely.
-  std::prev(BB->end())->eraseFromParent();
-  Builder.SetInsertPoint(BB);
-  LoadInst *InitLoaded = Builder.CreateLoad(Addr);
-  // Atomics require at least natural alignment.
-  InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
-  Builder.CreateBr(LoopBB);
-
-  // Start the main loop block now that we've taken care of the preliminaries.
-  Builder.SetInsertPoint(LoopBB);
-  PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
-  Loaded->addIncoming(InitLoaded, BB);
-
-  Value *NewVal =
-      performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
-
-  Value *Pair = Builder.CreateAtomicCmpXchg(
-      Addr, Loaded, NewVal, MemOpOrder,
-      AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
-  Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
-  Loaded->addIncoming(NewLoaded, LoopBB);
-
-  Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
-  Builder.CreateCondBr(Success, ExitBB, LoopBB);
-
-  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-
-  AI->replaceAllUsesWith(NewLoaded);
-  AI->eraseFromParent();
-
-  return true;
-}
-
  bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
    AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
    AtomicOrdering FailureOrder = CI->getFailureOrdering();
@@ -449,7 +414,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
    Value *ShouldStore =
        Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
  
-  // If the the cmpxchg doesn't actually need any ordering when it fails, we can
+  // If the cmpxchg doesn't actually need any ordering when it fails, we can
    // jump straight past that fence instruction (if it exists).
    Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
  
@@ -541,9 +506,77 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
  
  bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
    if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
-    if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
-      expandAtomicLoad(ResultingLoad);
+    tryExpandAtomicLoad(ResultingLoad);
      return true;
    }
    return false;
  }
+
+bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+                                    CreateCmpXchgInstFun CreateCmpXchg) {
+  assert(AI);
+
+  AtomicOrdering MemOpOrder =
+      AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+  Value *Addr = AI->getPointerOperand();
+  BasicBlock *BB = AI->getParent();
+  Function *F = BB->getParent();
+  LLVMContext &Ctx = F->getContext();
+
+  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+  //
+  // The standard expansion we produce is:
+  //     [...]
+  //     %init_loaded = load atomic iN* %addr
+  //     br label %loop
+  // loop:
+  //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+  //     %new = some_op iN %loaded, %incr
+  //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+  //     %new_loaded = extractvalue { iN, i1 } %pair, 0
+  //     %success = extractvalue { iN, i1 } %pair, 1
+  //     br i1 %success, label %atomicrmw.end, label %loop
+  // atomicrmw.end:
+  //     [...]
+  BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+  // This grabs the DebugLoc from AI.
+  IRBuilder<> Builder(AI);
+
+  // The split call above "helpfully" added a branch at the end of BB (to the
+  // wrong place), but we want a load. It's easiest to just remove
+  // the branch entirely.
+  std::prev(BB->end())->eraseFromParent();
+  Builder.SetInsertPoint(BB);
+  LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+  // Atomics require at least natural alignment.
+  InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
+  Builder.CreateBr(LoopBB);
+
+  // Start the main loop block now that we've taken care of the preliminaries.
+  Builder.SetInsertPoint(LoopBB);
+  PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+  Loaded->addIncoming(InitLoaded, BB);
+
+  Value *NewVal =
+      performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+  Value *NewLoaded = nullptr;
+  Value *Success = nullptr;
+
+  CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
+                Success, NewLoaded);
+  assert(Success && NewLoaded);
+
+  Loaded->addIncoming(NewLoaded, LoopBB);
+
+  Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+  AI->replaceAllUsesWith(NewLoaded);
+  AI->eraseFromParent();
+
+  return true;
+}