Taints the non-acquire RMW's store address with the load part

[oota-llvm.git] / lib / CodeGen / AtomicExpandPass.cpp
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp

index e4b7c5a62780ce919d07f98c3b07fa1e714438ae..045c8076d7e7c4c7288a8906855435485c8392c4 100644 (file)
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -8,11 +8,19 @@
  //===----------------------------------------------------------------------===//
  //
  // This file contains a pass (at IR level) to replace atomic instructions with
-// either (intrinsic-based) load-linked/store-conditional loops or
-// AtomicCmpXchg.
+// target specific instruction which implement the same semantics in a way
+// which better fits the target backend.  This can include the use of either
+// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or
+// type coercions.
  //
  //===----------------------------------------------------------------------===//
  
+#include "TaintRelaxedAtomicsUtils.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/MemoryLocation.h"
  #include "llvm/CodeGen/AtomicExpandUtils.h"
  #include "llvm/CodeGen/Passes.h"
  #include "llvm/IR/Function.h"
@@ -21,10 +29,13 @@
  #include "llvm/IR/Instructions.h"
  #include "llvm/IR/Intrinsics.h"
  #include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
  #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Target/TargetMachine.h"
  #include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
  
  using namespace llvm;
  
@@ -46,9 +57,12 @@ namespace {
    private:
      bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
                                 bool IsStore, bool IsLoad);
+    IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
+    LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
      bool tryExpandAtomicLoad(LoadInst *LI);
      bool expandAtomicLoadToLL(LoadInst *LI);
      bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+    StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
      bool expandAtomicStore(StoreInst *SI);
      bool tryExpandAtomicRMW(AtomicRMWInst *AI);
      bool expandAtomicOpToLLSC(
@@ -58,6 +72,33 @@ namespace {
      bool isIdempotentRMW(AtomicRMWInst *AI);
      bool simplifyIdempotentRMW(AtomicRMWInst *AI);
    };
+
+
+  // If 'LI' is a relaxed load, and it is immediately followed by a
+// atomic read-modify-write that has acq_rel parameter, we don't have to do
+// anything since the rmw serves as a natural barrier.
+void MarkRelaxedLoadBeforeAcqrelRMW(LoadInst* LI) {
+  auto* BB = LI->getParent();
+  auto BBI = LI->getIterator();
+  for (BBI++; BBI != BB->end(); BBI++) {
+    Instruction* CurInst = &*BBI;
+    if (!CurInst) {
+      return;
+    }
+    if (!CurInst->isAtomic()) {
+      continue;
+    }
+    auto* RMW = dyn_cast<AtomicRMWInst>(CurInst);
+    if (!RMW) {
+      return;
+    }
+    if (RMW->getOrdering() == AcquireRelease ||
+        RMW->getOrdering() == SequentiallyConsistent) {
+      LI->setHasSubsequentAcqlRMW(true);
+    }
+  }
+}
+
  }
  
  char AtomicExpand::ID = 0;
@@ -76,12 +117,61 @@ bool AtomicExpand::runOnFunction(Function &F) {
    TLI = TM->getSubtargetImpl(F)->getTargetLowering();
  
    SmallVector<Instruction *, 1> AtomicInsts;
+  SmallVector<LoadInst*, 1> MonotonicLoadInsts;
  
    // Changing control-flow while iterating through it is a bad idea, so gather a
    // list of all atomic instructions before we start.
    for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
-    if (I->isAtomic())
+    // XXX-update: For relaxed loads, change them to acquire. This includes
+    // relaxed loads, relaxed atomic RMW & relaxed atomic compare exchange.
+    if (I->isAtomic()) {
+      switch (I->getOpcode()) {
+        case Instruction::AtomicCmpXchg: {
+          // XXX-comment: AtomicCmpXchg in AArch64 will be translated to a
+          // conditional branch that contains the value of the load anyway, so
+          // we don't need to do anything.
+          /*
+          auto* CmpXchg = dyn_cast<AtomicCmpXchgInst>(&*I);
+          auto SuccOrdering = CmpXchg->getSuccessOrdering();
+          if (SuccOrdering == Monotonic) {
+            CmpXchg->setSuccessOrdering(Acquire);
+          } else if (SuccOrdering == Release) {
+            CmpXchg->setSuccessOrdering(AcquireRelease);
+          }
+          */
+          break;
+        }
+        case Instruction::AtomicRMW: {
+          // XXX-comment: Similar to AtomicCmpXchg. These instructions in
+          // AArch64 will be translated to a loop whose condition depends on the
+          // store status, which further depends on the load value.
+          /*
+          auto* RMW = dyn_cast<AtomicRMWInst>(&*I);
+          if (RMW->getOrdering() == Monotonic) {
+            RMW->setOrdering(Acquire);
+          }
+          */
+          break;
+        }
+        case Instruction::Load: {
+          auto* LI = dyn_cast<LoadInst>(&*I);
+          if (LI->getOrdering() == Monotonic) {
+            /*
+            DEBUG(dbgs() << "Transforming relaxed loads to acquire loads: "
+                         << *LI << '\n');
+            LI->setOrdering(Acquire);
+            */
+//            MonotonicLoadInsts.push_back(LI);
+            MarkRelaxedLoadBeforeAcqrelRMW(LI);
+          }
+          break;
+        }
+        default: {
+          break;
+        }
+      }
        AtomicInsts.push_back(&*I);
+    }
    }
  
    bool MadeChange = false;
@@ -98,7 +188,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
      if (TLI->getInsertFencesForAtomic()) {
        if (LI && isAtLeastAcquire(LI->getOrdering())) {
          FenceOrdering = LI->getOrdering();
-        LI->setOrdering(Monotonic);
+//        AddFakeConditionalBranch(
          IsStore = false;
          IsLoad = true;
        } else if (SI && isAtLeastRelease(SI->getOrdering())) {
@@ -130,9 +220,27 @@ bool AtomicExpand::runOnFunction(Function &F) {
      }
  
      if (LI) {
+      if (LI->getType()->isFloatingPointTy()) {
+        // TODO: add a TLI hook to control this so that each target can
+        // convert to lowering the original type one at a time.
+        LI = convertAtomicLoadToIntegerType(LI);
+        assert(LI->getType()->isIntegerTy() && "invariant broken");
+        MadeChange = true;
+      }
+      
        MadeChange |= tryExpandAtomicLoad(LI);
-    } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
-      MadeChange |= expandAtomicStore(SI);
+    } else if (SI) {
+      if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
+        // TODO: add a TLI hook to control this so that each target can
+        // convert to lowering the original type one at a time.
+        SI = convertAtomicStoreToIntegerType(SI);
+        assert(SI->getValueOperand()->getType()->isIntegerTy() &&
+               "invariant broken");
+        MadeChange = true;
+      }
+
+      if (TLI->shouldExpandAtomicStoreInIR(SI))
+        MadeChange |= expandAtomicStore(SI);
      } else if (RMWI) {
        // There are two different ways of expanding RMW instructions:
        // - into a load if it is idempotent
@@ -148,6 +256,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
        MadeChange |= expandAtomicCmpXchg(CASI);
      }
    }
+
    return MadeChange;
  }
  
@@ -172,6 +281,42 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
    return (LeadingFence || TrailingFence);
  }
  
+/// Get the iX type with the same bitwidth as T.
+IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
+                                                       const DataLayout &DL) {
+  EVT VT = TLI->getValueType(DL, T);
+  unsigned BitWidth = VT.getStoreSizeInBits();
+  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
+  return IntegerType::get(T->getContext(), BitWidth);
+}
+
+/// Convert an atomic load of a non-integral type to an integer load of the
+/// equivelent bitwidth.  See the function comment on
+/// convertAtomicStoreToIntegerType for background.  
+LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
+  auto *M = LI->getModule();
+  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
+                                            M->getDataLayout());
+
+  IRBuilder<> Builder(LI);
+  
+  Value *Addr = LI->getPointerOperand();
+  Type *PT = PointerType::get(NewTy,
+                              Addr->getType()->getPointerAddressSpace());
+  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+  
+  auto *NewLI = Builder.CreateLoad(NewAddr);
+  NewLI->setAlignment(LI->getAlignment());
+  NewLI->setVolatile(LI->isVolatile());
+  NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
+  DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
+  
+  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
+  LI->replaceAllUsesWith(NewVal);
+  LI->eraseFromParent();
+  return NewLI;
+}
+
  bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
    switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
    case TargetLoweringBase::AtomicExpansionKind::None:
@@ -222,6 +367,35 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
    return true;
  }
  
+/// Convert an atomic store of a non-integral type to an integer store of the
+/// equivelent bitwidth.  We used to not support floating point or vector
+/// atomics in the IR at all.  The backends learned to deal with the bitcast
+/// idiom because that was the only way of expressing the notion of a atomic
+/// float or vector store.  The long term plan is to teach each backend to
+/// instruction select from the original atomic store, but as a migration
+/// mechanism, we convert back to the old format which the backends understand.
+/// Each backend will need individual work to recognize the new format.
+StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
+  IRBuilder<> Builder(SI);
+  auto *M = SI->getModule();
+  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
+                                            M->getDataLayout());
+  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
+  
+  Value *Addr = SI->getPointerOperand();
+  Type *PT = PointerType::get(NewTy,
+                              Addr->getType()->getPointerAddressSpace());
+  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
+  NewSI->setAlignment(SI->getAlignment());
+  NewSI->setVolatile(SI->isVolatile());
+  NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
+  DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
+  SI->eraseFromParent();
+  return NewSI;
+}
+
  bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
    // This function is only called on atomic stores that are too large to be
    // atomic if implemented as a native store. So we replace them by an
@@ -343,10 +517,25 @@ bool AtomicExpand::expandAtomicOpToLLSC(
    Builder.SetInsertPoint(LoopBB);
    Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
  
+  // XXX-update: For relaxed RMWs (i.e., fetch_* operations), we still need to
+  // taint the load part. However, we only need to taint those whose results are
+  // not immediately used by a conditional branch or a store address.
+  Value* StoreAddr = Addr;
+  auto* LoadedPartInst = dyn_cast<Instruction>(Loaded);
+  assert(LoadedPartInst && "Load part of RMW should be an instruction!");
+  if (MemOpOrder != Acquire && MemOpOrder != AcquireRelease &&
+      MemOpOrder != SequentiallyConsistent) {
+    // Also check whether the result is used immediately. If so, taint the
+    // address of the upcoming store-exclusive.
+    if (NeedExtraConstraints(I)) {
+      StoreAddr = taintRMWStoreAddressWithLoadPart(Builder, Addr, LoadedPartInst);
+    }
+  }
+
    Value *NewVal = PerformOp(Builder, Loaded);
  
    Value *StoreSuccess =
-      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+      TLI->emitStoreConditional(Builder, NewVal, StoreAddr, MemOpOrder);
    Value *TryAgain = Builder.CreateICmpNE(
        StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
    Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);