//===----------------------------------------------------------------------===//
//
// This file contains a pass (at IR level) to replace atomic instructions with
-// either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg.
+// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
//
//===----------------------------------------------------------------------===//
bool runOnFunction(Function &F) override;
private:
+ bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+ bool IsStore, bool IsLoad);
bool expandAtomicLoad(LoadInst *LI);
+ bool expandAtomicLoadToLL(LoadInst *LI);
+ bool expandAtomicLoadToCmpXchg(LoadInst *LI);
bool expandAtomicStore(StoreInst *SI);
bool expandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+ bool isIdempotentRMW(AtomicRMWInst *AI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *AI);
};
}
auto SI = dyn_cast<StoreInst>(I);
auto RMWI = dyn_cast<AtomicRMWInst>(I);
auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
-
assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) &&
"Unknown atomic instruction");
+ auto FenceOrdering = Monotonic;
+ bool IsStore, IsLoad;
+ if (TargetLowering->getInsertFencesForAtomic()) {
+ if (LI && isAtLeastAcquire(LI->getOrdering())) {
+ FenceOrdering = LI->getOrdering();
+ LI->setOrdering(Monotonic);
+ IsStore = false;
+ IsLoad = true;
+ } else if (SI && isAtLeastRelease(SI->getOrdering())) {
+ FenceOrdering = SI->getOrdering();
+ SI->setOrdering(Monotonic);
+ IsStore = true;
+ IsLoad = false;
+ } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) ||
+ isAtLeastAcquire(RMWI->getOrdering()))) {
+ FenceOrdering = RMWI->getOrdering();
+ RMWI->setOrdering(Monotonic);
+ IsStore = IsLoad = true;
+ } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
+ (isAtLeastRelease(CASI->getSuccessOrdering()) ||
+ isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+ // If a compare and swap is lowered to LL/SC, we can do smarter fence
+ // insertion, with a stronger one on the success path than on the
+ // failure path. As a result, fence insertion is directly done by
+ // expandAtomicCmpXchg in that case.
+ FenceOrdering = CASI->getSuccessOrdering();
+ CASI->setSuccessOrdering(Monotonic);
+ CASI->setFailureOrdering(Monotonic);
+ IsStore = IsLoad = true;
+ }
+
+ if (FenceOrdering != Monotonic) {
+ MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
+ }
+ }
+
if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
MadeChange |= expandAtomicLoad(LI);
} else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
MadeChange |= expandAtomicStore(SI);
- } else if (RMWI && TargetLowering->shouldExpandAtomicRMWInIR(RMWI)) {
- MadeChange |= expandAtomicRMW(RMWI);
+ } else if (RMWI) {
+ // There are two different ways of expanding RMW instructions:
+ // - into a load if it is idempotent
+ // - into a Cmpxchg/LL-SC loop otherwise
+ // we try them in that order.
+ MadeChange |= (isIdempotentRMW(RMWI) &&
+ simplifyIdempotentRMW(RMWI)) ||
+ (TargetLowering->shouldExpandAtomicRMWInIR(RMWI) &&
+ expandAtomicRMW(RMWI));
} else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
MadeChange |= expandAtomicCmpXchg(CASI);
}
return MadeChange;
}
+bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
+ bool IsStore, bool IsLoad) {
+ IRBuilder<> Builder(I);
+
+ auto LeadingFence =
+ TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
+ Builder, Order, IsStore, IsLoad);
+
+ auto TrailingFence =
+ TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
+ Builder, Order, IsStore, IsLoad);
+ // The trailing fence is emitted before the instruction instead of after
+ // because there is no easy way of setting Builder insertion point after
+ // an instruction. So we must erase it from the BB, and insert it back
+ // in the right place.
+ // We have a guard here because not every atomic operation generates a
+ // trailing fence.
+ if (TrailingFence) {
+ TrailingFence->removeFromParent();
+ TrailingFence->insertAfter(I);
+ }
+
+ return (LeadingFence || TrailingFence);
+}
+
bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
+ if (TM->getSubtargetImpl()
+ ->getTargetLowering()
+ ->hasLoadLinkedStoreConditional())
+ return expandAtomicLoadToLL(LI);
+ else
+ return expandAtomicLoadToCmpXchg(LI);
+}
+
+bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
auto TLI = TM->getSubtargetImpl()->getTargetLowering();
- // If getInsertFencesForAtomic() returns true, then the target does not want
- // to deal with memory orders, and emitLeading/TrailingFence should take care
- // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
- // should preserve the ordering.
- AtomicOrdering MemOpOrder =
- TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
IRBuilder<> Builder(LI);
- // Note that although no fence is required before atomic load on ARM, it is
- // required before SequentiallyConsistent loads for the recommended Power
- // mapping (see http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html).
- // So we let the target choose what to emit.
- TLI->emitLeadingFence(Builder, LI->getOrdering(),
- /*IsStore=*/false, /*IsLoad=*/true);
-
- // The only 64-bit load guaranteed to be single-copy atomic by ARM is
- // an ldrexd (A3.5.3).
+ // On some architectures, load-linked instructions are atomic for larger
+ // sizes than normal loads. For example, the only 64-bit load guaranteed
+ // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
Value *Val =
- TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
-
- TLI->emitTrailingFence(Builder, LI->getOrdering(),
- /*IsStore=*/false, /*IsLoad=*/true);
+ TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
LI->replaceAllUsesWith(Val);
LI->eraseFromParent();
return true;
}
+bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
+ IRBuilder<> Builder(LI);
+ AtomicOrdering Order = LI->getOrdering();
+ Value *Addr = LI->getPointerOperand();
+ Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ Constant *DummyVal = Constant::getNullValue(Ty);
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, DummyVal, DummyVal, Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+
+ LI->replaceAllUsesWith(Loaded);
+ LI->eraseFromParent();
+
+ return true;
+}
+
bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
auto TLI = TM->getSubtargetImpl()->getTargetLowering();
- AtomicOrdering FenceOrder = AI->getOrdering();
+ AtomicOrdering MemOpOrder = AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
- // If getInsertFencesForAtomic() returns true, then the target does not want
- // to deal with memory orders, and emitLeading/TrailingFence should take care
- // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
- // should preserve the ordering.
- AtomicOrdering MemOpOrder =
- TLI->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
// Given: atomicrmw some_op iN* %addr, iN %incr ordering
//
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- TLI->emitLeadingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- TLI->emitTrailingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
}
bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
- auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
- AtomicOrdering FenceOrder =
- AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
AtomicOrdering MemOpOrder =
- TargetLowering->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
+ AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
Function *F = BB->getParent();
// the branch entirely.
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- TargetLowering->emitLeadingFence(Builder, FenceOrder,
- /*IsStore=*/true, /*IsLoad=*/true);
LoadInst *InitLoaded = Builder.CreateLoad(Addr);
// Atomics require at least natural alignment.
InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
Builder.CreateCondBr(Success, ExitBB, LoopBB);
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- TargetLowering->emitTrailingFence(Builder, FenceOrder,
- /*IsStore=*/true, /*IsLoad=*/true);
AI->replaceAllUsesWith(NewLoaded);
AI->eraseFromParent();
CI->eraseFromParent();
return true;
}
+
+bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
+ auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
+ if(!C)
+ return false;
+
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ switch(Op) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ default:
+ return false;
+ }
+}
+
+bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
+ auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+
+ if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
+ if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
+ expandAtomicLoad(ResultingLoad);
+ return true;
+ }
+
+ return false;
+}