private:
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
bool IsStore, bool IsLoad);
- bool expandAtomicLoad(LoadInst *LI);
+ bool tryExpandAtomicLoad(LoadInst *LI);
bool expandAtomicLoadToLL(LoadInst *LI);
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
bool expandAtomicStore(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
- bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
+ bool expandAtomicOpToLLSC(
+ Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
+ std::function<Value *(IRBuilder<> &, Value *)> PerformOp);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool isIdempotentRMW(AtomicRMWInst *AI);
bool simplifyIdempotentRMW(AtomicRMWInst *AI);
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(Monotonic);
IsStore = IsLoad = true;
- } else if (CASI && !TLI->hasLoadLinkedStoreConditional() &&
+ } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
(isAtLeastRelease(CASI->getSuccessOrdering()) ||
isAtLeastAcquire(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
}
}
- if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) {
- MadeChange |= expandAtomicLoad(LI);
+ if (LI) {
+ MadeChange |= tryExpandAtomicLoad(LI);
} else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
MadeChange |= expandAtomicStore(SI);
} else if (RMWI) {
} else {
MadeChange |= tryExpandAtomicRMW(RMWI);
}
- } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
+ } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) {
MadeChange |= expandAtomicCmpXchg(CASI);
}
}
return (LeadingFence || TrailingFence);
}
-bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
- if (TLI->hasLoadLinkedStoreConditional())
+bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
+ switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC:
+ return expandAtomicOpToLLSC(
+ LI, LI->getPointerOperand(), LI->getOrdering(),
+ [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
+ case TargetLoweringBase::AtomicExpansionKind::LLOnly:
return expandAtomicLoadToLL(LI);
- else
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicLoadToCmpXchg(LI);
+ }
+ llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
// to be single-copy atomic by ARM is an ldrexd (A3.5.3).
Value *Val =
TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
LI->replaceAllUsesWith(Val);
LI->eraseFromParent();
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
}
-bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
- switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
- case TargetLoweringBase::AtomicRMWExpansionKind::None:
- return false;
- case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
- assert(TLI->hasLoadLinkedStoreConditional() &&
- "TargetLowering requested we expand AtomicRMW instruction into "
- "load-linked/store-conditional combos, but such instructions aren't "
- "supported");
-
- return expandAtomicRMWToLLSC(AI);
- }
- case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
- return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
- }
- }
- llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
-}
-
/// Emit IR to implement the given atomicrmw operation on values in registers,
/// returning the new value.
static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
}
}
-bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
- AtomicOrdering MemOpOrder = AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+ switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC:
+ return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(),
+ [&](IRBuilder<> &Builder, Value *Loaded) {
+ return performAtomicOp(AI->getOperation(),
+ Builder, Loaded,
+ AI->getValOperand());
+ });
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
+ return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+ default:
+ llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
+ }
+}
+
+bool AtomicExpand::expandAtomicOpToLLSC(
+ Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
+ std::function<Value *(IRBuilder<> &, Value *)> PerformOp) {
+ BasicBlock *BB = I->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
// atomicrmw.end:
// fence?
// [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
+ // This grabs the DebugLoc from I.
+ IRBuilder<> Builder(I);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we might want a fence too. It's easiest to just remove
Builder.SetInsertPoint(LoopBB);
Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
- Value *NewVal =
- performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+ Value *NewVal = PerformOp(Builder, Loaded);
Value *StoreSuccess =
TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
- AI->replaceAllUsesWith(Loaded);
- AI->eraseFromParent();
+ I->replaceAllUsesWith(Loaded);
+ I->eraseFromParent();
return true;
}
// %loaded = @load.linked(%addr)
// %should_store = icmp eq %loaded, %desired
// br i1 %should_store, label %cmpxchg.trystore,
- // label %cmpxchg.failure
+ // label %cmpxchg.nostore
// cmpxchg.trystore:
// %stored = @store_conditional(%new, %addr)
// %success = icmp eq i32 %stored, 0
// cmpxchg.success:
// fence?
// br label %cmpxchg.end
+ // cmpxchg.nostore:
+ // @load_linked_fail_balance()?
+ // br label %cmpxchg.failure
// cmpxchg.failure:
// fence?
// br label %cmpxchg.end
// %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
// %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
// [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
+ BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
- auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+ auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
Builder.SetInsertPoint(TryStoreBB);
Value *StoreSuccess = TLI->emitStoreConditional(
/*IsLoad=*/true);
Builder.CreateBr(ExitBB);
+ Builder.SetInsertPoint(NoStoreBB);
+ // In the failing case, where we don't execute the store-conditional, the
+ // target might want to balance out the load-linked with a dedicated
+ // instruction (e.g., on ARM, clearing the exclusive monitor).
+ TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
+ Builder.CreateBr(FailureBB);
+
Builder.SetInsertPoint(FailureBB);
TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
/*IsLoad=*/true);
bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
- if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
- expandAtomicLoad(ResultingLoad);
+ tryExpandAtomicLoad(ResultingLoad);
return true;
}
return false;
// br i1 %success, label %atomicrmw.end, label %loop
// atomicrmw.end:
// [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
// This grabs the DebugLoc from AI.
Builder.SetInsertPoint(BB);
LoadInst *InitLoaded = Builder.CreateLoad(Addr);
// Atomics require at least natural alignment.
- InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
+ InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.