//===----------------------------------------------------------------------===//
//
// This file contains a pass (at IR level) to replace atomic instructions with
-// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
+// either (intrinsic-based) load-linked/store-conditional loops or
+// AtomicCmpXchg.
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
namespace {
class AtomicExpand: public FunctionPass {
const TargetMachine *TM;
+ const TargetLowering *TLI;
public:
static char ID; // Pass identification, replacement for typeid
explicit AtomicExpand(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {
+ : FunctionPass(ID), TM(TM), TLI(nullptr) {
initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
}
private:
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
bool IsStore, bool IsLoad);
- bool expandAtomicLoad(LoadInst *LI);
+ bool tryExpandAtomicLoad(LoadInst *LI);
+ bool expandAtomicLoadToLL(LoadInst *LI);
+ bool expandAtomicLoadToCmpXchg(LoadInst *LI);
bool expandAtomicStore(StoreInst *SI);
- bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool tryExpandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
- bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+ bool isIdempotentRMW(AtomicRMWInst *AI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *AI);
};
}
}
bool AtomicExpand::runOnFunction(Function &F) {
- if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand())
+ if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
return false;
- auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
+ TLI = TM->getSubtargetImpl(F)->getTargetLowering();
SmallVector<Instruction *, 1> AtomicInsts;
auto FenceOrdering = Monotonic;
bool IsStore, IsLoad;
- if (TargetLowering->getInsertFencesForAtomic()) {
+ if (TLI->getInsertFencesForAtomic()) {
if (LI && isAtLeastAcquire(LI->getOrdering())) {
FenceOrdering = LI->getOrdering();
LI->setOrdering(Monotonic);
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(Monotonic);
IsStore = IsLoad = true;
- } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
- (isAtLeastRelease(CASI->getSuccessOrdering()) ||
- isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+ } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
+ (isAtLeastRelease(CASI->getSuccessOrdering()) ||
+ isAtLeastAcquire(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
// insertion, with a stronger one on the success path than on the
// failure path. As a result, fence insertion is directly done by
}
}
- if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
- MadeChange |= expandAtomicLoad(LI);
- } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
+ if (LI) {
+ MadeChange |= tryExpandAtomicLoad(LI);
+ } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
MadeChange |= expandAtomicStore(SI);
- } else if (RMWI && TargetLowering->shouldExpandAtomicRMWInIR(RMWI)) {
- MadeChange |= expandAtomicRMW(RMWI);
- } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
+ } else if (RMWI) {
+ // There are two different ways of expanding RMW instructions:
+ // - into a load if it is idempotent
+ // - into a Cmpxchg/LL-SC loop otherwise
+ // we try them in that order.
+
+ if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+ MadeChange = true;
+ } else {
+ MadeChange |= tryExpandAtomicRMW(RMWI);
+ }
+ } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) {
MadeChange |= expandAtomicCmpXchg(CASI);
}
}
bool IsStore, bool IsLoad) {
IRBuilder<> Builder(I);
- auto LeadingFence =
- TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
- Builder, Order, IsStore, IsLoad);
+ auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
- auto TrailingFence =
- TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
- Builder, Order, IsStore, IsLoad);
+ auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
// The trailing fence is emitted before the instruction instead of after
// because there is no easy way of setting Builder insertion point after
// an instruction. So we must erase it from the BB, and insert it back
return (LeadingFence || TrailingFence);
}
-bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
+ switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC: {
+ return expandAtomicLoadToLL(LI);
+ }
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
+ return expandAtomicLoadToCmpXchg(LI);
+ }
+ }
+ llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
+}
+
+bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
IRBuilder<> Builder(LI);
// On some architectures, load-linked instructions are atomic for larger
return true;
}
+bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
+ IRBuilder<> Builder(LI);
+ AtomicOrdering Order = LI->getOrdering();
+ Value *Addr = LI->getPointerOperand();
+ Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ Constant *DummyVal = Constant::getNullValue(Ty);
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, DummyVal, DummyVal, Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+
+ LI->replaceAllUsesWith(Loaded);
+ LI->eraseFromParent();
+
+ return true;
+}
+
bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
// or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
- // It is the responsibility of the target to only return true in
+ // It is the responsibility of the target to only signal expansion via
// shouldExpandAtomicRMW in cases where this is required and possible.
IRBuilder<> Builder(SI);
AtomicRMWInst *AI =
SI->eraseFromParent();
// Now we have an appropriate swap instruction, lower it as usual.
- return expandAtomicRMW(AI);
+ return tryExpandAtomicRMW(AI);
+}
+
+static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
+ Value *Loaded, Value *NewVal,
+ AtomicOrdering MemOpOrder,
+ Value *&Success, Value *&NewLoaded) {
+ Value* Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, MemOpOrder,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+ Success = Builder.CreateExtractValue(Pair, 1, "success");
+ NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
}
-bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
- if (TM->getSubtargetImpl()
- ->getTargetLowering()
- ->hasLoadLinkedStoreConditional())
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+ switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ case TargetLoweringBase::AtomicExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicExpansionKind::LLSC: {
return expandAtomicRMWToLLSC(AI);
- else
- return expandAtomicRMWToCmpXchg(AI);
+ }
+ case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
+ return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+ }
+ }
+ llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
}
bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
AtomicOrdering MemOpOrder = AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
return true;
}
-bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
- AtomicOrdering MemOpOrder =
- AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
- Value *Addr = AI->getPointerOperand();
- BasicBlock *BB = AI->getParent();
- Function *F = BB->getParent();
- LLVMContext &Ctx = F->getContext();
-
- // Given: atomicrmw some_op iN* %addr, iN %incr ordering
- //
- // The standard expansion we produce is:
- // [...]
- // %init_loaded = load atomic iN* %addr
- // br label %loop
- // loop:
- // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
- // %new = some_op iN %loaded, %incr
- // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
- // %new_loaded = extractvalue { iN, i1 } %pair, 0
- // %success = extractvalue { iN, i1 } %pair, 1
- // br i1 %success, label %atomicrmw.end, label %loop
- // atomicrmw.end:
- // [...]
- BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
- BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
- // This grabs the DebugLoc from AI.
- IRBuilder<> Builder(AI);
-
- // The split call above "helpfully" added a branch at the end of BB (to the
- // wrong place), but we want a load. It's easiest to just remove
- // the branch entirely.
- std::prev(BB->end())->eraseFromParent();
- Builder.SetInsertPoint(BB);
- LoadInst *InitLoaded = Builder.CreateLoad(Addr);
- // Atomics require at least natural alignment.
- InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
- Builder.CreateBr(LoopBB);
-
- // Start the main loop block now that we've taken care of the preliminaries.
- Builder.SetInsertPoint(LoopBB);
- PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
- Loaded->addIncoming(InitLoaded, BB);
-
- Value *NewVal =
- performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
-
- Value *Pair = Builder.CreateAtomicCmpXchg(
- Addr, Loaded, NewVal, MemOpOrder,
- AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
- Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
- Loaded->addIncoming(NewLoaded, LoopBB);
-
- Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
- Builder.CreateCondBr(Success, ExitBB, LoopBB);
-
- Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-
- AI->replaceAllUsesWith(NewLoaded);
- AI->eraseFromParent();
-
- return true;
-}
-
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
AtomicOrdering FailureOrder = CI->getFailureOrdering();
Value *Addr = CI->getPointerOperand();
Value *ShouldStore =
Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
- // If the the cmpxchg doesn't actually need any ordering when it fails, we can
+ // If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
CI->eraseFromParent();
return true;
}
+
+bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
+ auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
+ if(!C)
+ return false;
+
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ switch(Op) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ default:
+ return false;
+ }
+}
+
+bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
+ if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
+ tryExpandAtomicLoad(ResultingLoad);
+ return true;
+ }
+ return false;
+}
+
+bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+ CreateCmpXchgInstFun CreateCmpXchg) {
+ assert(AI);
+
+ AtomicOrdering MemOpOrder =
+ AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+ // Atomics require at least natural alignment.
+ InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *NewLoaded = nullptr;
+ Value *Success = nullptr;
+
+ CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
+ Success, NewLoaded);
+ assert(Success && NewLoaded);
+
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+ AI->replaceAllUsesWith(NewLoaded);
+ AI->eraseFromParent();
+
+ return true;
+}