namespace {
class AtomicExpand: public FunctionPass {
const TargetMachine *TM;
+ const TargetLowering *TLI;
public:
static char ID; // Pass identification, replacement for typeid
explicit AtomicExpand(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {
+ : FunctionPass(ID), TM(TM), TLI(nullptr) {
initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
}
bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
bool IsStore, bool IsLoad);
bool expandAtomicLoad(LoadInst *LI);
+ bool expandAtomicLoadToLL(LoadInst *LI);
+ bool expandAtomicLoadToCmpXchg(LoadInst *LI);
bool expandAtomicStore(StoreInst *SI);
- bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool tryExpandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
+ bool isIdempotentRMW(AtomicRMWInst *AI);
+ bool simplifyIdempotentRMW(AtomicRMWInst *AI);
};
}
}
bool AtomicExpand::runOnFunction(Function &F) {
- if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand())
+ if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
return false;
- auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
+ TLI = TM->getSubtargetImpl(F)->getTargetLowering();
SmallVector<Instruction *, 1> AtomicInsts;
auto FenceOrdering = Monotonic;
bool IsStore, IsLoad;
- if (TargetLowering->getInsertFencesForAtomic()) {
+ if (TLI->getInsertFencesForAtomic()) {
if (LI && isAtLeastAcquire(LI->getOrdering())) {
FenceOrdering = LI->getOrdering();
LI->setOrdering(Monotonic);
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(Monotonic);
IsStore = IsLoad = true;
- } else if (CASI && !TargetLowering->hasLoadLinkedStoreConditional() &&
- (isAtLeastRelease(CASI->getSuccessOrdering()) ||
- isAtLeastAcquire(CASI->getSuccessOrdering()))) {
+ } else if (CASI && !TLI->hasLoadLinkedStoreConditional() &&
+ (isAtLeastRelease(CASI->getSuccessOrdering()) ||
+ isAtLeastAcquire(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
// insertion, with a stronger one on the success path than on the
// failure path. As a result, fence insertion is directly done by
}
}
- if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
+ if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) {
MadeChange |= expandAtomicLoad(LI);
- } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
+ } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
MadeChange |= expandAtomicStore(SI);
- } else if (RMWI && TargetLowering->shouldExpandAtomicRMWInIR(RMWI)) {
- MadeChange |= expandAtomicRMW(RMWI);
- } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
+ } else if (RMWI) {
+ // There are two different ways of expanding RMW instructions:
+ // - into a load if it is idempotent
+ // - into a Cmpxchg/LL-SC loop otherwise
+ // we try them in that order.
+
+ if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+ MadeChange = true;
+ } else {
+ MadeChange |= tryExpandAtomicRMW(RMWI);
+ }
+ } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
MadeChange |= expandAtomicCmpXchg(CASI);
}
}
bool IsStore, bool IsLoad) {
IRBuilder<> Builder(I);
- auto LeadingFence =
- TM->getSubtargetImpl()->getTargetLowering()->emitLeadingFence(
- Builder, Order, IsStore, IsLoad);
+ auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
- auto TrailingFence =
- TM->getSubtargetImpl()->getTargetLowering()->emitTrailingFence(
- Builder, Order, IsStore, IsLoad);
+ auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
// The trailing fence is emitted before the instruction instead of after
// because there is no easy way of setting Builder insertion point after
// an instruction. So we must erase it from the BB, and insert it back
}
bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+ if (TLI->hasLoadLinkedStoreConditional())
+ return expandAtomicLoadToLL(LI);
+ else
+ return expandAtomicLoadToCmpXchg(LI);
+}
+
+bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
IRBuilder<> Builder(LI);
// On some architectures, load-linked instructions are atomic for larger
return true;
}
+bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
+ IRBuilder<> Builder(LI);
+ AtomicOrdering Order = LI->getOrdering();
+ Value *Addr = LI->getPointerOperand();
+ Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+ Constant *DummyVal = Constant::getNullValue(Ty);
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, DummyVal, DummyVal, Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+
+ LI->replaceAllUsesWith(Loaded);
+ LI->eraseFromParent();
+
+ return true;
+}
+
bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
// This function is only called on atomic stores that are too large to be
// atomic if implemented as a native store. So we replace them by an
// atomic swap, that can be implemented for example as a ldrex/strex on ARM
// or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
- // It is the responsibility of the target to only return true in
+ // It is the responsibility of the target to only signal expansion via
// shouldExpandAtomicRMW in cases where this is required and possible.
IRBuilder<> Builder(SI);
AtomicRMWInst *AI =
SI->eraseFromParent();
// Now we have an appropriate swap instruction, lower it as usual.
- return expandAtomicRMW(AI);
+ return tryExpandAtomicRMW(AI);
}
-bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
- if (TM->getSubtargetImpl()
- ->getTargetLowering()
- ->hasLoadLinkedStoreConditional())
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+ switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ case TargetLoweringBase::AtomicRMWExpansionKind::None:
+ return false;
+ case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
+ assert(TLI->hasLoadLinkedStoreConditional() &&
+ "TargetLowering requested we expand AtomicRMW instruction into "
+ "load-linked/store-conditional combos, but such instructions aren't "
+ "supported");
+
return expandAtomicRMWToLLSC(AI);
- else
+ }
+ case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
return expandAtomicRMWToCmpXchg(AI);
+ }
+ }
+ llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
}
bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
AtomicOrdering MemOpOrder = AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
}
bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
- auto TLI = TM->getSubtargetImpl()->getTargetLowering();
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
AtomicOrdering FailureOrder = CI->getFailureOrdering();
Value *Addr = CI->getPointerOperand();
Value *ShouldStore =
Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
- // If the the cmpxchg doesn't actually need any ordering when it fails, we can
+ // If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
CI->eraseFromParent();
return true;
}
+
+bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
+ auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
+ if(!C)
+ return false;
+
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ switch(Op) {
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::Xor:
+ return C->isZero();
+ case AtomicRMWInst::And:
+ return C->isMinusOne();
+ // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ default:
+ return false;
+ }
+}
+
+bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
+ if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
+ if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
+ expandAtomicLoad(ResultingLoad);
+ return true;
+ }
+ return false;
+}