lib/CodeGen/AtomicExpandPass.cpp

   1 //===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file contains a pass (at IR level) to replace atomic instructions with
  11 // either (intrinsic-based) ldrex/strex loops or AtomicCmpXchg.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/CodeGen/Passes.h"
  16 #include "llvm/IR/Function.h"
  17 #include "llvm/IR/IRBuilder.h"
  18 #include "llvm/IR/InstIterator.h"
  19 #include "llvm/IR/Instructions.h"
  20 #include "llvm/IR/Intrinsics.h"
  21 #include "llvm/IR/Module.h"
  22 #include "llvm/Support/Debug.h"
  23 #include "llvm/Target/TargetLowering.h"
  24 #include "llvm/Target/TargetMachine.h"
  25 #include "llvm/Target/TargetSubtargetInfo.h"
  26
  27 using namespace llvm;
  28
  29 #define DEBUG_TYPE "atomic-expand"
  30
  31 namespace {
  32   class AtomicExpand: public FunctionPass {
  33     const TargetMachine *TM;
  34   public:
  35     static char ID; // Pass identification, replacement for typeid
  36     explicit AtomicExpand(const TargetMachine *TM = nullptr)
  37       : FunctionPass(ID), TM(TM) {
  38       initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
  39     }
  40
  41     bool runOnFunction(Function &F) override;
  42
  43   private:
  44     bool expandAtomicLoad(LoadInst *LI);
  45     bool expandAtomicStore(StoreInst *SI);
  46     bool expandAtomicRMW(AtomicRMWInst *AI);
  47     bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
  48     bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
  49     bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
  50   };
  51 }
  52
  53 char AtomicExpand::ID = 0;
  54 char &llvm::AtomicExpandID = AtomicExpand::ID;
  55 INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand",
  56     "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg",
  57     false, false)
  58
  59 FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
  60   return new AtomicExpand(TM);
  61 }
  62
  63 bool AtomicExpand::runOnFunction(Function &F) {
  64   if (!TM || !TM->getSubtargetImpl()->enableAtomicExpand())
  65     return false;
  66   auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
  67
  68   SmallVector<Instruction *, 1> AtomicInsts;
  69
  70   // Changing control-flow while iterating through it is a bad idea, so gather a
  71   // list of all atomic instructions before we start.
  72   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
  73     if (I->isAtomic())
  74       AtomicInsts.push_back(&*I);
  75   }
  76
  77   bool MadeChange = false;
  78   for (auto I : AtomicInsts) {
  79     auto LI = dyn_cast<LoadInst>(I);
  80     auto SI = dyn_cast<StoreInst>(I);
  81     auto RMWI = dyn_cast<AtomicRMWInst>(I);
  82     auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
  83
  84     assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) &&
  85            "Unknown atomic instruction");
  86
  87     if (LI && TargetLowering->shouldExpandAtomicLoadInIR(LI)) {
  88       MadeChange |= expandAtomicLoad(LI);
  89     } else if (SI && TargetLowering->shouldExpandAtomicStoreInIR(SI)) {
  90       MadeChange |= expandAtomicStore(SI);
  91     } else if (RMWI && TargetLowering->shouldExpandAtomicRMWInIR(RMWI)) {
  92       MadeChange |= expandAtomicRMW(RMWI);
  93     } else if (CASI && TargetLowering->hasLoadLinkedStoreConditional()) {
  94       MadeChange |= expandAtomicCmpXchg(CASI);
  95     }
  96   }
  97   return MadeChange;
  98 }
  99
 100 bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
 101   auto TLI = TM->getSubtargetImpl()->getTargetLowering();
 102   // If getInsertFencesForAtomic() returns true, then the target does not want
 103   // to deal with memory orders, and emitLeading/TrailingFence should take care
 104   // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
 105   // should preserve the ordering.
 106   AtomicOrdering MemOpOrder =
 107       TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
 108   IRBuilder<> Builder(LI);
 109
 110   // Note that although no fence is required before atomic load on ARM, it is
 111   // required before SequentiallyConsistent loads for the recommended Power
 112   // mapping (see http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html).
 113   // So we let the target choose what to emit.
 114   TLI->emitLeadingFence(Builder, LI->getOrdering(),
 115                         /*IsStore=*/false, /*IsLoad=*/true);
 116
 117   // The only 64-bit load guaranteed to be single-copy atomic by ARM is
 118   // an ldrexd (A3.5.3).
 119   Value *Val =
 120       TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
 121
 122   TLI->emitTrailingFence(Builder, LI->getOrdering(),
 123                          /*IsStore=*/false, /*IsLoad=*/true);
 124
 125   LI->replaceAllUsesWith(Val);
 126   LI->eraseFromParent();
 127
 128   return true;
 129 }
 130
 131 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
 132   // This function is only called on atomic stores that are too large to be
 133   // atomic if implemented as a native store. So we replace them by an
 134   // atomic swap, that can be implemented for example as a ldrex/strex on ARM
 135   // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
 136   // It is the responsibility of the target to only return true in
 137   // shouldExpandAtomicRMW in cases where this is required and possible.
 138   IRBuilder<> Builder(SI);
 139   AtomicRMWInst *AI =
 140       Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
 141                               SI->getValueOperand(), SI->getOrdering());
 142   SI->eraseFromParent();
 143
 144   // Now we have an appropriate swap instruction, lower it as usual.
 145   return expandAtomicRMW(AI);
 146 }
 147
 148 bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
 149   if (TM->getSubtargetImpl()
 150           ->getTargetLowering()
 151           ->hasLoadLinkedStoreConditional())
 152     return expandAtomicRMWToLLSC(AI);
 153   else
 154     return expandAtomicRMWToCmpXchg(AI);
 155 }
 156
 157 /// Emit IR to implement the given atomicrmw operation on values in registers,
 158 /// returning the new value.
 159 static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
 160                               Value *Loaded, Value *Inc) {
 161   Value *NewVal;
 162   switch (Op) {
 163   case AtomicRMWInst::Xchg:
 164     return Inc;
 165   case AtomicRMWInst::Add:
 166     return Builder.CreateAdd(Loaded, Inc, "new");
 167   case AtomicRMWInst::Sub:
 168     return Builder.CreateSub(Loaded, Inc, "new");
 169   case AtomicRMWInst::And:
 170     return Builder.CreateAnd(Loaded, Inc, "new");
 171   case AtomicRMWInst::Nand:
 172     return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
 173   case AtomicRMWInst::Or:
 174     return Builder.CreateOr(Loaded, Inc, "new");
 175   case AtomicRMWInst::Xor:
 176     return Builder.CreateXor(Loaded, Inc, "new");
 177   case AtomicRMWInst::Max:
 178     NewVal = Builder.CreateICmpSGT(Loaded, Inc);
 179     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
 180   case AtomicRMWInst::Min:
 181     NewVal = Builder.CreateICmpSLE(Loaded, Inc);
 182     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
 183   case AtomicRMWInst::UMax:
 184     NewVal = Builder.CreateICmpUGT(Loaded, Inc);
 185     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
 186   case AtomicRMWInst::UMin:
 187     NewVal = Builder.CreateICmpULE(Loaded, Inc);
 188     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
 189   default:
 190     llvm_unreachable("Unknown atomic op");
 191   }
 192 }
 193
 194 bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
 195   auto TLI = TM->getSubtargetImpl()->getTargetLowering();
 196   AtomicOrdering FenceOrder = AI->getOrdering();
 197   Value *Addr = AI->getPointerOperand();
 198   BasicBlock *BB = AI->getParent();
 199   Function *F = BB->getParent();
 200   LLVMContext &Ctx = F->getContext();
 201   // If getInsertFencesForAtomic() returns true, then the target does not want
 202   // to deal with memory orders, and emitLeading/TrailingFence should take care
 203   // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
 204   // should preserve the ordering.
 205   AtomicOrdering MemOpOrder =
 206       TLI->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
 207
 208   // Given: atomicrmw some_op iN* %addr, iN %incr ordering
 209   //
 210   // The standard expansion we produce is:
 211   //     [...]
 212   //     fence?
 213   // atomicrmw.start:
 214   //     %loaded = @load.linked(%addr)
 215   //     %new = some_op iN %loaded, %incr
 216   //     %stored = @store_conditional(%new, %addr)
 217   //     %try_again = icmp i32 ne %stored, 0
 218   //     br i1 %try_again, label %loop, label %atomicrmw.end
 219   // atomicrmw.end:
 220   //     fence?
 221   //     [...]
 222   BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
 223   BasicBlock *LoopBB =  BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
 224
 225   // This grabs the DebugLoc from AI.
 226   IRBuilder<> Builder(AI);
 227
 228   // The split call above "helpfully" added a branch at the end of BB (to the
 229   // wrong place), but we might want a fence too. It's easiest to just remove
 230   // the branch entirely.
 231   std::prev(BB->end())->eraseFromParent();
 232   Builder.SetInsertPoint(BB);
 233   TLI->emitLeadingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
 234   Builder.CreateBr(LoopBB);
 235
 236   // Start the main loop block now that we've taken care of the preliminaries.
 237   Builder.SetInsertPoint(LoopBB);
 238   Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
 239
 240   Value *NewVal =
 241       performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
 242
 243   Value *StoreSuccess =
 244       TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
 245   Value *TryAgain = Builder.CreateICmpNE(
 246       StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
 247   Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
 248
 249   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
 250   TLI->emitTrailingFence(Builder, FenceOrder, /*IsStore=*/true, /*IsLoad=*/true);
 251
 252   AI->replaceAllUsesWith(Loaded);
 253   AI->eraseFromParent();
 254
 255   return true;
 256 }
 257
 258 bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
 259   auto TargetLowering = TM->getSubtargetImpl()->getTargetLowering();
 260   AtomicOrdering FenceOrder =
 261       AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
 262   AtomicOrdering MemOpOrder =
 263       TargetLowering->getInsertFencesForAtomic() ? Monotonic : FenceOrder;
 264   Value *Addr = AI->getPointerOperand();
 265   BasicBlock *BB = AI->getParent();
 266   Function *F = BB->getParent();
 267   LLVMContext &Ctx = F->getContext();
 268
 269   // Given: atomicrmw some_op iN* %addr, iN %incr ordering
 270   //
 271   // The standard expansion we produce is:
 272   //     [...]
 273   //     %init_loaded = load atomic iN* %addr
 274   //     br label %loop
 275   // loop:
 276   //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
 277   //     %new = some_op iN %loaded, %incr
 278   //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
 279   //     %new_loaded = extractvalue { iN, i1 } %pair, 0
 280   //     %success = extractvalue { iN, i1 } %pair, 1
 281   //     br i1 %success, label %atomicrmw.end, label %loop
 282   // atomicrmw.end:
 283   //     [...]
 284   BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
 285   BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
 286
 287   // This grabs the DebugLoc from AI.
 288   IRBuilder<> Builder(AI);
 289
 290   // The split call above "helpfully" added a branch at the end of BB (to the
 291   // wrong place), but we want a load. It's easiest to just remove
 292   // the branch entirely.
 293   std::prev(BB->end())->eraseFromParent();
 294   Builder.SetInsertPoint(BB);
 295   TargetLowering->emitLeadingFence(Builder, FenceOrder,
 296                                    /*IsStore=*/true, /*IsLoad=*/true);
 297   LoadInst *InitLoaded = Builder.CreateLoad(Addr);
 298   // Atomics require at least natural alignment.
 299   InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
 300   Builder.CreateBr(LoopBB);
 301
 302   // Start the main loop block now that we've taken care of the preliminaries.
 303   Builder.SetInsertPoint(LoopBB);
 304   PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
 305   Loaded->addIncoming(InitLoaded, BB);
 306
 307   Value *NewVal =
 308       performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
 309
 310   Value *Pair = Builder.CreateAtomicCmpXchg(
 311       Addr, Loaded, NewVal, MemOpOrder,
 312       AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
 313   Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
 314   Loaded->addIncoming(NewLoaded, LoopBB);
 315
 316   Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
 317   Builder.CreateCondBr(Success, ExitBB, LoopBB);
 318
 319   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
 320   TargetLowering->emitTrailingFence(Builder, FenceOrder,
 321                                     /*IsStore=*/true, /*IsLoad=*/true);
 322
 323   AI->replaceAllUsesWith(NewLoaded);
 324   AI->eraseFromParent();
 325
 326   return true;
 327 }
 328
 329 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 330   auto TLI = TM->getSubtargetImpl()->getTargetLowering();
 331   AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
 332   AtomicOrdering FailureOrder = CI->getFailureOrdering();
 333   Value *Addr = CI->getPointerOperand();
 334   BasicBlock *BB = CI->getParent();
 335   Function *F = BB->getParent();
 336   LLVMContext &Ctx = F->getContext();
 337   // If getInsertFencesForAtomic() returns true, then the target does not want
 338   // to deal with memory orders, and emitLeading/TrailingFence should take care
 339   // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
 340   // should preserve the ordering.
 341   AtomicOrdering MemOpOrder =
 342       TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
 343
 344   // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
 345   //
 346   // The full expansion we produce is:
 347   //     [...]
 348   //     fence?
 349   // cmpxchg.start:
 350   //     %loaded = @load.linked(%addr)
 351   //     %should_store = icmp eq %loaded, %desired
 352   //     br i1 %should_store, label %cmpxchg.trystore,
 353   //                          label %cmpxchg.failure
 354   // cmpxchg.trystore:
 355   //     %stored = @store_conditional(%new, %addr)
 356   //     %success = icmp eq i32 %stored, 0
 357   //     br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
 358   // cmpxchg.success:
 359   //     fence?
 360   //     br label %cmpxchg.end
 361   // cmpxchg.failure:
 362   //     fence?
 363   //     br label %cmpxchg.end
 364   // cmpxchg.end:
 365   //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
 366   //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
 367   //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
 368   //     [...]
 369   BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
 370   auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
 371   auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
 372   auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
 373   auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
 374
 375   // This grabs the DebugLoc from CI
 376   IRBuilder<> Builder(CI);
 377
 378   // The split call above "helpfully" added a branch at the end of BB (to the
 379   // wrong place), but we might want a fence too. It's easiest to just remove
 380   // the branch entirely.
 381   std::prev(BB->end())->eraseFromParent();
 382   Builder.SetInsertPoint(BB);
 383   TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
 384                         /*IsLoad=*/true);
 385   Builder.CreateBr(LoopBB);
 386
 387   // Start the main loop block now that we've taken care of the preliminaries.
 388   Builder.SetInsertPoint(LoopBB);
 389   Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
 390   Value *ShouldStore =
 391       Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
 392
 393   // If the the cmpxchg doesn't actually need any ordering when it fails, we can
 394   // jump straight past that fence instruction (if it exists).
 395   Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
 396
 397   Builder.SetInsertPoint(TryStoreBB);
 398   Value *StoreSuccess = TLI->emitStoreConditional(
 399       Builder, CI->getNewValOperand(), Addr, MemOpOrder);
 400   StoreSuccess = Builder.CreateICmpEQ(
 401       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
 402   Builder.CreateCondBr(StoreSuccess, SuccessBB,
 403                        CI->isWeak() ? FailureBB : LoopBB);
 404
 405   // Make sure later instructions don't get reordered with a fence if necessary.
 406   Builder.SetInsertPoint(SuccessBB);
 407   TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
 408                          /*IsLoad=*/true);
 409   Builder.CreateBr(ExitBB);
 410
 411   Builder.SetInsertPoint(FailureBB);
 412   TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
 413                          /*IsLoad=*/true);
 414   Builder.CreateBr(ExitBB);
 415
 416   // Finally, we have control-flow based knowledge of whether the cmpxchg
 417   // succeeded or not. We expose this to later passes by converting any
 418   // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
 419
 420   // Setup the builder so we can create any PHIs we need.
 421   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
 422   PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
 423   Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
 424   Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
 425
 426   // Look for any users of the cmpxchg that are just comparing the loaded value
 427   // against the desired one, and replace them with the CFG-derived version.
 428   SmallVector<ExtractValueInst *, 2> PrunedInsts;
 429   for (auto User : CI->users()) {
 430     ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
 431     if (!EV)
 432       continue;
 433
 434     assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
 435            "weird extraction from { iN, i1 }");
 436
 437     if (EV->getIndices()[0] == 0)
 438       EV->replaceAllUsesWith(Loaded);
 439     else
 440       EV->replaceAllUsesWith(Success);
 441
 442     PrunedInsts.push_back(EV);
 443   }
 444
 445   // We can remove the instructions now we're no longer iterating through them.
 446   for (auto EV : PrunedInsts)
 447     EV->eraseFromParent();
 448
 449   if (!CI->use_empty()) {
 450     // Some use of the full struct return that we don't understand has happened,
 451     // so we've got to reconstruct it properly.
 452     Value *Res;
 453     Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
 454     Res = Builder.CreateInsertValue(Res, Success, 1);
 455
 456     CI->replaceAllUsesWith(Res);
 457   }
 458
 459   CI->eraseFromParent();
 460   return true;
 461 }