Atomics: promote ARM's IR-based atomics pass to CodeGen.
[oota-llvm.git] / lib / CodeGen / AtomicExpandLoadLinkedPass.cpp
1 //===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass (at IR level) to replace atomic instructions with
11 // appropriate (intrinsic-based) ldrex/strex loops.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #define DEBUG_TYPE "arm-atomic-expand"
16 #include "llvm/CodeGen/Passes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/Intrinsics.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Target/TargetLowering.h"
24 #include "llvm/Target/TargetMachine.h"
25 using namespace llvm;
26
27 namespace {
28   class AtomicExpandLoadLinked : public FunctionPass {
29     const TargetLowering *TLI;
30   public:
31     static char ID; // Pass identification, replacement for typeid
32     explicit AtomicExpandLoadLinked(const TargetMachine *TM = 0)
33       : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : 0) {
34       initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
35     }
36
37     bool runOnFunction(Function &F) override;
38     bool expandAtomicInsts(Function &F);
39
40     bool expandAtomicLoad(LoadInst *LI);
41     bool expandAtomicStore(StoreInst *LI);
42     bool expandAtomicRMW(AtomicRMWInst *AI);
43     bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
44
45     AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
46     void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
47   };
48 }
49
50 char AtomicExpandLoadLinked::ID = 0;
51 char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
52
53 static void *initializeAtomicExpandLoadLinkedPassOnce(PassRegistry &Registry) {
54   PassInfo *PI = new PassInfo(
55       "Expand Atomic calls in terms of load-linked & store-conditional",
56       "atomic-ll-sc", &AtomicExpandLoadLinked::ID,
57       PassInfo::NormalCtor_t(callDefaultCtor<AtomicExpandLoadLinked>), false,
58       false, PassInfo::TargetMachineCtor_t(
59                  callTargetMachineCtor<AtomicExpandLoadLinked>));
60   Registry.registerPass(*PI, true);
61   return PI;
62 }
63
64 void llvm::initializeAtomicExpandLoadLinkedPass(PassRegistry &Registry) {
65   CALL_ONCE_INITIALIZATION(initializeAtomicExpandLoadLinkedPassOnce)
66 }
67
68
69 FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
70   return new AtomicExpandLoadLinked(TM);
71 }
72
73 bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
74   if (!TLI)
75     return false;
76
77   SmallVector<Instruction *, 1> AtomicInsts;
78
79   // Changing control-flow while iterating through it is a bad idea, so gather a
80   // list of all atomic instructions before we start.
81   for (BasicBlock &BB : F)
82     for (Instruction &Inst : BB) {
83       if (isa<AtomicRMWInst>(&Inst) || isa<AtomicCmpXchgInst>(&Inst) ||
84           (isa<LoadInst>(&Inst) && cast<LoadInst>(&Inst)->isAtomic()) ||
85           (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
86         AtomicInsts.push_back(&Inst);
87     }
88
89   bool MadeChange = false;
90   for (Instruction *Inst : AtomicInsts) {
91     if (!TLI->shouldExpandAtomicInIR(Inst))
92       continue;
93
94     if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
95       MadeChange |= expandAtomicRMW(AI);
96     else if (AtomicCmpXchgInst *CI = dyn_cast<AtomicCmpXchgInst>(Inst))
97       MadeChange |= expandAtomicCmpXchg(CI);
98     else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
99       MadeChange |= expandAtomicLoad(LI);
100     else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
101       MadeChange |= expandAtomicStore(SI);
102     else
103       llvm_unreachable("Unknown atomic instruction");
104   }
105
106   return MadeChange;
107 }
108
109 bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
110   // Load instructions don't actually need a leading fence, even in the
111   // SequentiallyConsistent case.
112   AtomicOrdering MemOpOrder =
113     TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
114
115   // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
116   // an ldrexd (A3.5.3).
117   IRBuilder<> Builder(LI);
118   Value *Val =
119       TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
120
121   insertTrailingFence(Builder, LI->getOrdering());
122
123   LI->replaceAllUsesWith(Val);
124   LI->eraseFromParent();
125
126   return true;
127 }
128
129 bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) {
130   // The only atomic 64-bit store on ARM is an strexd that succeeds, which means
131   // we need a loop and the entire instruction is essentially an "atomicrmw
132   // xchg" that ignores the value loaded.
133   IRBuilder<> Builder(SI);
134   AtomicRMWInst *AI =
135       Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
136                               SI->getValueOperand(), SI->getOrdering());
137   SI->eraseFromParent();
138
139   // Now we have an appropriate swap instruction, lower it as usual.
140   return expandAtomicRMW(AI);
141 }
142
143 bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
144   AtomicOrdering Order = AI->getOrdering();
145   Value *Addr = AI->getPointerOperand();
146   BasicBlock *BB = AI->getParent();
147   Function *F = BB->getParent();
148   LLVMContext &Ctx = F->getContext();
149
150   // Given: atomicrmw some_op iN* %addr, iN %incr ordering
151   //
152   // The standard expansion we produce is:
153   //     [...]
154   //     fence?
155   // atomicrmw.start:
156   //     %loaded = @load.linked(%addr)
157   //     %new = some_op iN %loaded, %incr
158   //     %stored = @store_conditional(%new, %addr)
159   //     %try_again = icmp i32 ne %stored, 0
160   //     br i1 %try_again, label %loop, label %atomicrmw.end
161   // atomicrmw.end:
162   //     fence?
163   //     [...]
164   BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
165   BasicBlock *LoopBB =  BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
166
167   // This grabs the DebugLoc from AI.
168   IRBuilder<> Builder(AI);
169
170   // The split call above "helpfully" added a branch at the end of BB (to the
171   // wrong place), but we might want a fence too. It's easiest to just remove
172   // the branch entirely.
173   std::prev(BB->end())->eraseFromParent();
174   Builder.SetInsertPoint(BB);
175   AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
176   Builder.CreateBr(LoopBB);
177
178   // Start the main loop block now that we've taken care of the preliminaries.
179   Builder.SetInsertPoint(LoopBB);
180   Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
181
182   Value *NewVal;
183   switch (AI->getOperation()) {
184   case AtomicRMWInst::Xchg:
185     NewVal = AI->getValOperand();
186     break;
187   case AtomicRMWInst::Add:
188     NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new");
189     break;
190   case AtomicRMWInst::Sub:
191     NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new");
192     break;
193   case AtomicRMWInst::And:
194     NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
195     break;
196   case AtomicRMWInst::Nand:
197     NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
198                                "new");
199     break;
200   case AtomicRMWInst::Or:
201     NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new");
202     break;
203   case AtomicRMWInst::Xor:
204     NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new");
205     break;
206   case AtomicRMWInst::Max:
207     NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand());
208     NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
209     break;
210   case AtomicRMWInst::Min:
211     NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand());
212     NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
213     break;
214   case AtomicRMWInst::UMax:
215     NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand());
216     NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
217     break;
218   case AtomicRMWInst::UMin:
219     NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand());
220     NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new");
221     break;
222   default:
223     llvm_unreachable("Unknown atomic op");
224   }
225
226   Value *StoreSuccess =
227       TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
228   Value *TryAgain = Builder.CreateICmpNE(
229       StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
230   Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
231
232   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
233   insertTrailingFence(Builder, Order);
234
235   AI->replaceAllUsesWith(Loaded);
236   AI->eraseFromParent();
237
238   return true;
239 }
240
241 bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
242   AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
243   AtomicOrdering FailureOrder = CI->getFailureOrdering();
244   Value *Addr = CI->getPointerOperand();
245   BasicBlock *BB = CI->getParent();
246   Function *F = BB->getParent();
247   LLVMContext &Ctx = F->getContext();
248
249   // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
250   //
251   // The full expansion we produce is:
252   //     [...]
253   //     fence?
254   // cmpxchg.start:
255   //     %loaded = @load.linked(%addr)
256   //     %should_store = icmp eq %loaded, %desired
257   //     br i1 %should_store, label %cmpxchg.trystore,
258   //                          label %cmpxchg.end/%cmpxchg.barrier
259   // cmpxchg.trystore:
260   //     %stored = @store_conditional(%new, %addr)
261   //     %try_again = icmp i32 ne %stored, 0
262   //     br i1 %try_again, label %loop, label %cmpxchg.end
263   // cmpxchg.barrier:
264   //     fence?
265   //     br label %cmpxchg.end
266   // cmpxchg.end:
267   //     [...]
268   BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
269   auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, ExitBB);
270   auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, BarrierBB);
271   auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
272
273   // This grabs the DebugLoc from CI
274   IRBuilder<> Builder(CI);
275
276   // The split call above "helpfully" added a branch at the end of BB (to the
277   // wrong place), but we might want a fence too. It's easiest to just remove
278   // the branch entirely.
279   std::prev(BB->end())->eraseFromParent();
280   Builder.SetInsertPoint(BB);
281   AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
282   Builder.CreateBr(LoopBB);
283
284   // Start the main loop block now that we've taken care of the preliminaries.
285   Builder.SetInsertPoint(LoopBB);
286   Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
287   Value *ShouldStore =
288       Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
289
290   // If the the cmpxchg doesn't actually need any ordering when it fails, we can
291   // jump straight past that fence instruction (if it exists).
292   BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB;
293   Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
294
295   Builder.SetInsertPoint(TryStoreBB);
296   Value *StoreSuccess = TLI->emitStoreConditional(
297       Builder, CI->getNewValOperand(), Addr, MemOpOrder);
298   Value *TryAgain = Builder.CreateICmpNE(
299       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
300   Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
301
302   // Finally, make sure later instructions don't get reordered with a fence if
303   // necessary.
304   Builder.SetInsertPoint(BarrierBB);
305   insertTrailingFence(Builder, SuccessOrder);
306   Builder.CreateBr(ExitBB);
307
308   CI->replaceAllUsesWith(Loaded);
309   CI->eraseFromParent();
310
311   return true;
312 }
313
314 AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
315                                                        AtomicOrdering Ord) {
316   if (!TLI->getInsertFencesForAtomic())
317     return Ord;
318
319   if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
320     Builder.CreateFence(Release);
321
322   // The exclusive operations don't need any barrier if we're adding separate
323   // fences.
324   return Monotonic;
325 }
326
327 void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
328                                               AtomicOrdering Ord) {
329   if (!TLI->getInsertFencesForAtomic())
330     return;
331
332   if (Ord == Acquire || Ord == AcquireRelease)
333     Builder.CreateFence(Acquire);
334   else if (Ord == SequentiallyConsistent)
335     Builder.CreateFence(SequentiallyConsistent);
336 }