1 //===- RSProfiling.cpp - Various profiling using random sampling ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // These passes implement a random sampling based profiling. Different methods
11 // of choosing when to sample are supported, as well as different types of
12 // profiling. This is done as two passes. The first is a sequence of profiling
13 // passes which insert profiling into the program, and remember what they
16 // The second stage duplicates all instructions in a function, ignoring the
17 // profiling code, then connects the two versions togeather at the entry and at
18 // backedges. At each connection point a choice is made as to whether to jump
19 // to the profiled code (take a sample) or execute the unprofiled code.
21 // It is highly recommeneded that after this pass one runs mem2reg and adce
22 // (instcombine load-vn gdce dse also are good to run afterwards)
24 // This design is intended to make the profiling passes independent of the RS
25 // framework, but any profiling pass that implements the RSProfiling interface
26 // is compatible with the rs framework (and thus can be sampled)
28 // TODO: obviously the block and function profiling are almost identical to the
29 // existing ones, so they can be unified (esp since these passes are valid
30 // without the rs framework).
31 // TODO: Fix choice code so that frequency is not hard coded
33 //===----------------------------------------------------------------------===//
35 #include "llvm/Pass.h"
36 #include "llvm/Module.h"
37 #include "llvm/Instructions.h"
38 #include "llvm/Constants.h"
39 #include "llvm/DerivedTypes.h"
40 #include "llvm/Transforms/Scalar.h"
41 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Compiler.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Transforms/Instrumentation.h"
46 #include "RSProfiling.h"
58 cl::opt<RandomMeth> RandomMethod("profile-randomness",
59 cl::desc("How to randomly choose to profile:"),
61 clEnumValN(GBV, "global", "global counter"),
62 clEnumValN(GBVO, "ra_global",
63 "register allocated global counter"),
64 clEnumValN(HOSTCC, "rdcc", "cycle counter"),
67 /// NullProfilerRS - The basic profiler that does nothing. It is the default
68 /// profiler and thus terminates RSProfiler chains. It is useful for
69 /// measuring framework overhead
70 class VISIBILITY_HIDDEN NullProfilerRS : public RSProfilers {
72 bool isProfiling(Value* v) {
75 bool runOnModule(Module &M) {
78 void getAnalysisUsage(AnalysisUsage &AU) const {
83 static RegisterAnalysisGroup<RSProfilers> A("Profiling passes");
84 static RegisterPass<NullProfilerRS> NP("insert-null-profiling-rs",
85 "Measure profiling framework overhead");
86 static RegisterAnalysisGroup<RSProfilers, true> NPT(NP);
88 /// Chooser - Something that chooses when to make a sample of the profiled code
89 class VISIBILITY_HIDDEN Chooser {
91 /// ProcessChoicePoint - is called for each basic block inserted to choose
92 /// between normal and sample code
93 virtual void ProcessChoicePoint(BasicBlock*) = 0;
94 /// PrepFunction - is called once per function before other work is done.
95 /// This gives the opertunity to insert new allocas and such.
96 virtual void PrepFunction(Function*) = 0;
100 //Things that implement sampling policies
101 //A global value that is read-mod-stored to choose when to sample.
102 //A sample is taken when the global counter hits 0
103 class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {
104 GlobalVariable* Counter;
108 GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);
109 virtual ~GlobalRandomCounter();
110 virtual void PrepFunction(Function* F);
111 virtual void ProcessChoicePoint(BasicBlock* bb);
114 //Same is GRC, but allow register allocation of the global counter
115 class VISIBILITY_HIDDEN GlobalRandomCounterOpt : public Chooser {
116 GlobalVariable* Counter;
121 GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);
122 virtual ~GlobalRandomCounterOpt();
123 virtual void PrepFunction(Function* F);
124 virtual void ProcessChoicePoint(BasicBlock* bb);
127 //Use the cycle counter intrinsic as a source of pseudo randomness when
128 //deciding when to sample.
129 class VISIBILITY_HIDDEN CycleCounter : public Chooser {
133 CycleCounter(Module& m, uint64_t resetmask);
134 virtual ~CycleCounter();
135 virtual void PrepFunction(Function* F);
136 virtual void ProcessChoicePoint(BasicBlock* bb);
139 /// ProfilerRS - Insert the random sampling framework
140 struct VISIBILITY_HIDDEN ProfilerRS : public FunctionPass {
141 std::map<Value*, Value*> TransCache;
142 std::set<BasicBlock*> ChoicePoints;
145 //Translate and duplicate values for the new profile free version of stuff
146 Value* Translate(Value* v);
147 //Duplicate an entire function (with out profiling)
148 void Duplicate(Function& F, RSProfilers& LI);
149 //Called once for each backedge, handle the insertion of choice points and
150 //the interconection of the two versions of the code
151 void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F);
152 bool runOnFunction(Function& F);
153 bool doInitialization(Module &M);
154 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
157 RegisterPass<ProfilerRS> X("insert-rs-profiling-framework",
158 "Insert random sampling instrumentation framework");
162 static void ReplacePhiPred(BasicBlock* btarget,
163 BasicBlock* bold, BasicBlock* bnew);
165 static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc);
168 static void recBackEdge(BasicBlock* bb, T& BackEdges,
169 std::map<BasicBlock*, int>& color,
170 std::map<BasicBlock*, int>& depth,
171 std::map<BasicBlock*, int>& finish,
174 //find the back edges and where they go to
176 static void getBackEdges(Function& F, T& BackEdges);
179 ///////////////////////////////////////
180 // Methods of choosing when to profile
181 ///////////////////////////////////////
183 GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t,
184 uint64_t resetval) : T(t) {
185 ConstantInt* Init = ConstantInt::get(T, resetval);
187 Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
188 Init, "RandomSteeringCounter", &M);
191 GlobalRandomCounter::~GlobalRandomCounter() {}
193 void GlobalRandomCounter::PrepFunction(Function* F) {}
195 void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
196 BranchInst* t = cast<BranchInst>(bb->getTerminator());
199 LoadInst* l = new LoadInst(Counter, "counter", t);
201 ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0),
204 Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
206 new StoreInst(nv, Counter, t);
210 BasicBlock* oldnext = t->getSuccessor(0);
211 BasicBlock* resetblock = new BasicBlock("reset", oldnext->getParent(),
213 TerminatorInst* t2 = new BranchInst(oldnext, resetblock);
214 t->setSuccessor(0, resetblock);
215 new StoreInst(ResetValue, Counter, t2);
216 ReplacePhiPred(oldnext, bb, resetblock);
219 GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t,
222 ConstantInt* Init = ConstantInt::get(T, resetval);
224 Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
225 Init, "RandomSteeringCounter", &M);
228 GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {}
230 void GlobalRandomCounterOpt::PrepFunction(Function* F) {
231 //make a local temporary to cache the global
232 BasicBlock& bb = F->getEntryBlock();
233 BasicBlock::iterator InsertPt = bb.begin();
234 AI = new AllocaInst(T, 0, "localcounter", InsertPt);
235 LoadInst* l = new LoadInst(Counter, "counterload", InsertPt);
236 new StoreInst(l, AI, InsertPt);
238 //modify all functions and return values to restore the local variable to/from
239 //the global variable
240 for(Function::iterator fib = F->begin(), fie = F->end();
242 for(BasicBlock::iterator bib = fib->begin(), bie = fib->end();
244 if (isa<CallInst>(bib)) {
245 LoadInst* l = new LoadInst(AI, "counter", bib);
246 new StoreInst(l, Counter, bib);
247 l = new LoadInst(Counter, "counter", ++bib);
248 new StoreInst(l, AI, bib--);
249 } else if (isa<InvokeInst>(bib)) {
250 LoadInst* l = new LoadInst(AI, "counter", bib);
251 new StoreInst(l, Counter, bib);
253 BasicBlock* bb = cast<InvokeInst>(bib)->getNormalDest();
254 BasicBlock::iterator i = bb->begin();
255 while (isa<PHINode>(i))
257 l = new LoadInst(Counter, "counter", i);
259 bb = cast<InvokeInst>(bib)->getUnwindDest();
261 while (isa<PHINode>(i)) ++i;
262 l = new LoadInst(Counter, "counter", i);
263 new StoreInst(l, AI, i);
264 } else if (isa<UnwindInst>(&*bib) || isa<ReturnInst>(&*bib)) {
265 LoadInst* l = new LoadInst(AI, "counter", bib);
266 new StoreInst(l, Counter, bib);
270 void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
271 BranchInst* t = cast<BranchInst>(bb->getTerminator());
274 LoadInst* l = new LoadInst(AI, "counter", t);
276 ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0),
279 Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
281 new StoreInst(nv, AI, t);
285 BasicBlock* oldnext = t->getSuccessor(0);
286 BasicBlock* resetblock = new BasicBlock("reset", oldnext->getParent(),
288 TerminatorInst* t2 = new BranchInst(oldnext, resetblock);
289 t->setSuccessor(0, resetblock);
290 new StoreInst(ResetValue, AI, t2);
291 ReplacePhiPred(oldnext, bb, resetblock);
295 CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) {
296 F = m.getOrInsertFunction("llvm.readcyclecounter", Type::Int64Ty, NULL);
299 CycleCounter::~CycleCounter() {}
301 void CycleCounter::PrepFunction(Function* F) {}
303 void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {
304 BranchInst* t = cast<BranchInst>(bb->getTerminator());
306 CallInst* c = new CallInst(F, "rdcc", t);
308 BinaryOperator::createAnd(c, ConstantInt::get(Type::Int64Ty, rm),
311 ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b,
312 ConstantInt::get(Type::Int64Ty, 0),
318 ///////////////////////////////////////
320 ///////////////////////////////////////
321 bool RSProfilers_std::isProfiling(Value* v) {
322 if (profcode.find(v) != profcode.end())
325 RSProfilers& LI = getAnalysis<RSProfilers>();
326 return LI.isProfiling(v);
329 void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
330 GlobalValue *CounterArray) {
331 // Insert the increment after any alloca or PHI instructions...
332 BasicBlock::iterator InsertPos = BB->begin();
333 while (isa<AllocaInst>(InsertPos) || isa<PHINode>(InsertPos))
336 // Create the getelementptr constant expression
337 std::vector<Constant*> Indices(2);
338 Indices[0] = Constant::getNullValue(Type::Int32Ty);
339 Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum);
340 Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray,
343 // Load, increment and store the value back.
344 Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
345 profcode.insert(OldVal);
346 Value *NewVal = BinaryOperator::createAdd(OldVal,
347 ConstantInt::get(Type::Int32Ty, 1),
348 "NewCounter", InsertPos);
349 profcode.insert(NewVal);
350 profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos));
353 void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const {
354 //grab any outstanding profiler, or get the null one
355 AU.addRequired<RSProfilers>();
358 ///////////////////////////////////////
360 ///////////////////////////////////////
362 Value* ProfilerRS::Translate(Value* v) {
364 return TransCache[v];
366 if (BasicBlock* bb = dyn_cast<BasicBlock>(v)) {
367 if (bb == &bb->getParent()->getEntryBlock())
368 TransCache[bb] = bb; //don't translate entry block
370 TransCache[bb] = new BasicBlock("dup_" + bb->getName(), bb->getParent(),
372 return TransCache[bb];
373 } else if (Instruction* i = dyn_cast<Instruction>(v)) {
374 //we have already translated this
375 //do not translate entry block allocas
376 if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) {
381 Instruction* i2 = i->clone();
383 i2->setName("dup_" + i->getName());
386 for (unsigned x = 0; x < i2->getNumOperands(); ++x)
387 i2->setOperand(x, Translate(i2->getOperand(x)));
390 } else if (isa<Function>(v) || isa<Constant>(v) || isa<Argument>(v)) {
394 assert(0 && "Value not handled");
398 void ProfilerRS::Duplicate(Function& F, RSProfilers& LI)
400 //perform a breadth first search, building up a duplicate of the code
401 std::queue<BasicBlock*> worklist;
402 std::set<BasicBlock*> seen;
404 //This loop ensures proper BB order, to help performance
405 for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib)
407 while (!worklist.empty()) {
408 Translate(worklist.front());
412 //remember than reg2mem created a new entry block we don't want to duplicate
413 worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0));
414 seen.insert(&F.getEntryBlock());
416 while (!worklist.empty()) {
417 BasicBlock* bb = worklist.front();
419 if(seen.find(bb) == seen.end()) {
420 BasicBlock* bbtarget = cast<BasicBlock>(Translate(bb));
421 BasicBlock::InstListType& instlist = bbtarget->getInstList();
422 for (BasicBlock::iterator iib = bb->begin(), iie = bb->end();
425 if (!LI.isProfiling(&*iib)) {
426 Instruction* i = cast<Instruction>(Translate(iib));
427 instlist.insert(bbtarget->end(), i);
430 //updated search state;
432 TerminatorInst* ti = bb->getTerminator();
433 for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) {
434 BasicBlock* bbs = ti->getSuccessor(x);
435 if (seen.find(bbs) == seen.end()) {
443 void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) {
444 //given a backedge from B -> A, and translations A' and B',
446 //b: add branches in C to A and A' and in C' to A and A'
447 //c: mod terminators@B, replace A with C
448 //d: mod terminators@B', replace A' with C'
449 //e: mod phis@A for pred B to be pred C
450 // if multiple entries, simplify to one
451 //f: mod phis@A' for pred B' to be pred C'
452 // if multiple entries, simplify to one
453 //g: for all phis@A with pred C using x
454 // add in edge from C' using x'
455 // add in edge from C using x in A'
458 BasicBlock* bbC = new BasicBlock("choice", &F, src->getNext() );
459 //ChoicePoints.insert(bbC);
461 new BasicBlock("choice", &F, cast<BasicBlock>(Translate(src))->getNext() );
462 ChoicePoints.insert(bbCp);
465 new BranchInst(cast<BasicBlock>(Translate(dst)), bbC);
466 new BranchInst(dst, cast<BasicBlock>(Translate(dst)),
467 ConstantInt::get(Type::Int1Ty, true), bbCp);
470 TerminatorInst* iB = src->getTerminator();
471 for (unsigned x = 0; x < iB->getNumSuccessors(); ++x)
472 if (iB->getSuccessor(x) == dst)
473 iB->setSuccessor(x, bbC);
477 TerminatorInst* iBp = cast<TerminatorInst>(Translate(src->getTerminator()));
478 for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x)
479 if (iBp->getSuccessor(x) == cast<BasicBlock>(Translate(dst)))
480 iBp->setSuccessor(x, bbCp);
483 ReplacePhiPred(dst, src, bbC);
484 //src could be a switch, in which case we are replacing several edges with one
485 //thus collapse those edges int the Phi
486 CollapsePhi(dst, bbC);
488 ReplacePhiPred(cast<BasicBlock>(Translate(dst)),
489 cast<BasicBlock>(Translate(src)),bbCp);
490 CollapsePhi(cast<BasicBlock>(Translate(dst)), bbCp);
492 for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie;
494 if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
495 for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
496 if(bbC == phi->getIncomingBlock(x)) {
497 phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp);
498 cast<PHINode>(Translate(phi))->addIncoming(phi->getIncomingValue(x),
501 phi->removeIncomingValue(bbC);
505 bool ProfilerRS::runOnFunction(Function& F) {
506 if (!F.isDeclaration()) {
507 std::set<std::pair<BasicBlock*, BasicBlock*> > BackEdges;
508 RSProfilers& LI = getAnalysis<RSProfilers>();
510 getBackEdges(F, BackEdges);
512 //assume that stuff worked. now connect the duplicated basic blocks
513 //with the originals in such a way as to preserve ssa. yuk!
514 for (std::set<std::pair<BasicBlock*, BasicBlock*> >::iterator
515 ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib)
516 ProcessBackEdge(ib->first, ib->second, F);
518 //oh, and add the edge from the reg2mem created entry node to the
519 //duplicated second node
520 TerminatorInst* T = F.getEntryBlock().getTerminator();
521 ReplaceInstWithInst(T, new BranchInst(T->getSuccessor(0),
523 Translate(T->getSuccessor(0))),
524 ConstantInt::get(Type::Int1Ty, true)));
526 //do whatever is needed now that the function is duplicated
529 //add entry node to choice points
530 ChoicePoints.insert(&F.getEntryBlock());
532 for (std::set<BasicBlock*>::iterator
533 ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii)
534 c->ProcessChoicePoint(*ii);
536 ChoicePoints.clear();
544 bool ProfilerRS::doInitialization(Module &M) {
545 switch (RandomMethod) {
547 c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1);
550 c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1);
553 c = new CycleCounter(M, (1 << 14) - 1);
559 void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const {
560 AU.addRequired<RSProfilers>();
561 AU.addRequiredID(DemoteRegisterToMemoryID);
564 ///////////////////////////////////////
566 ///////////////////////////////////////
567 static void ReplacePhiPred(BasicBlock* btarget,
568 BasicBlock* bold, BasicBlock* bnew) {
569 for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
571 if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
572 for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
573 if(bold == phi->getIncomingBlock(x))
574 phi->setIncomingBlock(x, bnew);
578 static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) {
579 for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
581 if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
582 std::map<BasicBlock*, Value*> counter;
583 for(unsigned i = 0; i < phi->getNumIncomingValues(); ) {
584 if (counter[phi->getIncomingBlock(i)]) {
585 assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]);
586 phi->removeIncomingValue(i, false);
588 counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i);
596 static void recBackEdge(BasicBlock* bb, T& BackEdges,
597 std::map<BasicBlock*, int>& color,
598 std::map<BasicBlock*, int>& depth,
599 std::map<BasicBlock*, int>& finish,
605 TerminatorInst* t= bb->getTerminator();
606 for(unsigned i = 0; i < t->getNumSuccessors(); ++i) {
607 BasicBlock* bbnew = t->getSuccessor(i);
608 if (color[bbnew] == 0)
609 recBackEdge(bbnew, BackEdges, color, depth, finish, time);
610 else if (color[bbnew] == 1) {
611 BackEdges.insert(std::make_pair(bb, bbnew));
622 //find the back edges and where they go to
624 static void getBackEdges(Function& F, T& BackEdges) {
625 std::map<BasicBlock*, int> color;
626 std::map<BasicBlock*, int> depth;
627 std::map<BasicBlock*, int> finish;
629 recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time);
630 DOUT << F.getName() << " " << BackEdges.size() << "\n";
635 ModulePass* llvm::createNullProfilerRSPass() {
636 return new NullProfilerRS();
639 FunctionPass* llvm::createRSProfilingPass() {
640 return new ProfilerRS();