1 //===- RSProfiling.cpp - Various profiling using random sampling ----------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // These passes implement a random sampling based profiling. Different methods
11 // of choosing when to sample are supported, as well as different types of
12 // profiling. This is done as two passes. The first is a sequence of profiling
13 // passes which insert profiling into the program, and remember what they
16 // The second stage duplicates all instructions in a function, ignoring the
17 // profiling code, then connects the two versions togeather at the entry and at
18 // backedges. At each connection point a choice is made as to whether to jump
19 // to the profiled code (take a sample) or execute the unprofiled code.
21 // It is highly recommeneded that after this pass one runs mem2reg and adce
22 // (instcombine load-vn gdce dse also are good to run afterwards)
24 // This design is intended to make the profiling passes independent of the RS
25 // framework, but any profiling pass that implements the RSProfiling interface
26 // is compatible with the rs framework (and thus can be sampled)
28 // TODO: obviously the block and function profiling are almost identical to the
29 // existing ones, so they can be unified (esp since these passes are valid
30 // without the rs framework).
31 // TODO: Fix choice code so that frequency is not hard coded
33 //===----------------------------------------------------------------------===//
35 #include "llvm/Pass.h"
36 #include "llvm/Module.h"
37 #include "llvm/Instructions.h"
38 #include "llvm/Constants.h"
39 #include "llvm/DerivedTypes.h"
40 #include "llvm/Transforms/Scalar.h"
41 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
42 #include "llvm/ADT/Statistic.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Transforms/Instrumentation.h"
46 //#include "ProfilingUtils.h"
47 #include "RSProfiling.h"
58 Statistic<> NumBackEdges("bedge", "Number of BackEdges");
64 cl::opt<RandomMeth> RandomMethod("profile-randomness",
65 cl::desc("How to randomly choose to profile:"),
67 clEnumValN(GBV, "global", "global counter"),
68 clEnumValN(GBVO, "ra_global",
69 "register allocated global counter"),
70 clEnumValN(HOSTCC, "rdcc", "cycle counter"),
73 class NullProfilerRS : public RSProfilers {
75 bool isProfiling(Value* v) {
78 bool runOnModule(Module &M) {
81 void getAnalysisUsage(AnalysisUsage &AU) const {
86 static RegisterAnalysisGroup<RSProfilers> A("Profiling passes");
87 static RegisterOpt<NullProfilerRS> NP("insert-null-profiling-rs",
88 "Measure profiling framework overhead");
89 static RegisterAnalysisGroup<RSProfilers, NullProfilerRS, true> NPT;
91 //Something that chooses how to sample
94 virtual void ProcessChoicePoint(BasicBlock*) = 0;
95 virtual void PrepFunction(Function*) = 0;
99 //Things that implement sampling policies
100 class GlobalRandomCounter : public Chooser {
101 GlobalVariable* Counter;
105 GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);
106 virtual ~GlobalRandomCounter();
107 virtual void PrepFunction(Function* F);
108 virtual void ProcessChoicePoint(BasicBlock* bb);
111 class GlobalRandomCounterOpt : public Chooser {
112 GlobalVariable* Counter;
117 GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);
118 virtual ~GlobalRandomCounterOpt();
119 virtual void PrepFunction(Function* F);
120 virtual void ProcessChoicePoint(BasicBlock* bb);
123 class CycleCounter : public Chooser {
127 CycleCounter(Module& m, uint64_t resetmask);
128 virtual ~CycleCounter();
129 virtual void PrepFunction(Function* F);
130 virtual void ProcessChoicePoint(BasicBlock* bb);
134 struct ProfilerRS : public FunctionPass {
135 std::map<Value*, Value*> TransCache;
136 std::set<BasicBlock*> ChoicePoints;
139 Value* Translate(Value* v);
140 void Duplicate(Function& F, RSProfilers& LI);
141 void ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F);
142 bool runOnFunction(Function& F);
143 bool doInitialization(Module &M);
144 virtual void getAnalysisUsage(AnalysisUsage &AU) const;
147 RegisterOpt<ProfilerRS> X("insert-rs-profiling-framework",
148 "Insert random sampling instrumentation framework");
152 static void ReplacePhiPred(BasicBlock* btarget,
153 BasicBlock* bold, BasicBlock* bnew);
155 static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc);
158 static void recBackEdge(BasicBlock* bb, T& BackEdges,
159 std::map<BasicBlock*, int>& color,
160 std::map<BasicBlock*, int>& depth,
161 std::map<BasicBlock*, int>& finish,
164 //find the back edges and where they go to
166 static void getBackEdges(Function& F, T& BackEdges);
169 ///////////////////////////////////////
170 // Methods of choosing when to profile
171 ///////////////////////////////////////
173 GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t,
174 uint64_t resetval) : T(t) {
175 Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
176 ConstantUInt::get(T, resetval),
177 "RandomSteeringCounter", &M);
178 ResetValue = ConstantUInt::get(T, resetval);
181 GlobalRandomCounter::~GlobalRandomCounter() {}
183 void GlobalRandomCounter::PrepFunction(Function* F) {}
185 void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
186 BranchInst* t = cast<BranchInst>(bb->getTerminator());
189 LoadInst* l = new LoadInst(Counter, "counter", t);
191 SetCondInst* s = new SetCondInst(Instruction::SetEQ, l,
192 ConstantUInt::get(T, 0),
194 Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
196 new StoreInst(nv, Counter, t);
200 BasicBlock* oldnext = t->getSuccessor(0);
201 BasicBlock* resetblock = new BasicBlock("reset", oldnext->getParent(),
203 TerminatorInst* t2 = new BranchInst(oldnext, resetblock);
204 t->setSuccessor(0, resetblock);
205 new StoreInst(ResetValue, Counter, t2);
206 ReplacePhiPred(oldnext, bb, resetblock);
209 GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t,
212 Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage,
213 ConstantUInt::get(T, resetval),
214 "RandomSteeringCounter", &M);
215 ResetValue = ConstantUInt::get(T, resetval);
218 GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {}
220 void GlobalRandomCounterOpt::PrepFunction(Function* F) {
221 //make a local temporary to cache the global
222 BasicBlock& bb = F->getEntryBlock();
223 AI = new AllocaInst(T, 0, "localcounter", bb.begin());
224 LoadInst* l = new LoadInst(Counter, "counterload", AI->getNext());
225 new StoreInst(l, AI, l->getNext());
227 //modify all functions and return values
228 for(Function::iterator fib = F->begin(), fie = F->end();
230 for(BasicBlock::iterator bib = fib->begin(), bie = fib->end();
232 if (isa<CallInst>(&*bib)) {
233 LoadInst* l = new LoadInst(AI, "counter", bib);
234 new StoreInst(l, Counter, bib);
235 l = new LoadInst(Counter, "counter", bib->getNext());
236 new StoreInst(l, AI, l->getNext());
237 } else if (isa<InvokeInst>(&*bib)) {
238 LoadInst* l = new LoadInst(AI, "counter", bib);
239 new StoreInst(l, Counter, bib);
241 BasicBlock* bb = cast<InvokeInst>(&*bib)->getNormalDest();
242 Instruction* i = bb->begin();
243 while (isa<PHINode>(i)) i = i->getNext();
244 l = new LoadInst(Counter, "counter", i);
246 bb = cast<InvokeInst>(&*bib)->getUnwindDest();
248 while (isa<PHINode>(i)) i = i->getNext();
249 l = new LoadInst(Counter, "counter", i);
250 new StoreInst(l, AI, l->getNext());
251 } else if (isa<UnwindInst>(&*bib) || isa<ReturnInst>(&*bib)) {
252 LoadInst* l = new LoadInst(AI, "counter", bib);
253 new StoreInst(l, Counter, bib);
257 void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {
258 BranchInst* t = cast<BranchInst>(bb->getTerminator());
261 LoadInst* l = new LoadInst(AI, "counter", t);
263 SetCondInst* s = new SetCondInst(Instruction::SetEQ, l,
264 ConstantUInt::get(T, 0),
266 Value* nv = BinaryOperator::createSub(l, ConstantInt::get(T, 1),
268 new StoreInst(nv, AI, t);
272 BasicBlock* oldnext = t->getSuccessor(0);
273 BasicBlock* resetblock = new BasicBlock("reset", oldnext->getParent(),
275 TerminatorInst* t2 = new BranchInst(oldnext, resetblock);
276 t->setSuccessor(0, resetblock);
277 new StoreInst(ResetValue, AI, t2);
278 ReplacePhiPred(oldnext, bb, resetblock);
282 CycleCounter::CycleCounter(Module& m, uint64_t resetmask) : rm(resetmask) {
283 F = m.getOrInsertFunction("llvm.readcyclecounter", Type::ULongTy, NULL);
286 CycleCounter::~CycleCounter() {}
288 void CycleCounter::PrepFunction(Function* F) {}
290 void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {
291 BranchInst* t = cast<BranchInst>(bb->getTerminator());
293 CallInst* c = new CallInst(F, "rdcc", t);
295 BinaryOperator::createAnd(c, ConstantUInt::get(Type::ULongTy, rm),
298 SetCondInst* s = new SetCondInst(Instruction::SetEQ, b,
299 ConstantUInt::get(Type::ULongTy, 0),
304 ///////////////////////////////////////
306 ///////////////////////////////////////
307 bool RSProfilers_std::isProfiling(Value* v) {
308 if (profcode.find(v) != profcode.end())
311 RSProfilers& LI = getAnalysis<RSProfilers>();
312 return LI.isProfiling(v);
315 void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
316 GlobalValue *CounterArray) {
317 // Insert the increment after any alloca or PHI instructions...
318 BasicBlock::iterator InsertPos = BB->begin();
319 while (isa<AllocaInst>(InsertPos) || isa<PHINode>(InsertPos))
322 // Create the getelementptr constant expression
323 std::vector<Constant*> Indices(2);
324 Indices[0] = Constant::getNullValue(Type::IntTy);
325 Indices[1] = ConstantSInt::get(Type::IntTy, CounterNum);
326 Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray, Indices);
328 // Load, increment and store the value back.
329 Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);
330 profcode.insert(OldVal);
331 Value *NewVal = BinaryOperator::createAdd(OldVal,
332 ConstantInt::get(Type::UIntTy, 1),
333 "NewCounter", InsertPos);
334 profcode.insert(NewVal);
335 profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos));
338 void RSProfilers_std::getAnalysisUsage(AnalysisUsage &AU) const {
339 //grab any outstanding profiler, or get the null one
340 AU.addRequired<RSProfilers>();
343 ///////////////////////////////////////
345 ///////////////////////////////////////
347 Value* ProfilerRS::Translate(Value* v) {
349 return TransCache[v];
351 if (BasicBlock* bb = dyn_cast<BasicBlock>(v)) {
352 if (bb == &bb->getParent()->getEntryBlock())
353 TransCache[bb] = bb; //don't translate entry block
355 TransCache[bb] = new BasicBlock("dup_" + bb->getName(), bb->getParent(),
357 return TransCache[bb];
358 } else if (Instruction* i = dyn_cast<Instruction>(v)) {
359 //we have already translated this
360 //do not translate entry block allocas
361 if(&i->getParent()->getParent()->getEntryBlock() == i->getParent()) {
366 Instruction* i2 = i->clone();
368 i2->setName("dup_" + i->getName());
371 for (unsigned x = 0; x < i2->getNumOperands(); ++x)
372 i2->setOperand(x, Translate(i2->getOperand(x)));
375 } else if (isa<Function>(v) || isa<Constant>(v) || isa<Argument>(v)) {
379 assert(0 && "Value not handled");
383 void ProfilerRS::Duplicate(Function& F, RSProfilers& LI)
385 //perform a breadth first search, building up a duplicate of the code
386 std::queue<BasicBlock*> worklist;
387 std::set<BasicBlock*> seen;
389 //This loop ensures proper BB order, to help performance
390 for (Function::iterator fib = F.begin(), fie = F.end(); fib != fie; ++fib)
392 while (!worklist.empty()) {
393 Translate(worklist.front());
397 //remember than reg2mem created a new entry block we don't want to duplicate
398 worklist.push(F.getEntryBlock().getTerminator()->getSuccessor(0));
399 seen.insert(&F.getEntryBlock());
401 while (!worklist.empty()) {
402 BasicBlock* bb = worklist.front();
404 if(seen.find(bb) == seen.end()) {
405 BasicBlock* bbtarget = cast<BasicBlock>(Translate(bb));
406 BasicBlock::InstListType& instlist = bbtarget->getInstList();
407 for (BasicBlock::iterator iib = bb->begin(), iie = bb->end();
410 if (!LI.isProfiling(&*iib)) {
411 Instruction* i = cast<Instruction>(Translate(iib));
412 instlist.insert(bbtarget->end(), i);
415 //updated search state;
417 TerminatorInst* ti = bb->getTerminator();
418 for (unsigned x = 0; x < ti->getNumSuccessors(); ++x) {
419 BasicBlock* bbs = ti->getSuccessor(x);
420 if (seen.find(bbs) == seen.end()) {
428 void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F) {
429 //given a backedge from B -> A, and translations A' and B',
431 //b: add branches in C to A and A' and in C' to A and A'
432 //c: mod terminators@B, replace A with C
433 //d: mod terminators@B', replace A' with C'
434 //e: mod phis@A for pred B to be pred C
435 // if multiple entries, simplify to one
436 //f: mod phis@A' for pred B' to be pred C'
437 // if multiple entries, simplify to one
438 //g: for all phis@A with pred C using x
439 // add in edge from C' using x'
440 // add in edge from C using x in A'
443 BasicBlock* bbC = new BasicBlock("choice", &F, src->getNext() );
444 //ChoicePoints.insert(bbC);
446 new BasicBlock("choice", &F, cast<BasicBlock>(Translate(src))->getNext() );
447 ChoicePoints.insert(bbCp);
450 new BranchInst(cast<BasicBlock>(Translate(dst)), bbC);
451 new BranchInst(dst, cast<BasicBlock>(Translate(dst)),
452 ConstantBool::get(true), bbCp);
455 TerminatorInst* iB = src->getTerminator();
456 for (unsigned x = 0; x < iB->getNumSuccessors(); ++x)
457 if (iB->getSuccessor(x) == dst)
458 iB->setSuccessor(x, bbC);
462 TerminatorInst* iBp = cast<TerminatorInst>(Translate(src->getTerminator()));
463 for (unsigned x = 0; x < iBp->getNumSuccessors(); ++x)
464 if (iBp->getSuccessor(x) == cast<BasicBlock>(Translate(dst)))
465 iBp->setSuccessor(x, bbCp);
468 ReplacePhiPred(dst, src, bbC);
469 //src could be a switch, in which case we are replacing several edges with one
470 //thus collapse those edges int the Phi
471 CollapsePhi(dst, bbC);
473 ReplacePhiPred(cast<BasicBlock>(Translate(dst)),
474 cast<BasicBlock>(Translate(src)),bbCp);
475 CollapsePhi(cast<BasicBlock>(Translate(dst)), bbCp);
477 for(BasicBlock::iterator ib = dst->begin(), ie = dst->end(); ib != ie;
479 if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
480 for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
481 if(bbC == phi->getIncomingBlock(x)) {
482 phi->addIncoming(Translate(phi->getIncomingValue(x)), bbCp);
483 cast<PHINode>(Translate(phi))->addIncoming(phi->getIncomingValue(x),
486 phi->removeIncomingValue(bbC);
490 bool ProfilerRS::runOnFunction(Function& F) {
491 if (!F.isExternal()) {
492 std::set<std::pair<BasicBlock*, BasicBlock*> > BackEdges;
493 RSProfilers& LI = getAnalysis<RSProfilers>();
495 getBackEdges(F, BackEdges);
497 //assume that stuff worked. now connect the duplicated basic blocks
498 //with the originals in such a way as to preserve ssa. yuk!
499 for (std::set<std::pair<BasicBlock*, BasicBlock*> >::iterator
500 ib = BackEdges.begin(), ie = BackEdges.end(); ib != ie; ++ib)
501 ProcessBackEdge(ib->first, ib->second, F);
503 //oh, and add the edge from the reg2mem created entry node to the
504 //duplicated second node
505 TerminatorInst* T = F.getEntryBlock().getTerminator();
506 ReplaceInstWithInst(T, new BranchInst(T->getSuccessor(0),
507 cast<BasicBlock>(Translate(T->getSuccessor(0))),
508 ConstantBool::get(true)));
510 //do whatever is needed now that the function is duplicated
513 //add entry node to choice points
514 ChoicePoints.insert(&F.getEntryBlock());
516 for (std::set<BasicBlock*>::iterator
517 ii = ChoicePoints.begin(), ie = ChoicePoints.end(); ii != ie; ++ii)
518 c->ProcessChoicePoint(*ii);
520 ChoicePoints.clear();
528 bool ProfilerRS::doInitialization(Module &M) {
529 switch (RandomMethod) {
531 c = new GlobalRandomCounter(M, Type::UIntTy, (1 << 14) - 1);
534 c = new GlobalRandomCounterOpt(M, Type::UIntTy, (1 << 14) - 1);
537 c = new CycleCounter(M, (1 << 14) - 1);
543 void ProfilerRS::getAnalysisUsage(AnalysisUsage &AU) const {
544 AU.addRequired<RSProfilers>();
545 AU.addRequiredID(DemoteRegisterToMemoryID);
548 ///////////////////////////////////////
550 ///////////////////////////////////////
551 static void ReplacePhiPred(BasicBlock* btarget,
552 BasicBlock* bold, BasicBlock* bnew) {
553 for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
555 if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
556 for(unsigned x = 0; x < phi->getNumIncomingValues(); ++x)
557 if(bold == phi->getIncomingBlock(x))
558 phi->setIncomingBlock(x, bnew);
562 static void CollapsePhi(BasicBlock* btarget, BasicBlock* bsrc) {
563 for(BasicBlock::iterator ib = btarget->begin(), ie = btarget->end();
565 if (PHINode* phi = dyn_cast<PHINode>(&*ib)) {
566 unsigned total = phi->getNumIncomingValues();
567 std::map<BasicBlock*, Value*> counter;
568 for(unsigned i = 0; i < phi->getNumIncomingValues(); ) {
569 if (counter[phi->getIncomingBlock(i)]) {
570 assert(phi->getIncomingValue(i) == counter[phi->getIncomingBlock(i)]);
571 phi->removeIncomingValue(i, false);
573 counter[phi->getIncomingBlock(i)] = phi->getIncomingValue(i);
581 static void recBackEdge(BasicBlock* bb, T& BackEdges,
582 std::map<BasicBlock*, int>& color,
583 std::map<BasicBlock*, int>& depth,
584 std::map<BasicBlock*, int>& finish,
590 TerminatorInst* t= bb->getTerminator();
591 for(unsigned i = 0; i < t->getNumSuccessors(); ++i) {
592 BasicBlock* bbnew = t->getSuccessor(i);
593 if (color[bbnew] == 0)
594 recBackEdge(bbnew, BackEdges, color, depth, finish, time);
595 else if (color[bbnew] == 1) {
596 BackEdges.insert(std::make_pair(bb, bbnew));
607 //find the back edges and where they go to
609 static void getBackEdges(Function& F, T& BackEdges) {
610 std::map<BasicBlock*, int> color;
611 std::map<BasicBlock*, int> depth;
612 std::map<BasicBlock*, int> finish;
614 recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time);
615 DEBUG(std::cerr << F.getName() << " " << BackEdges.size() << "\n");
620 ModulePass* llvm::createNullProfilerRSPass() {
621 return new NullProfilerRS();
624 FunctionPass* llvm::createRSProfilingPass() {
625 return new ProfilerRS();