1 //===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass performs global value numbering to eliminate fully redundant
11 // instructions. It also performs simple dead load elimination.
13 // Note that this pass does the value numbering itself; it does not use the
14 // ValueNumbering analysis passes.
16 //===----------------------------------------------------------------------===//
18 #define DEBUG_TYPE "gvn"
19 #include "llvm/Transforms/Scalar.h"
20 #include "llvm/Constants.h"
21 #include "llvm/DerivedTypes.h"
22 #include "llvm/GlobalVariable.h"
23 #include "llvm/Function.h"
24 #include "llvm/IntrinsicInst.h"
25 #include "llvm/LLVMContext.h"
26 #include "llvm/Operator.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/Analysis/ConstantFolding.h"
29 #include "llvm/Analysis/Dominators.h"
30 #include "llvm/Analysis/InstructionSimplify.h"
31 #include "llvm/Analysis/Loads.h"
32 #include "llvm/Analysis/MemoryBuiltins.h"
33 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
34 #include "llvm/Analysis/PHITransAddr.h"
35 #include "llvm/Analysis/ValueTracking.h"
36 #include "llvm/Target/TargetData.h"
37 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
38 #include "llvm/Transforms/Utils/Local.h"
39 #include "llvm/Transforms/Utils/SSAUpdater.h"
40 #include "llvm/ADT/DenseMap.h"
41 #include "llvm/ADT/DepthFirstIterator.h"
42 #include "llvm/ADT/PostOrderIterator.h"
43 #include "llvm/ADT/SmallPtrSet.h"
44 #include "llvm/ADT/Statistic.h"
45 #include "llvm/Support/Allocator.h"
46 #include "llvm/Support/CFG.h"
47 #include "llvm/Support/CommandLine.h"
48 #include "llvm/Support/Debug.h"
49 #include "llvm/Support/ErrorHandling.h"
50 #include "llvm/Support/GetElementPtrTypeIterator.h"
51 #include "llvm/Support/IRBuilder.h"
55 STATISTIC(NumGVNInstr, "Number of instructions deleted");
56 STATISTIC(NumGVNLoad, "Number of loads deleted");
57 STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
58 STATISTIC(NumGVNBlocks, "Number of blocks merged");
59 STATISTIC(NumPRELoad, "Number of loads PRE'd");
61 static cl::opt<bool> EnablePRE("enable-pre",
62 cl::init(true), cl::Hidden);
63 static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
65 //===----------------------------------------------------------------------===//
67 //===----------------------------------------------------------------------===//
69 /// This class holds the mapping between values and value numbers. It is used
70 /// as an efficient mechanism to determine the expression-wise equivalence of
74 enum ExpressionOpcode {
75 ADD = Instruction::Add,
76 FADD = Instruction::FAdd,
77 SUB = Instruction::Sub,
78 FSUB = Instruction::FSub,
79 MUL = Instruction::Mul,
80 FMUL = Instruction::FMul,
81 UDIV = Instruction::UDiv,
82 SDIV = Instruction::SDiv,
83 FDIV = Instruction::FDiv,
84 UREM = Instruction::URem,
85 SREM = Instruction::SRem,
86 FREM = Instruction::FRem,
87 SHL = Instruction::Shl,
88 LSHR = Instruction::LShr,
89 ASHR = Instruction::AShr,
90 AND = Instruction::And,
92 XOR = Instruction::Xor,
93 TRUNC = Instruction::Trunc,
94 ZEXT = Instruction::ZExt,
95 SEXT = Instruction::SExt,
96 FPTOUI = Instruction::FPToUI,
97 FPTOSI = Instruction::FPToSI,
98 UITOFP = Instruction::UIToFP,
99 SITOFP = Instruction::SIToFP,
100 FPTRUNC = Instruction::FPTrunc,
101 FPEXT = Instruction::FPExt,
102 PTRTOINT = Instruction::PtrToInt,
103 INTTOPTR = Instruction::IntToPtr,
104 BITCAST = Instruction::BitCast,
105 ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
106 ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
107 FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
108 FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
109 FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
110 SHUFFLE, SELECT, GEP, CALL, CONSTANT,
111 INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
113 ExpressionOpcode opcode;
115 SmallVector<uint32_t, 4> varargs;
119 Expression(ExpressionOpcode o) : opcode(o) { }
121 bool operator==(const Expression &other) const {
122 if (opcode != other.opcode)
124 else if (opcode == EMPTY || opcode == TOMBSTONE)
126 else if (type != other.type)
128 else if (function != other.function)
131 if (varargs.size() != other.varargs.size())
134 for (size_t i = 0; i < varargs.size(); ++i)
135 if (varargs[i] != other.varargs[i])
142 /*bool operator!=(const Expression &other) const {
143 return !(*this == other);
149 DenseMap<Value*, uint32_t> valueNumbering;
150 DenseMap<Expression, uint32_t> expressionNumbering;
152 MemoryDependenceAnalysis* MD;
155 uint32_t nextValueNumber;
157 Expression::ExpressionOpcode getOpcode(CmpInst* C);
158 Expression create_expression(BinaryOperator* BO);
159 Expression create_expression(CmpInst* C);
160 Expression create_expression(ShuffleVectorInst* V);
161 Expression create_expression(ExtractElementInst* C);
162 Expression create_expression(InsertElementInst* V);
163 Expression create_expression(SelectInst* V);
164 Expression create_expression(CastInst* C);
165 Expression create_expression(GetElementPtrInst* G);
166 Expression create_expression(CallInst* C);
167 Expression create_expression(ExtractValueInst* C);
168 Expression create_expression(InsertValueInst* C);
170 uint32_t lookup_or_add_call(CallInst* C);
172 ValueTable() : nextValueNumber(1) { }
173 uint32_t lookup_or_add(Value *V);
174 uint32_t lookup(Value *V) const;
175 void add(Value *V, uint32_t num);
177 void erase(Value *v);
178 void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
179 AliasAnalysis *getAliasAnalysis() const { return AA; }
180 void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
181 void setDomTree(DominatorTree* D) { DT = D; }
182 uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
183 void verifyRemoved(const Value *) const;
188 template <> struct DenseMapInfo<Expression> {
189 static inline Expression getEmptyKey() {
190 return Expression(Expression::EMPTY);
193 static inline Expression getTombstoneKey() {
194 return Expression(Expression::TOMBSTONE);
197 static unsigned getHashValue(const Expression e) {
198 unsigned hash = e.opcode;
200 hash = ((unsigned)((uintptr_t)e.type >> 4) ^
201 (unsigned)((uintptr_t)e.type >> 9));
203 for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
204 E = e.varargs.end(); I != E; ++I)
205 hash = *I + hash * 37;
207 hash = ((unsigned)((uintptr_t)e.function >> 4) ^
208 (unsigned)((uintptr_t)e.function >> 9)) +
213 static bool isEqual(const Expression &LHS, const Expression &RHS) {
219 struct isPodLike<Expression> { static const bool value = true; };
223 //===----------------------------------------------------------------------===//
224 // ValueTable Internal Functions
225 //===----------------------------------------------------------------------===//
227 Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
228 if (isa<ICmpInst>(C)) {
229 switch (C->getPredicate()) {
230 default: // THIS SHOULD NEVER HAPPEN
231 llvm_unreachable("Comparison with unknown predicate?");
232 case ICmpInst::ICMP_EQ: return Expression::ICMPEQ;
233 case ICmpInst::ICMP_NE: return Expression::ICMPNE;
234 case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
235 case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
236 case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
237 case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
238 case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
239 case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
240 case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
241 case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
244 switch (C->getPredicate()) {
245 default: // THIS SHOULD NEVER HAPPEN
246 llvm_unreachable("Comparison with unknown predicate?");
247 case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
248 case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
249 case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
250 case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
251 case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
252 case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
253 case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
254 case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
255 case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
256 case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
257 case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
258 case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
259 case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
260 case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
265 Expression ValueTable::create_expression(CallInst* C) {
268 e.type = C->getType();
269 e.function = C->getCalledFunction();
270 e.opcode = Expression::CALL;
273 for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
275 e.varargs.push_back(lookup_or_add(*I));
280 Expression ValueTable::create_expression(BinaryOperator* BO) {
282 e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
283 e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
285 e.type = BO->getType();
286 e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
291 Expression ValueTable::create_expression(CmpInst* C) {
294 e.varargs.push_back(lookup_or_add(C->getOperand(0)));
295 e.varargs.push_back(lookup_or_add(C->getOperand(1)));
297 e.type = C->getType();
298 e.opcode = getOpcode(C);
303 Expression ValueTable::create_expression(CastInst* C) {
306 e.varargs.push_back(lookup_or_add(C->getOperand(0)));
308 e.type = C->getType();
309 e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
314 Expression ValueTable::create_expression(ShuffleVectorInst* S) {
317 e.varargs.push_back(lookup_or_add(S->getOperand(0)));
318 e.varargs.push_back(lookup_or_add(S->getOperand(1)));
319 e.varargs.push_back(lookup_or_add(S->getOperand(2)));
321 e.type = S->getType();
322 e.opcode = Expression::SHUFFLE;
327 Expression ValueTable::create_expression(ExtractElementInst* E) {
330 e.varargs.push_back(lookup_or_add(E->getOperand(0)));
331 e.varargs.push_back(lookup_or_add(E->getOperand(1)));
333 e.type = E->getType();
334 e.opcode = Expression::EXTRACT;
339 Expression ValueTable::create_expression(InsertElementInst* I) {
342 e.varargs.push_back(lookup_or_add(I->getOperand(0)));
343 e.varargs.push_back(lookup_or_add(I->getOperand(1)));
344 e.varargs.push_back(lookup_or_add(I->getOperand(2)));
346 e.type = I->getType();
347 e.opcode = Expression::INSERT;
352 Expression ValueTable::create_expression(SelectInst* I) {
355 e.varargs.push_back(lookup_or_add(I->getCondition()));
356 e.varargs.push_back(lookup_or_add(I->getTrueValue()));
357 e.varargs.push_back(lookup_or_add(I->getFalseValue()));
359 e.type = I->getType();
360 e.opcode = Expression::SELECT;
365 Expression ValueTable::create_expression(GetElementPtrInst* G) {
368 e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
370 e.type = G->getType();
371 e.opcode = Expression::GEP;
373 for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
375 e.varargs.push_back(lookup_or_add(*I));
380 Expression ValueTable::create_expression(ExtractValueInst* E) {
383 e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
384 for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
386 e.varargs.push_back(*II);
388 e.type = E->getType();
389 e.opcode = Expression::EXTRACTVALUE;
394 Expression ValueTable::create_expression(InsertValueInst* E) {
397 e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
398 e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
399 for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
401 e.varargs.push_back(*II);
403 e.type = E->getType();
404 e.opcode = Expression::INSERTVALUE;
409 //===----------------------------------------------------------------------===//
410 // ValueTable External Functions
411 //===----------------------------------------------------------------------===//
413 /// add - Insert a value into the table with a specified value number.
414 void ValueTable::add(Value *V, uint32_t num) {
415 valueNumbering.insert(std::make_pair(V, num));
418 uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
419 if (AA->doesNotAccessMemory(C)) {
420 Expression exp = create_expression(C);
421 uint32_t& e = expressionNumbering[exp];
422 if (!e) e = nextValueNumber++;
423 valueNumbering[C] = e;
425 } else if (AA->onlyReadsMemory(C)) {
426 Expression exp = create_expression(C);
427 uint32_t& e = expressionNumbering[exp];
429 e = nextValueNumber++;
430 valueNumbering[C] = e;
434 e = nextValueNumber++;
435 valueNumbering[C] = e;
439 MemDepResult local_dep = MD->getDependency(C);
441 if (!local_dep.isDef() && !local_dep.isNonLocal()) {
442 valueNumbering[C] = nextValueNumber;
443 return nextValueNumber++;
446 if (local_dep.isDef()) {
447 CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
449 if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
450 valueNumbering[C] = nextValueNumber;
451 return nextValueNumber++;
454 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
455 uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
456 uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
458 valueNumbering[C] = nextValueNumber;
459 return nextValueNumber++;
463 uint32_t v = lookup_or_add(local_cdep);
464 valueNumbering[C] = v;
469 const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
470 MD->getNonLocalCallDependency(CallSite(C));
471 // FIXME: call/call dependencies for readonly calls should return def, not
472 // clobber! Move the checking logic to MemDep!
475 // Check to see if we have a single dominating call instruction that is
477 for (unsigned i = 0, e = deps.size(); i != e; ++i) {
478 const NonLocalDepEntry *I = &deps[i];
479 // Ignore non-local dependencies.
480 if (I->getResult().isNonLocal())
483 // We don't handle non-depedencies. If we already have a call, reject
484 // instruction dependencies.
485 if (I->getResult().isClobber() || cdep != 0) {
490 CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
491 // FIXME: All duplicated with non-local case.
492 if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
493 cdep = NonLocalDepCall;
502 valueNumbering[C] = nextValueNumber;
503 return nextValueNumber++;
506 if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
507 valueNumbering[C] = nextValueNumber;
508 return nextValueNumber++;
510 for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
511 uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
512 uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
514 valueNumbering[C] = nextValueNumber;
515 return nextValueNumber++;
519 uint32_t v = lookup_or_add(cdep);
520 valueNumbering[C] = v;
524 valueNumbering[C] = nextValueNumber;
525 return nextValueNumber++;
529 /// lookup_or_add - Returns the value number for the specified value, assigning
530 /// it a new number if it did not have one before.
531 uint32_t ValueTable::lookup_or_add(Value *V) {
532 DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
533 if (VI != valueNumbering.end())
536 if (!isa<Instruction>(V)) {
537 valueNumbering[V] = nextValueNumber;
538 return nextValueNumber++;
541 Instruction* I = cast<Instruction>(V);
543 switch (I->getOpcode()) {
544 case Instruction::Call:
545 return lookup_or_add_call(cast<CallInst>(I));
546 case Instruction::Add:
547 case Instruction::FAdd:
548 case Instruction::Sub:
549 case Instruction::FSub:
550 case Instruction::Mul:
551 case Instruction::FMul:
552 case Instruction::UDiv:
553 case Instruction::SDiv:
554 case Instruction::FDiv:
555 case Instruction::URem:
556 case Instruction::SRem:
557 case Instruction::FRem:
558 case Instruction::Shl:
559 case Instruction::LShr:
560 case Instruction::AShr:
561 case Instruction::And:
562 case Instruction::Or :
563 case Instruction::Xor:
564 exp = create_expression(cast<BinaryOperator>(I));
566 case Instruction::ICmp:
567 case Instruction::FCmp:
568 exp = create_expression(cast<CmpInst>(I));
570 case Instruction::Trunc:
571 case Instruction::ZExt:
572 case Instruction::SExt:
573 case Instruction::FPToUI:
574 case Instruction::FPToSI:
575 case Instruction::UIToFP:
576 case Instruction::SIToFP:
577 case Instruction::FPTrunc:
578 case Instruction::FPExt:
579 case Instruction::PtrToInt:
580 case Instruction::IntToPtr:
581 case Instruction::BitCast:
582 exp = create_expression(cast<CastInst>(I));
584 case Instruction::Select:
585 exp = create_expression(cast<SelectInst>(I));
587 case Instruction::ExtractElement:
588 exp = create_expression(cast<ExtractElementInst>(I));
590 case Instruction::InsertElement:
591 exp = create_expression(cast<InsertElementInst>(I));
593 case Instruction::ShuffleVector:
594 exp = create_expression(cast<ShuffleVectorInst>(I));
596 case Instruction::ExtractValue:
597 exp = create_expression(cast<ExtractValueInst>(I));
599 case Instruction::InsertValue:
600 exp = create_expression(cast<InsertValueInst>(I));
602 case Instruction::GetElementPtr:
603 exp = create_expression(cast<GetElementPtrInst>(I));
606 valueNumbering[V] = nextValueNumber;
607 return nextValueNumber++;
610 uint32_t& e = expressionNumbering[exp];
611 if (!e) e = nextValueNumber++;
612 valueNumbering[V] = e;
616 /// lookup - Returns the value number of the specified value. Fails if
617 /// the value has not yet been numbered.
618 uint32_t ValueTable::lookup(Value *V) const {
619 DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
620 assert(VI != valueNumbering.end() && "Value not numbered?");
624 /// clear - Remove all entries from the ValueTable
625 void ValueTable::clear() {
626 valueNumbering.clear();
627 expressionNumbering.clear();
631 /// erase - Remove a value from the value numbering
632 void ValueTable::erase(Value *V) {
633 valueNumbering.erase(V);
636 /// verifyRemoved - Verify that the value is removed from all internal data
638 void ValueTable::verifyRemoved(const Value *V) const {
639 for (DenseMap<Value*, uint32_t>::const_iterator
640 I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
641 assert(I->first != V && "Inst still occurs in value numbering map!");
645 //===----------------------------------------------------------------------===//
647 //===----------------------------------------------------------------------===//
650 struct ValueNumberScope {
651 ValueNumberScope* parent;
652 DenseMap<uint32_t, Value*> table;
654 ValueNumberScope(ValueNumberScope* p) : parent(p) { }
660 class GVN : public FunctionPass {
661 bool runOnFunction(Function &F);
663 static char ID; // Pass identification, replacement for typeid
664 explicit GVN(bool noloads = false)
665 : FunctionPass(ID), NoLoads(noloads), MD(0) {
666 initializeGVNPass(*PassRegistry::getPassRegistry());
671 MemoryDependenceAnalysis *MD;
673 const TargetData* TD;
677 /// NumberTable - A mapping from value numers to lists of Value*'s that
678 /// have that value number. Use lookupNumber to query it.
679 DenseMap<uint32_t, std::pair<Value*, void*> > NumberTable;
680 BumpPtrAllocator TableAllocator;
682 /// insert_table - Push a new Value to the NumberTable onto the list for
683 /// its value number.
684 void insert_table(uint32_t N, Value *V) {
685 std::pair<Value*, void*>& Curr = NumberTable[N];
691 std::pair<Value*, void*>* Node =
692 TableAllocator.Allocate<std::pair<Value*, void*> >();
694 Node->second = Curr.second;
698 /// erase_table - Scan the list of values corresponding to a given value
699 /// number, and remove the given value if encountered.
700 void erase_table(uint32_t N, Value *V) {
701 std::pair<Value*, void*>* Prev = 0;
702 std::pair<Value*, void*>* Curr = &NumberTable[N];
704 while (Curr->first != V) {
706 Curr = static_cast<std::pair<Value*, void*>*>(Curr->second);
710 Prev->second = Curr->second;
715 std::pair<Value*, void*>* Next =
716 static_cast<std::pair<Value*, void*>*>(Curr->second);
717 Curr->first = Next->first;
718 Curr->second = Next->second;
723 // List of critical edges to be split between iterations.
724 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
726 // This transformation requires dominator postdominator info
727 virtual void getAnalysisUsage(AnalysisUsage &AU) const {
728 AU.addRequired<DominatorTree>();
730 AU.addRequired<MemoryDependenceAnalysis>();
731 AU.addRequired<AliasAnalysis>();
733 AU.addPreserved<DominatorTree>();
734 AU.addPreserved<AliasAnalysis>();
738 // FIXME: eliminate or document these better
739 bool processLoad(LoadInst* L,
740 SmallVectorImpl<Instruction*> &toErase);
741 bool processInstruction(Instruction *I,
742 SmallVectorImpl<Instruction*> &toErase);
743 bool processNonLocalLoad(LoadInst* L,
744 SmallVectorImpl<Instruction*> &toErase);
745 bool processBlock(BasicBlock *BB);
746 void dump(DenseMap<uint32_t, Value*>& d);
747 bool iterateOnFunction(Function &F);
748 bool performPRE(Function& F);
749 Value *lookupNumber(BasicBlock *BB, uint32_t num);
750 void cleanupGlobalSets();
751 void verifyRemoved(const Instruction *I) const;
752 bool splitCriticalEdges();
758 // createGVNPass - The public interface to this file...
759 FunctionPass *llvm::createGVNPass(bool NoLoads) {
760 return new GVN(NoLoads);
763 INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
764 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
765 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
766 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
767 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
769 void GVN::dump(DenseMap<uint32_t, Value*>& d) {
771 for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
772 E = d.end(); I != E; ++I) {
773 errs() << I->first << "\n";
779 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
780 /// we're analyzing is fully available in the specified block. As we go, keep
781 /// track of which blocks we know are fully alive in FullyAvailableBlocks. This
782 /// map is actually a tri-state map with the following values:
783 /// 0) we know the block *is not* fully available.
784 /// 1) we know the block *is* fully available.
785 /// 2) we do not know whether the block is fully available or not, but we are
786 /// currently speculating that it will be.
787 /// 3) we are speculating for this block and have used that to speculate for
789 static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
790 DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
791 // Optimistically assume that the block is fully available and check to see
792 // if we already know about this block in one lookup.
793 std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
794 FullyAvailableBlocks.insert(std::make_pair(BB, 2));
796 // If the entry already existed for this block, return the precomputed value.
798 // If this is a speculative "available" value, mark it as being used for
799 // speculation of other blocks.
800 if (IV.first->second == 2)
801 IV.first->second = 3;
802 return IV.first->second != 0;
805 // Otherwise, see if it is fully available in all predecessors.
806 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
808 // If this block has no predecessors, it isn't live-in here.
810 goto SpeculationFailure;
812 for (; PI != PE; ++PI)
813 // If the value isn't fully available in one of our predecessors, then it
814 // isn't fully available in this block either. Undo our previous
815 // optimistic assumption and bail out.
816 if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
817 goto SpeculationFailure;
821 // SpeculationFailure - If we get here, we found out that this is not, after
822 // all, a fully-available block. We have a problem if we speculated on this and
823 // used the speculation to mark other blocks as available.
825 char &BBVal = FullyAvailableBlocks[BB];
827 // If we didn't speculate on this, just return with it set to false.
833 // If we did speculate on this value, we could have blocks set to 1 that are
834 // incorrect. Walk the (transitive) successors of this block and mark them as
836 SmallVector<BasicBlock*, 32> BBWorklist;
837 BBWorklist.push_back(BB);
840 BasicBlock *Entry = BBWorklist.pop_back_val();
841 // Note that this sets blocks to 0 (unavailable) if they happen to not
842 // already be in FullyAvailableBlocks. This is safe.
843 char &EntryVal = FullyAvailableBlocks[Entry];
844 if (EntryVal == 0) continue; // Already unavailable.
846 // Mark as unavailable.
849 for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
850 BBWorklist.push_back(*I);
851 } while (!BBWorklist.empty());
857 /// CanCoerceMustAliasedValueToLoad - Return true if
858 /// CoerceAvailableValueToLoadType will succeed.
859 static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
861 const TargetData &TD) {
862 // If the loaded or stored value is an first class array or struct, don't try
863 // to transform them. We need to be able to bitcast to integer.
864 if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
865 StoredVal->getType()->isStructTy() ||
866 StoredVal->getType()->isArrayTy())
869 // The store has to be at least as big as the load.
870 if (TD.getTypeSizeInBits(StoredVal->getType()) <
871 TD.getTypeSizeInBits(LoadTy))
878 /// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
879 /// then a load from a must-aliased pointer of a different type, try to coerce
880 /// the stored value. LoadedTy is the type of the load we want to replace and
881 /// InsertPt is the place to insert new instructions.
883 /// If we can't do it, return null.
884 static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
885 const Type *LoadedTy,
886 Instruction *InsertPt,
887 const TargetData &TD) {
888 if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
891 const Type *StoredValTy = StoredVal->getType();
893 uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
894 uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
896 // If the store and reload are the same size, we can always reuse it.
897 if (StoreSize == LoadSize) {
898 if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
899 // Pointer to Pointer -> use bitcast.
900 return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
903 // Convert source pointers to integers, which can be bitcast.
904 if (StoredValTy->isPointerTy()) {
905 StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
906 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
909 const Type *TypeToCastTo = LoadedTy;
910 if (TypeToCastTo->isPointerTy())
911 TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
913 if (StoredValTy != TypeToCastTo)
914 StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
916 // Cast to pointer if the load needs a pointer type.
917 if (LoadedTy->isPointerTy())
918 StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
923 // If the loaded value is smaller than the available value, then we can
924 // extract out a piece from it. If the available value is too small, then we
925 // can't do anything.
926 assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
928 // Convert source pointers to integers, which can be manipulated.
929 if (StoredValTy->isPointerTy()) {
930 StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
931 StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
934 // Convert vectors and fp to integer, which can be manipulated.
935 if (!StoredValTy->isIntegerTy()) {
936 StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
937 StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
940 // If this is a big-endian system, we need to shift the value down to the low
941 // bits so that a truncate will work.
942 if (TD.isBigEndian()) {
943 Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
944 StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
947 // Truncate the integer to the right size now.
948 const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
949 StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
951 if (LoadedTy == NewIntTy)
954 // If the result is a pointer, inttoptr.
955 if (LoadedTy->isPointerTy())
956 return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
958 // Otherwise, bitcast.
959 return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
962 /// AnalyzeLoadFromClobberingWrite - This function is called when we have a
963 /// memdep query of a load that ends up being a clobbering memory write (store,
964 /// memset, memcpy, memmove). This means that the write *may* provide bits used
965 /// by the load but we can't be sure because the pointers don't mustalias.
967 /// Check this case to see if there is anything more we can do before we give
968 /// up. This returns -1 if we have to give up, or a byte number in the stored
969 /// value of the piece that feeds the load.
970 static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
972 uint64_t WriteSizeInBits,
973 const TargetData &TD) {
974 // If the loaded or stored value is an first class array or struct, don't try
975 // to transform them. We need to be able to bitcast to integer.
976 if (LoadTy->isStructTy() || LoadTy->isArrayTy())
979 int64_t StoreOffset = 0, LoadOffset = 0;
980 Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
981 Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
982 if (StoreBase != LoadBase)
985 // If the load and store are to the exact same address, they should have been
986 // a must alias. AA must have gotten confused.
987 // FIXME: Study to see if/when this happens. One case is forwarding a memset
988 // to a load from the base of the memset.
990 if (LoadOffset == StoreOffset) {
991 dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
992 << "Base = " << *StoreBase << "\n"
993 << "Store Ptr = " << *WritePtr << "\n"
994 << "Store Offs = " << StoreOffset << "\n"
995 << "Load Ptr = " << *LoadPtr << "\n";
1000 // If the load and store don't overlap at all, the store doesn't provide
1001 // anything to the load. In this case, they really don't alias at all, AA
1002 // must have gotten confused.
1003 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
1005 if ((WriteSizeInBits & 7) | (LoadSize & 7))
1007 uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
1011 bool isAAFailure = false;
1012 if (StoreOffset < LoadOffset)
1013 isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
1015 isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
1019 dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
1020 << "Base = " << *StoreBase << "\n"
1021 << "Store Ptr = " << *WritePtr << "\n"
1022 << "Store Offs = " << StoreOffset << "\n"
1023 << "Load Ptr = " << *LoadPtr << "\n";
1029 // If the Load isn't completely contained within the stored bits, we don't
1030 // have all the bits to feed it. We could do something crazy in the future
1031 // (issue a smaller load then merge the bits in) but this seems unlikely to be
1033 if (StoreOffset > LoadOffset ||
1034 StoreOffset+StoreSize < LoadOffset+LoadSize)
1037 // Okay, we can do this transformation. Return the number of bytes into the
1038 // store that the load is.
1039 return LoadOffset-StoreOffset;
1042 /// AnalyzeLoadFromClobberingStore - This function is called when we have a
1043 /// memdep query of a load that ends up being a clobbering store.
1044 static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
1046 const TargetData &TD) {
1047 // Cannot handle reading from store of first-class aggregate yet.
1048 if (DepSI->getValueOperand()->getType()->isStructTy() ||
1049 DepSI->getValueOperand()->getType()->isArrayTy())
1052 Value *StorePtr = DepSI->getPointerOperand();
1053 uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
1054 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1055 StorePtr, StoreSize, TD);
1058 static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
1060 const TargetData &TD) {
1061 // If the mem operation is a non-constant size, we can't handle it.
1062 ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
1063 if (SizeCst == 0) return -1;
1064 uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
1066 // If this is memset, we just need to see if the offset is valid in the size
1068 if (MI->getIntrinsicID() == Intrinsic::memset)
1069 return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
1072 // If we have a memcpy/memmove, the only case we can handle is if this is a
1073 // copy from constant memory. In that case, we can read directly from the
1075 MemTransferInst *MTI = cast<MemTransferInst>(MI);
1077 Constant *Src = dyn_cast<Constant>(MTI->getSource());
1078 if (Src == 0) return -1;
1080 GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src));
1081 if (GV == 0 || !GV->isConstant()) return -1;
1083 // See if the access is within the bounds of the transfer.
1084 int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1085 MI->getDest(), MemSizeInBits, TD);
1089 // Otherwise, see if we can constant fold a load from the constant with the
1090 // offset applied as appropriate.
1091 Src = ConstantExpr::getBitCast(Src,
1092 llvm::Type::getInt8PtrTy(Src->getContext()));
1093 Constant *OffsetCst =
1094 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1095 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1096 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1097 if (ConstantFoldLoadFromConstPtr(Src, &TD))
1103 /// GetStoreValueForLoad - This function is called when we have a
1104 /// memdep query of a load that ends up being a clobbering store. This means
1105 /// that the store *may* provide bits used by the load but we can't be sure
1106 /// because the pointers don't mustalias. Check this case to see if there is
1107 /// anything more we can do before we give up.
1108 static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
1110 Instruction *InsertPt, const TargetData &TD){
1111 LLVMContext &Ctx = SrcVal->getType()->getContext();
1113 uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
1114 uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
1116 IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1118 // Compute which bits of the stored value are being used by the load. Convert
1119 // to an integer type to start with.
1120 if (SrcVal->getType()->isPointerTy())
1121 SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
1122 if (!SrcVal->getType()->isIntegerTy())
1123 SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
1126 // Shift the bits to the least significant depending on endianness.
1128 if (TD.isLittleEndian())
1129 ShiftAmt = Offset*8;
1131 ShiftAmt = (StoreSize-LoadSize-Offset)*8;
1134 SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
1136 if (LoadSize != StoreSize)
1137 SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
1140 return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
1143 /// GetMemInstValueForLoad - This function is called when we have a
1144 /// memdep query of a load that ends up being a clobbering mem intrinsic.
1145 static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
1146 const Type *LoadTy, Instruction *InsertPt,
1147 const TargetData &TD){
1148 LLVMContext &Ctx = LoadTy->getContext();
1149 uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
1151 IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1153 // We know that this method is only called when the mem transfer fully
1154 // provides the bits for the load.
1155 if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
1156 // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1157 // independently of what the offset is.
1158 Value *Val = MSI->getValue();
1160 Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
1162 Value *OneElt = Val;
1164 // Splat the value out to the right number of bits.
1165 for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
1166 // If we can double the number of bytes set, do it.
1167 if (NumBytesSet*2 <= LoadSize) {
1168 Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
1169 Val = Builder.CreateOr(Val, ShVal);
1174 // Otherwise insert one byte at a time.
1175 Value *ShVal = Builder.CreateShl(Val, 1*8);
1176 Val = Builder.CreateOr(OneElt, ShVal);
1180 return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
1183 // Otherwise, this is a memcpy/memmove from a constant global.
1184 MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
1185 Constant *Src = cast<Constant>(MTI->getSource());
1187 // Otherwise, see if we can constant fold a load from the constant with the
1188 // offset applied as appropriate.
1189 Src = ConstantExpr::getBitCast(Src,
1190 llvm::Type::getInt8PtrTy(Src->getContext()));
1191 Constant *OffsetCst =
1192 ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1193 Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1194 Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1195 return ConstantFoldLoadFromConstPtr(Src, &TD);
1200 struct AvailableValueInBlock {
1201 /// BB - The basic block in question.
1204 SimpleVal, // A simple offsetted value that is accessed.
1205 MemIntrin // A memory intrinsic which is loaded from.
1208 /// V - The value that is live out of the block.
1209 PointerIntPair<Value *, 1, ValType> Val;
1211 /// Offset - The byte offset in Val that is interesting for the load query.
1214 static AvailableValueInBlock get(BasicBlock *BB, Value *V,
1215 unsigned Offset = 0) {
1216 AvailableValueInBlock Res;
1218 Res.Val.setPointer(V);
1219 Res.Val.setInt(SimpleVal);
1220 Res.Offset = Offset;
1224 static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
1225 unsigned Offset = 0) {
1226 AvailableValueInBlock Res;
1228 Res.Val.setPointer(MI);
1229 Res.Val.setInt(MemIntrin);
1230 Res.Offset = Offset;
1234 bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
1235 Value *getSimpleValue() const {
1236 assert(isSimpleValue() && "Wrong accessor");
1237 return Val.getPointer();
1240 MemIntrinsic *getMemIntrinValue() const {
1241 assert(!isSimpleValue() && "Wrong accessor");
1242 return cast<MemIntrinsic>(Val.getPointer());
1245 /// MaterializeAdjustedValue - Emit code into this block to adjust the value
1246 /// defined here to the specified type. This handles various coercion cases.
1247 Value *MaterializeAdjustedValue(const Type *LoadTy,
1248 const TargetData *TD) const {
1250 if (isSimpleValue()) {
1251 Res = getSimpleValue();
1252 if (Res->getType() != LoadTy) {
1253 assert(TD && "Need target data to handle type mismatch case");
1254 Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
1257 DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
1258 << *getSimpleValue() << '\n'
1259 << *Res << '\n' << "\n\n\n");
1262 Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
1263 LoadTy, BB->getTerminator(), *TD);
1264 DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1265 << " " << *getMemIntrinValue() << '\n'
1266 << *Res << '\n' << "\n\n\n");
1274 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
1275 /// construct SSA form, allowing us to eliminate LI. This returns the value
1276 /// that should be used at LI's definition site.
1277 static Value *ConstructSSAForLoadSet(LoadInst *LI,
1278 SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1279 const TargetData *TD,
1280 const DominatorTree &DT,
1281 AliasAnalysis *AA) {
1282 // Check for the fully redundant, dominating load case. In this case, we can
1283 // just use the dominating value directly.
1284 if (ValuesPerBlock.size() == 1 &&
1285 DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
1286 return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
1288 // Otherwise, we have to construct SSA form.
1289 SmallVector<PHINode*, 8> NewPHIs;
1290 SSAUpdater SSAUpdate(&NewPHIs);
1291 SSAUpdate.Initialize(LI->getType(), LI->getName());
1293 const Type *LoadTy = LI->getType();
1295 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1296 const AvailableValueInBlock &AV = ValuesPerBlock[i];
1297 BasicBlock *BB = AV.BB;
1299 if (SSAUpdate.HasValueForBlock(BB))
1302 SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
1305 // Perform PHI construction.
1306 Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
1308 // If new PHI nodes were created, notify alias analysis.
1309 if (V->getType()->isPointerTy())
1310 for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
1311 AA->copyValue(LI, NewPHIs[i]);
1316 static bool isLifetimeStart(const Instruction *Inst) {
1317 if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1318 return II->getIntrinsicID() == Intrinsic::lifetime_start;
1322 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
1323 /// non-local by performing PHI construction.
1324 bool GVN::processNonLocalLoad(LoadInst *LI,
1325 SmallVectorImpl<Instruction*> &toErase) {
1326 // Find the non-local dependencies of the load.
1327 SmallVector<NonLocalDepResult, 64> Deps;
1328 AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
1329 MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
1330 //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
1331 // << Deps.size() << *LI << '\n');
1333 // If we had to process more than one hundred blocks to find the
1334 // dependencies, this load isn't worth worrying about. Optimizing
1335 // it will be too expensive.
1336 if (Deps.size() > 100)
1339 // If we had a phi translation failure, we'll have a single entry which is a
1340 // clobber in the current block. Reject this early.
1341 if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
1343 dbgs() << "GVN: non-local load ";
1344 WriteAsOperand(dbgs(), LI);
1345 dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
1350 // Filter out useless results (non-locals, etc). Keep track of the blocks
1351 // where we have a value available in repl, also keep track of whether we see
1352 // dependencies that produce an unknown value for the load (such as a call
1353 // that could potentially clobber the load).
1354 SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
1355 SmallVector<BasicBlock*, 16> UnavailableBlocks;
1357 for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
1358 BasicBlock *DepBB = Deps[i].getBB();
1359 MemDepResult DepInfo = Deps[i].getResult();
1361 if (DepInfo.isClobber()) {
1362 // The address being loaded in this non-local block may not be the same as
1363 // the pointer operand of the load if PHI translation occurs. Make sure
1364 // to consider the right address.
1365 Value *Address = Deps[i].getAddress();
1367 // If the dependence is to a store that writes to a superset of the bits
1368 // read by the load, we can extract the bits we need for the load from the
1370 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
1371 if (TD && Address) {
1372 int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
1375 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1376 DepSI->getValueOperand(),
1383 // If the clobbering value is a memset/memcpy/memmove, see if we can
1384 // forward a value on from it.
1385 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
1386 if (TD && Address) {
1387 int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
1390 ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
1397 UnavailableBlocks.push_back(DepBB);
1401 Instruction *DepInst = DepInfo.getInst();
1403 // Loading the allocation -> undef.
1404 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
1405 // Loading immediately after lifetime begin -> undef.
1406 isLifetimeStart(DepInst)) {
1407 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1408 UndefValue::get(LI->getType())));
1412 if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1413 // Reject loads and stores that are to the same address but are of
1414 // different types if we have to.
1415 if (S->getValueOperand()->getType() != LI->getType()) {
1416 // If the stored value is larger or equal to the loaded value, we can
1418 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
1419 LI->getType(), *TD)) {
1420 UnavailableBlocks.push_back(DepBB);
1425 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1426 S->getValueOperand()));
1430 if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1431 // If the types mismatch and we can't handle it, reject reuse of the load.
1432 if (LD->getType() != LI->getType()) {
1433 // If the stored value is larger or equal to the loaded value, we can
1435 if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
1436 UnavailableBlocks.push_back(DepBB);
1440 ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
1444 UnavailableBlocks.push_back(DepBB);
1448 // If we have no predecessors that produce a known value for this load, exit
1450 if (ValuesPerBlock.empty()) return false;
1452 // If all of the instructions we depend on produce a known value for this
1453 // load, then it is fully redundant and we can use PHI insertion to compute
1454 // its value. Insert PHIs and remove the fully redundant value now.
1455 if (UnavailableBlocks.empty()) {
1456 DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1458 // Perform PHI construction.
1459 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1460 VN.getAliasAnalysis());
1461 LI->replaceAllUsesWith(V);
1463 if (isa<PHINode>(V))
1465 if (V->getType()->isPointerTy())
1466 MD->invalidateCachedPointerInfo(V);
1468 toErase.push_back(LI);
1473 if (!EnablePRE || !EnableLoadPRE)
1476 // Okay, we have *some* definitions of the value. This means that the value
1477 // is available in some of our (transitive) predecessors. Lets think about
1478 // doing PRE of this load. This will involve inserting a new load into the
1479 // predecessor when it's not available. We could do this in general, but
1480 // prefer to not increase code size. As such, we only do this when we know
1481 // that we only have to insert *one* load (which means we're basically moving
1482 // the load, not inserting a new one).
1484 SmallPtrSet<BasicBlock *, 4> Blockers;
1485 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1486 Blockers.insert(UnavailableBlocks[i]);
1488 // Lets find first basic block with more than one predecessor. Walk backwards
1489 // through predecessors if needed.
1490 BasicBlock *LoadBB = LI->getParent();
1491 BasicBlock *TmpBB = LoadBB;
1493 bool isSinglePred = false;
1494 bool allSingleSucc = true;
1495 while (TmpBB->getSinglePredecessor()) {
1496 isSinglePred = true;
1497 TmpBB = TmpBB->getSinglePredecessor();
1498 if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1500 if (Blockers.count(TmpBB))
1503 // If any of these blocks has more than one successor (i.e. if the edge we
1504 // just traversed was critical), then there are other paths through this
1505 // block along which the load may not be anticipated. Hoisting the load
1506 // above this block would be adding the load to execution paths along
1507 // which it was not previously executed.
1508 if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1515 // FIXME: It is extremely unclear what this loop is doing, other than
1516 // artificially restricting loadpre.
1519 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1520 const AvailableValueInBlock &AV = ValuesPerBlock[i];
1521 if (AV.isSimpleValue())
1522 // "Hot" Instruction is in some loop (because it dominates its dep.
1524 if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
1525 if (DT->dominates(LI, I)) {
1531 // We are interested only in "hot" instructions. We don't want to do any
1532 // mis-optimizations here.
1537 // Check to see how many predecessors have the loaded value fully
1539 DenseMap<BasicBlock*, Value*> PredLoads;
1540 DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1541 for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1542 FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
1543 for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1544 FullyAvailableBlocks[UnavailableBlocks[i]] = false;
1546 SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
1547 for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
1549 BasicBlock *Pred = *PI;
1550 if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1553 PredLoads[Pred] = 0;
1555 if (Pred->getTerminator()->getNumSuccessors() != 1) {
1556 if (isa<IndirectBrInst>(Pred->getTerminator())) {
1557 DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1558 << Pred->getName() << "': " << *LI << '\n');
1561 unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
1562 NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
1565 if (!NeedToSplit.empty()) {
1566 toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
1570 // Decide whether PRE is profitable for this load.
1571 unsigned NumUnavailablePreds = PredLoads.size();
1572 assert(NumUnavailablePreds != 0 &&
1573 "Fully available value should be eliminated above!");
1575 // If this load is unavailable in multiple predecessors, reject it.
1576 // FIXME: If we could restructure the CFG, we could make a common pred with
1577 // all the preds that don't have an available LI and insert a new load into
1579 if (NumUnavailablePreds != 1)
1582 // Check if the load can safely be moved to all the unavailable predecessors.
1583 bool CanDoPRE = true;
1584 SmallVector<Instruction*, 8> NewInsts;
1585 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1586 E = PredLoads.end(); I != E; ++I) {
1587 BasicBlock *UnavailablePred = I->first;
1589 // Do PHI translation to get its value in the predecessor if necessary. The
1590 // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1592 // If all preds have a single successor, then we know it is safe to insert
1593 // the load on the pred (?!?), so we can insert code to materialize the
1594 // pointer if it is not available.
1595 PHITransAddr Address(LI->getPointerOperand(), TD);
1597 if (allSingleSucc) {
1598 LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
1601 Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
1602 LoadPtr = Address.getAddr();
1605 // If we couldn't find or insert a computation of this phi translated value,
1608 DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1609 << *LI->getPointerOperand() << "\n");
1614 // Make sure it is valid to move this load here. We have to watch out for:
1615 // @1 = getelementptr (i8* p, ...
1616 // test p and branch if == 0
1618 // It is valid to have the getelementptr before the test, even if p can be 0,
1619 // as getelementptr only does address arithmetic.
1620 // If we are not pushing the value through any multiple-successor blocks
1621 // we do not have this case. Otherwise, check that the load is safe to
1622 // put anywhere; this can be improved, but should be conservatively safe.
1623 if (!allSingleSucc &&
1624 // FIXME: REEVALUTE THIS.
1625 !isSafeToLoadUnconditionally(LoadPtr,
1626 UnavailablePred->getTerminator(),
1627 LI->getAlignment(), TD)) {
1632 I->second = LoadPtr;
1636 while (!NewInsts.empty())
1637 NewInsts.pop_back_val()->eraseFromParent();
1641 // Okay, we can eliminate this load by inserting a reload in the predecessor
1642 // and using PHI construction to get the value in the other predecessors, do
1644 DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1645 DEBUG(if (!NewInsts.empty())
1646 dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
1647 << *NewInsts.back() << '\n');
1649 // Assign value numbers to the new instructions.
1650 for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
1651 // FIXME: We really _ought_ to insert these value numbers into their
1652 // parent's availability map. However, in doing so, we risk getting into
1653 // ordering issues. If a block hasn't been processed yet, we would be
1654 // marking a value as AVAIL-IN, which isn't what we intend.
1655 VN.lookup_or_add(NewInsts[i]);
1658 for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1659 E = PredLoads.end(); I != E; ++I) {
1660 BasicBlock *UnavailablePred = I->first;
1661 Value *LoadPtr = I->second;
1663 Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
1665 UnavailablePred->getTerminator());
1667 // Transfer the old load's TBAA tag to the new load.
1668 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
1669 NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
1671 // Add the newly created load.
1672 ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
1674 MD->invalidateCachedPointerInfo(LoadPtr);
1675 DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1678 // Perform PHI construction.
1679 Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1680 VN.getAliasAnalysis());
1681 LI->replaceAllUsesWith(V);
1682 if (isa<PHINode>(V))
1684 if (V->getType()->isPointerTy())
1685 MD->invalidateCachedPointerInfo(V);
1687 toErase.push_back(LI);
1692 /// processLoad - Attempt to eliminate a load, first by eliminating it
1693 /// locally, and then attempting non-local elimination if that fails.
1694 bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
1698 if (L->isVolatile())
1701 // ... to a pointer that has been loaded from before...
1702 MemDepResult Dep = MD->getDependency(L);
1704 // If the value isn't available, don't do anything!
1705 if (Dep.isClobber()) {
1706 // Check to see if we have something like this:
1707 // store i32 123, i32* %P
1708 // %A = bitcast i32* %P to i8*
1709 // %B = gep i8* %A, i32 1
1712 // We could do that by recognizing if the clobber instructions are obviously
1713 // a common base + constant offset, and if the previous store (or memset)
1714 // completely covers this load. This sort of thing can happen in bitfield
1716 Value *AvailVal = 0;
1717 if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
1719 int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
1720 L->getPointerOperand(),
1723 AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
1724 L->getType(), L, *TD);
1727 // If the clobbering value is a memset/memcpy/memmove, see if we can forward
1728 // a value on from it.
1729 if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
1731 int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
1732 L->getPointerOperand(),
1735 AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
1740 DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
1741 << *AvailVal << '\n' << *L << "\n\n\n");
1743 // Replace the load!
1744 L->replaceAllUsesWith(AvailVal);
1745 if (AvailVal->getType()->isPointerTy())
1746 MD->invalidateCachedPointerInfo(AvailVal);
1748 toErase.push_back(L);
1754 // fast print dep, using operator<< on instruction would be too slow
1755 dbgs() << "GVN: load ";
1756 WriteAsOperand(dbgs(), L);
1757 Instruction *I = Dep.getInst();
1758 dbgs() << " is clobbered by " << *I << '\n';
1763 // If it is defined in another block, try harder.
1764 if (Dep.isNonLocal())
1765 return processNonLocalLoad(L, toErase);
1767 Instruction *DepInst = Dep.getInst();
1768 if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1769 Value *StoredVal = DepSI->getValueOperand();
1771 // The store and load are to a must-aliased pointer, but they may not
1772 // actually have the same type. See if we know how to reuse the stored
1773 // value (depending on its type).
1774 if (StoredVal->getType() != L->getType()) {
1776 StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
1781 DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
1782 << '\n' << *L << "\n\n\n");
1789 L->replaceAllUsesWith(StoredVal);
1790 if (StoredVal->getType()->isPointerTy())
1791 MD->invalidateCachedPointerInfo(StoredVal);
1793 toErase.push_back(L);
1798 if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
1799 Value *AvailableVal = DepLI;
1801 // The loads are of a must-aliased pointer, but they may not actually have
1802 // the same type. See if we know how to reuse the previously loaded value
1803 // (depending on its type).
1804 if (DepLI->getType() != L->getType()) {
1806 AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
1807 if (AvailableVal == 0)
1810 DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
1811 << "\n" << *L << "\n\n\n");
1818 L->replaceAllUsesWith(AvailableVal);
1819 if (DepLI->getType()->isPointerTy())
1820 MD->invalidateCachedPointerInfo(DepLI);
1822 toErase.push_back(L);
1827 // If this load really doesn't depend on anything, then we must be loading an
1828 // undef value. This can happen when loading for a fresh allocation with no
1829 // intervening stores, for example.
1830 if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
1831 L->replaceAllUsesWith(UndefValue::get(L->getType()));
1833 toErase.push_back(L);
1838 // If this load occurs either right after a lifetime begin,
1839 // then the loaded value is undefined.
1840 if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
1841 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1842 L->replaceAllUsesWith(UndefValue::get(L->getType()));
1844 toErase.push_back(L);
1853 // lookupNumber - In order to find a leader for a given value number at a
1854 // specific basic block, we first obtain the list of all Values for that number,
1855 // and then scan the list to find one whose block dominates the block in
1856 // question. This is fast because dominator tree queries consist of only
1857 // a few comparisons of DFS numbers.
1858 Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
1859 std::pair<Value*, void*> Vals = NumberTable[num];
1860 if (!Vals.first) return 0;
1861 Instruction *Inst = dyn_cast<Instruction>(Vals.first);
1862 if (!Inst) return Vals.first;
1863 BasicBlock *Parent = Inst->getParent();
1864 if (DT->dominates(Parent, BB))
1867 std::pair<Value*, void*>* Next =
1868 static_cast<std::pair<Value*, void*>*>(Vals.second);
1870 Instruction *CurrInst = dyn_cast<Instruction>(Next->first);
1871 if (!CurrInst) return Next->first;
1873 BasicBlock *Parent = CurrInst->getParent();
1874 if (DT->dominates(Parent, BB))
1877 Next = static_cast<std::pair<Value*, void*>*>(Next->second);
1884 /// processInstruction - When calculating availability, handle an instruction
1885 /// by inserting it into the appropriate sets
1886 bool GVN::processInstruction(Instruction *I,
1887 SmallVectorImpl<Instruction*> &toErase) {
1888 // Ignore dbg info intrinsics.
1889 if (isa<DbgInfoIntrinsic>(I))
1892 // If the instruction can be easily simplified then do so now in preference
1893 // to value numbering it. Value numbering often exposes redundancies, for
1894 // example if it determines that %y is equal to %x then the instruction
1895 // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
1896 if (Value *V = SimplifyInstruction(I, TD, DT)) {
1897 I->replaceAllUsesWith(V);
1898 if (MD && V->getType()->isPointerTy())
1899 MD->invalidateCachedPointerInfo(V);
1901 toErase.push_back(I);
1905 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1906 bool Changed = processLoad(LI, toErase);
1909 unsigned Num = VN.lookup_or_add(LI);
1910 insert_table(Num, LI);
1916 uint32_t NextNum = VN.getNextUnusedValueNumber();
1917 unsigned Num = VN.lookup_or_add(I);
1919 // Allocations are always uniquely numbered, so we can save time and memory
1920 // by fast failing them.
1921 if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
1922 insert_table(Num, I);
1926 // If the number we were assigned was a brand new VN, then we don't
1927 // need to do a lookup to see if the number already exists
1928 // somewhere in the domtree: it can't!
1929 if (Num == NextNum) {
1930 insert_table(Num, I);
1934 // Perform fast-path value-number based elimination of values inherited from
1936 Value *repl = lookupNumber(I->getParent(), Num);
1938 // Failure, just remember this instance for future use.
1939 insert_table(Num, I);
1945 I->replaceAllUsesWith(repl);
1946 if (MD && repl->getType()->isPointerTy())
1947 MD->invalidateCachedPointerInfo(repl);
1948 toErase.push_back(I);
1952 /// runOnFunction - This is the main transformation entry point for a function.
1953 bool GVN::runOnFunction(Function& F) {
1955 MD = &getAnalysis<MemoryDependenceAnalysis>();
1956 DT = &getAnalysis<DominatorTree>();
1957 TD = getAnalysisIfAvailable<TargetData>();
1958 VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
1962 bool Changed = false;
1963 bool ShouldContinue = true;
1965 // Merge unconditional branches, allowing PRE to catch more
1966 // optimization opportunities.
1967 for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
1968 BasicBlock *BB = FI;
1970 bool removedBlock = MergeBlockIntoPredecessor(BB, this);
1971 if (removedBlock) ++NumGVNBlocks;
1973 Changed |= removedBlock;
1976 unsigned Iteration = 0;
1978 while (ShouldContinue) {
1979 DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
1980 ShouldContinue = iterateOnFunction(F);
1981 if (splitCriticalEdges())
1982 ShouldContinue = true;
1983 Changed |= ShouldContinue;
1988 bool PREChanged = true;
1989 while (PREChanged) {
1990 PREChanged = performPRE(F);
1991 Changed |= PREChanged;
1994 // FIXME: Should perform GVN again after PRE does something. PRE can move
1995 // computations into blocks where they become fully redundant. Note that
1996 // we can't do this until PRE's critical edge splitting updates memdep.
1997 // Actually, when this happens, we should just fully integrate PRE into GVN.
1999 cleanupGlobalSets();
2005 bool GVN::processBlock(BasicBlock *BB) {
2006 // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
2007 // incrementing BI before processing an instruction).
2008 SmallVector<Instruction*, 8> toErase;
2009 bool ChangedFunction = false;
2011 for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2013 ChangedFunction |= processInstruction(BI, toErase);
2014 if (toErase.empty()) {
2019 // If we need some instructions deleted, do it now.
2020 NumGVNInstr += toErase.size();
2022 // Avoid iterator invalidation.
2023 bool AtStart = BI == BB->begin();
2027 for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
2028 E = toErase.end(); I != E; ++I) {
2029 DEBUG(dbgs() << "GVN removed: " << **I << '\n');
2030 if (MD) MD->removeInstruction(*I);
2031 (*I)->eraseFromParent();
2032 DEBUG(verifyRemoved(*I));
2042 return ChangedFunction;
2045 /// performPRE - Perform a purely local form of PRE that looks for diamond
2046 /// control flow patterns and attempts to perform simple PRE at the join point.
2047 bool GVN::performPRE(Function &F) {
2048 bool Changed = false;
2049 DenseMap<BasicBlock*, Value*> predMap;
2050 for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
2051 DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
2052 BasicBlock *CurrentBlock = *DI;
2054 // Nothing to PRE in the entry block.
2055 if (CurrentBlock == &F.getEntryBlock()) continue;
2057 for (BasicBlock::iterator BI = CurrentBlock->begin(),
2058 BE = CurrentBlock->end(); BI != BE; ) {
2059 Instruction *CurInst = BI++;
2061 if (isa<AllocaInst>(CurInst) ||
2062 isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
2063 CurInst->getType()->isVoidTy() ||
2064 CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2065 isa<DbgInfoIntrinsic>(CurInst))
2068 // We don't currently value number ANY inline asm calls.
2069 if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
2070 if (CallI->isInlineAsm())
2073 uint32_t ValNo = VN.lookup(CurInst);
2075 // Look for the predecessors for PRE opportunities. We're
2076 // only trying to solve the basic diamond case, where
2077 // a value is computed in the successor and one predecessor,
2078 // but not the other. We also explicitly disallow cases
2079 // where the successor is its own predecessor, because they're
2080 // more complicated to get right.
2081 unsigned NumWith = 0;
2082 unsigned NumWithout = 0;
2083 BasicBlock *PREPred = 0;
2086 for (pred_iterator PI = pred_begin(CurrentBlock),
2087 PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2088 BasicBlock *P = *PI;
2089 // We're not interested in PRE where the block is its
2090 // own predecessor, or in blocks with predecessors
2091 // that are not reachable.
2092 if (P == CurrentBlock) {
2095 } else if (!DT->dominates(&F.getEntryBlock(), P)) {
2100 Value* predV = lookupNumber(P, ValNo);
2104 } else if (predV == CurInst) {
2112 // Don't do PRE when it might increase code size, i.e. when
2113 // we would need to insert instructions in more than one pred.
2114 if (NumWithout != 1 || NumWith == 0)
2117 // Don't do PRE across indirect branch.
2118 if (isa<IndirectBrInst>(PREPred->getTerminator()))
2121 // We can't do PRE safely on a critical edge, so instead we schedule
2122 // the edge to be split and perform the PRE the next time we iterate
2124 unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
2125 if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
2126 toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
2130 // Instantiate the expression in the predecessor that lacked it.
2131 // Because we are going top-down through the block, all value numbers
2132 // will be available in the predecessor by the time we need them. Any
2133 // that weren't originally present will have been instantiated earlier
2135 Instruction *PREInstr = CurInst->clone();
2136 bool success = true;
2137 for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
2138 Value *Op = PREInstr->getOperand(i);
2139 if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2142 if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
2143 PREInstr->setOperand(i, V);
2150 // Fail out if we encounter an operand that is not available in
2151 // the PRE predecessor. This is typically because of loads which
2152 // are not value numbered precisely.
2155 DEBUG(verifyRemoved(PREInstr));
2159 PREInstr->insertBefore(PREPred->getTerminator());
2160 PREInstr->setName(CurInst->getName() + ".pre");
2161 predMap[PREPred] = PREInstr;
2162 VN.add(PREInstr, ValNo);
2165 // Update the availability map to include the new instruction.
2166 insert_table(ValNo, PREInstr);
2168 // Create a PHI to make the value available in this block.
2169 PHINode* Phi = PHINode::Create(CurInst->getType(),
2170 CurInst->getName() + ".pre-phi",
2171 CurrentBlock->begin());
2172 for (pred_iterator PI = pred_begin(CurrentBlock),
2173 PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2174 BasicBlock *P = *PI;
2175 Phi->addIncoming(predMap[P], P);
2179 insert_table(ValNo, Phi);
2181 CurInst->replaceAllUsesWith(Phi);
2182 if (MD && Phi->getType()->isPointerTy())
2183 MD->invalidateCachedPointerInfo(Phi);
2185 erase_table(ValNo, CurInst);
2187 DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
2188 if (MD) MD->removeInstruction(CurInst);
2189 CurInst->eraseFromParent();
2190 DEBUG(verifyRemoved(CurInst));
2195 if (splitCriticalEdges())
2201 /// splitCriticalEdges - Split critical edges found during the previous
2202 /// iteration that may enable further optimization.
2203 bool GVN::splitCriticalEdges() {
2204 if (toSplit.empty())
2207 std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
2208 SplitCriticalEdge(Edge.first, Edge.second, this);
2209 } while (!toSplit.empty());
2210 if (MD) MD->invalidateCachedPredecessors();
2214 /// iterateOnFunction - Executes one iteration of GVN
2215 bool GVN::iterateOnFunction(Function &F) {
2216 cleanupGlobalSets();
2218 // Top-down walk of the dominator tree
2219 bool Changed = false;
2221 // Needed for value numbering with phi construction to work.
2222 ReversePostOrderTraversal<Function*> RPOT(&F);
2223 for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
2224 RE = RPOT.end(); RI != RE; ++RI)
2225 Changed |= processBlock(*RI);
2227 for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
2228 DE = df_end(DT->getRootNode()); DI != DE; ++DI)
2229 Changed |= processBlock(DI->getBlock());
2235 void GVN::cleanupGlobalSets() {
2237 NumberTable.clear();
2238 TableAllocator.Reset();
2241 /// verifyRemoved - Verify that the specified instruction does not occur in our
2242 /// internal data structures.
2243 void GVN::verifyRemoved(const Instruction *Inst) const {
2244 VN.verifyRemoved(Inst);
2246 // Walk through the value number scope to make sure the instruction isn't
2247 // ferreted away in it.
2248 for (DenseMap<uint32_t, std::pair<Value*, void*> >::const_iterator
2249 I = NumberTable.begin(), E = NumberTable.end(); I != E; ++I) {
2250 std::pair<Value*, void*> const * Node = &I->second;
2251 assert(Node->first != Inst && "Inst still in value numbering scope!");
2253 while (Node->second) {
2254 Node = static_cast<std::pair<Value*, void*>*>(Node->second);
2255 assert(Node->first != Inst && "Inst still in value numbering scope!");