Fix my previous patch (r163164). It does now what it is supposed to do:
[oota-llvm.git] / lib / Transforms / Utils / BypassSlowDivision.cpp
1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #define DEBUG_TYPE "bypass-slow-division"
19 #include "llvm/Instructions.h"
20 #include "llvm/Function.h"
21 #include "llvm/IRBuilder.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
24
25 using namespace llvm;
26
27 namespace llvm {
28   struct DivOpInfo {
29     bool SignedOp;
30     Value *Dividend;
31     Value *Divisor;
32
33     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
34       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
35   };
36
37   struct DivPhiNodes {
38     PHINode *Quotient;
39     PHINode *Remainder;
40
41     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
42       : Quotient(InQuotient), Remainder(InRemainder) {}
43   };
44
45   template<>
46   struct DenseMapInfo<DivOpInfo> {
47     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
48       return Val1.SignedOp == Val2.SignedOp &&
49              Val1.Dividend == Val2.Dividend &&
50              Val1.Divisor == Val2.Divisor;
51     }
52
53     static DivOpInfo getEmptyKey() {
54       return DivOpInfo(false, 0, 0);
55     }
56
57     static DivOpInfo getTombstoneKey() {
58       return DivOpInfo(true, 0, 0);
59     }
60
61     static unsigned getHashValue(const DivOpInfo &Val) {
62       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
63                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
64                         (unsigned)Val.SignedOp;
65     }
66   };
67
68   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
69 }
70
71 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
72 // value of the operands and uses a shorter-faster div/rem instruction when
73 // possible and the longer-slower div/rem instruction otherwise.
74 static bool insertFastDiv(Function &F,
75                           Function::iterator &I,
76                           BasicBlock::iterator &J,
77                           IntegerType *BypassType,
78                           bool UseDivOp,
79                           bool UseSignedOp,
80                           DivCacheTy &PerBBDivCache) {
81   // Get instruction operands
82   Instruction *Instr = J;
83   Value *Dividend = Instr->getOperand(0);
84   Value *Divisor = Instr->getOperand(1);
85
86   if (isa<ConstantInt>(Divisor) ||
87       (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
88     // Operations with immediate values should have
89     // been solved and replaced during compile time.
90     return false;
91   }
92
93   // Basic Block is split before divide
94   BasicBlock *MainBB = I;
95   BasicBlock *SuccessorBB = I->splitBasicBlock(J);
96   ++I; //advance iterator I to successorBB
97
98   // Add new basic block for slow divide operation
99   BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
100                                           MainBB->getParent(), SuccessorBB);
101   SlowBB->moveBefore(SuccessorBB);
102   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
103   Value *SlowQuotientV;
104   Value *SlowRemainderV;
105   if (UseSignedOp) {
106     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
107     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
108   } else {
109     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
110     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
111   }
112   SlowBuilder.CreateBr(SuccessorBB);
113
114   // Add new basic block for fast divide operation
115   BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
116                                           MainBB->getParent(), SuccessorBB);
117   FastBB->moveBefore(SlowBB);
118   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
119   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
120                                                 BypassType);
121   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
122                                                  BypassType);
123
124   // udiv/urem because optimization only handles positive numbers
125   Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
126                                                       ShortDivisorV);
127   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
128                                                   ShortDivisorV);
129   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
130                                                 ShortQuotientV,
131                                                 Dividend->getType());
132   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
133                                                  ShortRemainderV,
134                                                  Dividend->getType());
135   FastBuilder.CreateBr(SuccessorBB);
136
137   // Phi nodes for result of div and rem
138   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
139   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
140   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
141   QuoPhi->addIncoming(FastQuotientV, FastBB);
142   PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
143   RemPhi->addIncoming(SlowRemainderV, SlowBB);
144   RemPhi->addIncoming(FastRemainderV, FastBB);
145
146   // Replace Instr with appropriate phi node
147   if (UseDivOp)
148     Instr->replaceAllUsesWith(QuoPhi);
149   else
150     Instr->replaceAllUsesWith(RemPhi);
151   Instr->eraseFromParent();
152
153   // Combine operands into a single value with OR for value testing below
154   MainBB->getInstList().back().eraseFromParent();
155   IRBuilder<> MainBuilder(MainBB, MainBB->end());
156   Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
157
158   // BitMask is inverted to check if the operands are
159   // larger than the bypass type
160   uint64_t BitMask = ~BypassType->getBitMask();
161   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
162
163   // Compare operand values and branch
164   Value *ZeroV = MainBuilder.getInt32(0);
165   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
166   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
167
168   // point iterator J at first instruction of successorBB
169   J = I->begin();
170
171   // Cache phi nodes to be used later in place of other instances
172   // of div or rem with the same sign, dividend, and divisor
173   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
174   DivPhiNodes Value(QuoPhi, RemPhi);
175   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
176   return true;
177 }
178
179 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
180 // operands and operation are identical. Otherwise call insertFastDiv to perform
181 // the optimization and cache the resulting dividend and remainder.
182 static bool reuseOrInsertFastDiv(Function &F,
183                                  Function::iterator &I,
184                                  BasicBlock::iterator &J,
185                                  IntegerType *BypassType,
186                                  bool UseDivOp,
187                                  bool UseSignedOp,
188                                  DivCacheTy &PerBBDivCache) {
189   // Get instruction operands
190   Instruction *Instr = J;
191   DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
192   DivCacheTy::const_iterator CacheI = PerBBDivCache.find(Key);
193
194   if (CacheI == PerBBDivCache.end()) {
195     // If previous instance does not exist, insert fast div
196     return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
197                          PerBBDivCache);
198   }
199
200   // Replace operation value with previously generated phi node
201   DivPhiNodes Value = CacheI->second;
202   if (UseDivOp) {
203     // Replace all uses of div instruction with quotient phi node
204     J->replaceAllUsesWith(Value.Quotient);
205   } else {
206     // Replace all uses of rem instruction with remainder phi node
207     J->replaceAllUsesWith(Value.Remainder);
208   }
209
210   // Advance to next operation
211   ++J;
212
213   // Remove redundant operation
214   Instr->eraseFromParent();
215   return true;
216 }
217
218 // bypassSlowDivision - This optimization identifies DIV instructions that can
219 // be profitably bypassed and carried out with a shorter, faster divide.
220 bool bypassSlowDivision(Function &F,
221                         Function::iterator &I,
222                         const llvm::DenseMap<Type *, Type *> &BypassTypeMap) {
223   DivCacheTy DivCache;
224
225   bool MadeChange = false;
226   for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
227
228     // Get instruction details
229     unsigned Opcode = J->getOpcode();
230     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
231     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
232     bool UseSignedOp = Opcode == Instruction::SDiv ||
233                        Opcode == Instruction::SRem;
234
235     // Only optimize div or rem ops
236     if (!UseDivOp && !UseRemOp)
237       continue;
238
239     // Continue if div/rem type is not bypassed
240     DenseMap<Type *, Type *>::const_iterator BT =
241       BypassTypeMap.find(J->getType());
242     if (BT == BypassTypeMap.end())
243       continue;
244
245     IntegerType *BypassType = cast<IntegerType>(BT->second);
246     MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp,
247                                        UseSignedOp, DivCache);
248   }
249
250   return MadeChange;
251 }