1f517d038d1951b6a77208d28b42a59ae17ab869
[oota-llvm.git] / lib / Transforms / Utils / BypassSlowDivision.cpp
1 //===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains an optimization for div and rem on architectures that
11 // execute short instructions significantly faster than longer instructions.
12 // For example, on Intel Atom 32-bit divides are slow enough that during
13 // runtime it is profitable to check the value of the operands, and if they are
14 // positive and less than 256 use an unsigned 8-bit divide.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #define DEBUG_TYPE "bypass-slow-division"
19 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instructions.h"
24
25 using namespace llvm;
26
27 namespace {
28   struct DivOpInfo {
29     bool SignedOp;
30     Value *Dividend;
31     Value *Divisor;
32
33     DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
34       : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
35   };
36
37   struct DivPhiNodes {
38     PHINode *Quotient;
39     PHINode *Remainder;
40
41     DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
42       : Quotient(InQuotient), Remainder(InRemainder) {}
43   };
44 }
45
46 namespace llvm {
47   template<>
48   struct DenseMapInfo<DivOpInfo> {
49     static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
50       return Val1.SignedOp == Val2.SignedOp &&
51              Val1.Dividend == Val2.Dividend &&
52              Val1.Divisor == Val2.Divisor;
53     }
54
55     static DivOpInfo getEmptyKey() {
56       return DivOpInfo(false, 0, 0);
57     }
58
59     static DivOpInfo getTombstoneKey() {
60       return DivOpInfo(true, 0, 0);
61     }
62
63     static unsigned getHashValue(const DivOpInfo &Val) {
64       return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
65                         reinterpret_cast<uintptr_t>(Val.Divisor)) ^
66                         (unsigned)Val.SignedOp;
67     }
68   };
69
70   typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
71 }
72
73 // insertFastDiv - Substitutes the div/rem instruction with code that checks the
74 // value of the operands and uses a shorter-faster div/rem instruction when
75 // possible and the longer-slower div/rem instruction otherwise.
76 static bool insertFastDiv(Function &F,
77                           Function::iterator &I,
78                           BasicBlock::iterator &J,
79                           IntegerType *BypassType,
80                           bool UseDivOp,
81                           bool UseSignedOp,
82                           DivCacheTy &PerBBDivCache) {
83   // Get instruction operands
84   Instruction *Instr = J;
85   Value *Dividend = Instr->getOperand(0);
86   Value *Divisor = Instr->getOperand(1);
87
88   if (isa<ConstantInt>(Divisor) ||
89       (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
90     // Operations with immediate values should have
91     // been solved and replaced during compile time.
92     return false;
93   }
94
95   // Basic Block is split before divide
96   BasicBlock *MainBB = I;
97   BasicBlock *SuccessorBB = I->splitBasicBlock(J);
98   ++I; //advance iterator I to successorBB
99
100   // Add new basic block for slow divide operation
101   BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
102                                           MainBB->getParent(), SuccessorBB);
103   SlowBB->moveBefore(SuccessorBB);
104   IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
105   Value *SlowQuotientV;
106   Value *SlowRemainderV;
107   if (UseSignedOp) {
108     SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
109     SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
110   } else {
111     SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
112     SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
113   }
114   SlowBuilder.CreateBr(SuccessorBB);
115
116   // Add new basic block for fast divide operation
117   BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
118                                           MainBB->getParent(), SuccessorBB);
119   FastBB->moveBefore(SlowBB);
120   IRBuilder<> FastBuilder(FastBB, FastBB->begin());
121   Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
122                                                 BypassType);
123   Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
124                                                  BypassType);
125
126   // udiv/urem because optimization only handles positive numbers
127   Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
128                                                       ShortDivisorV);
129   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
130                                                   ShortDivisorV);
131   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
132                                                 ShortQuotientV,
133                                                 Dividend->getType());
134   Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
135                                                  ShortRemainderV,
136                                                  Dividend->getType());
137   FastBuilder.CreateBr(SuccessorBB);
138
139   // Phi nodes for result of div and rem
140   IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
141   PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
142   QuoPhi->addIncoming(SlowQuotientV, SlowBB);
143   QuoPhi->addIncoming(FastQuotientV, FastBB);
144   PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
145   RemPhi->addIncoming(SlowRemainderV, SlowBB);
146   RemPhi->addIncoming(FastRemainderV, FastBB);
147
148   // Replace Instr with appropriate phi node
149   if (UseDivOp)
150     Instr->replaceAllUsesWith(QuoPhi);
151   else
152     Instr->replaceAllUsesWith(RemPhi);
153   Instr->eraseFromParent();
154
155   // Combine operands into a single value with OR for value testing below
156   MainBB->getInstList().back().eraseFromParent();
157   IRBuilder<> MainBuilder(MainBB, MainBB->end());
158   Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
159
160   // BitMask is inverted to check if the operands are
161   // larger than the bypass type
162   uint64_t BitMask = ~BypassType->getBitMask();
163   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
164
165   // Compare operand values and branch
166   Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
167   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
168   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
169
170   // point iterator J at first instruction of successorBB
171   J = I->begin();
172
173   // Cache phi nodes to be used later in place of other instances
174   // of div or rem with the same sign, dividend, and divisor
175   DivOpInfo Key(UseSignedOp, Dividend, Divisor);
176   DivPhiNodes Value(QuoPhi, RemPhi);
177   PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
178   return true;
179 }
180
181 // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
182 // operands and operation are identical. Otherwise call insertFastDiv to perform
183 // the optimization and cache the resulting dividend and remainder.
184 static bool reuseOrInsertFastDiv(Function &F,
185                                  Function::iterator &I,
186                                  BasicBlock::iterator &J,
187                                  IntegerType *BypassType,
188                                  bool UseDivOp,
189                                  bool UseSignedOp,
190                                  DivCacheTy &PerBBDivCache) {
191   // Get instruction operands
192   Instruction *Instr = J;
193   DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
194   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
195
196   if (CacheI == PerBBDivCache.end()) {
197     // If previous instance does not exist, insert fast div
198     return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
199                          PerBBDivCache);
200   }
201
202   // Replace operation value with previously generated phi node
203   DivPhiNodes &Value = CacheI->second;
204   if (UseDivOp) {
205     // Replace all uses of div instruction with quotient phi node
206     J->replaceAllUsesWith(Value.Quotient);
207   } else {
208     // Replace all uses of rem instruction with remainder phi node
209     J->replaceAllUsesWith(Value.Remainder);
210   }
211
212   // Advance to next operation
213   ++J;
214
215   // Remove redundant operation
216   Instr->eraseFromParent();
217   return true;
218 }
219
220 // bypassSlowDivision - This optimization identifies DIV instructions that can
221 // be profitably bypassed and carried out with a shorter, faster divide.
222 bool llvm::bypassSlowDivision(Function &F,
223                               Function::iterator &I,
224                               const DenseMap<unsigned int, unsigned int> &BypassWidths) {
225   DivCacheTy DivCache;
226
227   bool MadeChange = false;
228   for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
229
230     // Get instruction details
231     unsigned Opcode = J->getOpcode();
232     bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
233     bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
234     bool UseSignedOp = Opcode == Instruction::SDiv ||
235                        Opcode == Instruction::SRem;
236
237     // Only optimize div or rem ops
238     if (!UseDivOp && !UseRemOp)
239       continue;
240
241     // Skip division on vector types, only optimize integer instructions
242     if (!J->getType()->isIntegerTy())
243       continue;
244
245     // Get bitwidth of div/rem instruction
246     IntegerType *T = cast<IntegerType>(J->getType());
247     unsigned int bitwidth = T->getBitWidth();
248
249     // Continue if bitwidth is not bypassed
250     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
251     if (BI == BypassWidths.end())
252       continue;
253
254     // Get type for div/rem instruction with bypass bitwidth
255     IntegerType *BT = IntegerType::get(J->getContext(), BI->second);
256
257     MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp,
258                                        UseSignedOp, DivCache);
259   }
260
261   return MadeChange;
262 }