lib/Transforms/Utils/IntegerDivision.cpp

   1 //===-- IntegerDivision.cpp - Expand integer division ---------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file contains an implementation of 32bit scalar integer division for
  11 // targets that don't have native support. It's largely derived from
  12 // compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the
  13 // amount of control flow
  14 //
  15 //===----------------------------------------------------------------------===//
  16
  17 #define DEBUG_TYPE "integer-division"
  18 #include "llvm/Transforms/Utils/IntegerDivision.h"
  19 #include "llvm/Function.h"
  20 #include "llvm/IRBuilder.h"
  21 #include "llvm/Instructions.h"
  22 #include "llvm/Intrinsics.h"
  23
  24 using namespace llvm;
  25
  26 /// Generate code to compute the remainder of two signed integers. Returns the
  27 /// remainder, which will have the sign of the dividend. Builder's insert point
  28 /// should be pointing where the caller wants code generated, e.g. at the srem
  29 /// instruction. This will generate a urem in the process, and Builder's insert
  30 /// point will be pointing at the uren (if present, i.e. not folded), ready to
  31 /// be expanded if the user wishes
  32 static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
  33                                           IRBuilder<> &Builder) {
  34   ConstantInt *ThirtyOne = Builder.getInt32(31);
  35
  36   // ;   %dividend_sgn = ashr i32 %dividend, 31
  37   // ;   %divisor_sgn  = ashr i32 %divisor, 31
  38   // ;   %dvd_xor      = xor i32 %dividend, %dividend_sgn
  39   // ;   %dvs_xor      = xor i32 %divisor, %divisor_sgn
  40   // ;   %u_dividend   = sub i32 %dvd_xor, %dividend_sgn
  41   // ;   %u_divisor    = sub i32 %dvs_xor, %divisor_sgn
  42   // ;   %urem         = urem i32 %dividend, %divisor
  43   // ;   %xored        = xor i32 %urem, %dividend_sgn
  44   // ;   %srem         = sub i32 %xored, %dividend_sgn
  45   Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
  46   Value *DivisorSign  = Builder.CreateAShr(Divisor, ThirtyOne);
  47   Value *DvdXor       = Builder.CreateXor(Dividend, DividendSign);
  48   Value *DvsXor       = Builder.CreateXor(Divisor, DivisorSign);
  49   Value *UDividend    = Builder.CreateSub(DvdXor, DividendSign);
  50   Value *UDivisor     = Builder.CreateSub(DvsXor, DivisorSign);
  51   Value *URem         = Builder.CreateURem(UDividend, UDivisor);
  52   Value *Xored        = Builder.CreateXor(URem, DividendSign);
  53   Value *SRem         = Builder.CreateSub(Xored, DividendSign);
  54
  55   if (Instruction *URemInst = dyn_cast<Instruction>(URem))
  56     Builder.SetInsertPoint(URemInst);
  57
  58   return SRem;
  59 }
  60
  61
  62 /// Generate code to compute the remainder of two unsigned integers. Returns the
  63 /// remainder. Builder's insert point should be pointing where the caller wants
  64 /// code generated, e.g. at the urem instruction. This will generate a udiv in
  65 /// the process, and Builder's insert point will be pointing at the udiv (if
  66 /// present, i.e. not folded), ready to be expanded if the user wishes
  67 static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
  68                                              IRBuilder<> &Builder) {
  69   // Remainder = Dividend - Quotient*Divisor
  70
  71   // ;   %quotient  = udiv i32 %dividend, %divisor
  72   // ;   %product   = mul i32 %divisor, %quotient
  73   // ;   %remainder = sub i32 %dividend, %product
  74   Value *Quotient  = Builder.CreateUDiv(Dividend, Divisor);
  75   Value *Product   = Builder.CreateMul(Divisor, Quotient);
  76   Value *Remainder = Builder.CreateSub(Dividend, Product);
  77
  78   if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
  79     Builder.SetInsertPoint(UDiv);
  80
  81   return Remainder;
  82 }
  83
  84 /// Generate code to divide two signed integers. Returns the quotient, rounded
  85 /// towards 0. Builder's insert point should be pointing where the caller wants
  86 /// code generated, e.g. at the sdiv instruction. This will generate a udiv in
  87 /// the process, and Builder's insert point will be pointing at the udiv (if
  88 /// present, i.e. not folded), ready to be expanded if the user wishes.
  89 static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
  90                                          IRBuilder<> &Builder) {
  91   // Implementation taken from compiler-rt's __divsi3
  92
  93   ConstantInt *ThirtyOne = Builder.getInt32(31);
  94
  95   // ;   %tmp    = ashr i32 %dividend, 31
  96   // ;   %tmp1   = ashr i32 %divisor, 31
  97   // ;   %tmp2   = xor i32 %tmp, %dividend
  98   // ;   %u_dvnd = sub nsw i32 %tmp2, %tmp
  99   // ;   %tmp3   = xor i32 %tmp1, %divisor
 100   // ;   %u_dvsr = sub nsw i32 %tmp3, %tmp1
 101   // ;   %q_sgn  = xor i32 %tmp1, %tmp
 102   // ;   %q_mag  = udiv i32 %u_dvnd, %u_dvsr
 103   // ;   %tmp4   = xor i32 %q_mag, %q_sgn
 104   // ;   %q      = sub i32 %tmp4, %q_sgn
 105   Value *Tmp    = Builder.CreateAShr(Dividend, ThirtyOne);
 106   Value *Tmp1   = Builder.CreateAShr(Divisor, ThirtyOne);
 107   Value *Tmp2   = Builder.CreateXor(Tmp, Dividend);
 108   Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp);
 109   Value *Tmp3   = Builder.CreateXor(Tmp1, Divisor);
 110   Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1);
 111   Value *Q_Sgn  = Builder.CreateXor(Tmp1, Tmp);
 112   Value *Q_Mag  = Builder.CreateUDiv(U_Dvnd, U_Dvsr);
 113   Value *Tmp4   = Builder.CreateXor(Q_Mag, Q_Sgn);
 114   Value *Q      = Builder.CreateSub(Tmp4, Q_Sgn);
 115
 116   if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag))
 117     Builder.SetInsertPoint(UDiv);
 118
 119   return Q;
 120 }
 121
 122 /// Generates code to divide two unsigned scalar 32-bit integers. Returns the
 123 /// quotient, rounded towards 0. Builder's insert point should be pointing where
 124 /// the caller wants code generated, e.g. at the udiv instruction.
 125 static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
 126                                            IRBuilder<> &Builder) {
 127   // The basic algorithm can be found in the compiler-rt project's
 128   // implementation of __udivsi3.c. Here, we do a lower-level IR based approach
 129   // that's been hand-tuned to lessen the amount of control flow involved.
 130
 131   // Some helper values
 132   IntegerType *I32Ty = Builder.getInt32Ty();
 133
 134   ConstantInt *Zero      = Builder.getInt32(0);
 135   ConstantInt *One       = Builder.getInt32(1);
 136   ConstantInt *ThirtyOne = Builder.getInt32(31);
 137   ConstantInt *NegOne    = ConstantInt::getSigned(I32Ty, -1);
 138   ConstantInt *True      = Builder.getTrue();
 139
 140   BasicBlock *IBB = Builder.GetInsertBlock();
 141   Function *F = IBB->getParent();
 142   Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
 143                                                 I32Ty);
 144
 145   // Our CFG is going to look like:
 146   // +---------------------+
 147   // | special-cases       |
 148   // |   ...               |
 149   // +---------------------+
 150   //  |       |
 151   //  |   +----------+
 152   //  |   |  bb1     |
 153   //  |   |  ...     |
 154   //  |   +----------+
 155   //  |    |      |
 156   //  |    |  +------------+
 157   //  |    |  |  preheader |
 158   //  |    |  |  ...       |
 159   //  |    |  +------------+
 160   //  |    |      |
 161   //  |    |      |      +---+
 162   //  |    |      |      |   |
 163   //  |    |  +------------+ |
 164   //  |    |  |  do-while  | |
 165   //  |    |  |  ...       | |
 166   //  |    |  +------------+ |
 167   //  |    |      |      |   |
 168   //  |   +-----------+  +---+
 169   //  |   | loop-exit |
 170   //  |   |  ...      |
 171   //  |   +-----------+
 172   //  |     |
 173   // +-------+
 174   // | ...   |
 175   // | end   |
 176   // +-------+
 177   BasicBlock *SpecialCases = Builder.GetInsertBlock();
 178   SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases"));
 179   BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(),
 180                                                   "udiv-end");
 181   BasicBlock *LoopExit  = BasicBlock::Create(Builder.getContext(),
 182                                              "udiv-loop-exit", F, End);
 183   BasicBlock *DoWhile   = BasicBlock::Create(Builder.getContext(),
 184                                              "udiv-do-while", F, End);
 185   BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(),
 186                                              "udiv-preheader", F, End);
 187   BasicBlock *BB1       = BasicBlock::Create(Builder.getContext(),
 188                                              "udiv-bb1", F, End);
 189
 190   // We'll be overwriting the terminator to insert our extra blocks
 191   SpecialCases->getTerminator()->eraseFromParent();
 192
 193   // First off, check for special cases: dividend or divisor is zero, divisor
 194   // is greater than dividend, and divisor is 1.
 195   // ; special-cases:
 196   // ;   %ret0_1      = icmp eq i32 %divisor, 0
 197   // ;   %ret0_2      = icmp eq i32 %dividend, 0
 198   // ;   %ret0_3      = or i1 %ret0_1, %ret0_2
 199   // ;   %tmp0        = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true)
 200   // ;   %tmp1        = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true)
 201   // ;   %sr          = sub nsw i32 %tmp0, %tmp1
 202   // ;   %ret0_4      = icmp ugt i32 %sr, 31
 203   // ;   %ret0        = or i1 %ret0_3, %ret0_4
 204   // ;   %retDividend = icmp eq i32 %sr, 31
 205   // ;   %retVal      = select i1 %ret0, i32 0, i32 %dividend
 206   // ;   %earlyRet    = or i1 %ret0, %retDividend
 207   // ;   br i1 %earlyRet, label %end, label %bb1
 208   Builder.SetInsertPoint(SpecialCases);
 209   Value *Ret0_1      = Builder.CreateICmpEQ(Divisor, Zero);
 210   Value *Ret0_2      = Builder.CreateICmpEQ(Dividend, Zero);
 211   Value *Ret0_3      = Builder.CreateOr(Ret0_1, Ret0_2);
 212   Value *Tmp0        = Builder.CreateCall2(CTLZi32, Divisor, True);
 213   Value *Tmp1        = Builder.CreateCall2(CTLZi32, Dividend, True);
 214   Value *SR          = Builder.CreateSub(Tmp0, Tmp1);
 215   Value *Ret0_4      = Builder.CreateICmpUGT(SR, ThirtyOne);
 216   Value *Ret0        = Builder.CreateOr(Ret0_3, Ret0_4);
 217   Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne);
 218   Value *RetVal      = Builder.CreateSelect(Ret0, Zero, Dividend);
 219   Value *EarlyRet    = Builder.CreateOr(Ret0, RetDividend);
 220   Builder.CreateCondBr(EarlyRet, End, BB1);
 221
 222   // ; bb1:                                             ; preds = %special-cases
 223   // ;   %sr_1     = add i32 %sr, 1
 224   // ;   %tmp2     = sub i32 31, %sr
 225   // ;   %q        = shl i32 %dividend, %tmp2
 226   // ;   %skipLoop = icmp eq i32 %sr_1, 0
 227   // ;   br i1 %skipLoop, label %loop-exit, label %preheader
 228   Builder.SetInsertPoint(BB1);
 229   Value *SR_1     = Builder.CreateAdd(SR, One);
 230   Value *Tmp2     = Builder.CreateSub(ThirtyOne, SR);
 231   Value *Q        = Builder.CreateShl(Dividend, Tmp2);
 232   Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero);
 233   Builder.CreateCondBr(SkipLoop, LoopExit, Preheader);
 234
 235   // ; preheader:                                           ; preds = %bb1
 236   // ;   %tmp3 = lshr i32 %dividend, %sr_1
 237   // ;   %tmp4 = add i32 %divisor, -1
 238   // ;   br label %do-while
 239   Builder.SetInsertPoint(Preheader);
 240   Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1);
 241   Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne);
 242   Builder.CreateBr(DoWhile);
 243
 244   // ; do-while:                                 ; preds = %do-while, %preheader
 245   // ;   %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
 246   // ;   %sr_3    = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
 247   // ;   %r_1     = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
 248   // ;   %q_2     = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
 249   // ;   %tmp5  = shl i32 %r_1, 1
 250   // ;   %tmp6  = lshr i32 %q_2, 31
 251   // ;   %tmp7  = or i32 %tmp5, %tmp6
 252   // ;   %tmp8  = shl i32 %q_2, 1
 253   // ;   %q_1   = or i32 %carry_1, %tmp8
 254   // ;   %tmp9  = sub i32 %tmp4, %tmp7
 255   // ;   %tmp10 = ashr i32 %tmp9, 31
 256   // ;   %carry = and i32 %tmp10, 1
 257   // ;   %tmp11 = and i32 %tmp10, %divisor
 258   // ;   %r     = sub i32 %tmp7, %tmp11
 259   // ;   %sr_2  = add i32 %sr_3, -1
 260   // ;   %tmp12 = icmp eq i32 %sr_2, 0
 261   // ;   br i1 %tmp12, label %loop-exit, label %do-while
 262   Builder.SetInsertPoint(DoWhile);
 263   PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2);
 264   PHINode *SR_3    = Builder.CreatePHI(I32Ty, 2);
 265   PHINode *R_1     = Builder.CreatePHI(I32Ty, 2);
 266   PHINode *Q_2     = Builder.CreatePHI(I32Ty, 2);
 267   Value *Tmp5  = Builder.CreateShl(R_1, One);
 268   Value *Tmp6  = Builder.CreateLShr(Q_2, ThirtyOne);
 269   Value *Tmp7  = Builder.CreateOr(Tmp5, Tmp6);
 270   Value *Tmp8  = Builder.CreateShl(Q_2, One);
 271   Value *Q_1   = Builder.CreateOr(Carry_1, Tmp8);
 272   Value *Tmp9  = Builder.CreateSub(Tmp4, Tmp7);
 273   Value *Tmp10 = Builder.CreateAShr(Tmp9, 31);
 274   Value *Carry = Builder.CreateAnd(Tmp10, One);
 275   Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor);
 276   Value *R     = Builder.CreateSub(Tmp7, Tmp11);
 277   Value *SR_2  = Builder.CreateAdd(SR_3, NegOne);
 278   Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero);
 279   Builder.CreateCondBr(Tmp12, LoopExit, DoWhile);
 280
 281   // ; loop-exit:                                      ; preds = %do-while, %bb1
 282   // ;   %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
 283   // ;   %q_3     = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
 284   // ;   %tmp13 = shl i32 %q_3, 1
 285   // ;   %q_4   = or i32 %carry_2, %tmp13
 286   // ;   br label %end
 287   Builder.SetInsertPoint(LoopExit);
 288   PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2);
 289   PHINode *Q_3     = Builder.CreatePHI(I32Ty, 2);
 290   Value *Tmp13 = Builder.CreateShl(Q_3, One);
 291   Value *Q_4   = Builder.CreateOr(Carry_2, Tmp13);
 292   Builder.CreateBr(End);
 293
 294   // ; end:                                 ; preds = %loop-exit, %special-cases
 295   // ;   %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
 296   // ;   ret i32 %q_5
 297   Builder.SetInsertPoint(End, End->begin());
 298   PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2);
 299
 300   // Populate the Phis, since all values have now been created. Our Phis were:
 301   // ;   %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ]
 302   Carry_1->addIncoming(Zero, Preheader);
 303   Carry_1->addIncoming(Carry, DoWhile);
 304   // ;   %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ]
 305   SR_3->addIncoming(SR_1, Preheader);
 306   SR_3->addIncoming(SR_2, DoWhile);
 307   // ;   %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ]
 308   R_1->addIncoming(Tmp3, Preheader);
 309   R_1->addIncoming(R, DoWhile);
 310   // ;   %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ]
 311   Q_2->addIncoming(Q, Preheader);
 312   Q_2->addIncoming(Q_1, DoWhile);
 313   // ;   %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ]
 314   Carry_2->addIncoming(Zero, BB1);
 315   Carry_2->addIncoming(Carry, DoWhile);
 316   // ;   %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ]
 317   Q_3->addIncoming(Q, BB1);
 318   Q_3->addIncoming(Q_1, DoWhile);
 319   // ;   %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ]
 320   Q_5->addIncoming(Q_4, LoopExit);
 321   Q_5->addIncoming(RetVal, SpecialCases);
 322
 323   return Q_5;
 324 }
 325
 326 /// Generate code to calculate the remainder of two integers, replacing Rem with
 327 /// the generated code. This currently generates code using the udiv expansion,
 328 /// but future work includes generating more specialized code, e.g. when more
 329 /// information about the operands are known. Currently only implements 32bit
 330 /// scalar division (due to udiv's limitation), but future work is removing this
 331 /// limitation.
 332 ///
 333 /// @brief Replace Rem with generated code.
 334 bool llvm::expandRemainder(BinaryOperator *Rem) {
 335   assert((Rem->getOpcode() == Instruction::SRem ||
 336           Rem->getOpcode() == Instruction::URem) &&
 337          "Trying to expand remainder from a non-remainder function");
 338
 339   IRBuilder<> Builder(Rem);
 340
 341   // First prepare the sign if it's a signed remainder
 342   if (Rem->getOpcode() == Instruction::SRem) {
 343     Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
 344                                                    Rem->getOperand(1), Builder);
 345
 346     Rem->replaceAllUsesWith(Remainder);
 347     Rem->dropAllReferences();
 348     Rem->eraseFromParent();
 349
 350     // If we didn't actually generate a udiv instruction, we're done
 351     BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
 352     if (!BO || BO->getOpcode() != Instruction::URem)
 353       return true;
 354
 355     Rem = BO;
 356   }
 357
 358   Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
 359                                                     Rem->getOperand(1),
 360                                                     Builder);
 361
 362   Rem->replaceAllUsesWith(Remainder);
 363   Rem->dropAllReferences();
 364   Rem->eraseFromParent();
 365
 366   // Expand the udiv
 367   if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
 368     assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
 369     expandDivision(UDiv);
 370   }
 371
 372   return true;
 373 }
 374
 375
 376 /// Generate code to divide two integers, replacing Div with the generated
 377 /// code. This currently generates code similarly to compiler-rt's
 378 /// implementations, but future work includes generating more specialized code
 379 /// when more information about the operands are known. Currently only
 380 /// implements 32bit scalar division, but future work is removing this
 381 /// limitation.
 382 ///
 383 /// @brief Replace Div with generated code.
 384 bool llvm::expandDivision(BinaryOperator *Div) {
 385   assert((Div->getOpcode() == Instruction::SDiv ||
 386           Div->getOpcode() == Instruction::UDiv) &&
 387          "Trying to expand division from a non-division function");
 388
 389   IRBuilder<> Builder(Div);
 390
 391   if (Div->getType()->isVectorTy())
 392     llvm_unreachable("Div over vectors not supported");
 393
 394   // First prepare the sign if it's a signed division
 395   if (Div->getOpcode() == Instruction::SDiv) {
 396     // Lower the code to unsigned division, and reset Div to point to the udiv.
 397     Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
 398                                                  Div->getOperand(1), Builder);
 399     Div->replaceAllUsesWith(Quotient);
 400     Div->dropAllReferences();
 401     Div->eraseFromParent();
 402
 403     // If we didn't actually generate a udiv instruction, we're done
 404     BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
 405     if (!BO || BO->getOpcode() != Instruction::UDiv)
 406       return true;
 407
 408     Div = BO;
 409   }
 410
 411   // Insert the unsigned division code
 412   Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0),
 413                                                  Div->getOperand(1),
 414                                                  Builder);
 415   Div->replaceAllUsesWith(Quotient);
 416   Div->dropAllReferences();
 417   Div->eraseFromParent();
 418
 419   return true;
 420 }