From: David Majnemer <david.majnemer@gmail.com>
Date: Sat, 29 Jun 2013 08:40:07 +0000 (+0000)
Subject: InstCombine: Be more agressive optimizing 'udiv' instrs with 'select' denoms
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=f723e5d1c290a00378d3fc10c7ef502692e0710e;p=oota-llvm.git

InstCombine: Be more agressive optimizing 'udiv' instrs with 'select' denoms

Real world code sometimes has the denominator of a 'udiv' be a
'select'.  LLVM can handle such cases but only when the 'select'
operands are symmetric in structure (both select operands are a constant
power of two or a left shift, etc.).  This falls apart if we are dealt a
'udiv' where the code is not symetric or if the select operands lead us
to more select instructions.

Instead, we should treat the LHS and each select operand as a distinct
divide operation and try to optimize them independently.  If we can
to simplify each operation, then we can replace the 'udiv' with, say, a
'lshr' that has a new select with a bunch of new operands for the
select.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185257 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index d0d4f41d3b4..bc5d699e538 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -705,26 +705,27 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) {
   return 0;
 }
 
-Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
-  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
-  if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
-    return ReplaceInstUsesWith(I, V);
-
-  // Handle the integer div common cases
-  if (Instruction *Common = commonIDivTransforms(I))
-    return Common;
+const unsigned MaxDepth = 6;
 
+// \brief Recursively visits the possible right hand operands of a udiv
+// instruction, seeing through select instructions, to determine if we can
+// replace the udiv with something simpler.  If we find that an operand is not
+// able to simplify the udiv, we abort the entire transformation.
+//
+// Inserts any intermediate instructions used for the simplification into
+// NewInstrs and returns a new instruction that depends upon them.
+static Instruction *visitUDivOperand(Value *Op0, Value *Op1,
+                                     const BinaryOperator &I,
+                                     SmallVectorImpl<Instruction *> &NewInstrs,
+                                     unsigned Depth = 0) {
   {
     // X udiv 2^C -> X >> C
     // Check to see if this is an unsigned division with an exact power of 2,
     // if so, convert to a right shift.
     const APInt *C;
     if (match(Op1, m_Power2(C))) {
-      BinaryOperator *LShr =
-      BinaryOperator::CreateLShr(Op0,
-                                 ConstantInt::get(Op0->getType(),
-                                                  C->logBase2()));
+      BinaryOperator *LShr = BinaryOperator::CreateLShr(
+          Op0, ConstantInt::get(Op0->getType(), C->logBase2()));
       if (I.isExact()) LShr->setIsExact();
       return LShr;
     }
@@ -733,51 +734,68 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
     // X udiv C, where C >= signbit
     if (C->getValue().isNegative()) {
-      Value *IC = Builder->CreateICmpULT(Op0, C);
+      ICmpInst *IC = new ICmpInst(ICmpInst::ICMP_ULT, Op0, C);
+      NewInstrs.push_back(IC);
+
       return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
                                 ConstantInt::get(I.getType(), 1));
     }
   }
 
-  // (x lshr C1) udiv C2 --> x udiv (C2 << C1)
-  if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
-    Value *X;
-    ConstantInt *C1;
-    if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {
-      APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));
-      return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));
-    }
-  }
-
   // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
   { const APInt *CI; Value *N;
     if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
         match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {
-      if (*CI != 1)
-        N = Builder->CreateAdd(N,
-                               ConstantInt::get(N->getType(), CI->logBase2()));
-      if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
-        N = Builder->CreateZExt(N, Z->getDestTy());
-      if (I.isExact())
-        return BinaryOperator::CreateExactLShr(Op0, N);
-      return BinaryOperator::CreateLShr(Op0, N);
+      if (*CI != 1) {
+        N = BinaryOperator::CreateAdd(
+            N, ConstantInt::get(N->getType(), CI->logBase2()));
+        NewInstrs.push_back(cast<Instruction>(N));
+      }
+      if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1)) {
+        N = new ZExtInst(N, Z->getDestTy());
+        NewInstrs.push_back(cast<Instruction>(N));
+      }
+      BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
+      if (I.isExact()) LShr->setIsExact();
+      return LShr;
     }
   }
 
-  // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
-  // where C1&C2 are powers of two.
-  { Value *Cond; const APInt *C1, *C2;
-    if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
-      // Construct the "on true" case of the select
-      Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
-                                       I.isExact());
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ == MaxDepth)
+    return 0;
 
-      // Construct the "on false" case of the select
-      Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
-                                       I.isExact());
+  if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+    if (Instruction *LHS =
+            visitUDivOperand(Op0, SI->getOperand(1), I, NewInstrs)) {
+      NewInstrs.push_back(LHS);
+      if (Instruction *RHS =
+              visitUDivOperand(Op0, SI->getOperand(2), I, NewInstrs)) {
+        NewInstrs.push_back(RHS);
+        return SelectInst::Create(SI->getCondition(), LHS, RHS);
+      }
+    }
 
-      // construct the select instruction and return it.
-      return SelectInst::Create(Cond, TSI, FSI);
+  return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Handle the integer div common cases
+  if (Instruction *Common = commonIDivTransforms(I))
+    return Common;
+
+  // (x lshr C1) udiv C2 --> x udiv (C2 << C1)
+  if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
+    Value *X;
+    ConstantInt *C1;
+    if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {
+      APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));
+      return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));
     }
   }
 
@@ -788,6 +806,21 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
                                               I.isExact()),
                           I.getType());
 
+  // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))
+  SmallVector<Instruction *, 4> NewInstrs;
+  Instruction *RetI = visitUDivOperand(Op0, Op1, I, NewInstrs);
+  for (unsigned i = 0, e = NewInstrs.size(); i != e; i++)
+    // If we managed to replace the UDiv completely, insert the new intermediate
+    // instructions before where the UDiv was.
+    // If we couldn't, we must clean up after ourselves by deleting the new
+    // instructions.
+    if (RetI)
+      NewInstrs[i]->insertBefore(&I);
+    else
+      delete NewInstrs[i];
+  if (RetI)
+    return RetI;
+
   return 0;
 }
 
diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll
index e0372ebac18..46d0f9afd70 100644
--- a/test/Transforms/InstCombine/div-shift.ll
+++ b/test/Transforms/InstCombine/div-shift.ll
@@ -35,3 +35,41 @@ define i64 @t3(i64 %x, i32 %y) nounwind  {
   %3 = udiv i64 %x, %2
   ret i64 %3
 }
+
+define i32 @t4(i32 %x, i32 %y) nounwind {
+; CHECK: t4
+; CHECK-NOT: udiv
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %y, 5
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 5, i32 %y
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 %x, [[SEL]]
+; CHECK-NEXT: ret i32 [[SHR]]
+  %1 = shl i32 1, %y
+  %2 = icmp ult i32 %1, 32
+  %3 = select i1 %2, i32 32, i32 %1
+  %4 = udiv i32 %x, %3
+  ret i32 %4
+}
+
+define i32 @t5(i1 %x, i1 %y, i32 %V) nounwind {
+; CHECK: t5
+; CHECK-NOT: udiv
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 %x, i32 5, i32 6
+; CHECK-NEXT: [[SEL2:%.*]] = select i1 %y, i32 [[SEL1]], i32 %V
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 %V, [[SEL2]]
+; CHECK-NEXT: ret i32 [[LSHR]]
+  %1 = shl i32 1, %V
+  %2 = select i1 %x, i32 32, i32 64
+  %3 = select i1 %y, i32 %2, i32 %1
+  %4 = udiv i32 %V, %3
+  ret i32 %4
+}
+
+define i32 @t6(i32 %x, i32 %z) nounwind{
+; CHECK: t6
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 0
+; CHECK-NOT: udiv i32 %z, %x
+  %x_is_zero = icmp eq i32 %x, 0
+  %divisor = select i1 %x_is_zero, i32 1, i32 %x
+  %y = udiv i32 %z, %divisor
+  ret i32 %y
+}