From cb698b26a108b4aa42995d629fa72231eb238f24 Mon Sep 17 00:00:00 2001
From: David Majnemer <david.majnemer@gmail.com>
Date: Sat, 16 Aug 2014 08:55:06 +0000
Subject: [PATCH] InstCombine: Combine mul with div.

We can combne a mul with a div if one of the operands is a multiple of
the other:

%mul = mul nsw nuw %a, C1
%ret = udiv %mul, C2
  =>
%ret = mul nsw %a, (C1 / C2)

This can expose further optimization opportunities if we end up
multiplying or dividing by a power of 2.

Consider this small example:

define i32 @f(i32 %a) {
  %mul = mul nuw i32 %a, 14
  %div = udiv exact i32 %mul, 7
  ret i32 %div
}

which gets CodeGen'd to:

    imull       $14, %edi, %eax
    imulq       $613566757, %rax, %rcx
    shrq        $32, %rcx
    subl        %ecx, %eax
    shrl        %eax
    addl        %ecx, %eax
    shrl        $2, %eax
    retq

We can now transform this into:
define i32 @f(i32 %a) {
  %shl = shl nuw i32 %a, 1
  ret i32 %shl
}

which gets CodeGen'd to:

    leal        (%rdi,%rdi), %eax
    retq

This fixes PR20681.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215815 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineMulDivRem.cpp      | 77 ++++++++++++++++++-
 test/Transforms/InstCombine/div.ll            | 72 +++++++++++++++++
 2 files changed, 147 insertions(+), 2 deletions(-)
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 6c6e7d81516..acfc96efa23 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -97,6 +97,21 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
   return MulExt.slt(Min) || MulExt.sgt(Max);
 }
 
+/// \brief True if C2 is a multiple of C1. Quotient contains C2/C1.
+static bool IsMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
+                       bool IsSigned) {
+  assert(C1.getBitWidth() == C2.getBitWidth() &&
+         "Inconsistent width of constants!");
+
+  APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
+  if (IsSigned)
+    APInt::sdivrem(C1, C2, Quotient, Remainder);
+  else
+    APInt::udivrem(C1, C2, Quotient, Remainder);
+
+  return Remainder.isMinValue();
+}
+
 /// \brief A helper routine of InstCombiner::visitMul().
 ///
 /// If C is a vector of known powers of 2, then this function returns
@@ -725,8 +740,8 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     return &I;
 
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
-    // (X / C1) / C2  -> X / (C1*C2)
-    if (Instruction *LHS = dyn_cast<Instruction>(Op0))
+    if (Instruction *LHS = dyn_cast<Instruction>(Op0)) {
+      // (X / C1) / C2  -> X / (C1*C2)
       if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
         if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
           if (MultiplyOverflows(RHS, LHSRHS,
@@ -736,6 +751,64 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
                                         ConstantExpr::getMul(RHS, LHSRHS));
         }
 
+      Value *X;
+      const APInt *C1, *C2;
+      if (match(RHS, m_APInt(C2))) {
+        bool IsSigned = I.getOpcode() == Instruction::SDiv;
+        if ((IsSigned && match(LHS, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
+            (!IsSigned && match(LHS, m_NUWMul(m_Value(X), m_APInt(C1))))) {
+          APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+
+          // (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
+          if (IsMultiple(*C2, *C1, Quotient, IsSigned)) {
+            BinaryOperator *BO = BinaryOperator::Create(
+                I.getOpcode(), X, ConstantInt::get(X->getType(), Quotient));
+            BO->setIsExact(I.isExact());
+            return BO;
+          }
+
+          // (X * C1) / C2 -> X * (C1 / C2) if C1 is a multiple of C2.
+          if (IsMultiple(*C1, *C2, Quotient, IsSigned)) {
+            BinaryOperator *BO = BinaryOperator::Create(
+                Instruction::Mul, X, ConstantInt::get(X->getType(), Quotient));
+            BO->setHasNoUnsignedWrap(
+                !IsSigned &&
+                cast<OverflowingBinaryOperator>(LHS)->hasNoUnsignedWrap());
+            BO->setHasNoSignedWrap(
+                cast<OverflowingBinaryOperator>(LHS)->hasNoSignedWrap());
+            return BO;
+          }
+        }
+
+        if ((IsSigned && match(LHS, m_NSWShl(m_Value(X), m_APInt(C1)))) ||
+            (!IsSigned && match(LHS, m_NUWShl(m_Value(X), m_APInt(C1))))) {
+          APInt Quotient(C1->getBitWidth(), /*Val=*/0ULL, IsSigned);
+          APInt C1Shifted = APInt::getOneBitSet(
+              C1->getBitWidth(), static_cast<unsigned>(C1->getLimitedValue()));
+
+          // (X << C1) / C2 -> X / (C2 >> C1) if C2 is a multiple of C1.
+          if (IsMultiple(*C2, C1Shifted, Quotient, IsSigned)) {
+            BinaryOperator *BO = BinaryOperator::Create(
+                I.getOpcode(), X, ConstantInt::get(X->getType(), Quotient));
+            BO->setIsExact(I.isExact());
+            return BO;
+          }
+
+          // (X << C1) / C2 -> X * (C2 >> C1) if C1 is a multiple of C2.
+          if (IsMultiple(C1Shifted, *C2, Quotient, IsSigned)) {
+            BinaryOperator *BO = BinaryOperator::Create(
+                Instruction::Mul, X, ConstantInt::get(X->getType(), Quotient));
+            BO->setHasNoUnsignedWrap(
+                !IsSigned &&
+                cast<OverflowingBinaryOperator>(LHS)->hasNoUnsignedWrap());
+            BO->setHasNoSignedWrap(
+                cast<OverflowingBinaryOperator>(LHS)->hasNoSignedWrap());
+            return BO;
+          }
+        }
+      }
+    }
+
     if (!RHS->isZero()) { // avoid X udiv 0
       if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
         if (Instruction *R = FoldOpIntoSelect(I, SI))
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 9c7ba9b0305..67c5c78cd83 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -175,3 +175,75 @@ define i32 @test20(i32 %x) {
 ; CHECK-NEXT: select i1 %{{.*}}, i32 %x, i32 {{.*}}
 ; CHECK-NEXT: ret i32
 }
+
+define i32 @test21(i32 %a) {
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 12
+  ret i32 %div
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: %div = sdiv i32 %a, 3
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test22(i32 %a) {
+  %mul = mul nsw i32 %a, 3
+  %div = sdiv i32 %mul, 12
+  ret i32 %div
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: %div = sdiv i32 %a, 4
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test23(i32 %a) {
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 12
+  ret i32 %div
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: %div = udiv i32 %a, 3
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test24(i32 %a) {
+  %mul = mul nuw i32 %a, 3
+  %div = udiv i32 %mul, 12
+  ret i32 %div
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: %div = lshr i32 %a, 2
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test25(i32 %a) {
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 2
+  ret i32 %div
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: %div = shl nsw i32 %a, 1
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test26(i32 %a) {
+  %mul = mul nsw i32 %a, 12
+  %div = sdiv i32 %mul, 3
+  ret i32 %div
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: %div = shl nsw i32 %a, 2
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test27(i32 %a) {
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 2
+  ret i32 %div
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: %div = shl nuw i32 %a, 1
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test28(i32 %a) {
+  %mul = mul nuw i32 %a, 36
+  %div = udiv i32 %mul, 3
+  ret i32 %div
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: %div = mul nuw i32 %a, 12
+; CHECK-NEXT: ret i32 %div
+}
-- 
2.34.1