From f99bcae76f160415bfae61e9b20ffde66d045b4a Mon Sep 17 00:00:00 2001
From: Chen Li <meloli87@gmail.com>
Date: Sat, 12 Dec 2015 01:04:15 +0000
Subject: [PATCH] [X86ISelLowering] Add additional support for
 multiplication-to-shift conversion.

Summary: This patch adds support of conversion (mul x, 2^N + 1) => (add (shl x, N), x) and (mul x, 2^N - 1) => (sub (shl x, N), x) if the multiplication can not be converted to LEA + SHL or LEA + LEA. LLVM has already supported this on ARM, and it should also be useful on X86. Note the patch currently only applies to cases where the constant operand is positive, and I am planing to add another patch to support negative cases after this.

Reviewers: craig.topper, RKSimon

Subscribers: aemerson, llvm-commits

Differential Revision: http://reviews.llvm.org/D14603

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255415 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp | 28 +++++++++++++++++--
 test/CodeGen/X86/imul.ll           | 45 ++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b1b7f5d4586..bc06ab9dc06 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -24738,9 +24738,11 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
     MulAmt1 = 3;
     MulAmt2 = MulAmt / 3;
   }
+
+  SDLoc DL(N);
+  SDValue NewMul;
   if (MulAmt2 &&
       (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
-    SDLoc DL(N);
 
     if (isPowerOf2_64(MulAmt2) &&
         !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
@@ -24749,7 +24751,6 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
       // is an add.
       std::swap(MulAmt1, MulAmt2);
 
-    SDValue NewMul;
     if (isPowerOf2_64(MulAmt1))
       NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
                            DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
@@ -24763,10 +24764,31 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
     else
       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
                            DAG.getConstant(MulAmt2, DL, VT));
+  }
+
+  if (!NewMul) {
+    assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX)
+           && "Both cases that could cause potential overflows should have "
+              "already been handled.");
+    if (isPowerOf2_64(MulAmt - 1))
+      // (mul x, 2^N + 1) => (add (shl x, N), x)
+      NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+                                DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                DAG.getConstant(Log2_64(MulAmt - 1), DL,
+                                MVT::i8)));
 
+    else if (isPowerOf2_64(MulAmt + 1))
+      // (mul x, 2^N - 1) => (sub (shl x, N), x)
+      NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT,
+                                N->getOperand(0),
+                                DAG.getConstant(Log2_64(MulAmt + 1),
+                                DL, MVT::i8)), N->getOperand(0));
+  }
+
+  if (NewMul)
     // Do not add new nodes to DAG combiner worklist.
     DCI.CombineTo(N, NewMul, false);
-  }
+
   return SDValue();
 }
 
diff --git a/test/CodeGen/X86/imul.ll b/test/CodeGen/X86/imul.ll
index 99a4690a009..9d4d19332db 100644
--- a/test/CodeGen/X86/imul.ll
+++ b/test/CodeGen/X86/imul.ll
@@ -126,3 +126,48 @@ define i32 @mul40_32_minsize(i32 %A) minsize {
     %mul = mul i32 %A, 40
     ret i32 %mul
 }
+
+define i32 @mul33_32(i32 %A) {
+; X64-LABEL: mul33_32:
+; X64: shll
+; X64-NEXT: leal
+; X86-LABEL: mul33_32:
+; X86: shll
+; X86-NEXT: addl
+    %mul = mul i32 %A, 33
+    ret i32 %mul
+}
+
+define i32 @mul31_32(i32 %A) {
+; X64-LABEL: mul31_32:
+; X64: shll
+; X64-NEXT: subl
+; X86-LABEL: mul31_32:
+; X86: shll
+; X86-NEXT: subl
+    %mul = mul i32 %A, 31
+    ret i32 %mul
+}
+
+define i32 @mul0_32(i32 %A) {
+; X64-LABEL: mul0_32:
+; X64: xorl	%eax, %eax
+    %mul = mul i32 %A, 0
+    ret i32 %mul
+}
+
+define i32 @mul4294967295_32(i32 %A) {
+; X64-LABEL: mul4294967295_32:
+; X64: negl	%edi
+; X64-NEXT:	movl	%edi, %eax
+    %mul = mul i32 %A, 4294967295
+    ret i32 %mul
+}
+
+define i64 @mul18446744073709551615_64(i64 %A) {
+; X64-LABEL: mul18446744073709551615_64:
+; X64: negq	%rdi
+; X64-NEXT:	movq	%rdi, %rax
+    %mul = mul i64 %A, 18446744073709551615
+    ret i64 %mul
+}
-- 
2.34.1