Teach DAGCombine to fold fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2...

author Benjamin Kramer <benny.kra@googlemail.com>

Sun, 30 Jan 2011 16:38:43 +0000 (16:38 +0000)

committer Benjamin Kramer <benny.kra@googlemail.com>

Sun, 30 Jan 2011 16:38:43 +0000 (16:38 +0000)
author Benjamin Kramer <benny.kra@googlemail.com>
Sun, 30 Jan 2011 16:38:43 +0000 (16:38 +0000)
committer Benjamin Kramer <benny.kra@googlemail.com>
Sun, 30 Jan 2011 16:38:43 +0000 (16:38 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index a5b2d9594d5ccc4e153d375f5d16c2015953745e..94487d40414562d8ab4da20bda37968ff62da978 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3154,6 +3154,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
      }
    }
  
+  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+  //      if c1 is equal to the number of bits the trunc removes
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (N0.getOperand(0).getOpcode() == ISD::SRL ||
+       N0.getOperand(0).getOpcode() == ISD::SRA) &&
+      N0.getOperand(0).hasOneUse() &&
+      N0.getOperand(0).getOperand(1).hasOneUse() &&
+      N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+    EVT LargeVT = N0.getOperand(0).getValueType();
+    ConstantSDNode *LargeShiftAmt =
+      cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+    if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+        LargeShiftAmt->getZExtValue()) {
+      SDValue Amt =
+        DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+                        getShiftAmountTy());
+      SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+                                N0.getOperand(0).getOperand(0), Amt);
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+    }
+  }
+
    // Simplify, based on bits shifted out of the LHS.
    if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
      return SDValue(N, 0);
diff --git a/lib/Target/README.txt b/lib/Target/README.txt

index 8b5c5ce137131dc1d25e3d41524167c03dab15dc..c0a2b760de78955f01fa24dcf905f014f78ceb76 100644 (file)
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2274,24 +2274,3 @@ llc time when it gets inlined, because we can use smaller transfers.  This also
  avoids partial register stalls in some important cases.
  
  //===---------------------------------------------------------------------===//
-
-We miss an optzn when lowering divide by some constants.  For example:
-  int test(int x) { return x/10; }
-
-We produce:
-
-_test:                                  ## @test
-## BB#0:                                ## %entry
-       movslq  %edi, %rax
-       imulq   $1717986919, %rax, %rax ## imm = 0x66666667
-       movq    %rax, %rcx
-       shrq    $63, %rcx
-**     shrq    $32, %rax
-**      sarl   $2, %eax
-       addl    %ecx, %eax
-       ret
-
-The two starred instructions could be replaced with a "sarl $34, %rax".  This
-occurs in 186.crafty very frequently.
-
-//===---------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll

index 545662fd0ffc82f4acf97ac5cd52f4a33ddf1bbd..7ceb972f61bbf07f3f1e541272197c4b69270092 100644 (file)
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -51,3 +51,12 @@ define i32 @test5(i32 %A) nounwind {
  ; CHECK: mull  4(%esp)
  }
  
+define signext i16 @test6(i16 signext %x) nounwind {
+entry:
+  %div = sdiv i16 %x, 10
+  ret i16 %div
+; CHECK: test6:
+; CHECK: imull $26215, %eax, %eax
+; CHECK: shrl  $31, %ecx
+; CHECK: sarl  $18, %eax
+}
author	Benjamin Kramer <benny.kra@googlemail.com>
	Sun, 30 Jan 2011 16:38:43 +0000 (16:38 +0000)
committer	Benjamin Kramer <benny.kra@googlemail.com>
	Sun, 30 Jan 2011 16:38:43 +0000 (16:38 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/Target/README.txt		patch \| blob \| history
test/CodeGen/X86/divide-by-constant.ll		patch \| blob \| history