Add a neat little two's complement hack for x86.

author Benjamin Kramer <benny.kra@googlemail.com>

Tue, 26 Jul 2011 22:42:13 +0000 (22:42 +0000)

committer Benjamin Kramer <benny.kra@googlemail.com>

Tue, 26 Jul 2011 22:42:13 +0000 (22:42 +0000)
author Benjamin Kramer <benny.kra@googlemail.com>
Tue, 26 Jul 2011 22:42:13 +0000 (22:42 +0000)
committer Benjamin Kramer <benny.kra@googlemail.com>
Tue, 26 Jul 2011 22:42:13 +0000 (22:42 +0000)
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt

index d6ceaa7ed4b614ba60f606cb22aa1e7e4abfe71a..8d9eabad0be0a046ca1a00827bbc273ac96ef314 100644 (file)
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -2076,12 +2076,11 @@ generates (x86_64):
         jb      LBB0_2
  ## BB#1:
         decl    %edi
-       movl    $63, %eax
-       bsrl    %edi, %ecx
-       cmovel  %eax, %ecx
-       xorl    $31, %ecx
-       movl    $32, %eax
-       subl    %ecx, %eax
+       movl    $63, %ecx
+       bsrl    %edi, %eax
+       cmovel  %ecx, %eax
+       xorl    $-32, %eax
+       addl    $33, %eax
  LBB0_2:
         ret
  
@@ -2091,26 +2090,10 @@ The cmov and the early test are redundant:
         jb      LBB0_2
  ## BB#1:
         decl    %edi
-       bsrl    %edi, %ecx
-       xorl    $31, %ecx
-       movl    $32, %eax
-       subl    %ecx, %eax
+       bsrl    %edi, %eax
+       xorl    $-32, %eax
+       addl    $33, %eax
  LBB0_2:
         ret
  
-If we want to get really fancy we could use some two's complement magic:
-       xorl    %eax, %eax
-       cmpl    $2, %edi
-       jb      LBB0_2
-## BB#1:
-       decl    %edi
-       bsrl    %edi, %ecx
-       xorl    $-32, %ecx
-       leal    33(%ecx), %eax
-LBB0_2:
-       ret
-
-This is only useful on targets that can't encode the first operand of a sub
-directly.  The rule is C1 - (X^C2) -> (C1+1) + (X^~C2).
-
  //===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 26c48a4d56a47e86bf9833f22a9f66dbddfd238c..f51a455b70305408329662bc37fdfe994773beb7 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12550,7 +12550,7 @@ static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
  //      (add Y, (setne X, 0)) -> sbb -1, Y
  //      (sub (sete  X, 0), Y) -> sbb  0, Y
  //      (sub (setne X, 0), Y) -> adc -1, Y
-static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
    DebugLoc DL = N->getDebugLoc();
  
    // Look through ZExts.
@@ -12586,6 +12586,33 @@ static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
                       DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
  }
  
+static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+
+  // X86 can't encode an immediate LHS of a sub. See if we can push the
+  // negation into a preceding instruction.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
+    uint64_t Op0C = C->getSExtValue();
+
+    // If the RHS of the sub is a XOR with one use and a constant, invert the
+    // immediate. Then add one to the LHS of the sub so we can turn
+    // X-Y -> X+~Y+1, saving one register.
+    if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
+        isa<ConstantSDNode>(Op1.getOperand(1))) {
+      uint64_t XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getSExtValue();
+      EVT VT = Op0.getValueType();
+      SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT,
+                                   Op1.getOperand(0),
+                                   DAG.getConstant(~XorC, VT));
+      return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor,
+                         DAG.getConstant(Op0C+1, VT));
+    }
+  }
+
+  return OptimizeConditionalInDecrement(N, DAG);
+}
+
  SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
    SelectionDAG &DAG = DCI.DAG;
@@ -12595,8 +12622,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
      return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
    case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
    case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
-  case ISD::ADD:
-  case ISD::SUB:            return OptimizeConditonalInDecrement(N, DAG);
+  case ISD::ADD:            return OptimizeConditionalInDecrement(N, DAG);
+  case ISD::SUB:            return PerformSubCombine(N, DAG);
    case X86ISD::ADC:         return PerformADCCombine(N, DAG, DCI);
    case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
    case ISD::SHL:
diff --git a/test/CodeGen/X86/sub.ll b/test/CodeGen/X86/sub.ll

new file mode 100644 (file)

index 0000000..2a4d2d6
--- /dev/null
+++ b/test/CodeGen/X86/sub.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=x86 < sub.ll | FileCheck %s
+
+define i32 @test1(i32 %x) {
+  %xor = xor i32 %x, 31
+  %sub = sub i32 32, %xor
+  ret i32 %sub
+; CHECK: test1:
+; CHECK:      xorl $-32
+; CHECK-NEXT: addl $33
+; CHECK-NEXT: ret
+}
author	Benjamin Kramer <benny.kra@googlemail.com>
	Tue, 26 Jul 2011 22:42:13 +0000 (22:42 +0000)
committer	Benjamin Kramer <benny.kra@googlemail.com>
	Tue, 26 Jul 2011 22:42:13 +0000 (22:42 +0000)
lib/Target/X86/README.txt		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/sub.ll	[new file with mode: 0644]	patch \| blob