improve the setcc -> setcc_carry optimization to happen more

author Chris Lattner <sabre@nondot.org>

Sun, 19 Dec 2010 22:08:31 +0000 (22:08 +0000)

committer Chris Lattner <sabre@nondot.org>

Sun, 19 Dec 2010 22:08:31 +0000 (22:08 +0000)
author Chris Lattner <sabre@nondot.org>
Sun, 19 Dec 2010 22:08:31 +0000 (22:08 +0000)
committer Chris Lattner <sabre@nondot.org>
Sun, 19 Dec 2010 22:08:31 +0000 (22:08 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 8311d3373678e74846f777a9ff299cd8871888b7..48e418d57ceaa5d5f454b8c10828efd723015e10 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7053,17 +7053,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
    if (X86CC == X86::COND_INVALID)
      return SDValue();
  
-  SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
-
-  // Use sbb x, x to materialize carry bit into a GPR.
-  if (X86CC == X86::COND_B)
-    return DAG.getNode(ISD::AND, dl, MVT::i8,
-                       DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
-                                   DAG.getConstant(X86CC, MVT::i8), Cond),
-                       DAG.getConstant(1, MVT::i8));
-
+  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
    return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                     DAG.getConstant(X86CC, MVT::i8), Cond);
+                     DAG.getConstant(X86CC, MVT::i8), EFLAGS);
  }
  
  SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
@@ -11430,13 +11422,31 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
    return SDValue();
  }
  
+// Optimize  RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+  unsigned X86CC = N->getConstantOperandVal(0);
+  SDValue EFLAG = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+  
+  // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+  // a zext and produces an all-ones bit which is more useful than 0/1 in some
+  // cases.
+  if (X86CC == X86::COND_B)
+    return DAG.getNode(ISD::AND, DL, MVT::i8,
+                       DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+                                   DAG.getConstant(X86CC, MVT::i8), EFLAG),
+                       DAG.getConstant(1, MVT::i8));
+  
+  return SDValue();
+}
+
  SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
    SelectionDAG &DAG = DCI.DAG;
    switch (N->getOpcode()) {
    default: break;
    case ISD::EXTRACT_VECTOR_ELT:
-                        return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+    return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
    case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
    case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
    case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
@@ -11452,6 +11462,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
    case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
    case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG);
+  case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG);
    case X86ISD::SHUFPS:      // Handle all target specific shuffles
    case X86ISD::SHUFPD:
    case X86ISD::PALIGN:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 5fea01de09df8a29545d1dcbebd20f79de4a5424..7566f989822076f3dcf313422b0f9161c96a7f45 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -92,7 +92,7 @@ namespace llvm {
  
        // Same as SETCC except it's materialized with a sbb and the value is all
        // one's or all zero's.
-      SETCC_CARRY,
+      SETCC_CARRY,  // R = carry_bit ? ~0 : 0
  
        /// X86 conditional moves. Operand 0 and operand 1 are the two values
        /// to select from. Operand 2 is the condition code, and operand 3 is the
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td

index 2b1ea9c6851070cef5faa1b6fd959ffae3170344..724e6b895e4fc77045268a1dc56728886cb8760e 100644 (file)
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -193,9 +193,20 @@ def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
  } // isCodeGenOnly
  
  
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
  def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
            (SETB_C64r)>;
  
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C64r)>;
+
  
  //===----------------------------------------------------------------------===//
  // String Pseudo Instructions
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll

index 4c2257494d2a44d9264a9f7a08cfd103ef0cdcad..f924ec8132ee40dc4bc08750251a65b0587c003e 100644 (file)
--- a/test/CodeGen/X86/add-of-carry.ll
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -1,8 +1,9 @@
  ; RUN: llc < %s -march=x86 | FileCheck %s
  ; <rdar://problem/8449754>
  
-define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
+define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
  entry:
+; CHECK: test1:
  ; CHECK:       sbbl    %ecx, %ecx
  ; CHECK-NOT: addl
  ; CHECK: subl  %ecx, %eax
@@ -12,3 +13,22 @@ entry:
    %z.0 = add i32 %add4, %inc
    ret i32 %z.0
  }
+
+; Instcombine transforms test1 into test2:
+; CHECK: test2:
+; CHECK: movl
+; CHECK-NEXT: addl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: subl
+; CHECK-NEXT: ret
+define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
+  %0 = extractvalue { i32, i1 } %uadd, 0
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %0, %inc
+  ret i32 %z.0
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll

index ec5ed17ad84959d42fee8d688eb364caa9176c10..6c32396a4177adad322718ebabb8f9e21f36ac54 100644 (file)
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -114,8 +114,8 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno
  
  define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
    ; CHECK: vcomisd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl    %eax, %eax
+  ; CHECK: andl    $1, %eax
    %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
@@ -825,8 +825,7 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn
  
  define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
    ; CHECK: vucomisd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
    %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
@@ -1183,8 +1182,7 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
  
  define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
    ; CHECK: vptest 
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
    %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
@@ -1455,8 +1453,7 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
  
  define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
    ; CHECK: vcomiss
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbb
    %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
@@ -1697,8 +1694,7 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
  
  define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
    ; CHECK: vucomiss
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
    %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
@@ -2173,8 +2169,7 @@ declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
  
  define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
    ; CHECK: vptest
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
    %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
@@ -2451,8 +2446,7 @@ declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) noun
  
  define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
    ; CHECK: vtestpd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
    %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
@@ -2461,8 +2455,7 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon
  
  define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
    ; CHECK: vtestpd
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
    %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
@@ -2471,8 +2464,7 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea
  
  define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
    ; CHECK: vtestps
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
    %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
@@ -2481,8 +2473,7 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
  
  define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
    ; CHECK: vtestps
-  ; CHECK: setb
-  ; CHECK: movzbl
+  ; CHECK: sbbl
    %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
    ret i32 %res
  }
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll

index 3a14fa26300c3dc86ca52412bae4d12e77ee9cc1..2ac4cb435a751ba6b2d4151e8fbd1ade992deaab 100644 (file)
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -200,11 +200,11 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
          ret i32 %tmp1
  ; X32: _ptestz_2:
  ; X32:    ptest        %xmm1, %xmm0
-; X32:    setb %al
+; X32:    sbbl %eax
  
  ; X64: _ptestz_2:
  ; X64:    ptest        %xmm1, %xmm0
-; X64:    setb %al
+; X64:    sbbl %eax
  }
  
  define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
author	Chris Lattner <sabre@nondot.org>
	Sun, 19 Dec 2010 22:08:31 +0000 (22:08 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Sun, 19 Dec 2010 22:08:31 +0000 (22:08 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
lib/Target/X86/X86InstrCompiler.td		patch \| blob \| history
test/CodeGen/X86/add-of-carry.ll		patch \| blob \| history
test/CodeGen/X86/avx-intrinsics-x86.ll		patch \| blob \| history
test/CodeGen/X86/sse41.ll		patch \| blob \| history