From: Rafael Espindola Date: Thu, 2 Jun 2011 19:57:47 +0000 (+0000) Subject: Revert 132424 to fix PR10068. X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=251b4a04057a8397791ad3924377888fe4f8a2ad;p=oota-llvm.git Revert 132424 to fix PR10068. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132479 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2eee5012b16..bb4df270345 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1759,14 +1759,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint() && !Op.getOperand(0).getValueType().isVector()) { - bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); - bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if (OpVTLegal || i32Legal) { - EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; + if (isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32)) { + EVT Ty = (isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType())) ? + Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); - if (!OpVTLegal) + if (Ty != Op.getValueType()) Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); unsigned ShVal = Op.getValueType().getSizeInBits()-1; SDValue ShAmt = TLO.DAG.getConstant(ShVal, Op.getValueType()); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4d9869634d1..78205096790 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9402,8 +9402,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; - case X86ISD::FSETCCsd: return "X86ISD::FSETCCsd"; - case X86ISD::FSETCCss: return "X86ISD::FSETCCss"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; @@ -11681,88 +11679,12 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } -// CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..)) -// where both setccs reference the same FP CMP, and rewrite for CMPEQSS -// and friends. Likewise for OR -> CMPNEQSS. -static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { - unsigned opcode; - - // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but - // we're requiring SSE2 for both. - if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue CMP = N0->getOperand(1); - SDValue CMP0 = CMP->getOperand(0); - SDValue CMP1 = CMP->getOperand(1); - EVT VT = CMP0.getValueType(); - DebugLoc DL = N->getDebugLoc(); - - if (VT == MVT::f32 || VT == MVT::f64) { - bool ExpectingFlags = false; - // Check for any users that want flags: - for (SDNode::use_iterator UI = N->use_begin(), - UE = N->use_end(); - !ExpectingFlags && UI != UE; ++UI) - switch (UI->getOpcode()) { - default: - case ISD::BR_CC: - case ISD::BRCOND: - case ISD::SELECT: - ExpectingFlags = true; - break; - case ISD::CopyToReg: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - break; - } - - if (!ExpectingFlags) { - enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0); - enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0); - - if (cc1 == X86::COND_E || cc1 == X86::COND_NE) { - X86::CondCode tmp = cc0; - cc0 = cc1; - cc1 = tmp; - } - - if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) || - (cc0 == X86::COND_NE && cc1 == X86::COND_P)) { - bool is64BitFP = (CMP0.getValueType() == MVT::f64); - X86ISD::NodeType NTOperator = is64BitFP ? - X86ISD::FSETCCsd : X86ISD::FSETCCss; - // FIXME: need symbolic constants for these magic numbers. - // See X86ATTInstPrinter.cpp:printSSECC(). - unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; - SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP0, CMP1, - DAG.getConstant(x86cc, MVT::i8)); - SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32, - OnesOrZeroesF); - SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI, - DAG.getConstant(1, MVT::i32)); - SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed); - return OneBitOfTruth; - } - } - } - } - return SDValue(); -} - static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); - SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget); - if (R.getNode()) - return R; - // Want to form PANDN nodes, in the hopes of then easily combining them with // OR and AND nodes to form PBLEND/PSIGN. EVT VT = N->getValueType(0); @@ -11792,10 +11714,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); - SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget); - if (R.getNode()) - return R; - EVT VT = N->getValueType(0); if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64) return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9218730b0a4..394df62b930 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -94,11 +94,6 @@ namespace llvm { // one's or all zero's. SETCC_CARRY, // R = carry_bit ? ~0 : 0 - /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. - /// Operands are two FP values to compare; result is a mask of - /// 0s or 1s. Generally DTRT for C/C++ with NaNs. - FSETCCss, FSETCCsd, - /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, /// result in an integer GPR. Needs masking for scalar result. FGETSIGNx86, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7c9a9f7e8c5..7e9b7fd55a1 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -41,8 +41,6 @@ def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; -def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>; -def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8cab8082468..b8aeb65801b 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -23,9 +23,6 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3, def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>; -def SDTX86Cmpsd : SDTypeProfile<1, 3, [SDTCisVT<0, f64>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def SDTX86Cmpss : SDTypeProfile<1, 3, [SDTCisVT<0, f32>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; - def SDTX86Cmov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>, SDTCisVT<4, i32>]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b64c03a9b59..58c715a981b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1056,37 +1056,13 @@ let neverHasSideEffects = 1 in { XD, VEX_4V; } -let Constraints = "$src1 = $dst" in { -def CMPSSrr : SIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, SSECC:$cc), - "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), FR32:$src2, imm:$cc))]>, XS; -def CMPSSrm : SIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, SSECC:$cc), - "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86cmpss (f32 FR32:$src1), (loadf32 addr:$src2), imm:$cc))]>, XS; -def CMPSDrr : SIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, SSECC:$cc), - "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), FR64:$src2, imm:$cc))]>, XD; -def CMPSDrm : SIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, SSECC:$cc), - "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86cmpsd (f64 FR64:$src1), (loadf64 addr:$src2), imm:$cc))]>, XD; -} let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { -def CMPSSrr_alt : SIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS; -def CMPSSrm_alt : SIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XS; -def CMPSDrr_alt : SIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD; -def CMPSDrm_alt : SIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>, XD; + defm CMPSS : sse12_cmp_scalar, XS; + defm CMPSD : sse12_cmp_scalar, XD; } multiclass sse12_cmp_scalar_int %t +; RUN: not grep cmp %t +; RUN: not grep xor %t +; RUN: grep jne %t | count 1 +; RUN: grep jp %t | count 1 +; RUN: grep setnp %t | count 1 +; RUN: grep sete %t | count 1 +; RUN: grep and %t | count 1 +; RUN: grep cvt %t | count 4 define i32 @isint_return(double %d) nounwind { -; CHECK-NOT: xor -; CHECK: cvt %i = fptosi double %d to i32 -; CHECK-NEXT: cvt %e = sitofp i32 %i to double -; CHECK: cmpeqsd %c = fcmp oeq double %d, %e -; CHECK-NEXT: movd -; CHECK-NEXT: andl %z = zext i1 %c to i32 ret i32 %z } @@ -17,14 +19,9 @@ define i32 @isint_return(double %d) nounwind { declare void @foo() define void @isint_branch(double %d) nounwind { -; CHECK: cvt %i = fptosi double %d to i32 -; CHECK-NEXT: cvt %e = sitofp i32 %i to double -; CHECK: ucomisd %c = fcmp oeq double %d, %e -; CHECK-NEXT: jne -; CHECK-NEXT: jp br i1 %c, label %true, label %false true: call void @foo() diff --git a/test/CodeGen/X86/pr9127.ll b/test/CodeGen/X86/pr9127.ll index ba92c77e22b..9b251f57e0e 100644 --- a/test/CodeGen/X86/pr9127.ll +++ b/test/CodeGen/X86/pr9127.ll @@ -10,4 +10,4 @@ entry: } ; test that the load is folded. -; CHECK: cmpeqsd (%{{rdi|rdx}}), %xmm0 +; CHECK: ucomisd (%{{rdi|rdx}}), %xmm0 diff --git a/test/CodeGen/X86/setoeq.ll b/test/CodeGen/X86/setoeq.ll index aa2f0af55cc..4a9c1bacc5f 100644 --- a/test/CodeGen/X86/setoeq.ll +++ b/test/CodeGen/X86/setoeq.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -march=x86 | grep set | count 2 +; RUN: llc < %s -march=x86 | grep and define zeroext i8 @t(double %x) nounwind readnone { entry: @@ -6,16 +7,5 @@ entry: %1 = sitofp i32 %0 to double ; [#uses=1] %2 = fcmp oeq double %1, %x ; [#uses=1] %retval12 = zext i1 %2 to i8 ; [#uses=1] -; CHECK: cmpeqsd - ret i8 %retval12 -} - -define zeroext i8 @u(double %x) nounwind readnone { -entry: - %0 = fptosi double %x to i32 ; [#uses=1] - %1 = sitofp i32 %0 to double ; [#uses=1] - %2 = fcmp une double %1, %x ; [#uses=1] - %retval12 = zext i1 %2 to i8 ; [#uses=1] -; CHECK: cmpneqsd ret i8 %retval12 }