From: Hans Wennborg Date: Thu, 19 Nov 2015 16:35:08 +0000 (+0000) Subject: X86: More efficient legalization of wide integer compares X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=086b179985b193f50250faf475a47d7f3e6e4783 X86: More efficient legalization of wide integer compares In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253572 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 975bade748c..aaf08e14f57 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -372,6 +372,12 @@ namespace ISD { /// then the result type must also be a vector type. SETCC, + /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but + /// op #2 is a *carry value*. This operator checks the result of + /// "LHS - RHS - Carry", and can be used to compare two wide integers: + /// (setcce lhshi rhshi (subc lhslo rhslo) cc). Only valid for integers. + SETCCE, + /// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded /// integer shift operations. The operation ordering is: /// [Lo,Hi] = op [LoLHS,HiLHS], Amt diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 939eb5f54cf..af73b62546e 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -267,6 +267,7 @@ namespace { SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); + SDValue visitSETCCE(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); @@ -1396,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); + case ISD::SETCCE: return visitSETCCE(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); @@ -5720,6 +5722,19 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { SDLoc(N)); } +SDValue DAGCombiner::visitSETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + + // If Carry is false, fold to a regular SETCC. + if (Carry.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + + return SDValue(); +} + /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or /// a build_vector of constants. /// This function is called by the DAGCombiner when visiting sext/zext/aext diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e3a11cd27e1..b465ffb5d1b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1242,7 +1242,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : - Node->getOpcode() == ISD::SETCC ? 2 : 1; + Node->getOpcode() == ISD::SETCC ? 2 : + Node->getOpcode() == ISD::SETCCE ? 3 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 3c7586db0ab..ea537fff168 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2634,6 +2634,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; @@ -2761,6 +2762,47 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } + if (LHSHi == RHSHi) { + // Comparing the low bits is enough. + NewLHS = Tmp1; + NewRHS = SDValue(); + return; + } + + // Lower with SETCCE if the target supports it. + // FIXME: Make all targets support this, then remove the other lowering. + if (TLI.getOperationAction( + ISD::SETCCE, + TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) == + TargetLowering::Custom) { + // SETCCE can detect < and >= directly. For > and <=, flip operands and + // condition code. + bool FlipOperands = false; + switch (CCCode) { + case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break; + case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break; + case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break; + case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break; + default: break; + } + if (FlipOperands) { + std::swap(LHSLo, RHSLo); + std::swap(LHSHi, RHSHi); + } + // Perform a wide subtraction, feeding the carry from the low part into + // SETCCE. The SETCCE operation is essentially looking at the high part of + // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or + // positive iff LHS >= RHS. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo); + SDValue Res = + DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()), + LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode)); + NewLHS = Res; + NewRHS = SDValue(); + return; + } + NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ, false, DagCombineInfo, dl); @@ -2825,6 +2867,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { DAG.getCondCode(CCCode)), 0); } +SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + SDLoc dl = SDLoc(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(LHS, LHSLo, LHSHi); + GetExpandedInteger(RHS, RHSLo, RHSHi); + + // Expand to a SUBE for the low part and a smaller SETCCE for the high. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry); + return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi, + LowCmp.getValue(1), Cond); +} + SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // The value being shifted is legal, but the shift amount is too big. // It follows that either the result of the shift is undefined, or the diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 7f4501e4b67..4e4740f1f9c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -356,6 +356,7 @@ private: SDValue ExpandIntOp_BR_CC(SDNode *N); SDValue ExpandIntOp_SELECT_CC(SDNode *N); SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_SETCCE(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 541cb367902..195b4849860 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -209,6 +209,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; + case ISD::SETCCE: return "setcce"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 31401f2fb8f..bbd857c0811 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -421,9 +421,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); setOperationAction(ISD::SETCC , MVT::f80 , Custom); + setOperationAction(ISD::SETCCE , MVT::i8 , Custom); + setOperationAction(ISD::SETCCE , MVT::i16 , Custom); + setOperationAction(ISD::SETCCE , MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::SELECT , MVT::i64 , Custom); setOperationAction(ISD::SETCC , MVT::i64 , Custom); + setOperationAction(ISD::SETCCE , MVT::i64 , Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support @@ -3957,6 +3961,22 @@ static bool isX86CCUnsigned(unsigned X86CC) { } } +static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { + switch (SetCCOpcode) { + default: llvm_unreachable("Invalid integer condition!"); + case ISD::SETEQ: return X86::COND_E; + case ISD::SETGT: return X86::COND_G; + case ISD::SETGE: return X86::COND_GE; + case ISD::SETLT: return X86::COND_L; + case ISD::SETLE: return X86::COND_LE; + case ISD::SETNE: return X86::COND_NE; + case ISD::SETULT: return X86::COND_B; + case ISD::SETUGT: return X86::COND_A; + case ISD::SETULE: return X86::COND_BE; + case ISD::SETUGE: return X86::COND_AE; + } +} + /// Do a one-to-one translation of a ISD::CondCode to the X86-specific /// condition code, returning the condition code and the LHS/RHS of the /// comparison to make. @@ -3980,19 +4000,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP, } } - switch (SetCCOpcode) { - default: llvm_unreachable("Invalid integer condition!"); - case ISD::SETEQ: return X86::COND_E; - case ISD::SETGT: return X86::COND_G; - case ISD::SETGE: return X86::COND_GE; - case ISD::SETLT: return X86::COND_L; - case ISD::SETLE: return X86::COND_LE; - case ISD::SETNE: return X86::COND_NE; - case ISD::SETULT: return X86::COND_B; - case ISD::SETUGT: return X86::COND_A; - case ISD::SETULE: return X86::COND_BE; - case ISD::SETUGE: return X86::COND_AE; - } + return TranslateIntegerX86CC(SetCCOpcode); } // First determine if it is required or is profitable to flip the operands. @@ -14576,6 +14584,23 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SetCC; } +SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Carry = Op.getOperand(2); + SDValue Cond = Op.getOperand(3); + SDLoc DL(Op); + + assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); + X86::CondCode CC = TranslateIntegerX86CC(cast(Cond)->get()); + + assert(Carry.getOpcode() != ISD::CARRY_FALSE); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry); + return DAG.getNode(X86ISD::SETCC, DL, Op.getValueType(), + DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1)); +} + // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); @@ -19685,6 +19710,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::SETCCE: return LowerSETCCE(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index c1b6328c712..4deb256d75d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1023,6 +1023,7 @@ namespace llvm { SDValue LowerToBT(SDValue And, ISD::CondCode CC, SDLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll index a19aa52f302..816577be15e 100644 --- a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll +++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll @@ -26,6 +26,5 @@ if.end: ; preds = %if.then, %entry ret void ; CHECK-LABEL: fn1: -; CHECK: shrq $32, [[REG:%.*]] -; CHECK: sete +; CHECK: jb } diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll index 4989bc14ef8..d5d3fa6db5e 100644 --- a/test/CodeGen/X86/atomic-minmax-i6432.ll +++ b/test/CodeGen/X86/atomic-minmax-i6432.ll @@ -8,7 +8,7 @@ define void @atomic_maxmin_i6432() { %1 = atomicrmw max i64* @sc64, i64 5 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: seta +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b @@ -16,7 +16,7 @@ define void @atomic_maxmin_i6432() { %2 = atomicrmw min i64* @sc64, i64 6 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: setb +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b @@ -24,7 +24,7 @@ define void @atomic_maxmin_i6432() { %3 = atomicrmw umax i64* @sc64, i64 7 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: seta +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b @@ -32,7 +32,7 @@ define void @atomic_maxmin_i6432() { %4 = atomicrmw umin i64* @sc64, i64 8 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: setb +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b diff --git a/test/CodeGen/X86/atomic128.ll b/test/CodeGen/X86/atomic128.ll index dea7d482f98..c41269b0b60 100644 --- a/test/CodeGen/X86/atomic128.ll +++ b/test/CodeGen/X86/atomic128.ll @@ -119,16 +119,9 @@ define void @fetch_and_min(i128* %p, i128 %bits) { ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rsi, %rax -; CHECK: setbe [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: setle [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: setg ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx @@ -151,16 +144,9 @@ define void @fetch_and_max(i128* %p, i128 %bits) { ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rsi, %rax -; CHECK: setae [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: setge [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: setge ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx @@ -183,16 +169,9 @@ define void @fetch_and_umin(i128* %p, i128 %bits) { ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rsi, %rax -; CHECK: setbe [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: setbe [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: seta ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx @@ -215,16 +194,9 @@ define void @fetch_and_umax(i128* %p, i128 %bits) { ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rax, %rsi -; CHECK: setb [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: seta [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: setb ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll index f6ea29123f1..6e0d18558c5 100644 --- a/test/CodeGen/X86/avx512-cmp.ll +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix AVX512-32 ; CHECK-LABEL: test1 ; CHECK: vucomisd {{.*}}encoding: [0x62 @@ -100,27 +99,3 @@ A: B: ret i32 7 } - -; AVX512-32-LABEL: test10 -; AVX512-32: movl 4(%esp), %ecx -; AVX512-32: cmpl $9, (%ecx) -; AVX512-32: seta %al -; AVX512-32: cmpl $0, 4(%ecx) -; AVX512-32: setg %cl -; AVX512-32: je -; AVX512-32: movb %cl, %al -; AVX512-32: testb $1, %al - -define void @test10(i64* %i.addr) { - - %x = load i64, i64* %i.addr, align 8 - %cmp = icmp slt i64 %x, 10 - br i1 %cmp, label %true, label %false - -true: - ret void - -false: - ret void -} - diff --git a/test/CodeGen/X86/wide-integer-cmp.ll b/test/CodeGen/X86/wide-integer-cmp.ll new file mode 100644 index 00000000000..c45a0541e6a --- /dev/null +++ b/test/CodeGen/X86/wide-integer-cmp.ll @@ -0,0 +1,130 @@ +; RUN: llc -mtriple=i686-linux-gnu %s -o - | FileCheck %s + + +define i32 @branch_eq(i64 %a, i64 %b) { +entry: + %cmp = icmp eq i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: branch_eq: +; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]] +; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] +; CHECK: xorl 16(%esp), [[LHSHi]] +; CHECK: xorl 12(%esp), [[LHSLo]] +; CHECK: orl [[LHSHi]], [[LHSLo]] +; CHECK: jne [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @branch_slt(i64 %a, i64 %b) { +entry: + %cmp = icmp slt i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: branch_slt: +; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]] +; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] +; CHECK: cmpl 12(%esp), [[LHSLo]] +; CHECK: sbbl 16(%esp), [[LHSHi]] +; CHECK: jge [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @branch_ule(i64 %a, i64 %b) { +entry: + %cmp = icmp ule i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: branch_ule: +; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]] +; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]] +; CHECK: cmpl 4(%esp), [[RHSLo]] +; CHECK: sbbl 8(%esp), [[RHSHi]] +; CHECK: jb [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @set_gt(i64 %a, i64 %b) { +entry: + %cmp = icmp sgt i64 %a, %b + %res = select i1 %cmp, i32 1, i32 0 + ret i32 %res + +; CHECK-LABEL: set_gt: +; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]] +; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]] +; CHECK: cmpl 4(%esp), [[RHSLo]] +; CHECK: sbbl 8(%esp), [[RHSHi]] +; CHECK: setl %al +; CHECK: retl +} + +define i32 @test_wide(i128 %a, i128 %b) { +entry: + %cmp = icmp slt i128 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: test_wide: +; CHECK: cmpl 24(%esp) +; CHECK: sbbl 28(%esp) +; CHECK: sbbl 32(%esp) +; CHECK: sbbl 36(%esp) +; CHECK: jge [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @test_carry_false(i64 %a, i64 %b) { +entry: + %x = and i64 %a, -4294967296 ;0xffffffff00000000 + %y = and i64 %b, -4294967296 + %cmp = icmp slt i64 %x, %y + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; The comparison of the low bits will be folded to a CARRY_FALSE node. Make +; sure the code can handle that. +; CHECK-LABEL: carry_false: +; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] +; CHECK: cmpl 16(%esp), [[LHSHi]] +; CHECK: jge [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} diff --git a/test/CodeGen/X86/win32-pic-jumptable.ll b/test/CodeGen/X86/win32-pic-jumptable.ll index cabd36ae395..1a90b6238f2 100644 --- a/test/CodeGen/X86/win32-pic-jumptable.ll +++ b/test/CodeGen/X86/win32-pic-jumptable.ll @@ -7,10 +7,10 @@ ; CHECK-NEXT: jmpl *%eax ; CHECK: LJTI0_0: +; CHECK-NEXT: .long LBB0_2-L0$pb +; CHECK-NEXT: .long LBB0_3-L0$pb ; CHECK-NEXT: .long LBB0_4-L0$pb ; CHECK-NEXT: .long LBB0_5-L0$pb -; CHECK-NEXT: .long LBB0_6-L0$pb -; CHECK-NEXT: .long LBB0_7-L0$pb target triple = "i686--windows-itanium"