From b9d6b8449d245ee1607f6f197b29befbf8c41a1e Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 18 Aug 2012 17:53:03 +0000 Subject: [PATCH] Reapply r162160 with a fix: Optimize Arith->Trunc->SETCC sequence to allow better compare/branch code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162172 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 75 ++++++++++++++++++++++------ test/CodeGen/X86/2012-08-16-setcc.ll | 45 +++++++++++++++++ test/CodeGen/X86/fold-load.ll | 4 +- 3 files changed, 107 insertions(+), 17 deletions(-) create mode 100644 test/CodeGen/X86/2012-08-16-setcc.ll diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 077185c4b53..e59ab804960 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8283,7 +8283,33 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, unsigned Opcode = 0; unsigned NumOperands = 0; - switch (Op.getNode()->getOpcode()) { + + // Truncate operations may prevent the merge of the SETCC instruction + // and the arithmetic intruction before it. Attempt to truncate the operands + // of the arithmetic instruction and use a reduced bit-width instruction. + bool NeedTruncation = false; + SDValue ArithOp = Op; + if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) { + SDValue Arith = Op->getOperand(0); + // Both the trunc and the arithmetic op need to have one user each. + if (Arith->hasOneUse()) + switch (Arith.getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + NeedTruncation = true; + ArithOp = Arith; + } + } + } + + // NOTICE: In the code below we use ArithOp to hold the arithmetic operation + // which may be the result of a CAST. We use the variable 'Op', which is the + // non-casted variable when we check for possible users. + switch (ArithOp.getOpcode()) { case ISD::ADD: // Due to an isel shortcoming, be conservative if this add is likely to be // selected as part of a load-modify-store instruction. When the root node @@ -8303,7 +8329,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, goto default_case; if (ConstantSDNode *C = - dyn_cast(Op.getNode()->getOperand(1))) { + dyn_cast(ArithOp.getNode()->getOperand(1))) { // An add of one will be selected as an INC. if (C->getAPIntValue() == 1) { Opcode = X86ISD::INC; @@ -8339,7 +8365,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && - (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { + !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) { NonFlagUse = true; break; } @@ -8360,11 +8386,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, goto default_case; // Otherwise use a regular EFLAGS-setting instruction. - switch (Op.getNode()->getOpcode()) { + switch (ArithOp.getOpcode()) { default: llvm_unreachable("unexpected operator!"); - case ISD::SUB: - Opcode = X86ISD::SUB; - break; + case ISD::SUB: Opcode = X86ISD::SUB; break; case ISD::OR: Opcode = X86ISD::OR; break; case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; @@ -8385,19 +8409,40 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, break; } + // If we found that truncation is beneficial, perform the truncation and + // update 'Op'. + if (NeedTruncation) { + EVT VT = Op.getValueType(); + SDValue WideVal = Op->getOperand(0); + EVT WideVT = WideVal.getValueType(); + unsigned ConvertedOp = 0; + // Use a target machine opcode to prevent further DAGCombine + // optimizations that may separate the arithmetic operations + // from the setcc node. + switch (WideVal.getOpcode()) { + default: break; + case ISD::ADD: ConvertedOp = X86ISD::ADD; break; + case ISD::SUB: ConvertedOp = X86ISD::SUB; break; + case ISD::AND: ConvertedOp = X86ISD::AND; break; + case ISD::OR: ConvertedOp = X86ISD::OR; break; + case ISD::XOR: ConvertedOp = X86ISD::XOR; break; + } + + if (ConvertedOp) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) { + SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0)); + SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1)); + Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1); + } + } + } + if (Opcode == 0) // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, Op.getValueType())); - if (Opcode == X86ISD::CMP) { - SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0), - Op.getOperand(1)); - // We can't replace usage of SUB with CMP. - // The SUB node will be removed later because there is no use of it. - return SDValue(New.getNode(), 0); - } - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SmallVector Ops; for (unsigned i = 0; i != NumOperands; ++i) diff --git a/test/CodeGen/X86/2012-08-16-setcc.ll b/test/CodeGen/X86/2012-08-16-setcc.ll new file mode 100644 index 00000000000..ed511567c32 --- /dev/null +++ b/test/CodeGen/X86/2012-08-16-setcc.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s + +; rdar://12081007 + +; CHECK: and_1: +; CHECK: andb +; CHECK-NEXT: cmovnel +; CHECK: ret +define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) { + %1 = and i8 %b, %a + %2 = icmp ne i8 %1, 0 + %3 = select i1 %2, i32 %x, i32 0 + ret i32 %3 +} + +; CHECK: and_2: +; CHECK: andb +; CHECK-NEXT: setne +; CHECK: ret +define zeroext i1 @and_2(i8 zeroext %a, i8 zeroext %b) { + %1 = and i8 %b, %a + %2 = icmp ne i8 %1, 0 + ret i1 %2 +} + +; CHECK: xor_1: +; CHECK: xorb +; CHECK-NEXT: cmovnel +; CHECK: ret +define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) { + %1 = xor i8 %b, %a + %2 = icmp ne i8 %1, 0 + %3 = select i1 %2, i32 %x, i32 0 + ret i32 %3 +} + +; CHECK: xor_2: +; CHECK: xorb +; CHECK-NEXT: setne +; CHECK: ret +define zeroext i1 @xor_2(i8 zeroext %a, i8 zeroext %b) { + %1 = xor i8 %b, %a + %2 = icmp ne i8 %1, 0 + ret i1 %2 +} diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll index c961f7576f9..d8366654c01 100644 --- a/test/CodeGen/X86/fold-load.ll +++ b/test/CodeGen/X86/fold-load.ll @@ -57,13 +57,13 @@ entry: %0 = load i32* %P, align 4 %1 = load i32* %Q, align 4 %2 = xor i32 %0, %1 - %3 = and i32 %2, 65535 + %3 = and i32 %2, 89947 %4 = icmp eq i32 %3, 0 br i1 %4, label %exit, label %land.end exit: %shr.i.i19 = xor i32 %1, %0 - %5 = and i32 %shr.i.i19, 2147418112 + %5 = and i32 %shr.i.i19, 3456789123 %6 = icmp eq i32 %5, 0 br label %land.end -- 2.34.1