From: Chris Lattner Date: Sun, 5 Dec 2010 07:30:36 +0000 (+0000) Subject: it turns out that when ".with.overflow" intrinsics were added to the X86 X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=b20e0b1fddfd9099e12b84a71fbc8ccff5a12b10;p=oota-llvm.git it turns out that when ".with.overflow" intrinsics were added to the X86 backend that they were all implemented except umul. This one fell back to the default implementation that did a hi/lo multiply and compared the top. Fix this to check the overflow flag that the 'mul' instruction sets, so we can avoid an explicit test. Now we compile: void *func(long count) { return new int[count]; } into: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] seto %cl ## encoding: [0x0f,0x90,0xc1] testb %cl, %cl ## encoding: [0x84,0xc9] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL instead of: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL Other than the silly seto+test, this is using the o bit directly, so it's going in the right direction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120935 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 0226278327c..1311dbabb54 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1600,7 +1600,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return RetVal; break; } - + case X86ISD::UMUL: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + unsigned LoReg, HiReg; + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; Opc = X86::MUL8r; break; + case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; Opc = X86::MUL16r; break; + case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; Opc = X86::MUL32r; break; + case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; Opc = X86::MUL64r; break; + } + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + N0, SDValue()).getValue(1); + + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); + SDValue Ops[] = {N1, InFlag}; + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); + ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); + return NULL; + } + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue N0 = Node->getOperand(0); @@ -1653,11 +1678,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, array_lengthof(Ops)); InFlag = SDValue(CNode, 1); + // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); } else { - InFlag = - SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag); + InFlag = SDValue(CNode, 0); } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -1696,7 +1722,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - + return NULL; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1df53b99472..58b8cb11121 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -948,6 +948,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); setOperationAction(ISD::SMULO, MVT::i32, Custom); + setOperationAction(ISD::UMULO, MVT::i32, Custom); // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -961,6 +962,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SSUBO, MVT::i64, Custom); setOperationAction(ISD::USUBO, MVT::i64, Custom); setOperationAction(ISD::SMULO, MVT::i64, Custom); + setOperationAction(ISD::UMULO, MVT::i64, Custom); } if (!Subtarget->is64Bit()) { @@ -7042,7 +7044,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return NewSetCC; } - // Look for "(setcc) == / != 1" to avoid unncessary setcc. + // Look for "(setcc) == / != 1" to avoid unnecessary setcc. if (Op0.getOpcode() == X86ISD::SETCC && Op1.getOpcode() == ISD::Constant && (cast(Op1)->getZExtValue() == 1 || @@ -8446,8 +8448,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDValue RHS = N->getOperand(1); unsigned BaseOp = 0; unsigned Cond = 0; - DebugLoc dl = Op.getDebugLoc(); - + DebugLoc DL = Op.getDebugLoc(); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: @@ -8486,19 +8487,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { BaseOp = X86ISD::SMUL; Cond = X86::COND_O; break; - case ISD::UMULO: - BaseOp = X86ISD::UMUL; - Cond = X86::COND_B; - break; + case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs + SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0), + MVT::i32); + SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS); + + SDValue SetCC = + DAG.getNode(X86ISD::SETCC, DL, MVT::i8, + DAG.getConstant(X86::COND_O, MVT::i32), + SDValue(Sum.getNode(), 2)); + + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); + return Sum; + } } // Also sets EFLAGS. SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); - SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS); + SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); SDValue SetCC = - DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1), - DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); + DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1), + DAG.getConstant(Cond, MVT::i32), + SDValue(Sum.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); return Sum; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index edcdceb39a5..35f63b26df6 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -200,9 +200,11 @@ namespace llvm { PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ, PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ, - // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results. - ADD, SUB, SMUL, UMUL, + // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. + ADD, SUB, SMUL, INC, DEC, OR, XOR, AND, + + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. MUL_IMM, diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index f82e1c666ed..e813ec04e78 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -60,11 +60,12 @@ def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", - []>; // EAX,EDX = EAX*GR32 + "mul{l}\t$src", // EAX,EDX = EAX*GR32 + [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64 + "mul{q}\t$src", // RAX,RDX = RAX*GR64 + [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>; let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 560cafe0816..d6e7f580320 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -35,6 +35,12 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisVT<1, i32>]>; +// RES1, RES2, FLAGS = op LHS, RHS +def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; def SDTX86BrCond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; @@ -188,7 +194,7 @@ def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, [SDNPCommutative]>; -def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags, +def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags, [SDNPCommutative]>; def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll index d522bd80acf..c9976617a24 100644 --- a/test/CodeGen/X86/umul-with-overflow.ll +++ b/test/CodeGen/X86/umul-with-overflow.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s -march=x86 | grep "\\\\\\\