From b20e0b1fddfd9099e12b84a71fbc8ccff5a12b10 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 5 Dec 2010 07:30:36 +0000 Subject: [PATCH] it turns out that when ".with.overflow" intrinsics were added to the X86 backend that they were all implemented except umul. This one fell back to the default implementation that did a hi/lo multiply and compared the top. Fix this to check the overflow flag that the 'mul' instruction sets, so we can avoid an explicit test. Now we compile: void *func(long count) { return new int[count]; } into: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] seto %cl ## encoding: [0x0f,0x90,0xc1] testb %cl, %cl ## encoding: [0x84,0xc9] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL instead of: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL Other than the silly seto+test, this is using the o bit directly, so it's going in the right direction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@120935 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 34 +++++++++++++++++++++++--- lib/Target/X86/X86ISelLowering.cpp | 31 +++++++++++++++-------- lib/Target/X86/X86ISelLowering.h | 6 +++-- lib/Target/X86/X86InstrArithmetic.td | 7 +++--- lib/Target/X86/X86InstrInfo.td | 8 +++++- test/CodeGen/X86/umul-with-overflow.ll | 8 +++++- 6 files changed, 73 insertions(+), 21 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 0226278327c..1311dbabb54 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1600,7 +1600,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return RetVal; break; } - + case X86ISD::UMUL: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + unsigned LoReg, HiReg; + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; Opc = X86::MUL8r; break; + case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; Opc = X86::MUL16r; break; + case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; Opc = X86::MUL32r; break; + case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; Opc = X86::MUL64r; break; + } + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, + N0, SDValue()).getValue(1); + + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); + SDValue Ops[] = {N1, InFlag}; + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); + + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); + ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); + ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); + return NULL; + } + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue N0 = Node->getOperand(0); @@ -1653,11 +1678,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, array_lengthof(Ops)); InFlag = SDValue(CNode, 1); + // Update the chain. ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); } else { - InFlag = - SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); + SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag); + InFlag = SDValue(CNode, 0); } // Prevent use of AH in a REX instruction by referencing AX instead. @@ -1696,7 +1722,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - + return NULL; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1df53b99472..58b8cb11121 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -948,6 +948,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); setOperationAction(ISD::SMULO, MVT::i32, Custom); + setOperationAction(ISD::UMULO, MVT::i32, Custom); // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -961,6 +962,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SSUBO, MVT::i64, Custom); setOperationAction(ISD::USUBO, MVT::i64, Custom); setOperationAction(ISD::SMULO, MVT::i64, Custom); + setOperationAction(ISD::UMULO, MVT::i64, Custom); } if (!Subtarget->is64Bit()) { @@ -7042,7 +7044,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return NewSetCC; } - // Look for "(setcc) == / != 1" to avoid unncessary setcc. + // Look for "(setcc) == / != 1" to avoid unnecessary setcc. if (Op0.getOpcode() == X86ISD::SETCC && Op1.getOpcode() == ISD::Constant && (cast(Op1)->getZExtValue() == 1 || @@ -8446,8 +8448,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDValue RHS = N->getOperand(1); unsigned BaseOp = 0; unsigned Cond = 0; - DebugLoc dl = Op.getDebugLoc(); - + DebugLoc DL = Op.getDebugLoc(); switch (Op.getOpcode()) { default: llvm_unreachable("Unknown ovf instruction!"); case ISD::SADDO: @@ -8486,19 +8487,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { BaseOp = X86ISD::SMUL; Cond = X86::COND_O; break; - case ISD::UMULO: - BaseOp = X86ISD::UMUL; - Cond = X86::COND_B; - break; + case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs + SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0), + MVT::i32); + SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS); + + SDValue SetCC = + DAG.getNode(X86ISD::SETCC, DL, MVT::i8, + DAG.getConstant(X86::COND_O, MVT::i32), + SDValue(Sum.getNode(), 2)); + + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); + return Sum; + } } // Also sets EFLAGS. SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); - SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS); + SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); SDValue SetCC = - DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1), - DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); + DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1), + DAG.getConstant(Cond, MVT::i32), + SDValue(Sum.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); return Sum; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index edcdceb39a5..35f63b26df6 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -200,9 +200,11 @@ namespace llvm { PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ, PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ, - // ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results. - ADD, SUB, SMUL, UMUL, + // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. + ADD, SUB, SMUL, INC, DEC, OR, XOR, AND, + + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. MUL_IMM, diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index f82e1c666ed..e813ec04e78 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -60,11 +60,12 @@ def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", - []>; // EAX,EDX = EAX*GR32 + "mul{l}\t$src", // EAX,EDX = EAX*GR32 + [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", []>; // RAX,RDX = RAX*GR64 + "mul{q}\t$src", // RAX,RDX = RAX*GR64 + [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>; let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 560cafe0816..d6e7f580320 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -35,6 +35,12 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>, SDTCisVT<1, i32>]>; +// RES1, RES2, FLAGS = op LHS, RHS +def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; def SDTX86BrCond : SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; @@ -188,7 +194,7 @@ def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags, [SDNPCommutative]>; -def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags, +def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags, [SDNPCommutative]>; def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>; diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll index d522bd80acf..c9976617a24 100644 --- a/test/CodeGen/X86/umul-with-overflow.ll +++ b/test/CodeGen/X86/umul-with-overflow.ll @@ -1,8 +1,14 @@ -; RUN: llc < %s -march=x86 | grep "\\\\\\\