From: Bill Wendling Date: Tue, 9 Dec 2008 22:08:41 +0000 (+0000) Subject: Add sub/mul overflow intrinsics. This currently doesn't have a X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=74c376529101acbe141a256d0bf23a44eb454c84;p=oota-llvm.git Add sub/mul overflow intrinsics. This currently doesn't have a target-independent way of determining overflow on multiplication. It's very tricky. Patch by Zoltan Varga! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60800 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 2498e9f9b67..2fabe16cf83 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -259,6 +259,12 @@ namespace ISD { // These nodes are generated from the llvm.[su]add.with.overflow intrinsics. SADDO, UADDO, + // Same for subtraction + SSUBO, USUBO, + + // Same for multiplication + SMULO, UMULO, + // Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 3037fbca2b7..154106eba21 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -305,6 +305,16 @@ def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; +def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], + [LLVMMatchType<0>, LLVMMatchType<0>]>; +def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + +def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], + [LLVMMatchType<0>, LLVMMatchType<0>]>; +def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + //===------------------------- Atomic Intrinsics --------------------------===// // def int_memory_barrier : Intrinsic<[llvm_void_ty], diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 698014d4eae..71a1d893f13 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1487,7 +1487,7 @@ private: MVT TransformToType[MVT::LAST_VALUETYPE]; // Defines the capacity of the TargetLowering::OpActions table - static const int OpActionsCapacity = 218; + static const int OpActionsCapacity = 222; /// OpActions - For each operation and each value type, keep a LegalizeAction /// that indicates how instruction selection should deal with the operation. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index c13e84b0b36..c33a8fffea9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4234,7 +4234,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } - case ISD::SADDO: { + case ISD::SADDO: + case ISD::SSUBO: { MVT VT = Node->getValueType(0); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: assert(0 && "This action not supported for this op yet!"); @@ -4246,7 +4247,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { SDValue LHS = LegalizeOp(Node->getOperand(0)); SDValue RHS = LegalizeOp(Node->getOperand(1)); - SDValue Sum = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, LHS.getValueType(), + LHS, RHS); MVT OType = Node->getValueType(1); SDValue Zero = DAG.getConstant(0, LHS.getValueType()); @@ -4255,16 +4258,21 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // RHSSign -> RHS >= 0 // SumSign -> Sum >= 0 // + // Add: // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) // SDValue LHSSign = DAG.getSetCC(OType, LHS, Zero, ISD::SETGE); SDValue RHSSign = DAG.getSetCC(OType, RHS, Zero, ISD::SETGE); - SDValue SignsEq = DAG.getSetCC(OType, LHSSign, RHSSign, ISD::SETEQ); + SDValue SignsMatch = DAG.getSetCC(OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); SDValue SumSign = DAG.getSetCC(OType, Sum, Zero, ISD::SETGE); SDValue SumSignNE = DAG.getSetCC(OType, LHSSign, SumSign, ISD::SETNE); - SDValue Cmp = DAG.getNode(ISD::AND, OType, SignsEq, SumSignNE); + SDValue Cmp = DAG.getNode(ISD::AND, OType, SignsMatch, SumSignNE); MVT ValueVTs[] = { LHS.getValueType(), OType }; SDValue Ops[] = { Sum, Cmp }; @@ -4280,7 +4288,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } - case ISD::UADDO: { + case ISD::UADDO: + case ISD::USUBO: { MVT VT = Node->getValueType(0); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: assert(0 && "This action not supported for this op yet!"); @@ -4292,9 +4301,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { SDValue LHS = LegalizeOp(Node->getOperand(0)); SDValue RHS = LegalizeOp(Node->getOperand(1)); - SDValue Sum = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, LHS.getValueType(), + LHS, RHS); MVT OType = Node->getValueType(1); - SDValue Cmp = DAG.getSetCC(OType, Sum, LHS, ISD::SETULT); + SDValue Cmp = DAG.getSetCC(OType, Sum, LHS, + Node->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT); MVT ValueVTs[] = { LHS.getValueType(), OType }; SDValue Ops[] = { Sum, Cmp }; @@ -4310,6 +4323,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } + case ISD::SMULO: + case ISD::UMULO: { + MVT VT = Node->getValueType(0); + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: assert(0 && "This action is not supported at all!"); + case TargetLowering::Custom: + Result = TLI.LowerOperation(Op, DAG); + if (Result.getNode()) break; + // Fall Thru + case TargetLowering::Legal: + // FIXME: According to Hacker's Delight, this can be implemented in + // target independent lowering, but it would be inefficient, since it + // requires a division + a branch + assert(0 && "Target independent lowering is not supported for SMULO/UMULO!"); + break; + } + break; + } + } assert(Result.getValueType() == Op.getValueType() && diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 62fcff392ee..10cfdc634bd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -92,7 +92,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UREM: Result = PromoteIntRes_UDIV(N); break; case ISD::SADDO: - case ISD::UADDO: Result = PromoteIntRes_XADDO(N, ResNo); break; + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: Result = PromoteIntRes_XALUO(N, ResNo); break; case ISD::ATOMIC_LOAD_ADD_8: case ISD::ATOMIC_LOAD_SUB_8: @@ -518,7 +522,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { return DAG.getNode(N->getOpcode(), LHS.getValueType(), LHS, RHS); } -SDValue DAGTypeLegalizer::PromoteIntRes_XADDO(SDNode *N, unsigned ResNo) { +SDValue DAGTypeLegalizer::PromoteIntRes_XALUO(SDNode *N, unsigned ResNo) { assert(ResNo == 1 && "Only boolean result promotion currently supported!"); // Simply change the return type of the boolean result. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index d2365d262e4..d46fccd0f84 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -270,7 +270,7 @@ private: SDValue PromoteIntRes_UDIV(SDNode *N); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); - SDValue PromoteIntRes_XADDO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_XALUO(SDNode *N, unsigned ResNo); // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 979dea0d7f0..bb3b42c383f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1511,6 +1511,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; case ISD::SADDO: case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: if (Op.getResNo() != 1) return; // The boolean result conforms to getBooleanContents. Fall through. @@ -1919,6 +1923,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ case ISD::SADDO: case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. Fall through. @@ -5216,6 +5224,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ADDE: return "adde"; case ISD::SADDO: return "saddo"; case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; case ISD::SUBC: return "subc"; case ISD::SUBE: return "sube"; case ISD::SHL_PARTS: return "shl_parts"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index e8987c54af5..5d81d224c88 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -2968,6 +2968,23 @@ SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { return 0; } +// implVisitAluOverflow - Lower an overflow instrinsics +const char * +SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { + SDValue Op1 = getValue(I.getOperand(1)); + SDValue Op2 = getValue(I.getOperand(2)); + + MVT ValueVTs[] = { Op1.getValueType(), MVT::i1 }; + SDValue Ops[] = { Op1, Op2 }; + + SDValue Result = + DAG.getNode(Op, + DAG.getVTList(&ValueVTs[0], 2), &Ops[0], 2); + + setValue(&I, Result); + return 0; + } + /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void @@ -4097,21 +4114,17 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { } case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: { - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - - MVT ValueVTs[] = { Op1.getValueType(), MVT::i1 }; - SDValue Ops[] = { Op1, Op2 }; - - SDValue Result = - DAG.getNode((Intrinsic == Intrinsic::sadd_with_overflow) ? - ISD::SADDO : ISD::UADDO, - DAG.getVTList(&ValueVTs[0], 2), &Ops[0], 2); - - setValue(&I, Result); - return 0; - } + return implVisitAluOverflow(I, ISD::UADDO); + case Intrinsic::sadd_with_overflow: + return implVisitAluOverflow(I, ISD::SADDO); + case Intrinsic::usub_with_overflow: + return implVisitAluOverflow(I, ISD::USUBO); + case Intrinsic::ssub_with_overflow: + return implVisitAluOverflow(I, ISD::SSUBO); + case Intrinsic::umul_with_overflow: + return implVisitAluOverflow(I, ISD::UMULO); + case Intrinsic::smul_with_overflow: + return implVisitAluOverflow(I, ISD::SMULO); case Intrinsic::prefetch: { SDValue Ops[4]; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h index e614c3049d8..db70f169020 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -530,6 +530,7 @@ private: } const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op); + const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op); }; /// AddCatchInfo - Extract the personality and type infos from an eh.selector diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 44f28d3d770..7e6ad092044 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -780,11 +780,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - // Add with overflow operations are custom lowered. + // Add/Sub/Mul with overflow operations are custom lowered. setOperationAction(ISD::SADDO, MVT::i32, Custom); setOperationAction(ISD::SADDO, MVT::i64, Custom); setOperationAction(ISD::UADDO, MVT::i32, Custom); setOperationAction(ISD::UADDO, MVT::i64, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i64, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i64, Custom); + setOperationAction(ISD::SMULO, MVT::i32, Custom); + setOperationAction(ISD::SMULO, MVT::i64, Custom); + setOperationAction(ISD::UMULO, MVT::i32, Custom); + setOperationAction(ISD::UMULO, MVT::i64, Custom); // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); @@ -5202,8 +5210,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { if (Cond.getOpcode() == ISD::SETCC) Cond = LowerSETCC(Cond, DAG); - else if (Cond.getOpcode() == ISD::SADDO || Cond.getOpcode() == ISD::UADDO) - Cond = LowerXADDO(Cond, DAG); + else if (Cond.getOpcode() == ISD::SADDO || Cond.getOpcode() == ISD::UADDO || + Cond.getOpcode() == ISD::SSUBO || Cond.getOpcode() == ISD::USUBO || + Cond.getOpcode() == ISD::SMULO || Cond.getOpcode() == ISD::UMULO) + Cond = LowerXALUO(Cond, DAG); // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. @@ -6118,23 +6128,52 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { return Op; } -SDValue X86TargetLowering::LowerXADDO(SDValue Op, SelectionDAG &DAG) { - // Lower the "add with overflow" instruction into a regular "add" plus a - // "setcc" instruction that checks the overflow flag. The "brcond" lowering +SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { + // Lower the "add/sub/mul with overflow" instruction into a regular ins plus + // a "setcc" instruction that checks the overflow flag. The "brcond" lowering // looks for this combo and may remove the "setcc" instruction if the "setcc" // has only one use. SDNode *N = Op.getNode(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + unsigned BaseOp = 0; + unsigned Cond = 0; + + switch (Op.getOpcode()) { + default: assert(0 && "Unknown ovf instruction!"); + case ISD::SADDO: + BaseOp = ISD::ADD; + Cond = X86::COND_O; + break; + case ISD::UADDO: + BaseOp = ISD::ADD; + Cond = X86::COND_C; + break; + case ISD::SSUBO: + BaseOp = ISD::SUB; + Cond = X86::COND_O; + break; + case ISD::USUBO: + BaseOp = ISD::SUB; + Cond = X86::COND_C; + break; + case ISD::SMULO: + BaseOp = ISD::MUL; + Cond = X86::COND_O; + break; + case ISD::UMULO: + BaseOp = ISD::MUL; + Cond = X86::COND_C; + break; + } // Also sets EFLAGS. SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); - SDValue Sum = DAG.getNode(ISD::ADD, VTs, LHS, RHS); + SDValue Sum = DAG.getNode(BaseOp, VTs, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, N->getValueType(1), - DAG.getConstant((Op.getOpcode() == ISD::SADDO) ? - X86::COND_O : X86::COND_C, + DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); @@ -6259,8 +6298,12 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::CTLZ: return LowerCTLZ(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); - case ISD::SADDO: return LowerXADDO(Op, DAG); - case ISD::UADDO: return LowerXADDO(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: return LowerXALUO(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 4a854443284..6eb78f692fb 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -593,7 +593,7 @@ namespace llvm { SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG); SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG); - SDValue LowerXADDO(SDValue Op, SelectionDAG &DAG); + SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG); SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG); SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 5f63e56e6f9..d6d08b9f0ab 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -379,29 +379,36 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2), let isTwoAddress = 1 in { def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>; + [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)), + (implicit EFLAGS)]>; def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2)))]>; + [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2))]>; + [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2))]>; + [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; } // isTwoAddress def SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR64:$src2), addr:$dst)]>; + [(store (sub (load addr:$dst), GR64:$src2), addr:$dst), + (implicit EFLAGS)]>; def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>; + [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst), + (implicit EFLAGS)]>; def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2), "sub{q}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>; + [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst), + (implicit EFLAGS)]>; let Uses = [EFLAGS] in { let isTwoAddress = 1 in { @@ -454,30 +461,36 @@ let isTwoAddress = 1 in { let isCommutable = 1 in def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>, TB; + [(set GR64:$dst, (mul GR64:$src1, GR64:$src2)), + (implicit EFLAGS)]>, TB; def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", - [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2)))]>, TB; + [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, TB; } // isTwoAddress // Suprisingly enough, these are not two address instructions! def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32 (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>; + [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2)), + (implicit EFLAGS)]>; def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2))]>; + [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2)), + (implicit EFLAGS)]>; def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32 (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2))]>; + [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2)), + (implicit EFLAGS)]>; def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2))]>; + [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2)), + (implicit EFLAGS)]>; } // Defs = [EFLAGS] // Unsigned division / remainder diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 37579e8d57b..3834f843749 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -709,10 +709,10 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. - [(set AL, (mul AL, GR8:$src))]>; // AL,AH = AL*GR8 + [(set AL, (mul AL, GR8:$src))]>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in -def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", []>, - OpSize; // AX,DX = AX*GR16 +def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", + []>, OpSize; // AX,DX = AX*GR16 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", []>; // EAX,EDX = EAX*GR32 @@ -2054,67 +2054,82 @@ let isTwoAddress = 0 in { def SUB8rr : I<0x28, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sub GR8:$src1, GR8:$src2))]>; + [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)), + (implicit EFLAGS)]>; def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sub GR16:$src1, GR16:$src2))]>, OpSize; + [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)), + (implicit EFLAGS)]>, OpSize; def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>; + [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)), + (implicit EFLAGS)]>; def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2)))]>; + [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2)))]>, OpSize; + [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, OpSize; def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2)))]>; + [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))), + (implicit EFLAGS)]>; def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", - [(set GR8:$dst, (sub GR8:$src1, imm:$src2))]>; + [(set GR8:$dst, (sub GR8:$src1, imm:$src2)), + (implicit EFLAGS)]>; def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sub GR16:$src1, imm:$src2))]>, OpSize; + [(set GR16:$dst, (sub GR16:$src1, imm:$src2)), + (implicit EFLAGS)]>, OpSize; def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, imm:$src2))]>; + [(set GR32:$dst, (sub GR32:$src1, imm:$src2)), + (implicit EFLAGS)]>; def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2))]>, - OpSize; + [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)), + (implicit EFLAGS)]>, OpSize; def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2))]>; + [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)), + (implicit EFLAGS)]>; let isTwoAddress = 0 in { def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR8:$src2), addr:$dst)]>; + [(store (sub (load addr:$dst), GR8:$src2), addr:$dst), + (implicit EFLAGS)]>; def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR16:$src2), addr:$dst)]>, - OpSize; + [(store (sub (load addr:$dst), GR16:$src2), addr:$dst), + (implicit EFLAGS)]>, OpSize; def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), GR32:$src2), addr:$dst)]>; + [(store (sub (load addr:$dst), GR32:$src2), addr:$dst), + (implicit EFLAGS)]>; def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", - [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst)]>; + [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst), + (implicit EFLAGS)]>; def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(store (sub (loadi16 addr:$dst), imm:$src2), addr:$dst)]>, - OpSize; + [(store (sub (loadi16 addr:$dst), imm:$src2), addr:$dst), + (implicit EFLAGS)]>, OpSize; def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(store (sub (loadi32 addr:$dst), imm:$src2), addr:$dst)]>; + [(store (sub (loadi32 addr:$dst), imm:$src2), addr:$dst), + (implicit EFLAGS)]>; def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), "sub{w}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>, - OpSize; + [(store (sub (load addr:$dst), i16immSExt8:$src2), addr:$dst), + (implicit EFLAGS)]>, OpSize; def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), "sub{l}\t{$src2, $dst|$dst, $src2}", - [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>; + [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst), + (implicit EFLAGS)]>; } let Uses = [EFLAGS] in { @@ -2152,18 +2167,22 @@ let Defs = [EFLAGS] in { let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (mul GR16:$src1, GR16:$src2))]>, TB, OpSize; + [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)), + (implicit EFLAGS)]>, TB, OpSize; def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>, TB; + [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)), + (implicit EFLAGS)]>, TB; } def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", - [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2)))]>, + [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, TB, OpSize; def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2)))]>, TB; + [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))), + (implicit EFLAGS)]>, TB; } // Defs = [EFLAGS] } // end Two Address instructions @@ -2172,39 +2191,44 @@ let Defs = [EFLAGS] in { def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, (mul GR16:$src1, imm:$src2))]>, OpSize; + [(set GR16:$dst, (mul GR16:$src1, imm:$src2)), + (implicit EFLAGS)]>, OpSize; def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>; + [(set GR32:$dst, (mul GR32:$src1, imm:$src2)), + (implicit EFLAGS)]>; def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2))]>, - OpSize; + [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)), + (implicit EFLAGS)]>, OpSize; def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2))]>; + [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)), + (implicit EFLAGS)]>; def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, (mul (load addr:$src1), imm:$src2))]>, - OpSize; + [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)), + (implicit EFLAGS)]>, OpSize; def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (mul (load addr:$src1), imm:$src2))]>; + [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)), + (implicit EFLAGS)]>; def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR16:$dst, (mul (load addr:$src1), i16immSExt8:$src2))]>, - OpSize; + [(set GR16:$dst, (mul (load addr:$src1), i16immSExt8:$src2)), + (implicit EFLAGS)]>, OpSize; def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set GR32:$dst, (mul (load addr:$src1), i32immSExt8:$src2))]>; + [(set GR32:$dst, (mul (load addr:$src1), i32immSExt8:$src2)), + (implicit EFLAGS)]>; } // Defs = [EFLAGS] //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/mul-with-overflow.ll b/test/CodeGen/X86/mul-with-overflow.ll new file mode 100644 index 00000000000..107d54714e2 --- /dev/null +++ b/test/CodeGen/X86/mul-with-overflow.ll @@ -0,0 +1,41 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1 +; RUN: llvm-as < %s | llc -march=x86 | grep {jc} | count 1 + +@ok = internal constant [4 x i8] c"%d\0A\00" +@no = internal constant [4 x i8] c"no\0A\00" + +define i1 @func1(i32 %v1, i32 %v2) nounwind { +entry: + %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) + %sum = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %overflow, label %normal + +normal: + %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind + ret i1 true + +overflow: + %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind + ret i1 false +} + +define i1 @func2(i32 %v1, i32 %v2) nounwind { +entry: + %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) + %sum = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %carry, label %normal + +normal: + %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind + ret i1 true + +carry: + %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind + ret i1 false +} + +declare i32 @printf(i8*, ...) nounwind +declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) +declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll new file mode 100644 index 00000000000..f51fc210819 --- /dev/null +++ b/test/CodeGen/X86/sub-with-overflow.ll @@ -0,0 +1,41 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1 +; RUN: llvm-as < %s | llc -march=x86 | grep {jc} | count 1 + +@ok = internal constant [4 x i8] c"%d\0A\00" +@no = internal constant [4 x i8] c"no\0A\00" + +define i1 @func1(i32 %v1, i32 %v2) nounwind { +entry: + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) + %sum = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %overflow, label %normal + +normal: + %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind + ret i1 true + +overflow: + %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind + ret i1 false +} + +define i1 @func2(i32 %v1, i32 %v2) nounwind { +entry: + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) + %sum = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %carry, label %normal + +normal: + %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind + ret i1 true + +carry: + %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind + ret i1 false +} + +declare i32 @printf(i8*, ...) nounwind +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)