From: Bill Wendling <isanbard@gmail.com>
Date: Tue, 9 Dec 2008 22:08:41 +0000 (+0000)
Subject: Add sub/mul overflow intrinsics. This currently doesn't have a
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=74c376529101acbe141a256d0bf23a44eb454c84;p=oota-llvm.git

Add sub/mul overflow intrinsics. This currently doesn't have a
target-independent way of determining overflow on multiplication. It's very
tricky. Patch by Zoltan Varga!


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@60800 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 2498e9f9b67..2fabe16cf83 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -259,6 +259,12 @@ namespace ISD {
     // These nodes are generated from the llvm.[su]add.with.overflow intrinsics.
     SADDO, UADDO,
 
+    // Same for subtraction
+    SSUBO, USUBO,
+
+    // Same for multiplication
+    SMULO, UMULO,
+
     // Simple binary floating point operators.
     FADD, FSUB, FMUL, FDIV, FREM,
 
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 3037fbca2b7..154106eba21 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -305,6 +305,16 @@ def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
 def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
                                        [LLVMMatchType<0>, LLVMMatchType<0>]>;
 
+def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>]>;
+def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>]>;
+
+def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>]>;
+def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>]>;
+
 //===------------------------- Atomic Intrinsics --------------------------===//
 //
 def int_memory_barrier : Intrinsic<[llvm_void_ty],
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 698014d4eae..71a1d893f13 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1487,7 +1487,7 @@ private:
   MVT TransformToType[MVT::LAST_VALUETYPE];
 
   // Defines the capacity of the TargetLowering::OpActions table
-  static const int OpActionsCapacity = 218;
+  static const int OpActionsCapacity = 222;
 
   /// OpActions - For each operation and each value type, keep a LegalizeAction
   /// that indicates how instruction selection should deal with the operation.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c13e84b0b36..c33a8fffea9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4234,7 +4234,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     break;
   }
 
-  case ISD::SADDO: {
+  case ISD::SADDO:
+  case ISD::SSUBO: {
     MVT VT = Node->getValueType(0);
     switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
     default: assert(0 && "This action not supported for this op yet!");
@@ -4246,7 +4247,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       SDValue LHS = LegalizeOp(Node->getOperand(0));
       SDValue RHS = LegalizeOp(Node->getOperand(1));
 
-      SDValue Sum = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS);
+      SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? 
+                                ISD::ADD : ISD::SUB, LHS.getValueType(),
+                                LHS, RHS);
       MVT OType = Node->getValueType(1);
 
       SDValue Zero = DAG.getConstant(0, LHS.getValueType());
@@ -4255,16 +4258,21 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       //   RHSSign -> RHS >= 0
       //   SumSign -> Sum >= 0
       //
+      //   Add:
       //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+      //   Sub:
+      //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
       //
       SDValue LHSSign = DAG.getSetCC(OType, LHS, Zero, ISD::SETGE);
       SDValue RHSSign = DAG.getSetCC(OType, RHS, Zero, ISD::SETGE);
-      SDValue SignsEq = DAG.getSetCC(OType, LHSSign, RHSSign, ISD::SETEQ);
+      SDValue SignsMatch = DAG.getSetCC(OType, LHSSign, RHSSign, 
+                                        Node->getOpcode() == ISD::SADDO ? 
+                                        ISD::SETEQ : ISD::SETNE);
 
       SDValue SumSign = DAG.getSetCC(OType, Sum, Zero, ISD::SETGE);
       SDValue SumSignNE = DAG.getSetCC(OType, LHSSign, SumSign, ISD::SETNE);
 
-      SDValue Cmp = DAG.getNode(ISD::AND, OType, SignsEq, SumSignNE);
+      SDValue Cmp = DAG.getNode(ISD::AND, OType, SignsMatch, SumSignNE);
 
       MVT ValueVTs[] = { LHS.getValueType(), OType };
       SDValue Ops[] = { Sum, Cmp };
@@ -4280,7 +4288,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
     break;
   }
-  case ISD::UADDO: {
+  case ISD::UADDO:
+  case ISD::USUBO: {
     MVT VT = Node->getValueType(0);
     switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
     default: assert(0 && "This action not supported for this op yet!");
@@ -4292,9 +4301,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       SDValue LHS = LegalizeOp(Node->getOperand(0));
       SDValue RHS = LegalizeOp(Node->getOperand(1));
 
-      SDValue Sum = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS);
+      SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+                                ISD::ADD : ISD::SUB, LHS.getValueType(),
+                                LHS, RHS);
       MVT OType = Node->getValueType(1);
-      SDValue Cmp = DAG.getSetCC(OType, Sum, LHS, ISD::SETULT);
+      SDValue Cmp = DAG.getSetCC(OType, Sum, LHS,
+                                 Node->getOpcode () == ISD::UADDO ? 
+                                 ISD::SETULT : ISD::SETUGT);
 
       MVT ValueVTs[] = { LHS.getValueType(), OType };
       SDValue Ops[] = { Sum, Cmp };
@@ -4310,6 +4323,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
     break;
   }
+  case ISD::SMULO:
+  case ISD::UMULO: {
+    MVT VT = Node->getValueType(0);
+    switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+    default: assert(0 && "This action is not supported at all!");
+    case TargetLowering::Custom:
+      Result = TLI.LowerOperation(Op, DAG);
+      if (Result.getNode()) break;
+      // Fall Thru
+    case TargetLowering::Legal:
+      // FIXME: According to Hacker's Delight, this can be implemented in
+      // target independent lowering, but it would be inefficient, since it
+      // requires a division + a branch
+      assert(0 && "Target independent lowering is not supported for SMULO/UMULO!");	
+    break;
+    }
+    break;
+  }
+
   }
   
   assert(Result.getValueType() == Op.getValueType() &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 62fcff392ee..10cfdc634bd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -92,7 +92,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UREM:        Result = PromoteIntRes_UDIV(N); break;
 
   case ISD::SADDO:
-  case ISD::UADDO:       Result = PromoteIntRes_XADDO(N, ResNo); break;
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:       Result = PromoteIntRes_XALUO(N, ResNo); break;
 
   case ISD::ATOMIC_LOAD_ADD_8:
   case ISD::ATOMIC_LOAD_SUB_8:
@@ -518,7 +522,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
   return DAG.getNode(N->getOpcode(), LHS.getValueType(), LHS, RHS);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_XADDO(SDNode *N, unsigned ResNo) {
+SDValue DAGTypeLegalizer::PromoteIntRes_XALUO(SDNode *N, unsigned ResNo) {
   assert(ResNo == 1 && "Only boolean result promotion currently supported!");
 
   // Simply change the return type of the boolean result.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d2365d262e4..d46fccd0f84 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -270,7 +270,7 @@ private:
   SDValue PromoteIntRes_UDIV(SDNode *N);
   SDValue PromoteIntRes_UNDEF(SDNode *N);
   SDValue PromoteIntRes_VAARG(SDNode *N);
-  SDValue PromoteIntRes_XADDO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_XALUO(SDNode *N, unsigned ResNo);
 
   // Integer Operand Promotion.
   bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 979dea0d7f0..bb3b42c383f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1511,6 +1511,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   case ISD::SADDO:
   case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
     if (Op.getResNo() != 1)
       return;
     // The boolean result conforms to getBooleanContents.  Fall through.
@@ -1919,6 +1923,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
 
   case ISD::SADDO:
   case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
     if (Op.getResNo() != 1)
       break;
     // The boolean result conforms to getBooleanContents.  Fall through.
@@ -5216,6 +5224,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::ADDE:        return "adde";
   case ISD::SADDO:       return "saddo";
   case ISD::UADDO:       return "uaddo";
+  case ISD::SSUBO:       return "ssubo";
+  case ISD::USUBO:       return "usubo";
+  case ISD::SMULO:       return "smulo";
+  case ISD::UMULO:       return "umulo";
   case ISD::SUBC:        return "subc";
   case ISD::SUBE:        return "sube";
   case ISD::SHL_PARTS:   return "shl_parts";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index e8987c54af5..5d81d224c88 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -2968,6 +2968,23 @@ SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
   return 0;
 }
 
+// implVisitAluOverflow - Lower an overflow instrinsics
+const char *
+SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
+    SDValue Op1 = getValue(I.getOperand(1));
+    SDValue Op2 = getValue(I.getOperand(2));
+
+    MVT ValueVTs[] = { Op1.getValueType(), MVT::i1 };
+    SDValue Ops[] = { Op1, Op2 };
+
+    SDValue Result =
+      DAG.getNode(Op,
+                  DAG.getVTList(&ValueVTs[0], 2), &Ops[0], 2);
+
+    setValue(&I, Result);
+    return 0;
+  }
+
 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
@@ -4097,21 +4114,17 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   }
 
   case Intrinsic::uadd_with_overflow:
-  case Intrinsic::sadd_with_overflow: {
-    SDValue Op1 = getValue(I.getOperand(1));
-    SDValue Op2 = getValue(I.getOperand(2));
-
-    MVT ValueVTs[] = { Op1.getValueType(), MVT::i1 };
-    SDValue Ops[] = { Op1, Op2 };
-
-    SDValue Result =
-      DAG.getNode((Intrinsic == Intrinsic::sadd_with_overflow) ?
-                    ISD::SADDO : ISD::UADDO,
-                  DAG.getVTList(&ValueVTs[0], 2), &Ops[0], 2);
-
-    setValue(&I, Result);
-    return 0;
-  }
+    return implVisitAluOverflow(I, ISD::UADDO);
+  case Intrinsic::sadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::SADDO);
+  case Intrinsic::usub_with_overflow:
+    return implVisitAluOverflow(I, ISD::USUBO);
+  case Intrinsic::ssub_with_overflow:
+    return implVisitAluOverflow(I, ISD::SSUBO);
+  case Intrinsic::umul_with_overflow:
+    return implVisitAluOverflow(I, ISD::UMULO);
+  case Intrinsic::smul_with_overflow:
+    return implVisitAluOverflow(I, ISD::SMULO);
 
   case Intrinsic::prefetch: {
     SDValue Ops[4];
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
index e614c3049d8..db70f169020 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -530,6 +530,7 @@ private:
   }
   
   const char *implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op);
+  const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op);
 };
 
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 44f28d3d770..7e6ad092044 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -780,11 +780,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
-  // Add with overflow operations are custom lowered.
+  // Add/Sub/Mul with overflow operations are custom lowered.
   setOperationAction(ISD::SADDO, MVT::i32, Custom);
   setOperationAction(ISD::SADDO, MVT::i64, Custom);
   setOperationAction(ISD::UADDO, MVT::i32, Custom);
   setOperationAction(ISD::UADDO, MVT::i64, Custom);
+  setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+  setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+  setOperationAction(ISD::USUBO, MVT::i32, Custom);
+  setOperationAction(ISD::USUBO, MVT::i64, Custom);
+  setOperationAction(ISD::SMULO, MVT::i32, Custom);
+  setOperationAction(ISD::SMULO, MVT::i64, Custom);
+  setOperationAction(ISD::UMULO, MVT::i32, Custom);
+  setOperationAction(ISD::UMULO, MVT::i64, Custom);
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
@@ -5202,8 +5210,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
 
   if (Cond.getOpcode() == ISD::SETCC)
     Cond = LowerSETCC(Cond, DAG);
-  else if (Cond.getOpcode() == ISD::SADDO || Cond.getOpcode() == ISD::UADDO)
-    Cond = LowerXADDO(Cond, DAG);
+  else if (Cond.getOpcode() == ISD::SADDO || Cond.getOpcode() == ISD::UADDO ||
+           Cond.getOpcode() == ISD::SSUBO || Cond.getOpcode() == ISD::USUBO ||
+           Cond.getOpcode() == ISD::SMULO || Cond.getOpcode() == ISD::UMULO)
+    Cond = LowerXALUO(Cond, DAG);
 
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
@@ -6118,23 +6128,52 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
   return Op;
 }
 
-SDValue X86TargetLowering::LowerXADDO(SDValue Op, SelectionDAG &DAG) {
-  // Lower the "add with overflow" instruction into a regular "add" plus a
-  // "setcc" instruction that checks the overflow flag. The "brcond" lowering
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
   // looks for this combo and may remove the "setcc" instruction if the "setcc"
   // has only one use.
   SDNode *N = Op.getNode();
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
+  unsigned BaseOp = 0;
+  unsigned Cond = 0;
+
+  switch (Op.getOpcode()) {
+  default: assert(0 && "Unknown ovf instruction!");
+  case ISD::SADDO:
+    BaseOp = ISD::ADD;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UADDO:
+    BaseOp = ISD::ADD;
+    Cond = X86::COND_C;
+    break;
+  case ISD::SSUBO:
+    BaseOp = ISD::SUB;
+    Cond = X86::COND_O;
+    break;
+  case ISD::USUBO:
+    BaseOp = ISD::SUB;
+    Cond = X86::COND_C;
+    break;
+  case ISD::SMULO:
+    BaseOp = ISD::MUL;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UMULO:
+    BaseOp = ISD::MUL;
+    Cond = X86::COND_C;
+    break;
+  }
 
   // Also sets EFLAGS.
   SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
-  SDValue Sum = DAG.getNode(ISD::ADD, VTs, LHS, RHS);
+  SDValue Sum = DAG.getNode(BaseOp, VTs, LHS, RHS);
 
   SDValue SetCC =
     DAG.getNode(X86ISD::SETCC, N->getValueType(1),
-                DAG.getConstant((Op.getOpcode() == ISD::SADDO) ?
-                                  X86::COND_O : X86::COND_C,
+                DAG.getConstant(Cond,
                                 MVT::i32), SDValue(Sum.getNode(), 1));
 
   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
@@ -6259,8 +6298,12 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
   case ISD::CTLZ:               return LowerCTLZ(Op, DAG);
   case ISD::CTTZ:               return LowerCTTZ(Op, DAG);
-  case ISD::SADDO:              return LowerXADDO(Op, DAG);
-  case ISD::UADDO:              return LowerXADDO(Op, DAG);
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:              return LowerXALUO(Op, DAG);
   case ISD::READCYCLECOUNTER:   return LowerREADCYCLECOUNTER(Op, DAG);
   }
 }
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 4a854443284..6eb78f692fb 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -593,7 +593,7 @@ namespace llvm {
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
     SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
     SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerXADDO(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
 
     SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
     SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 5f63e56e6f9..d6d08b9f0ab 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -379,29 +379,36 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
 let isTwoAddress = 1 in {
 def SUB64rr  : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
                   "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
+                  [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)),
+                   (implicit EFLAGS)]>;
 
 def SUB64rm  : RI<0x2B, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
                   "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2)))]>;
+                  [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))),
+                   (implicit EFLAGS)]>;
 
 def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
                       "sub{q}\t{$src2, $dst|$dst, $src2}",
-                      [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2))]>;
+                      [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2)),
+                   (implicit EFLAGS)]>;
 def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
                     "sub{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2))]>;
+                    [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2)),
+                   (implicit EFLAGS)]>;
 } // isTwoAddress
 
 def SUB64mr  : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
                   "sub{q}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sub (load addr:$dst), GR64:$src2), addr:$dst)]>;
+                  [(store (sub (load addr:$dst), GR64:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
 def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), 
                       "sub{q}\t{$src2, $dst|$dst, $src2}",
-               [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst)]>;
+               [(store (sub (load addr:$dst), i64immSExt32:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
 def SUB64mi8 : RIi8<0x83, MRM5m, (outs), (ins i64mem:$dst, i64i8imm :$src2), 
                     "sub{q}\t{$src2, $dst|$dst, $src2}",
-                [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst)]>;
+                [(store (sub (load addr:$dst), i64immSExt8:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
 
 let Uses = [EFLAGS] in {
 let isTwoAddress = 1 in {
@@ -454,30 +461,36 @@ let isTwoAddress = 1 in {
 let isCommutable = 1 in
 def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
                   "imul{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>, TB;
+                  [(set GR64:$dst, (mul GR64:$src1, GR64:$src2)),
+                   (implicit EFLAGS)]>, TB;
 
 def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
                   "imul{q}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2)))]>, TB;
+                 [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2))),
+                  (implicit EFLAGS)]>, TB;
 } // isTwoAddress
 
 // Suprisingly enough, these are not two address instructions!
 def IMUL64rri32 : RIi32<0x69, MRMSrcReg,                    // GR64 = GR64*I32
                         (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
                         "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>;
+                       [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2)),
+                        (implicit EFLAGS)]>;
 def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
                       (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
                       "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2))]>;
+                      [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2)),
+                       (implicit EFLAGS)]>;
 def IMUL64rmi32 : RIi32<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
                         (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
                         "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2))]>;
+                [(set GR64:$dst, (mul (load addr:$src1), i64immSExt32:$src2)),
+                 (implicit EFLAGS)]>;
 def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
                       (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
                       "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                 [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2))]>;
+                 [(set GR64:$dst, (mul (load addr:$src1), i64immSExt8:$src2)),
+                  (implicit EFLAGS)]>;
 } // Defs = [EFLAGS]
 
 // Unsigned division / remainder
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 37579e8d57b..3834f843749 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -709,10 +709,10 @@ def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
                // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
-               [(set AL, (mul AL, GR8:$src))]>;               // AL,AH = AL*GR8
+               [(set AL, (mul AL, GR8:$src))]>;     // AL,AH = AL*GR8
 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
-def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src), "mul{w}\t$src", []>,
-             OpSize;    // AX,DX = AX*GR16
+def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src), "mul{w}\t$src", 
+             []>, OpSize;    // AX,DX = AX*GR16
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
 def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src), "mul{l}\t$src", []>;
                        // EAX,EDX = EAX*GR32
@@ -2054,67 +2054,82 @@ let isTwoAddress = 0 in {
 
 def SUB8rr   : I<0x28, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
                  "sub{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (sub GR8:$src1, GR8:$src2))]>;
+                 [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
+                  (implicit EFLAGS)]>;
 def SUB16rr  : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                  "sub{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (sub GR16:$src1, GR16:$src2))]>, OpSize;
+                 [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
+                  (implicit EFLAGS)]>, OpSize;
 def SUB32rr  : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
                  "sub{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
+                 [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)),
+                  (implicit EFLAGS)]>;
 def SUB8rm   : I<0x2A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
                  "sub{b}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2)))]>;
+                 [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
+                  (implicit EFLAGS)]>;
 def SUB16rm  : I<0x2B, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
                  "sub{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2)))]>, OpSize;
+                 [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
+                  (implicit EFLAGS)]>, OpSize;
 def SUB32rm  : I<0x2B, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
                  "sub{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2)))]>;
+                 [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))),
+                  (implicit EFLAGS)]>;
 
 def SUB8ri   : Ii8 <0x80, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
                     "sub{b}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR8:$dst, (sub GR8:$src1, imm:$src2))]>;
+                    [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
+                    (implicit EFLAGS)]>;
 def SUB16ri  : Ii16<0x81, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
                     "sub{w}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR16:$dst, (sub GR16:$src1, imm:$src2))]>, OpSize;
+                    [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
+                    (implicit EFLAGS)]>, OpSize;
 def SUB32ri  : Ii32<0x81, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
                     "sub{l}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR32:$dst, (sub GR32:$src1, imm:$src2))]>;
+                    [(set GR32:$dst, (sub GR32:$src1, imm:$src2)),
+                    (implicit EFLAGS)]>;
 def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                    "sub{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2))]>,
-               OpSize;
+                   [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)),
+                   (implicit EFLAGS)]>, OpSize;
 def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
                    "sub{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2))]>;
+                   [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)),
+                   (implicit EFLAGS)]>;
 let isTwoAddress = 0 in {
   def SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
                    "sub{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR8:$src2), addr:$dst)]>;
+                   [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
   def SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
                    "sub{w}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR16:$src2), addr:$dst)]>,
-                 OpSize;
+                   [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
+                   (implicit EFLAGS)]>, OpSize;
   def SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
                    "sub{l}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (load addr:$dst), GR32:$src2), addr:$dst)]>;
+                   [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
   def SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2), 
                      "sub{b}\t{$src2, $dst|$dst, $src2}",
-                   [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+                   [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
+                   (implicit EFLAGS)]>;
   def SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2), 
                       "sub{w}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sub (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
-                 OpSize;
+                  [(store (sub (loadi16 addr:$dst), imm:$src2), addr:$dst),
+                  (implicit EFLAGS)]>, OpSize;
   def SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2), 
                       "sub{l}\t{$src2, $dst|$dst, $src2}",
-                  [(store (sub (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+                  [(store (sub (loadi32 addr:$dst), imm:$src2), addr:$dst),
+                  (implicit EFLAGS)]>;
   def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2), 
                      "sub{w}\t{$src2, $dst|$dst, $src2}",
-                [(store (sub (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
-                 OpSize;
+                [(store (sub (load addr:$dst), i16immSExt8:$src2), addr:$dst),
+                  (implicit EFLAGS)]>, OpSize;
   def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2), 
                      "sub{l}\t{$src2, $dst|$dst, $src2}",
-                [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+                [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst),
+                (implicit EFLAGS)]>;
 }
 
 let Uses = [EFLAGS] in {
@@ -2152,18 +2167,22 @@ let Defs = [EFLAGS] in {
 let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
 def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (mul GR16:$src1, GR16:$src2))]>, TB, OpSize;
+                 [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)),
+                 (implicit EFLAGS)]>, TB, OpSize;
 def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
                  "imul{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>, TB;
+                 [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)),
+                 (implicit EFLAGS)]>, TB;
 }
 def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2)))]>,
+                 [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))),
+                 (implicit EFLAGS)]>,
                  TB, OpSize;
 def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
                  "imul{l}\t{$src2, $dst|$dst, $src2}",
-                 [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2)))]>, TB;
+                 [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))),
+                 (implicit EFLAGS)]>, TB;
 } // Defs = [EFLAGS]
 } // end Two Address instructions
 
@@ -2172,39 +2191,44 @@ let Defs = [EFLAGS] in {
 def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
                       (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
                       "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, (mul GR16:$src1, imm:$src2))]>, OpSize;
+                      [(set GR16:$dst, (mul GR16:$src1, imm:$src2)),
+                      (implicit EFLAGS)]>, OpSize;
 def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
                       (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
                       "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>;
+                      [(set GR32:$dst, (mul GR32:$src1, imm:$src2)),
+                      (implicit EFLAGS)]>;
 def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
                      (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2))]>,
-                     OpSize;
+                     [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)),
+                     (implicit EFLAGS)]>, OpSize;
 def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
                      (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2))]>;
+                     [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)),
+                     (implicit EFLAGS)]>;
 
 def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                      // GR16 = [mem16]*I16
                       (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
                       "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR16:$dst, (mul (load addr:$src1), imm:$src2))]>,
-                      OpSize;
+                      [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)),
+                      (implicit EFLAGS)]>, OpSize;
 def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                      // GR32 = [mem32]*I32
                       (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
                       "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      [(set GR32:$dst, (mul (load addr:$src1), imm:$src2))]>;
+                      [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)),
+                      (implicit EFLAGS)]>;
 def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
                      (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                  [(set GR16:$dst, (mul (load addr:$src1), i16immSExt8:$src2))]>,
-                     OpSize;
+                  [(set GR16:$dst, (mul (load addr:$src1), i16immSExt8:$src2)),
+                  (implicit EFLAGS)]>, OpSize;
 def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
                      (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                  [(set GR32:$dst, (mul (load addr:$src1), i32immSExt8:$src2))]>;
+                  [(set GR32:$dst, (mul (load addr:$src1), i32immSExt8:$src2)),
+                  (implicit EFLAGS)]>;
 } // Defs = [EFLAGS]
 
 //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/mul-with-overflow.ll b/test/CodeGen/X86/mul-with-overflow.ll
new file mode 100644
index 00000000000..107d54714e2
--- /dev/null
+++ b/test/CodeGen/X86/mul-with-overflow.ll
@@ -0,0 +1,41 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
+; RUN: llvm-as < %s | llc -march=x86 | grep {jc} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
new file mode 100644
index 00000000000..f51fc210819
--- /dev/null
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -0,0 +1,41 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep {jo} | count 1
+; RUN: llvm-as < %s | llc -march=x86 | grep {jc} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)