Invert the TargetLowering flag that controls divide by consant expansion.

[oota-llvm.git] / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 69478e3b8460c1804826e9ede43b867e70204369..6c1d22c2bab7ec7189d58e7c1ad7de1e759f6154 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22,10 +22,8 @@
  //
  // FIXME: select C, pow2, pow2 -> something smart
  // FIXME: trunc(select X, Y, Z) -> select X, trunc(Y), trunc(Z)
-// FIXME: (select C, load A, load B) -> load (select C, A, B)
  // FIXME: Dead stores -> nuke
-// FIXME: shr X, (and Y,31) -> shr X, Y
-// FIXME: TRUNC (LOAD)   -> EXT_LOAD/LOAD(smaller)
+// FIXME: shr X, (and Y,31) -> shr X, Y   (TRICKY!)
  // FIXME: mul (x, const) -> shifts + adds
  // FIXME: undef values
  // FIXME: make truncate see through SIGN_EXTEND and AND
@@ -176,11 +174,15 @@ namespace {
      SDOperand visitLOAD(SDNode *N);
      SDOperand visitSTORE(SDNode *N);
  
+    bool SimplifySelectOps(SDNode *SELECT, SDOperand LHS, SDOperand RHS);
      SDOperand SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2);
      SDOperand SimplifySelectCC(SDOperand N0, SDOperand N1, SDOperand N2, 
                                 SDOperand N3, ISD::CondCode CC);
      SDOperand SimplifySetCC(MVT::ValueType VT, SDOperand N0, SDOperand N1,
                              ISD::CondCode Cond, bool foldBooleans = true);
+    
+    SDOperand BuildSDIV(SDNode *N);
+    SDOperand BuildUDIV(SDNode *N);    
  public:
      DAGCombiner(SelectionDAG &D)
        : DAG(D), TLI(D.getTargetLoweringInfo()), AfterLegalize(false) {}
@@ -190,6 +192,178 @@ public:
    };
  }
  
+struct ms {
+  int64_t m;  // magic number
+  int64_t s;  // shift amount
+};
+
+struct mu {
+  uint64_t m; // magic number
+  int64_t a;  // add indicator
+  int64_t s;  // shift amount
+};
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic32(int32_t d) {
+  int32_t p;
+  uint32_t ad, anc, delta, q1, r1, q2, r2, t;
+  const uint32_t two31 = 0x80000000U;
+  struct ms mag;
+  
+  ad = abs(d);
+  t = two31 + ((uint32_t)d >> 31);
+  anc = t - 1 - t%ad;   // absolute value of nc
+  p = 31;               // initialize p
+  q1 = two31/anc;       // initialize q1 = 2p/abs(nc)
+  r1 = two31 - q1*anc;  // initialize r1 = rem(2p,abs(nc))
+  q2 = two31/ad;        // initialize q2 = 2p/abs(d)
+  r2 = two31 - q2*ad;   // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = 2*q1;        // update q1 = 2p/abs(nc)
+    r1 = 2*r1;        // update r1 = rem(2p/abs(nc))
+    if (r1 >= anc) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = 2*q2;        // update q2 = 2p/abs(d)
+    r2 = 2*r2;        // update r2 = rem(2p/abs(d))
+    if (r2 >= ad) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1 < delta || (q1 == delta && r1 == 0));
+  
+  mag.m = (int32_t)(q2 + 1); // make sure to sign extend
+  if (d < 0) mag.m = -mag.m; // resulting magic number
+  mag.s = p - 32;            // resulting shift
+  return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu32(uint32_t d) {
+  int32_t p;
+  uint32_t nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  nc = - 1 - (-d)%d;
+  p = 31;                   // initialize p
+  q1 = 0x80000000/nc;       // initialize q1 = 2p/nc
+  r1 = 0x80000000 - q1*nc;  // initialize r1 = rem(2p,nc)
+  q2 = 0x7FFFFFFF/d;        // initialize q2 = (2p-1)/d
+  r2 = 0x7FFFFFFF - q2*d;   // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1 >= nc - r1 ) {
+      q1 = 2*q1 + 1;  // update q1
+      r1 = 2*r1 - nc; // update r1
+    }
+    else {
+      q1 = 2*q1; // update q1
+      r1 = 2*r1; // update r1
+    }
+    if (r2 + 1 >= d - r2) {
+      if (q2 >= 0x7FFFFFFF) magu.a = 1;
+      q2 = 2*q2 + 1;     // update q2
+      r2 = 2*r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2 >= 0x80000000) magu.a = 1;
+      q2 = 2*q2;     // update q2
+      r2 = 2*r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - 32;  // resulting shift
+  return magu;
+}
+
+/// magic - calculate the magic numbers required to codegen an integer sdiv as
+/// a sequence of multiply and shifts.  Requires that the divisor not be 0, 1,
+/// or -1.
+static ms magic64(int64_t d) {
+  int64_t p;
+  uint64_t ad, anc, delta, q1, r1, q2, r2, t;
+  const uint64_t two63 = 9223372036854775808ULL; // 2^63
+  struct ms mag;
+  
+  ad = d >= 0 ? d : -d;
+  t = two63 + ((uint64_t)d >> 63);
+  anc = t - 1 - t%ad;   // absolute value of nc
+  p = 63;               // initialize p
+  q1 = two63/anc;       // initialize q1 = 2p/abs(nc)
+  r1 = two63 - q1*anc;  // initialize r1 = rem(2p,abs(nc))
+  q2 = two63/ad;        // initialize q2 = 2p/abs(d)
+  r2 = two63 - q2*ad;   // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = 2*q1;        // update q1 = 2p/abs(nc)
+    r1 = 2*r1;        // update r1 = rem(2p/abs(nc))
+    if (r1 >= anc) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = 2*q2;        // update q2 = 2p/abs(d)
+    r2 = 2*r2;        // update r2 = rem(2p/abs(d))
+    if (r2 >= ad) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1 < delta || (q1 == delta && r1 == 0));
+  
+  mag.m = q2 + 1;
+  if (d < 0) mag.m = -mag.m; // resulting magic number
+  mag.s = p - 64;            // resulting shift
+  return mag;
+}
+
+/// magicu - calculate the magic numbers required to codegen an integer udiv as
+/// a sequence of multiply, add and shifts.  Requires that the divisor not be 0.
+static mu magicu64(uint64_t d)
+{
+  int64_t p;
+  uint64_t nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  nc = - 1 - (-d)%d;
+  p = 63;                   // initialize p
+  q1 = 0x8000000000000000ull/nc;       // initialize q1 = 2p/nc
+  r1 = 0x8000000000000000ull - q1*nc;  // initialize r1 = rem(2p,nc)
+  q2 = 0x7FFFFFFFFFFFFFFFull/d;        // initialize q2 = (2p-1)/d
+  r2 = 0x7FFFFFFFFFFFFFFFull - q2*d;   // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1 >= nc - r1 ) {
+      q1 = 2*q1 + 1;  // update q1
+      r1 = 2*r1 - nc; // update r1
+    }
+    else {
+      q1 = 2*q1; // update q1
+      r1 = 2*r1; // update r1
+    }
+    if (r2 + 1 >= d - r2) {
+      if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1;
+      q2 = 2*q2 + 1;     // update q2
+      r2 = 2*r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2 >= 0x8000000000000000ull) magu.a = 1;
+      q2 = 2*q2;     // update q2
+      r2 = 2*r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - 64;  // resulting shift
+  return magu;
+}
+
  /// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero.  We use
  /// this predicate to simplify operations downstream.  Op and Mask are known to
  /// be the same type.
@@ -494,10 +668,8 @@ SDOperand DAGCombiner::visitADD(SDNode *N) {
    if (N0C && N1C)
      return DAG.getConstant(N0C->getValue() + N1C->getValue(), VT);
    // canonicalize constant to RHS
-  if (N0C && !N1C) {
-    std::swap(N0, N1);
-    std::swap(N0C, N1C);
-  }
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADD, VT, N1, N0);
    // fold (add x, 0) -> x
    if (N1C && N1C->isNullValue())
      return N0;
@@ -566,17 +738,14 @@ SDOperand DAGCombiner::visitMUL(SDNode *N) {
      return DAG.getConstant(N0C->getValue() * N1C->getValue(),
                             N->getValueType(0));
    // canonicalize constant to RHS
-  if (N0C && !N1C) {
-    std::swap(N0, N1);
-    std::swap(N0C, N1C);
-  }
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::MUL, VT, N1, N0);
    // fold (mul x, 0) -> 0
    if (N1C && N1C->isNullValue())
      return N1;
    // fold (mul x, -1) -> 0-x
    if (N1C && N1C->isAllOnesValue())
-    return DAG.getNode(ISD::SUB, N->getValueType(0), 
-                       DAG.getConstant(0, N->getValueType(0)), N0);
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
    // fold (mul x, (1 << c)) -> x << c
    if (N1C && isPowerOf2_64(N1C->getValue()))
      return DAG.getNode(ISD::SHL, N->getValueType(0), N0,
@@ -607,18 +776,58 @@ SDOperand DAGCombiner::visitSDIV(SDNode *N) {
    if (N0C && N1C && !N1C->isNullValue())
      return DAG.getConstant(N0C->getSignExtended() / N1C->getSignExtended(),
                             N->getValueType(0));
+  // fold (sdiv X, 1) -> X
+  if (N1C && N1C->getSignExtended() == 1LL)
+    return N0;
+  // fold (sdiv X, -1) -> 0-X
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), N0);
    // If we know the sign bits of both operands are zero, strength reduce to a
    // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
    uint64_t SignBit = 1ULL << (MVT::getSizeInBits(VT)-1);
    if (MaskedValueIsZero(N1, SignBit, TLI) &&
        MaskedValueIsZero(N0, SignBit, TLI))
      return DAG.getNode(ISD::UDIV, N1.getValueType(), N0, N1);
+  // fold (sdiv X, pow2) -> (add (sra X, log(pow2)), (srl X, sizeof(X)-1))
+  if (N1C && N1C->getValue() && !TLI.isIntDivCheap() && 
+      (isPowerOf2_64(N1C->getSignExtended()) || 
+       isPowerOf2_64(-N1C->getSignExtended()))) {
+    // If dividing by powers of two is cheap, then don't perform the following
+    // fold.
+    if (TLI.isPow2DivCheap())
+      return SDOperand();
+    int64_t pow2 = N1C->getSignExtended();
+    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+    SDOperand SRL = DAG.getNode(ISD::SRL, VT, N0,
+                                DAG.getConstant(MVT::getSizeInBits(VT)-1,
+                                                TLI.getShiftAmountTy()));
+    WorkList.push_back(SRL.Val);
+    SDOperand SGN = DAG.getNode(ISD::ADD, VT, N0, SRL);
+    WorkList.push_back(SGN.Val);
+    SDOperand SRA = DAG.getNode(ISD::SRA, VT, SGN, 
+                                DAG.getConstant(Log2_64(abs2),
+                                                TLI.getShiftAmountTy()));
+    // If we're dividing by a positive value, we're done.  Otherwise, we must
+    // negate the result.
+    if (pow2 > 0)
+      return SRA;
+    WorkList.push_back(SRA.Val);
+    return DAG.getNode(ISD::SUB, VT, DAG.getConstant(0, VT), SRA);
+  }
+  // if integer divide is expensive and we satisfy the requirements, emit an
+  // alternate sequence.
+  if (N1C && (N1C->getSignExtended() < -1 || N1C->getSignExtended() > 1) && 
+      !TLI.isIntDivCheap() &&
+      TLI.isOperationLegal(ISD::MULHS, VT) && TLI.isTypeLegal(VT)) {
+    return BuildSDIV(N);
+  }
    return SDOperand();
  }
  
  SDOperand DAGCombiner::visitUDIV(SDNode *N) {
    SDOperand N0 = N->getOperand(0);
    SDOperand N1 = N->getOperand(1);
+  MVT::ValueType VT = N->getValueType(0);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.Val);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val);
    
@@ -631,6 +840,10 @@ SDOperand DAGCombiner::visitUDIV(SDNode *N) {
      return DAG.getNode(ISD::SRL, N->getValueType(0), N0,
                         DAG.getConstant(Log2_64(N1C->getValue()),
                                         TLI.getShiftAmountTy()));
+  // fold (udiv x, c) -> alternate
+  if (N1C && N1C->getValue() && TLI.isOperationLegal(ISD::MULHU, VT) &&
+      TLI.isTypeLegal(VT) && !TLI.isIntDivCheap())
+    return BuildUDIV(N);
    return SDOperand();
  }
  
@@ -714,10 +927,8 @@ SDOperand DAGCombiner::visitAND(SDNode *N) {
    if (N0C && N1C)
      return DAG.getConstant(N0C->getValue() & N1C->getValue(), VT);
    // canonicalize constant to RHS
-  if (N0C && !N1C) {
-    std::swap(N0, N1);
-    std::swap(N0C, N1C);
-  }
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::AND, VT, N1, N0);
    // fold (and x, -1) -> x
    if (N1C && N1C->isAllOnesValue())
      return N0;
@@ -837,7 +1048,7 @@ SDOperand DAGCombiner::visitAND(SDNode *N) {
      }
    }
    // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
-  if (N0.getOpcode() == ISD::SEXTLOAD && N0.Val->hasNUsesOfValue(1, 0)) {
+  if (N0.getOpcode() == ISD::SEXTLOAD && N0.hasOneUse()) {
      MVT::ValueType EVT = cast<VTSDNode>(N0.getOperand(3))->getVT();
      // If we zero all the possible extended bits, then we can turn this into
      // a zextload if we are running before legalize or the operation is legal.
@@ -868,10 +1079,8 @@ SDOperand DAGCombiner::visitOR(SDNode *N) {
      return DAG.getConstant(N0C->getValue() | N1C->getValue(),
                             N->getValueType(0));
    // canonicalize constant to RHS
-  if (N0C && !N1C) {
-    std::swap(N0, N1);
-    std::swap(N0C, N1C);
-  }
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::OR, VT, N1, N0);
    // fold (or x, 0) -> x
    if (N1C && N1C->isNullValue())
      return N0;
@@ -953,10 +1162,8 @@ SDOperand DAGCombiner::visitXOR(SDNode *N) {
    if (N0C && N1C)
      return DAG.getConstant(N0C->getValue() ^ N1C->getValue(), VT);
    // canonicalize constant to RHS
-  if (N0C && !N1C) {
-    std::swap(N0, N1);
-    std::swap(N0C, N1C);
-  }
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::XOR, VT, N1, N0);
    // fold (xor x, 0) -> x
    if (N1C && N1C->isNullValue())
      return N0;
@@ -1219,6 +1426,11 @@ SDOperand DAGCombiner::visitSELECT(SDNode *N) {
    // fold X ? Y : X --> X ? Y : 0 --> X & Y
    if (MVT::i1 == VT && N0 == N2)
      return DAG.getNode(ISD::AND, VT, N0, N1);
+  
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N1, N2))
+    return SDOperand();
+  
    // fold selects based on a setcc into other things, such as min/max/abs
    if (N0.getOpcode() == ISD::SETCC)
      return SimplifySelect(N0, N1, N2);
@@ -1243,6 +1455,11 @@ SDOperand DAGCombiner::visitSELECT_CC(SDNode *N) {
    // fold select_cc lhs, rhs, x, x, cc -> x
    if (N2 == N3)
      return N2;
+  
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N2, N3))
+    return SDOperand();
+  
    // fold select_cc into other things, such as min/max/abs
    return SimplifySelectCC(N0, N1, N2, N3, CC);
  }
@@ -1307,7 +1524,7 @@ SDOperand DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    if (N0.getOpcode() == ISD::SEXTLOAD && VT == N0.getValueType())
      return N0;
    // fold (sext (load x)) -> (sextload x)
-  if (N0.getOpcode() == ISD::LOAD && N0.Val->hasNUsesOfValue(1, 0)) {
+  if (N0.getOpcode() == ISD::LOAD && N0.hasOneUse()) {
      SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N0.getOperand(0),
                                         N0.getOperand(1), N0.getOperand(2),
                                         N0.getValueType());
@@ -1394,7 +1611,7 @@ SDOperand DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
      return SDOperand();
    }
    // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
-  if (N0.getOpcode() == ISD::ZEXTLOAD && N0.Val->hasNUsesOfValue(1, 0) &&
+  if (N0.getOpcode() == ISD::ZEXTLOAD && N0.hasOneUse() &&
        EVT == cast<VTSDNode>(N0.getOperand(3))->getVT() &&
        (!AfterLegalize || TLI.isOperationLegal(ISD::SEXTLOAD, EVT))) {
      SDOperand ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N0.getOperand(0),
@@ -1435,7 +1652,7 @@ SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) {
        return N0.getOperand(0);
    }
    // fold (truncate (load x)) -> (smaller load x)
-  if (N0.getOpcode() == ISD::LOAD && N0.Val->hasNUsesOfValue(1, 0)) {
+  if (N0.getOpcode() == ISD::LOAD && N0.hasOneUse()) {
      assert(MVT::getSizeInBits(N0.getValueType()) > MVT::getSizeInBits(VT) &&
             "Cannot truncate to larger type!");
      MVT::ValueType PtrType = N0.getOperand(1).getValueType();
@@ -1459,54 +1676,57 @@ SDOperand DAGCombiner::visitTRUNCATE(SDNode *N) {
  SDOperand DAGCombiner::visitFADD(SDNode *N) {
    SDOperand N0 = N->getOperand(0);
    SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
    MVT::ValueType VT = N->getValueType(0);
-
-  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0))
-    if (ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
-      // fold floating point (fadd c1, c2)
-      return DAG.getConstantFP(N0CFP->getValue() + N1CFP->getValue(),
-                               N->getValueType(0));
-    }
+  
+  // fold (fadd c1, c2) -> c1+c2
+  if (N0CFP && N1CFP)
+    return DAG.getConstantFP(N0CFP->getValue() + N1CFP->getValue(), VT);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FADD, VT, N1, N0);
    // fold (A + (-B)) -> A-B
    if (N1.getOpcode() == ISD::FNEG)
      return DAG.getNode(ISD::FSUB, VT, N0, N1.getOperand(0));
-  
    // fold ((-A) + B) -> B-A
    if (N0.getOpcode() == ISD::FNEG)
      return DAG.getNode(ISD::FSUB, VT, N1, N0.getOperand(0));
-  
    return SDOperand();
  }
  
  SDOperand DAGCombiner::visitFSUB(SDNode *N) {
    SDOperand N0 = N->getOperand(0);
    SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
    MVT::ValueType VT = N->getValueType(0);
-
-  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0))
-    if (ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
-      // fold floating point (fsub c1, c2)
-      return DAG.getConstantFP(N0CFP->getValue() - N1CFP->getValue(),
-                               N->getValueType(0));
-    }
+  
+  // fold (fsub c1, c2) -> c1-c2
+  if (N0CFP && N1CFP)
+    return DAG.getConstantFP(N0CFP->getValue() - N1CFP->getValue(), VT);
    // fold (A-(-B)) -> A+B
    if (N1.getOpcode() == ISD::FNEG)
      return DAG.getNode(ISD::FADD, N0.getValueType(), N0, N1.getOperand(0));
-  
    return SDOperand();
  }
  
  SDOperand DAGCombiner::visitFMUL(SDNode *N) {
    SDOperand N0 = N->getOperand(0);
    SDOperand N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
    MVT::ValueType VT = N->getValueType(0);
  
-  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0))
-    if (ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
-      // fold floating point (fmul c1, c2)
-      return DAG.getConstantFP(N0CFP->getValue() * N1CFP->getValue(),
-                               N->getValueType(0));
-    }
+  // fold (fmul c1, c2) -> c1*c2
+  if (N0CFP && N1CFP)
+    return DAG.getConstantFP(N0CFP->getValue() * N1CFP->getValue(), VT);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FMUL, VT, N1, N0);
+  // fold (fmul X, 2.0) -> (fadd X, X)
+  if (N1CFP && N1CFP->isExactlyValue(+2.0))
+    return DAG.getNode(ISD::FADD, VT, N0, N0);
    return SDOperand();
  }
  
@@ -1518,8 +1738,7 @@ SDOperand DAGCombiner::visitFDIV(SDNode *N) {
    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0))
      if (ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
        // fold floating point (fdiv c1, c2)
-      return DAG.getConstantFP(N0CFP->getValue() / N1CFP->getValue(),
-                               N->getValueType(0));
+      return DAG.getConstantFP(N0CFP->getValue() / N1CFP->getValue(), VT);
      }
    return SDOperand();
  }
@@ -1532,8 +1751,7 @@ SDOperand DAGCombiner::visitFREM(SDNode *N) {
    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0))
      if (ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
        // fold floating point (frem c1, c2) -> fmod(c1, c2)
-      return DAG.getConstantFP(fmod(N0CFP->getValue(),N1CFP->getValue()),
-                               N->getValueType(0));
+      return DAG.getConstantFP(fmod(N0CFP->getValue(),N1CFP->getValue()), VT);
      }
    return SDOperand();
  }
@@ -1787,6 +2005,72 @@ SDOperand DAGCombiner::SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2){
    return SDOperand();
  }
  
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select.
+///
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDOperand LHS, 
+                                    SDOperand RHS) {
+  
+  // If this is a select from two identical things, try to pull the operation
+  // through the select.
+  if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
+#if 0
+    std::cerr << "SELECT: ["; LHS.Val->dump();
+    std::cerr << "] ["; RHS.Val->dump();
+    std::cerr << "]\n";
+#endif
+    
+    // If this is a load and the token chain is identical, replace the select
+    // of two loads with a load through a select of the address to load from.
+    // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+    // constants have been dropped into the constant pool.
+    if ((LHS.getOpcode() == ISD::LOAD ||
+         LHS.getOpcode() == ISD::EXTLOAD ||
+         LHS.getOpcode() == ISD::ZEXTLOAD ||
+         LHS.getOpcode() == ISD::SEXTLOAD) &&
+        // Token chains must be identical.
+        LHS.getOperand(0) == RHS.getOperand(0) &&
+        // If this is an EXTLOAD, the VT's must match.
+        (LHS.getOpcode() == ISD::LOAD ||
+         LHS.getOperand(3) == RHS.getOperand(3))) {
+      // FIXME: this conflates two src values, discarding one.  This is not
+      // the right thing to do, but nothing uses srcvalues now.  When they do,
+      // turn SrcValue into a list of locations.
+      SDOperand Addr;
+      if (TheSelect->getOpcode() == ISD::SELECT)
+        Addr = DAG.getNode(ISD::SELECT, LHS.getOperand(1).getValueType(),
+                           TheSelect->getOperand(0), LHS.getOperand(1),
+                           RHS.getOperand(1));
+      else
+        Addr = DAG.getNode(ISD::SELECT_CC, LHS.getOperand(1).getValueType(),
+                           TheSelect->getOperand(0),
+                           TheSelect->getOperand(1), 
+                           LHS.getOperand(1), RHS.getOperand(1),
+                           TheSelect->getOperand(4));
+      
+      SDOperand Load;
+      if (LHS.getOpcode() == ISD::LOAD)
+        Load = DAG.getLoad(TheSelect->getValueType(0), LHS.getOperand(0),
+                           Addr, LHS.getOperand(2));
+      else
+        Load = DAG.getExtLoad(LHS.getOpcode(), TheSelect->getValueType(0),
+                              LHS.getOperand(0), Addr, LHS.getOperand(2),
+                              cast<VTSDNode>(LHS.getOperand(3))->getVT());
+      // Users of the select now use the result of the load.
+      CombineTo(TheSelect, Load);
+      
+      // Users of the old loads now use the new load's chain.  We know the
+      // old-load value is dead now.
+      CombineTo(LHS.Val, Load.getValue(0), Load.getValue(1));
+      CombineTo(RHS.Val, Load.getValue(0), Load.getValue(1));
+      return true;
+    }
+  }
+  
+  return false;
+}
+
  SDOperand DAGCombiner::SimplifySelectCC(SDOperand N0, SDOperand N1, 
                                          SDOperand N2, SDOperand N3,
                                          ISD::CondCode CC) {
@@ -2290,6 +2574,79 @@ SDOperand DAGCombiner::SimplifySetCC(MVT::ValueType VT, SDOperand N0,
    return SDOperand();
  }
  
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildSDIV(SDNode *N) {
+  MVT::ValueType VT = N->getValueType(0);
+  assert((VT == MVT::i32 || VT == MVT::i64) && 
+         "BuildSDIV only operates on i32 or i64!");
+  
+  int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended();
+  ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d);
+  
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0),
+                            DAG.getConstant(magics.m, VT));
+  // If d > 0 and m < 0, add the numerator
+  if (d > 0 && magics.m < 0) { 
+    Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0));
+    WorkList.push_back(Q.Val);
+  }
+  // If d < 0 and m > 0, subtract the numerator.
+  if (d < 0 && magics.m > 0) {
+    Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0));
+    WorkList.push_back(Q.Val);
+  }
+  // Shift right algebraic if shift value is nonzero
+  if (magics.s > 0) {
+    Q = DAG.getNode(ISD::SRA, VT, Q, 
+                    DAG.getConstant(magics.s, TLI.getShiftAmountTy()));
+    WorkList.push_back(Q.Val);
+  }
+  // Extract the sign bit and add it to the quotient
+  SDOperand T =
+    DAG.getNode(ISD::SRL, MVT::i32, Q,
+                DAG.getConstant(MVT::getSizeInBits(VT)-1,
+                                TLI.getShiftAmountTy()));
+  WorkList.push_back(T.Val);
+  return DAG.getNode(ISD::ADD, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDOperand DAGCombiner::BuildUDIV(SDNode *N) {
+  MVT::ValueType VT = N->getValueType(0);
+  assert((VT == MVT::i32 || VT == MVT::i64) && 
+         "BuildUDIV only operates on i32 or i64!");
+  
+  uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue();
+  mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d);
+  
+  // Multiply the numerator (operand 0) by the magic value
+  SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0),
+                            DAG.getConstant(magics.m, VT));
+  WorkList.push_back(Q.Val);
+
+  if (magics.a == 0) {
+    return DAG.getNode(ISD::SRL, VT, Q, 
+                       DAG.getConstant(magics.s, TLI.getShiftAmountTy()));
+  } else {
+    SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q);
+    WorkList.push_back(NPQ.Val);
+    NPQ = DAG.getNode(ISD::SRL, VT, NPQ, 
+                      DAG.getConstant(1, TLI.getShiftAmountTy()));
+    WorkList.push_back(NPQ.Val);
+    NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q);
+    WorkList.push_back(NPQ.Val);
+    return DAG.getNode(ISD::SRL, VT, NPQ, 
+                       DAG.getConstant(magics.s-1, TLI.getShiftAmountTy()));
+  }
+}
+
  // SelectionDAG::Combine - This is the entry point for the file.
  //
  void SelectionDAG::Combine(bool RunningAfterLegalize) {