move combineRepeatedFPDivisors logic into a helper function; NFCI

[oota-llvm.git] / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index e0866ef4ee753a10d032e91833dde6173349cf1a..b06d53310bbb67ef4ea67dc07ba6048a9b68e461 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -255,6 +255,7 @@ namespace {
      SDValue visitSRA(SDNode *N);
      SDValue visitSRL(SDNode *N);
      SDValue visitRotate(SDNode *N);
+    SDValue visitBSWAP(SDNode *N);
      SDValue visitCTLZ(SDNode *N);
      SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
      SDValue visitCTTZ(SDNode *N);
@@ -268,6 +269,7 @@ namespace {
      SDValue visitZERO_EXTEND(SDNode *N);
      SDValue visitANY_EXTEND(SDNode *N);
      SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+    SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
      SDValue visitTRUNCATE(SDNode *N);
      SDValue visitBITCAST(SDNode *N);
      SDValue visitBUILD_PAIR(SDNode *N);
@@ -307,6 +309,12 @@ namespace {
      SDValue visitINSERT_SUBVECTOR(SDNode *N);
      SDValue visitMLOAD(SDNode *N);
      SDValue visitMSTORE(SDNode *N);
+    SDValue visitMGATHER(SDNode *N);
+    SDValue visitMSCATTER(SDNode *N);
+    SDValue visitFP_TO_FP16(SDNode *N);
+
+    SDValue visitFADDForFMACombine(SDNode *N);
+    SDValue visitFSUBForFMACombine(SDNode *N);
  
      SDValue XformToShuffleWithZero(SDNode *N);
      SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -330,6 +338,7 @@ namespace {
                                           unsigned HiOp);
      SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
      SDValue CombineExtLoad(SDNode *N);
+    SDValue combineRepeatedFPDivisors(SDNode *N);
      SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
      SDValue BuildSDIV(SDNode *N);
      SDValue BuildSDIVPow2(SDNode *N);
@@ -380,6 +389,13 @@ namespace {
        unsigned SequenceNum;
      };
  
+    /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
+    /// constant build_vector of the stored constant values in Stores.
+    SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
+                                         SDLoc SL,
+                                         ArrayRef<MemOpLink> Stores,
+                                         EVT Ty) const;
+
      /// This is a helper function for MergeConsecutiveStores. When the source
      /// elements of the consecutive stores are all constants or all extracted
      /// vector elements, try to merge them into one larger store.
@@ -388,6 +404,13 @@ namespace {
                                           EVT MemVT, unsigned NumElem,
                                           bool IsConstantSrc, bool UseVector);
  
+    /// This is a helper function for MergeConsecutiveStores.
+    /// Stores that may be merged are placed in StoreNodes.
+    /// Loads that may alias with those stores are placed in AliasLoadNodes.
+    void getStoreMergeAndAliasCandidates(
+        StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
+        SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
+    
      /// Merge consecutive store operations into a wide store.
      /// This optimization uses wide integers or vectors when possible.
      /// \return True if some memory operations were changed.
@@ -421,8 +444,9 @@ namespace {
        assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
        if (LHSTy.isVector())
          return LHSTy;
-      return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
-                        : TLI.getPointerTy();
+      auto &DL = DAG.getDataLayout();
+      return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
+                        : TLI.getPointerTy(DL);
      }
  
      /// This method returns true if we are running before type legalization or
@@ -434,7 +458,7 @@ namespace {
  
      /// Convenience wrapper around TargetLowering::getSetCCResultType
      EVT getSetCCResultType(EVT VT) const {
-      return TLI.getSetCCResultType(*DAG.getContext(), VT);
+      return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
      }
    };
  }
@@ -588,7 +612,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
    case ISD::ConstantFP: {
      APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
      V.changeSign();
-    return DAG.getConstantFP(V, Op.getValueType());
+    return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
    }
    case ISD::FADD:
      // FIXME: determine better conditions for this xform.
@@ -612,7 +636,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
  
      // fold (fneg (fsub 0, B)) -> B
      if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
-      if (N0CFP->getValueAPF().isZero())
+      if (N0CFP->isZero())
          return Op.getOperand(1);
  
      // fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -770,7 +794,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
      if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
        if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
          // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R))
+        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
            return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
          return SDValue();
        }
@@ -790,7 +814,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
      if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
        if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
          // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
-        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L))
+        if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
            return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
          return SDValue();
        }
@@ -1170,8 +1194,8 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
        continue;
  
      if (N->use_empty()) {
-      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-        Nodes.insert(N->getOperand(i).getNode());
+      for (const SDValue &ChildN : N->op_values())
+        Nodes.insert(ChildN.getNode());
  
        removeFromWorklist(N);
        DAG.DeleteNode(N);
@@ -1193,9 +1217,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
    LegalTypes = Level >= AfterLegalizeTypes;
  
    // Add all the dag nodes to the worklist.
-  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = DAG.allnodes_end(); I != E; ++I)
-    AddToWorklist(I);
+  for (SDNode &Node : DAG.allnodes())
+    AddToWorklist(&Node);
  
    // Create a dummy node (which is not added to allnodes), that adds a reference
    // to the root node, preventing it from being deleted, and tracking any
@@ -1244,9 +1267,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
      // worklist as well. Because the worklist uniques things already, this
      // won't repeatedly process the same operand.
      CombinedNodes.insert(N);
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      if (!CombinedNodes.count(N->getOperand(i).getNode()))
-        AddToWorklist(N->getOperand(i).getNode());
+    for (const SDValue &ChildN : N->op_values())
+      if (!CombinedNodes.count(ChildN.getNode()))
+        AddToWorklist(ChildN.getNode());
  
      SDValue RV = combine(N);
  
@@ -1328,6 +1351,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::SRL:                return visitSRL(N);
    case ISD::ROTR:
    case ISD::ROTL:               return visitRotate(N);
+  case ISD::BSWAP:              return visitBSWAP(N);
    case ISD::CTLZ:               return visitCTLZ(N);
    case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
    case ISD::CTTZ:               return visitCTTZ(N);
@@ -1341,6 +1365,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
    case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
    case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
+  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
    case ISD::TRUNCATE:           return visitTRUNCATE(N);
    case ISD::BITCAST:            return visitBITCAST(N);
    case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
@@ -1378,8 +1403,11 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
    case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
    case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
+  case ISD::MGATHER:            return visitMGATHER(N);
    case ISD::MLOAD:              return visitMLOAD(N);
+  case ISD::MSCATTER:           return visitMSCATTER(N);
    case ISD::MSTORE:             return visitMSTORE(N);
+  case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
    }
    return SDValue();
  }
@@ -1443,11 +1471,9 @@ SDValue DAGCombiner::combine(SDNode *N) {
      if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
        SDValue Ops[] = {N1, N0};
        SDNode *CSENode;
-      if (const BinaryWithFlagsSDNode *BinNode =
-              dyn_cast<BinaryWithFlagsSDNode>(N)) {
-        CSENode = DAG.getNodeIfExists(
-            N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
-            BinNode->hasNoSignedWrap(), BinNode->isExact());
+      if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
+        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+                                      &BinNode->Flags);
        } else {
          CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
        }
@@ -1498,8 +1524,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
      SDNode *TF = TFs[i];
  
      // Check each of the operands.
-    for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
-      SDValue Op = TF->getOperand(i);
+    for (const SDValue &Op : TF->op_values()) {
  
        switch (Op.getOpcode()) {
        case ISD::EntryToken:
@@ -1570,6 +1595,33 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
  }
  
+static bool isNullConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isNullValue();
+}
+
+static bool isNullFPConstant(SDValue V) {
+  ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+  return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
+static bool isAllOnesConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isAllOnesValue();
+}
+
+static bool isOneConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isOne();
+}
+
+/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
+/// ContantSDNode pointer else nullptr.
+static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
+  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
+}
+
  SDValue DAGCombiner::visitADD(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -1593,16 +1645,16 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    if (N1.getOpcode() == ISD::UNDEF)
      return N1;
    // fold (add c1, c2) -> c1+c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
    // canonicalize constant to RHS
    if (isConstantIntBuildVectorOrConstantInt(N0) &&
       !isConstantIntBuildVectorOrConstantInt(N1))
      return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
    // fold (add x, 0) -> x
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
      return N0;
    // fold (add Sym, c) -> Sym+c
    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
@@ -1613,21 +1665,21 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                                      (uint64_t)N1C->getSExtValue());
    // fold ((c1-A)+c2) -> (c1+c2)-A
    if (N1C && N0.getOpcode() == ISD::SUB)
-    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
-      return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+    if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::SUB, DL, VT,
                           DAG.getConstant(N1C->getAPIntValue()+
-                                         N0C->getAPIntValue(), VT),
+                                         N0C->getAPIntValue(), DL, VT),
                           N0.getOperand(1));
+    }
    // reassociate add
    if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
      return RADD;
    // fold ((0-A) + B) -> B-A
-  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
-      cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+  if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
      return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
    // fold (A + (0-B)) -> A-B
-  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
-      cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+  if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
      return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
    // fold (A+(B-A)) -> B
    if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
@@ -1687,34 +1739,27 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    }
  
    // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
-  if (N1.getOpcode() == ISD::SHL &&
-      N1.getOperand(0).getOpcode() == ISD::SUB)
-    if (ConstantSDNode *C =
-          dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
-      if (C->getAPIntValue() == 0)
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
-                           DAG.getNode(ISD::SHL, SDLoc(N), VT,
-                                       N1.getOperand(0).getOperand(1),
-                                       N1.getOperand(1)));
-  if (N0.getOpcode() == ISD::SHL &&
-      N0.getOperand(0).getOpcode() == ISD::SUB)
-    if (ConstantSDNode *C =
-          dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
-      if (C->getAPIntValue() == 0)
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
-                           DAG.getNode(ISD::SHL, SDLoc(N), VT,
-                                       N0.getOperand(0).getOperand(1),
-                                       N0.getOperand(1)));
+  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
+      isNullConstant(N1.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
+                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+                                   N1.getOperand(0).getOperand(1),
+                                   N1.getOperand(1)));
+  if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
+      isNullConstant(N0.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
+                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+                                   N0.getOperand(0).getOperand(1),
+                                   N0.getOperand(1)));
  
    if (N1.getOpcode() == ISD::AND) {
      SDValue AndOp0 = N1.getOperand(0);
-    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
      unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
      unsigned DestBits = VT.getScalarType().getSizeInBits();
  
      // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
      // and similar xforms where the inner op is either ~0 or 0.
-    if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+    if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
        SDLoc DL(N);
        return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
      }
@@ -1735,7 +1780,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
      if (TN->getVT() == MVT::i1) {
        SDLoc DL(N);
        SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
-                                 DAG.getConstant(1, VT));
+                                 DAG.getConstant(1, DL, VT));
        return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
      }
    }
@@ -1761,7 +1806,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
      return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
  
    // fold (addc x, 0) -> x + no carry out
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
      return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
                                          SDLoc(N), MVT::Glue));
  
@@ -1809,9 +1854,9 @@ static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
                               SelectionDAG &DAG,
                               bool LegalOperations, bool LegalTypes) {
    if (!VT.isVector())
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
    if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
    return SDValue();
  }
  
@@ -1835,16 +1880,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
    if (N0 == N1)
      return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
    // fold (sub c1, c2) -> c1-c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
    // fold (sub x, c) -> (add x, -c)
-  if (N1C)
-    return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0,
-                       DAG.getConstant(-N1C->getAPIntValue(), VT));
+  if (N1C) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::ADD, DL, VT, N0,
+                       DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+  }
    // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
-  if (N0C && N0C->isAllOnesValue())
+  if (isAllOnesConstant(N0))
      return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
    // fold A-(A-B) -> B
    if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
@@ -1859,9 +1906,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
    ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
      dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
    if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+    SDLoc DL(N);
      SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
-                                   VT);
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC,
+                                   DL, VT);
+    return DAG.getNode(ISD::SUB, DL, VT, NewC,
                         N1.getOperand(0));
    }
    // fold ((A+(B+or-C))-B) -> A+or-C
@@ -1902,7 +1950,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
        if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
          if (GA->getGlobal() == GB->getGlobal())
            return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
-                                 VT);
+                                 SDLoc(N), VT);
      }
  
    // sub X, (sextinreg Y i1) -> add X, (and Y 1)
@@ -1911,7 +1959,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
      if (TN->getVT() == MVT::i1) {
        SDLoc DL(N);
        SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
-                                 DAG.getConstant(1, VT));
+                                 DAG.getConstant(1, DL, VT));
        return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
      }
    }
@@ -1931,20 +1979,20 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
                                   MVT::Glue));
  
    // fold (subc x, x) -> 0 + no borrow
-  if (N0 == N1)
-    return CombineTo(N, DAG.getConstant(0, VT),
-                     DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
+  if (N0 == N1) {
+    SDLoc DL(N);
+    return CombineTo(N, DAG.getConstant(0, DL, VT),
+                     DAG.getNode(ISD::CARRY_FALSE, DL,
                                   MVT::Glue));
+  }
  
    // fold (subc x, 0) -> x + no borrow
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
      return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
                                          MVT::Glue));
  
    // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
-  if (N0C && N0C->isAllOnesValue())
+  if (isAllOnesConstant(N0))
      return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
                       DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
                                   MVT::Glue));
@@ -1971,10 +2019,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
  
    // fold (mul x, undef) -> 0
    if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
  
    bool N0IsConst = false;
    bool N1IsConst = false;
+  bool N1IsOpaqueConst = false;
+  bool N0IsOpaqueConst = false;
    APInt ConstValue0, ConstValue1;
    // fold vector ops
    if (VT.isVector()) {
@@ -1984,17 +2034,22 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
      N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
      N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
    } else {
-    N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
-    ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
-                            : APInt();
-    N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
-    ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
-                            : APInt();
+    N0IsConst = isa<ConstantSDNode>(N0);
+    if (N0IsConst) {
+      ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+      N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
+    }
+    N1IsConst = isa<ConstantSDNode>(N1);
+    if (N1IsConst) {
+      ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+      N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
+    }
    }
  
    // fold (mul c1, c2) -> c1*c2
-  if (N0IsConst && N1IsConst)
-    return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode());
+  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
+    return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
+                                      N0.getNode(), N1.getNode());
  
    // canonicalize constant to RHS (vector doesn't have to splat)
    if (isConstantIntBuildVectorOrConstantInt(N0) &&
@@ -2011,23 +2066,30 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
    if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
      return N0;
    // fold (mul x, -1) -> 0-x
-  if (N1IsConst && ConstValue1.isAllOnesValue())
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT), N0);
+  if (N1IsConst && ConstValue1.isAllOnesValue()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SUB, DL, VT,
+                       DAG.getConstant(0, DL, VT), N0);
+  }
    // fold (mul x, (1 << c)) -> x << c
-  if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat)
-    return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
-                       DAG.getConstant(ConstValue1.logBase2(),
+  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
+      IsFullSplat) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SHL, DL, VT, N0,
+                       DAG.getConstant(ConstValue1.logBase2(), DL,
                                         getShiftAmountTy(N0.getValueType())));
+  }
    // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
-  if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
+  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
+      IsFullSplat) {
      unsigned Log2Val = (-ConstValue1).logBase2();
+    SDLoc DL(N);
      // FIXME: If the input is something that is easily negated (e.g. a
      // single-use add), we should put the negate there.
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT),
-                       DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
-                            DAG.getConstant(Log2Val,
+    return DAG.getNode(ISD::SUB, DL, VT,
+                       DAG.getConstant(0, DL, VT),
+                       DAG.getNode(ISD::SHL, DL, VT, N0,
+                            DAG.getConstant(Log2Val, DL,
                                        getShiftAmountTy(N0.getValueType()))));
    }
  
@@ -2097,15 +2159,17 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
    // fold (sdiv c1, c2) -> c1/c2
    ConstantSDNode *N0C = isConstOrConstSplat(N0);
    ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
    // fold (sdiv X, 1) -> X
-  if (N1C && N1C->getAPIntValue() == 1LL)
+  if (N1C && N1C->isOne())
      return N0;
    // fold (sdiv X, -1) -> 0-X
-  if (N1C && N1C->isAllOnesValue())
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT), N0);
+  if (N1C && N1C->isAllOnesValue()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SUB, DL, VT,
+                       DAG.getConstant(0, DL, VT), N0);
+  }
    // If we know the sign bits of both operands are zero, strength reduce to a
    // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
    if (!VT.isVector()) {
@@ -2115,37 +2179,43 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
    }
  
    // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
-                                     (-N1C->getAPIntValue()).isPowerOf2())) {
+  // FIXME: We check for the exact bit here because the generic lowering gives
+  // better results in that case. The target-specific lowering should learn how
+  // to handle exact sdivs efficiently.
+  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+      !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
+      (N1C->getAPIntValue().isPowerOf2() ||
+       (-N1C->getAPIntValue()).isPowerOf2())) {
      // If dividing by powers of two is cheap, then don't perform the following
      // fold.
      if (TLI.isPow2SDivCheap())
        return SDValue();
  
      // Target-specific implementation of sdiv x, pow2.
-    SDValue Res = BuildSDIVPow2(N);
-    if (Res.getNode())
+    if (SDValue Res = BuildSDIVPow2(N))
        return Res;
  
      unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
+    SDLoc DL(N);
  
      // Splat the sign bit into the register
      SDValue SGN =
-        DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
-                    DAG.getConstant(VT.getScalarSizeInBits() - 1,
+        DAG.getNode(ISD::SRA, DL, VT, N0,
+                    DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
                                      getShiftAmountTy(N0.getValueType())));
      AddToWorklist(SGN.getNode());
  
      // Add (N0 < 0) ? abs2 - 1 : 0;
      SDValue SRL =
-        DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
-                    DAG.getConstant(VT.getScalarSizeInBits() - lg2,
+        DAG.getNode(ISD::SRL, DL, VT, SGN,
+                    DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
                                      getShiftAmountTy(SGN.getValueType())));
-    SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
+    SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
      AddToWorklist(SRL.getNode());
      AddToWorklist(ADD.getNode());    // Divide by pow2
-    SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
-                  DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+    SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
+                  DAG.getConstant(lg2, DL,
+                                  getShiftAmountTy(ADD.getValueType())));
  
      // If we're dividing by a positive value, we're done.  Otherwise, we must
      // negate the result.
@@ -2153,19 +2223,18 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
        return SRA;
  
      AddToWorklist(SRA.getNode());
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
+    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
    }
  
    // If integer divide is expensive and we satisfy the requirements, emit an
    // alternate sequence.
-  if (N1C && !TLI.isIntDivCheap()) {
-    SDValue Op = BuildSDIV(N);
-    if (Op.getNode()) return Op;
-  }
+  if (N1C && !TLI.isIntDivCheap())
+    if (SDValue Op = BuildSDIV(N))
+      return Op;
  
    // undef / X -> 0
    if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
    // X / undef -> undef
    if (N1.getOpcode() == ISD::UNDEF)
      return N1;
@@ -2186,37 +2255,41 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
    // fold (udiv c1, c2) -> c1/c2
    ConstantSDNode *N0C = isConstOrConstSplat(N0);
    ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
    // fold (udiv x, (1 << c)) -> x >>u c
-  if (N1C && N1C->getAPIntValue().isPowerOf2())
-    return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+  if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SRL, DL, VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
                                         getShiftAmountTy(N0.getValueType())));
+  }
    // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
    if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
        if (SHC->getAPIntValue().isPowerOf2()) {
          EVT ADDVT = N1.getOperand(1).getValueType();
-        SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT,
+        SDLoc DL(N);
+        SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
                                    N1.getOperand(1),
                                    DAG.getConstant(SHC->getAPIntValue()
                                                                    .logBase2(),
-                                                  ADDVT));
+                                                  DL, ADDVT));
          AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
+        return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
        }
      }
    }
    // fold (udiv x, c) -> alternate
-  if (N1C && !TLI.isIntDivCheap()) {
-    SDValue Op = BuildUDIV(N);
-    if (Op.getNode()) return Op;
-  }
+  if (N1C && !TLI.isIntDivCheap())
+    if (SDValue Op = BuildUDIV(N))
+      return Op;
  
    // undef / X -> 0
    if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
    // X / undef -> undef
    if (N1.getOpcode() == ISD::UNDEF)
      return N1;
@@ -2232,8 +2305,10 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
    // fold (srem c1, c2) -> c1%c2
    ConstantSDNode *N0C = isConstOrConstSplat(N0);
    ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
    // If we know the sign bits of both operands are zero, strength reduce to a
    // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
    if (!VT.isVector()) {
@@ -2258,7 +2333,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
  
    // undef % X -> 0
    if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
    // X % undef -> undef
    if (N1.getOpcode() == ISD::UNDEF)
      return N1;
@@ -2274,22 +2349,28 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
    // fold (urem c1, c2) -> c1%c2
    ConstantSDNode *N0C = isConstOrConstSplat(N0);
    ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
    // fold (urem x, pow2) -> (and x, pow2-1)
-  if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue()-1,VT));
+  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+      N1C->getAPIntValue().isPowerOf2()) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::AND, DL, VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+  }
    // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
    if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
        if (SHC->getAPIntValue().isPowerOf2()) {
+        SDLoc DL(N);
          SDValue Add =
-          DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
-                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+          DAG.getNode(ISD::ADD, DL, VT, N1,
+                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
                                   VT));
          AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
+        return DAG.getNode(ISD::AND, DL, VT, N0, Add);
        }
      }
    }
@@ -2311,7 +2392,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
  
    // undef % X -> 0
    if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
    // X % undef -> undef
    if (N1.getOpcode() == ISD::UNDEF)
      return N1;
@@ -2322,21 +2403,23 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
  SDValue DAGCombiner::visitMULHS(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    EVT VT = N->getValueType(0);
    SDLoc DL(N);
  
    // fold (mulhs x, 0) -> 0
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
      return N1;
    // fold (mulhs x, 1) -> (sra x, size(x)-1)
-  if (N1C && N1C->getAPIntValue() == 1)
-    return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0,
+  if (isOneConstant(N1)) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
                         DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+                                       DL,
                                         getShiftAmountTy(N0.getValueType())));
+  }
    // fold (mulhs x, undef) -> 0
    if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
  
    // If the type twice as wide is legal, transform the mulhs to a wider multiply
    // plus a shift.
@@ -2349,7 +2432,8 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
        N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
        N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
        N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(N1.getValueType())));
        return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
      }
    }
@@ -2360,19 +2444,18 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
  SDValue DAGCombiner::visitMULHU(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    EVT VT = N->getValueType(0);
    SDLoc DL(N);
  
    // fold (mulhu x, 0) -> 0
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
      return N1;
    // fold (mulhu x, 1) -> 0
-  if (N1C && N1C->getAPIntValue() == 1)
-    return DAG.getConstant(0, N0.getValueType());
+  if (isOneConstant(N1))
+    return DAG.getConstant(0, DL, N0.getValueType());
    // fold (mulhu x, undef) -> 0
    if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, DL, VT);
  
    // If the type twice as wide is legal, transform the mulhu to a wider multiply
    // plus a shift.
@@ -2385,7 +2468,8 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
        N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
        N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
        N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(N1.getValueType())));
        return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
      }
    }
@@ -2445,8 +2529,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  }
  
  SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
+    return Res;
  
    EVT VT = N->getValueType(0);
    SDLoc DL(N);
@@ -2463,7 +2547,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
        Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
        // Compute the high part as N1.
        Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(Lo.getValueType())));
        Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
        // Compute the low part as N0.
        Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -2475,8 +2560,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
  }
  
  SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
+    return Res;
  
    EVT VT = N->getValueType(0);
    SDLoc DL(N);
@@ -2493,7 +2578,8 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
        Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
        // Compute the high part as N1.
        Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
-            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+            DAG.getConstant(SimpleSize, DL,
+                            getShiftAmountTy(Lo.getValueType())));
        Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
        // Compute the low part as N0.
        Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
@@ -2525,15 +2611,15 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) {
  }
  
  SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM))
+    return Res;
  
    return SDValue();
  }
  
  SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM))
+    return Res;
  
    return SDValue();
  }
@@ -2649,7 +2735,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
        // build vector of all zeros that might be illegal at this stage.
        if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
          if (!LegalTypes)
-          ShOp = DAG.getConstant(0, VT);
+          ShOp = DAG.getConstant(0, SDLoc(N), VT);
          else
            ShOp = SDValue();
        }
@@ -2670,7 +2756,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
        ShOp = N0->getOperand(0);
        if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
          if (!LegalTypes)
-          ShOp = DAG.getConstant(0, VT);
+          ShOp = DAG.getConstant(0, SDLoc(N), VT);
          else
            ShOp = SDValue();
        }
@@ -2701,7 +2787,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
  
    // fold (and x, undef) -> 0
    if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(LocReference), VT);
    // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
    SDValue LL, LR, RL, RR, CC0, CC1;
    if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
@@ -2711,39 +2797,42 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
      if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
          LL.getValueType().isInteger()) {
        // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
-      if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+      if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
                                       LR.getValueType(), LL, RL);
          AddToWorklist(ORNode.getNode());
          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
        }
-      // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
-      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
-        SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
-                                      LR.getValueType(), LL, RL);
-        AddToWorklist(ANDNode.getNode());
-        return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
-      }
-      // fold (and (setgt X,  -1), (setgt Y,  -1)) -> (setgt (or X, Y), -1)
-      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+      if (isAllOnesConstant(LR)) {
+        // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+        if (Op1 == ISD::SETEQ) {
+          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+                                        LR.getValueType(), LL, RL);
+          AddToWorklist(ANDNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+        }
+        // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
+        if (Op1 == ISD::SETGT) {
+          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+                                       LR.getValueType(), LL, RL);
+          AddToWorklist(ORNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        }
        }
      }
      // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
      if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
          Op0 == Op1 && LL.getValueType().isInteger() &&
-      Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() &&
-                                 cast<ConstantSDNode>(RR)->isAllOnesValue()) ||
-                                (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
-                                 cast<ConstantSDNode>(RR)->isNullValue()))) {
-      SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
-                                    LL, DAG.getConstant(1, LL.getValueType()));
+      Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+                            (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+      SDLoc DL(N0);
+      SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+                                    LL, DAG.getConstant(1, DL,
+                                                        LL.getValueType()));
        AddToWorklist(ADDNode.getNode());
        return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
-                          DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
+                          DAG.getConstant(2, DL, LL.getValueType()),
+                          ISD::SETUGE);
      }
      // canonicalize equivalent to ll == rl
      if (LL == RR && LR == RL) {
@@ -2778,9 +2867,10 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
            if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
              ADDC |= Mask;
              if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
+              SDLoc DL(N0);
                SDValue NewAdd =
-                DAG.getNode(ISD::ADD, SDLoc(N0), VT,
-                            N0.getOperand(0), DAG.getConstant(ADDC, VT));
+                DAG.getNode(ISD::ADD, DL, VT,
+                            N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
                CombineTo(N0.getNode(), NewAdd);
                // Return N so it doesn't get rechecked!
                return SDValue(LocReference, 0);
@@ -2810,13 +2900,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
        return DAG.getConstant(
            APInt::getNullValue(
                N0.getValueType().getScalarType().getSizeInBits()),
-          N0.getValueType());
+          SDLoc(N), N0.getValueType());
      if (ISD::isBuildVectorAllZeros(N1.getNode()))
        // do not return N1, because undef node may exist in N1
        return DAG.getConstant(
            APInt::getNullValue(
                N1.getValueType().getScalarType().getSizeInBits()),
-          N1.getValueType());
+          SDLoc(N), N1.getValueType());
  
      // fold (and x, -1) -> x, vector edition
      if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -2826,22 +2916,22 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    }
  
    // fold (and c1, c2) -> c1&c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
    // canonicalize constant to RHS
    if (isConstantIntBuildVectorOrConstantInt(N0) &&
       !isConstantIntBuildVectorOrConstantInt(N1))
      return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
    // fold (and x, -1) -> x
-  if (N1C && N1C->isAllOnesValue())
+  if (isAllOnesConstant(N1))
      return N0;
    // if (and x, c) is known to be zero, return 0
    unsigned BitWidth = VT.getScalarType().getSizeInBits();
    if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
                                     APInt::getAllOnesValue(BitWidth)))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
    // reassociate and
    if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
      return RAND;
@@ -3019,12 +3109,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
            // For big endian targets, we need to add an offset to the pointer
            // to load the correct bytes.  For little endian systems, we merely
            // need to read fewer bytes from the same pointer.
-          if (TLI.isBigEndian()) {
+          if (DAG.getDataLayout().isBigEndian()) {
              unsigned LVTStoreBytes = LoadedVT.getStoreSize();
              unsigned EVTStoreBytes = ExtVT.getStoreSize();
              unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
-            NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType,
-                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
+            SDLoc DL(LN0);
+            NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
+                                 NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
              Alignment = MinAlign(Alignment, PtrOff);
            }
  
@@ -3048,10 +3139,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
      return Combined;
  
    // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
+  if (N0.getOpcode() == N1.getOpcode())
+    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+      return Tmp;
  
    // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
    // fold (and (sra)) -> (and (srl)) when possible.
@@ -3208,9 +3298,12 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    }
  
    SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
-  if (OpSizeInBits > 16)
-    Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res,
-                      DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+  if (OpSizeInBits > 16) {
+    SDLoc DL(N);
+    Res = DAG.getNode(ISD::SRL, DL, VT, Res,
+                      DAG.getConstant(OpSizeInBits - 16, DL,
+                                      getShiftAmountTy(VT)));
+  }
    return Res;
  }
  
@@ -3348,19 +3441,20 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
    if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
      return SDValue();
  
-  SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT,
-                              SDValue(Parts[0],0));
+  SDLoc DL(N);
+  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
+                              SDValue(Parts[0], 0));
  
    // Result of the bswap should be rotated by 16. If it's not legal, then
    // do  (x << 16) | (x >> 16).
-  SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
    if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
-    return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt);
+    return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
    if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
-    return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt);
-  return DAG.getNode(ISD::OR, SDLoc(N), VT,
-                     DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt),
-                     DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt));
+    return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
+  return DAG.getNode(ISD::OR, DL, VT,
+                     DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
+                     DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
  }
  
  /// This contains all DAGCombine rules which reduce two values combined by
@@ -3371,7 +3465,8 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
    if (!LegalOperations &&
        (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
      EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
-    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
+                           SDLoc(LocReference), VT);
    }
    // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
    SDValue LL, LR, RL, RR, CC0, CC1;
@@ -3379,12 +3474,10 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
      ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
      ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
  
-    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
-        LL.getValueType().isInteger()) {
+    if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
        // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
        // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
-      if (cast<ConstantSDNode>(LR)->isNullValue() &&
-          (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+      if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
                                       LR.getValueType(), LL, RL);
          AddToWorklist(ORNode.getNode());
@@ -3392,8 +3485,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
        }
        // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
        // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
-      if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
-          (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+      if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
                                        LR.getValueType(), LL, RL);
          AddToWorklist(ANDNode.getNode());
@@ -3419,25 +3511,29 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
    }
  
    // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
-  if (N0.getOpcode() == ISD::AND &&
-      N1.getOpcode() == ISD::AND &&
-      N0.getOperand(1).getOpcode() == ISD::Constant &&
-      N1.getOperand(1).getOpcode() == ISD::Constant &&
+  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
        // Don't increase # computations.
        (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
      // We can only do this xform if we know that bits from X that are set in C2
      // but not in C1 are already zero.  Likewise for Y.
-    const APInt &LHSMask =
-      cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-    const APInt &RHSMask =
-      cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
-
-    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
-        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
-      SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
-                              N0.getOperand(0), N1.getOperand(0));
-      return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, X,
-                         DAG.getConstant(LHSMask | RHSMask, VT));
+    if (const ConstantSDNode *N0O1C =
+        getAsNonOpaqueConstant(N0.getOperand(1))) {
+      if (const ConstantSDNode *N1O1C =
+          getAsNonOpaqueConstant(N1.getOperand(1))) {
+        // We can only do this xform if we know that bits from X that are set in
+        // C2 but not in C1 are already zero.  Likewise for Y.
+        const APInt &LHSMask = N0O1C->getAPIntValue();
+        const APInt &RHSMask = N1O1C->getAPIntValue();
+
+        if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+            DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+          SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+                                  N0.getOperand(0), N1.getOperand(0));
+          SDLoc DL(LocReference);
+          return DAG.getNode(ISD::AND, DL, VT, X,
+                             DAG.getConstant(LHSMask | RHSMask, DL, VT));
+        }
+      }
      }
    }
  
@@ -3477,13 +3573,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
        return DAG.getConstant(
            APInt::getAllOnesValue(
                N0.getValueType().getScalarType().getSizeInBits()),
-          N0.getValueType());
+          SDLoc(N), N0.getValueType());
      if (ISD::isBuildVectorAllOnes(N1.getNode()))
        // do not return N1, because undef node may exist in N1
        return DAG.getConstant(
            APInt::getAllOnesValue(
                N1.getValueType().getScalarType().getSizeInBits()),
-          N1.getValueType());
+          SDLoc(N), N1.getValueType());
  
      // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
      // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
@@ -3543,19 +3639,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    }
  
    // fold (or c1, c2) -> c1|c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
    // canonicalize constant to RHS
    if (isConstantIntBuildVectorOrConstantInt(N0) &&
       !isConstantIntBuildVectorOrConstantInt(N1))
      return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
    // fold (or x, 0) -> x
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
      return N0;
    // fold (or x, -1) -> -1
-  if (N1C && N1C->isAllOnesValue())
+  if (isAllOnesConstant(N1))
      return N1;
    // fold (or x, c) -> c iff (x & ~c) == 0
    if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
@@ -3565,11 +3661,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
      return Combined;
  
    // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
-  SDValue BSwap = MatchBSwapHWord(N, N0, N1);
-  if (BSwap.getNode())
+  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
      return BSwap;
-  BSwap = MatchBSwapHWordLow(N, N0, N1);
-  if (BSwap.getNode())
+  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
      return BSwap;
  
    // reassociate or
@@ -3581,7 +3675,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
               isa<ConstantSDNode>(N0.getOperand(1))) {
      ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
      if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
-      if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1))
+      if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
+                                                   N1C, C1))
          return DAG.getNode(
              ISD::AND, SDLoc(N), VT,
              DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
@@ -3589,10 +3684,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
      }
    }
    // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
+  if (N0.getOpcode() == N1.getOpcode())
+    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+      return Tmp;
  
    // See if this is some rotate idiom.
    if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
@@ -3821,7 +3915,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
          Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
        }
  
-      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
      }
  
      return Rot.getNode();
@@ -3879,23 +3973,23 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
  
    // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
    if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
    // fold (xor x, undef) -> undef
    if (N0.getOpcode() == ISD::UNDEF)
      return N0;
    if (N1.getOpcode() == ISD::UNDEF)
      return N1;
    // fold (xor c1, c2) -> c1^c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
    // canonicalize constant to RHS
    if (isConstantIntBuildVectorOrConstantInt(N0) &&
       !isConstantIntBuildVectorOrConstantInt(N1))
      return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
    // fold (xor x, 0) -> x
-  if (N1C && N1C->isNullValue())
+  if (isNullConstant(N1))
      return N0;
    // reassociate xor
    if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
@@ -3923,18 +4017,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    }
  
    // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
-  if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+  if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
        N0.getNode()->hasOneUse() &&
        isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
      SDValue V = N0.getOperand(0);
-    V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
-                    DAG.getConstant(1, V.getValueType()));
+    SDLoc DL(N0);
+    V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
+                    DAG.getConstant(1, DL, V.getValueType()));
      AddToWorklist(V.getNode());
      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
    }
  
    // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
-  if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+  if (isOneConstant(N1) && VT == MVT::i1 &&
        (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
      SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
      if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
@@ -3946,7 +4041,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
      }
    }
    // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
-  if (N1C && N1C->isAllOnesValue() &&
+  if (isAllOnesConstant(N1) &&
        (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
      SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
      if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
@@ -3967,16 +4062,18 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    }
    // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
    if (N1C && N0.getOpcode() == ISD::XOR) {
-    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
-    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-    if (N00C)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1),
+    if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
                           DAG.getConstant(N1C->getAPIntValue() ^
-                                         N00C->getAPIntValue(), VT));
-    if (N01C)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0),
+                                         N00C->getAPIntValue(), DL, VT));
+    }
+    if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
                           DAG.getConstant(N1C->getAPIntValue() ^
-                                         N01C->getAPIntValue(), VT));
+                                         N01C->getAPIntValue(), DL, VT));
+    }
    }
    // fold (xor x, x) -> 0
    if (N0 == N1)
@@ -4000,19 +4097,17 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    //   consistent result.
    // - Pushing the zero left requires shifting one bits in from the right.
    // A rotate left of ~1 is a nice way of achieving the desired result.
-  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
-    if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode()))
-      if (N0.getOpcode() == ISD::SHL)
-        if (auto *ShlLHS = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
-          if (N1C->isAllOnesValue() && ShlLHS->isOne())
-            return DAG.getNode(ISD::ROTL, SDLoc(N), VT, DAG.getConstant(~1, VT),
-                               N0.getOperand(1));
+  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
+      && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
+    SDLoc DL(N);
+    return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
+                       N0.getOperand(1));
+  }
  
    // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
+  if (N0.getOpcode() == N1.getOpcode())
+    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+      return Tmp;
  
    // Simplify the expression using non-local knowledge.
    if (!VT.isVector() &&
@@ -4025,10 +4120,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
  /// Handle transforms common to the three shifts, when the shift amount is a
  /// constant.
  SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
-  // We can't and shouldn't fold opaque constants.
-  if (Amt->isOpaque())
-    return SDValue();
-
    SDNode *LHS = N->getOperand(0).getNode();
    if (!LHS->hasOneUse()) return SDValue();
  
@@ -4055,8 +4146,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
    }
  
    // We require the RHS of the binop to be a constant and not opaque as well.
-  ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
-  if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
+  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
+  if (!BinOpCst) return SDValue();
  
    // FIXME: disable this unless the input to the binop is a shift by a constant.
    // If it is not a shift, it pessimizes some common cases like:
@@ -4109,14 +4200,17 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
      SDValue N01 = N->getOperand(0).getOperand(1);
  
      if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
-      EVT TruncVT = N->getValueType(0);
-      SDValue N00 = N->getOperand(0).getOperand(0);
-      APInt TruncC = N01C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+      if (!N01C->isOpaque()) {
+        EVT TruncVT = N->getValueType(0);
+        SDValue N00 = N->getOperand(0).getOperand(0);
+        APInt TruncC = N01C->getAPIntValue();
+        TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+        SDLoc DL(N);
  
-      return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
-                         DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
-                         DAG.getConstant(TruncC, TruncVT));
+        return DAG.getNode(ISD::AND, DL, TruncVT,
+                           DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
+                           DAG.getConstant(TruncC, DL, TruncVT));
+      }
      }
    }
  
@@ -4159,7 +4253,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
          if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
              TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
                  TargetLowering::ZeroOrNegativeOneBooleanContent) {
-          if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV))
+          if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
+                                                     N01CV, N1CV))
              return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
          }
        } else {
@@ -4169,25 +4264,25 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
    }
  
    // fold (shl c1, c2) -> c1<<c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
    // fold (shl 0, x) -> 0
-  if (N0C && N0C->isNullValue())
+  if (isNullConstant(N0))
      return N0;
    // fold (shl x, c >= size(x)) -> undef
-  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
      return DAG.getUNDEF(VT);
    // fold (shl x, 0) -> x
    if (N1C && N1C->isNullValue())
      return N0;
    // fold (shl undef, x) -> 0
    if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
    // if (shl x, c) is known to be zero, return 0
    if (DAG.MaskedValueIsZero(SDValue(N, 0),
                              APInt::getAllOnesValue(OpSizeInBits)))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
    // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
    if (N1.getOpcode() == ISD::TRUNCATE &&
        N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -4204,10 +4299,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
      if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
        uint64_t c1 = N0C1->getZExtValue();
        uint64_t c2 = N1C->getZExtValue();
+      SDLoc DL(N);
        if (c1 + c2 >= OpSizeInBits)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, N1.getValueType()));
+        return DAG.getConstant(0, DL, VT);
+      return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
      }
    }
  
@@ -4227,12 +4323,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
        EVT InnerShiftVT = N0Op0.getValueType();
        uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
        if (c2 >= OpSizeInBits - InnerShiftSize) {
+        SDLoc DL(N0);
          if (c1 + c2 >= OpSizeInBits)
-          return DAG.getConstant(0, VT);
-        return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
-                           DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+          return DAG.getConstant(0, DL, VT);
+        return DAG.getNode(ISD::SHL, DL, VT,
+                           DAG.getNode(N0.getOpcode(), DL, VT,
                                         N0Op0->getOperand(0)),
-                           DAG.getConstant(c1 + c2, N1.getValueType()));
+                           DAG.getConstant(c1 + c2, DL, N1.getValueType()));
        }
      }
    }
@@ -4250,8 +4347,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
          if (c1 == c2) {
            SDValue NewOp0 = N0.getOperand(0);
            EVT CountVT = NewOp0.getOperand(1).getValueType();
-          SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
-                                       NewOp0, DAG.getConstant(c2, CountVT));
+          SDLoc DL(N);
+          SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
+                                       NewOp0,
+                                       DAG.getConstant(c2, DL, CountVT));
            AddToWorklist(NewSHL.getNode());
            return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
          }
@@ -4259,6 +4358,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
      }
    }
  
+  // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
+  // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
+  if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
+      cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+      uint64_t C1 = N0C1->getZExtValue();
+      uint64_t C2 = N1C->getZExtValue();
+      SDLoc DL(N);
+      if (C1 <= C2)
+        return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+                           DAG.getConstant(C2 - C1, DL, N1.getValueType()));
+      return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
+                         DAG.getConstant(C1 - C2, DL, N1.getValueType()));
+    }
+  }
+
    // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
    //                               (and (srl x, (sub c1, c2), MASK)
    // Only fold this if the inner shift has no other uses -- if it does, folding
@@ -4272,25 +4387,30 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
          SDValue Shift;
          if (c2 > c1) {
            Mask = Mask.shl(c2 - c1);
-          Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                              DAG.getConstant(c2 - c1, N1.getValueType()));
+          SDLoc DL(N);
+          Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+                              DAG.getConstant(c2 - c1, DL, N1.getValueType()));
          } else {
            Mask = Mask.lshr(c1 - c2);
-          Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                              DAG.getConstant(c1 - c2, N1.getValueType()));
+          SDLoc DL(N);
+          Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+                              DAG.getConstant(c1 - c2, DL, N1.getValueType()));
          }
-        return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
-                           DAG.getConstant(Mask, VT));
+        SDLoc DL(N0);
+        return DAG.getNode(ISD::AND, DL, VT, Shift,
+                           DAG.getConstant(Mask, DL, VT));
        }
      }
    }
    // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
    if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
      unsigned BitSize = VT.getScalarSizeInBits();
+    SDLoc DL(N);
      SDValue HiBitsMask =
        DAG.getConstant(APInt::getHighBitsSet(BitSize,
-                                            BitSize - N1C->getZExtValue()), VT);
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+                                            BitSize - N1C->getZExtValue()),
+                      DL, VT);
+    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
                         HiBitsMask);
    }
  
@@ -4306,11 +4426,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
      return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
    }
  
-  if (N1C) {
-    SDValue NewSHL = visitShiftByConstant(N, N1C);
-    if (NewSHL.getNode())
+  if (N1C && !N1C->isOpaque())
+    if (SDValue NewSHL = visitShiftByConstant(N, N1C))
        return NewSHL;
-  }
  
    return SDValue();
  }
@@ -4331,14 +4449,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    }
  
    // fold (sra c1, c2) -> (sra c1, c2)
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
    // fold (sra 0, x) -> 0
-  if (N0C && N0C->isNullValue())
+  if (isNullConstant(N0))
      return N0;
    // fold (sra -1, x) -> -1
-  if (N0C && N0C->isAllOnesValue())
+  if (isAllOnesConstant(N0))
      return N0;
    // fold (sra x, (setge c, size(x))) -> undef
    if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4366,8 +4484,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
        unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
        if (Sum >= OpSizeInBits)
          Sum = OpSizeInBits - 1;
-      return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(Sum, N1.getValueType()));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(Sum, DL, N1.getValueType()));
      }
    }
  
@@ -4399,14 +4518,15 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
            TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
            TLI.isTruncateFree(VT, TruncVT)) {
  
-          SDValue Amt = DAG.getConstant(ShiftAmt,
-              getShiftAmountTy(N0.getOperand(0).getValueType()));
-          SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT,
-                                      N0.getOperand(0), Amt);
-          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT,
-                                      Shift);
-          return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N),
-                             N->getValueType(0), Trunc);
+        SDLoc DL(N);
+        SDValue Amt = DAG.getConstant(ShiftAmt, DL,
+            getShiftAmountTy(N0.getOperand(0).getValueType()));
+        SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
+                                    N0.getOperand(0), Amt);
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
+                                    Shift);
+        return DAG.getNode(ISD::SIGN_EXTEND, DL,
+                           N->getValueType(0), Trunc);
        }
      }
    }
@@ -4433,12 +4553,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
        EVT LargeVT = N0Op0.getValueType();
  
        if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+        SDLoc DL(N);
          SDValue Amt =
-          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
                            getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
-        SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+        SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
                                    N0Op0.getOperand(0), Amt);
-        return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+        return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
        }
      }
    }
@@ -4452,11 +4573,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    if (DAG.SignBitIsZero(N0))
      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
  
-  if (N1C) {
-    SDValue NewSRA = visitShiftByConstant(N, N1C);
-    if (NewSRA.getNode())
+  if (N1C && !N1C->isOpaque())
+    if (SDValue NewSRA = visitShiftByConstant(N, N1C))
        return NewSRA;
-  }
  
    return SDValue();
  }
@@ -4477,11 +4596,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    }
  
    // fold (srl c1, c2) -> c1 >>u c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  if (N0C && N1C)
-    return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
+    return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
    // fold (srl 0, x) -> 0
-  if (N0C && N0C->isNullValue())
+  if (isNullConstant(N0))
      return N0;
    // fold (srl x, c >= size(x)) -> undef
    if (N1C && N1C->getZExtValue() >= OpSizeInBits)
@@ -4492,17 +4611,18 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    // if (srl x, c) is known to be zero, return 0
    if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
                                     APInt::getAllOnesValue(OpSizeInBits)))
-    return DAG.getConstant(0, VT);
+    return DAG.getConstant(0, SDLoc(N), VT);
  
    // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
    if (N1C && N0.getOpcode() == ISD::SRL) {
      if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
        uint64_t c1 = N01C->getZExtValue();
        uint64_t c2 = N1C->getZExtValue();
+      SDLoc DL(N);
        if (c1 + c2 >= OpSizeInBits)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, N1.getValueType()));
+        return DAG.getConstant(0, DL, VT);
+      return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
      }
    }
  
@@ -4518,12 +4638,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
      uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
      // This is only valid if the OpSizeInBits + c1 = size of inner shift.
      if (c1 + OpSizeInBits == InnerShiftSize) {
+      SDLoc DL(N0);
        if (c1 + c2 >= InnerShiftSize)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT,
-                         DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT,
+        return DAG.getConstant(0, DL, VT);
+      return DAG.getNode(ISD::TRUNCATE, DL, VT,
+                         DAG.getNode(ISD::SRL, DL, InnerShiftVT,
                                       N0.getOperand(0)->getOperand(0),
-                                     DAG.getConstant(c1 + c2, ShiftCountVT)));
+                                     DAG.getConstant(c1 + c2, DL,
+                                                     ShiftCountVT)));
      }
    }
  
@@ -4532,8 +4654,9 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
      unsigned BitSize = N0.getScalarValueSizeInBits();
      if (BitSize <= 64) {
        uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
-      return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(~0ULL >> ShAmt, VT));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
+                         DAG.getConstant(~0ULL >> ShAmt, DL, VT));
      }
    }
  
@@ -4547,14 +4670,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
  
      if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
        uint64_t ShiftAmt = N1C->getZExtValue();
-      SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
+      SDLoc DL0(N0);
+      SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
                                         N0.getOperand(0),
-                          DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
+                          DAG.getConstant(ShiftAmt, DL0,
+                                          getShiftAmountTy(SmallVT)));
        AddToWorklist(SmallShift.getNode());
        APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
-      return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                         DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
-                         DAG.getConstant(Mask, VT));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::AND, DL, VT,
+                         DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
+                         DAG.getConstant(Mask, DL, VT));
      }
    }
  
@@ -4573,12 +4699,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
  
      // If any of the input bits are KnownOne, then the input couldn't be all
      // zeros, thus the result of the srl will always be zero.
-    if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+    if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
  
      // If all of the bits input the to ctlz node are known to be zero, then
      // the result of the ctlz is "32" and the result of the shift is one.
      APInt UnknownBits = ~KnownZero;
-    if (UnknownBits == 0) return DAG.getConstant(1, VT);
+    if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
  
      // Otherwise, check to see if there is exactly one bit input to the ctlz.
      if ((UnknownBits & (UnknownBits - 1)) == 0) {
@@ -4590,13 +4716,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
        SDValue Op = N0.getOperand(0);
  
        if (ShAmt) {
-        Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
-                  DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+        SDLoc DL(N0);
+        Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+                  DAG.getConstant(ShAmt, DL,
+                                  getShiftAmountTy(Op.getValueType())));
          AddToWorklist(Op.getNode());
        }
  
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT,
-                         Op, DAG.getConstant(1, VT));
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT,
+                         Op, DAG.getConstant(1, DL, VT));
      }
    }
  
@@ -4613,7 +4742,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
      return SDValue(N, 0);
  
-  if (N1C) {
+  if (N1C && !N1C->isOpaque()) {
      SDValue NewSRL = visitShiftByConstant(N, N1C);
      if (NewSRL.getNode())
        return NewSRL;
@@ -4656,12 +4785,25 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    return SDValue();
  }
  
+SDValue DAGCombiner::visitBSWAP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (bswap c1) -> c2
+  if (isConstantIntBuildVectorOrConstantInt(N0))
+    return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
+  // fold (bswap (bswap x)) -> x
+  if (N0.getOpcode() == ISD::BSWAP)
+    return N0->getOperand(0);
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitCTLZ(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
  
    // fold (ctlz c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -4671,7 +4813,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
    EVT VT = N->getValueType(0);
  
    // fold (ctlz_zero_undef c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -4681,7 +4823,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
    EVT VT = N->getValueType(0);
  
    // fold (cttz c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -4691,7 +4833,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
    EVT VT = N->getValueType(0);
  
    // fold (cttz_zero_undef c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -4701,7 +4843,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
    EVT VT = N->getValueType(0);
  
    // fold (ctpop c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -4753,16 +4895,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
    // fold (select C, X, X) -> X
    if (N1 == N2)
      return N1;
-  // fold (select true, X, Y) -> X
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  if (N0C && !N0C->isNullValue())
-    return N1;
-  // fold (select false, X, Y) -> Y
-  if (N0C && N0C->isNullValue())
-    return N2;
+  if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
+    // fold (select true, X, Y) -> X
+    // fold (select false, X, Y) -> Y
+    return !N0C->isNullValue() ? N1 : N2;
+  }
    // fold (select C, 1, X) -> (or C, X)
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+  if (VT == MVT::i1 && isOneConstant(N1))
      return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
    // fold (select C, 0, 1) -> (xor C, 1)
    // We can't do this reliably if integer based booleans have different contents
@@ -4773,47 +4912,49 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
    // undiscoverable (or not reasonably discoverable). For example, it could be
    // in another basic block or it could require searching a complicated
    // expression.
-  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
    if (VT.isInteger() &&
        (VT0 == MVT::i1 || (VT0.isInteger() &&
                            TLI.getBooleanContents(false, false) ==
                                TLI.getBooleanContents(false, true) &&
                            TLI.getBooleanContents(false, false) ==
                                TargetLowering::ZeroOrOneBooleanContent)) &&
-      N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+      isNullConstant(N1) && isOneConstant(N2)) {
      SDValue XORNode;
-    if (VT == VT0)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT0,
-                         N0, DAG.getConstant(1, VT0));
-    XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
-                          N0, DAG.getConstant(1, VT0));
+    if (VT == VT0) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::XOR, DL, VT0,
+                         N0, DAG.getConstant(1, DL, VT0));
+    }
+    SDLoc DL0(N0);
+    XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
+                          N0, DAG.getConstant(1, DL0, VT0));
      AddToWorklist(XORNode.getNode());
      if (VT.bitsGT(VT0))
        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
    }
    // fold (select C, 0, X) -> (and (not C), X)
-  if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+  if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
      SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
      AddToWorklist(NOTNode.getNode());
      return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
    }
    // fold (select C, X, 1) -> (or (not C), X)
-  if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+  if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
      SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
      AddToWorklist(NOTNode.getNode());
      return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
    }
    // fold (select C, X, 0) -> (and C, X)
-  if (VT == MVT::i1 && N2C && N2C->isNullValue())
+  if (VT == MVT::i1 && isNullConstant(N2))
      return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
    // fold (select X, X, Y) -> (or X, Y)
    // fold (select X, 1, Y) -> (or X, Y)
-  if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+  if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
      return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
    // fold (select X, Y, X) -> (and X, Y)
    // fold (select X, Y, 0) -> (and X, Y)
-  if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+  if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
      return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
  
    // If we can fold this based on the true/false value, do so.
@@ -4881,7 +5022,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
        SDValue N1_0 = N1->getOperand(0);
        SDValue N1_1 = N1->getOperand(1);
        SDValue N1_2 = N1->getOperand(2);
-      if (N1_2 == N2) {
+      if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
          // Create the actual and node if we can generate good code for it.
          if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
            SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
@@ -4900,7 +5041,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
        SDValue N2_0 = N2->getOperand(0);
        SDValue N2_1 = N2->getOperand(1);
        SDValue N2_2 = N2->getOperand(2);
-      if (N2_1 == N1) {
+      if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
          // Create the actual or node if we can generate good code for it.
          if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
            SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
@@ -4991,6 +5132,67 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
        TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
  }
  
+SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
+  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+  SDValue Mask = MSC->getMask();
+  SDValue Data  = MSC->getValue();
+  SDLoc DL(N);
+
+  // If the MSCATTER data type requires splitting and the mask is provided by a
+  // SETCC, then split both nodes and its operands before legalization. This
+  // prevents the type legalizer from unrolling SETCC into scalar comparisons
+  // and enables future optimizations (e.g. min/max pattern matching on X86).
+  if (Mask.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  // Check if any splitting is required.
+  if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+      TargetLowering::TypeSplitVector)
+    return SDValue();
+  SDValue MaskLo, MaskHi, Lo, Hi;
+  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
+
+  SDValue Chain = MSC->getChain();
+
+  EVT MemoryVT = MSC->getMemoryVT();
+  unsigned Alignment = MSC->getOriginalAlignment();
+
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue DataLo, DataHi;
+  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+  SDValue BasePtr = MSC->getBasePtr();
+  SDValue IndexLo, IndexHi;
+  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MSC->getPointerInfo(),
+                          MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
+                          Alignment, MSC->getAAInfo(), MSC->getRanges());
+
+  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
+  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
+                            DL, OpsLo, MMO);
+
+  SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
+  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+                            DL, OpsHi, MMO);
+
+  AddToWorklist(Lo.getNode());
+  AddToWorklist(Hi.getNode());
+
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
  SDValue DAGCombiner::visitMSTORE(SDNode *N) {
  
    if (Level >= AfterLegalizeTypes)
@@ -5046,7 +5248,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
  
      unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
      Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
  
      MMO = DAG.getMachineFunction().
        getMachineMemOperand(MST->getPointerInfo(),
@@ -5065,6 +5267,83 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
    return SDValue();
  }
  
+SDValue DAGCombiner::visitMGATHER(SDNode *N) {
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
+  MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
+  SDValue Mask = MGT->getMask();
+  SDLoc DL(N);
+
+  // If the MGATHER result requires splitting and the mask is provided by a
+  // SETCC, then split both nodes and its operands before legalization. This
+  // prevents the type legalizer from unrolling SETCC into scalar comparisons
+  // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+  if (Mask.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  // Check if any splitting is required.
+  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+      TargetLowering::TypeSplitVector)
+    return SDValue();
+
+  SDValue MaskLo, MaskHi, Lo, Hi;
+  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+  SDValue Src0 = MGT->getValue();
+  SDValue Src0Lo, Src0Hi;
+  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+  EVT LoVT, HiVT;
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+
+  SDValue Chain = MGT->getChain();
+  EVT MemoryVT = MGT->getMemoryVT();
+  unsigned Alignment = MGT->getOriginalAlignment();
+
+  EVT LoMemVT, HiMemVT;
+  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+  SDValue BasePtr = MGT->getBasePtr();
+  SDValue Index = MGT->getIndex();
+  SDValue IndexLo, IndexHi;
+  std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MGT->getPointerInfo(),
+                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
+                          Alignment, MGT->getAAInfo(), MGT->getRanges());
+
+  SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
+  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
+                            MMO);
+
+  SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
+  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
+                            MMO);
+
+  AddToWorklist(Lo.getNode());
+  AddToWorklist(Hi.getNode());
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+                      Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
+
+  SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+  SDValue RetOps[] = { GatherRes, Chain };
+  return DAG.getMergeValues(RetOps, DL);
+}
+
  SDValue DAGCombiner::visitMLOAD(SDNode *N) {
  
    if (Level >= AfterLegalizeTypes)
@@ -5121,7 +5400,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
  
      unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
      Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
  
      MMO = DAG.getMachineFunction().
      getMachineMemOperand(MLD->getPointerInfo(),
@@ -5180,7 +5459,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
        EVT VT = LHS.getValueType();
        SDValue Shift = DAG.getNode(
            ISD::SRA, DL, VT, LHS,
-          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
        SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
        AddToWorklist(Shift.getNode());
        AddToWorklist(Add.getNode());
@@ -5188,6 +5467,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
      }
    }
  
+  if (SimplifySelectOps(N, N1, N2))
+    return SDValue(N, 0);  // Don't revisit N.
+
    // If the VSELECT result requires splitting and the mask is provided by a
    // SETCC, then split both nodes and its operands before legalization. This
    // prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -5286,12 +5568,12 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
                         SDLoc(N));
  }
  
-// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
-// dag node into a ConstantSDNode or a build_vector of constants.
-// This function is called by the DAGCombiner when visiting sext/zext/aext
-// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
-// Vector extends are not folded if operations are legal; this is to
-// avoid introducing illegal build_vector dag nodes.
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or 
+/// a build_vector of constants.
+/// This function is called by the DAGCombiner when visiting sext/zext/aext
+/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+/// Vector extends are not folded if operations are legal; this is to
+/// avoid introducing illegal build_vector dag nodes.
  static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
                                           SelectionDAG &DAG, bool LegalTypes,
                                           bool LegalOperations) {
@@ -5300,7 +5582,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
    EVT VT = N->getValueType(0);
  
    assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
-         Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
+         Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+         && "Expected EXTEND dag node in input!");
  
    // fold (sext c1) -> c1
    // fold (zext c1) -> c1
@@ -5320,9 +5603,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
    // We can fold this node into a build_vector.
    unsigned VTBits = SVT.getSizeInBits();
    unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
-  unsigned ShAmt = VTBits - EVTBits;
    SmallVector<SDValue, 8> Elts;
-  unsigned NumElts = N0->getNumOperands();
+  unsigned NumElts = VT.getVectorNumElements();
    SDLoc DL(N);
  
    for (unsigned i=0; i != NumElts; ++i) {
@@ -5332,14 +5614,14 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
        continue;
      }
  
-    ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
-    const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
-    if (Opcode == ISD::SIGN_EXTEND)
-      Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
-                                     SVT));
+    SDLoc DL(Op);
+    // Get the constant value and if needed trunc it to the size of the type.
+    // Nodes like build_vector might have constants wider than the scalar type.
+    APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
+    if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+      Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
      else
-      Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
-                                     SVT));
+      Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
    }
  
    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
@@ -5505,7 +5787,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
          Align, LN0->getAAInfo());
  
      BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
-                          DAG.getConstant(Stride, BasePtr.getValueType()));
+                          DAG.getConstant(Stride, DL, BasePtr.getValueType()));
  
      Loads.push_back(SplitLoad.getValue(0));
      Chains.push_back(SplitLoad.getValue(1));
@@ -5543,8 +5825,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    if (N0.getOpcode() == ISD::TRUNCATE) {
      // fold (sext (truncate (load x))) -> (sext (smaller load x))
      // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
        SDNode* oye = N0.getNode()->getOperand(0).getNode();
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
@@ -5668,14 +5949,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                           LN0->getMemOperand());
          APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
          Mask = Mask.sext(VT.getSizeInBits());
-        SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
-                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDLoc DL(N);
+        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
          SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
                                      SDLoc(N0.getOperand(0)),
                                      N0.getOperand(0).getValueType(), ExtLoad);
          CombineTo(N, And);
          CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
-        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
                          ISD::SIGN_EXTEND);
          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
        }
@@ -5718,11 +6000,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
  
      // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
      unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+    SDLoc DL(N);
      SDValue NegOne =
-      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
      SDValue SCC =
-      SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                       NegOne, DAG.getConstant(0, VT),
+      SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+                       NegOne, DAG.getConstant(0, DL, VT),
                         cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
      if (SCC.getNode()) return SCC;
  
@@ -5734,7 +6017,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
          SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
                                       N0.getOperand(0), N0.getOperand(1), CC);
          return DAG.getSelect(DL, VT, SetCC,
-                             NegOne, DAG.getConstant(0, VT));
+                             NegOne, DAG.getConstant(0, DL, VT));
        }
      }
    }
@@ -5768,11 +6051,9 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
    SDValue Op1 = N->getOperand(1);
    assert(Op0.getValueType() == Op1.getValueType());
  
-  ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
-  ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
-  if (COp0 && COp0->isNullValue())
+  if (isNullConstant(Op0))
      Op = Op1;
-  else if (COp1 && COp1->isNullValue())
+  else if (isNullConstant(Op1))
      Op = Op0;
    else
      return false;
@@ -5826,8 +6107,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
    // fold (zext (truncate (load x))) -> (zext (smaller load x))
    // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
    if (N0.getOpcode() == ISD::TRUNCATE) {
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
        SDNode* oye = N0.getNode()->getOperand(0).getNode();
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
@@ -5844,8 +6124,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
  
      // fold (zext (truncate (load x))) -> (zext (smaller load x))
      // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
        SDNode* oye = N0.getNode()->getOperand(0).getNode();
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
@@ -5883,8 +6162,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
      }
      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
      Mask = Mask.zext(VT.getSizeInBits());
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       X, DAG.getConstant(Mask, VT));
+    SDLoc DL(N);
+    return DAG.getNode(ISD::AND, DL, VT,
+                       X, DAG.getConstant(Mask, DL, VT));
    }
  
    // fold (zext (load x)) -> (zext (truncate (zextload x)))
@@ -5944,14 +6224,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                           LN0->getMemOperand());
          APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
          Mask = Mask.zext(VT.getSizeInBits());
-        SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
-                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDLoc DL(N);
+        SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
+                                  ExtLoad, DAG.getConstant(Mask, DL, VT));
          SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
                                      SDLoc(N0.getOperand(0)),
                                      N0.getOperand(0).getValueType(), ExtLoad);
          CombineTo(N, And);
          CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
-        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
                          ISD::ZERO_EXTEND);
          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
        }
@@ -5989,19 +6270,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
        // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
        // Only do this before legalize for now.
        EVT EltVT = VT.getVectorElementType();
+      SDLoc DL(N);
        SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
-                                    DAG.getConstant(1, EltVT));
+                                    DAG.getConstant(1, DL, EltVT));
        if (VT.getSizeInBits() == N0VT.getSizeInBits())
          // We know that the # elements of the results is the same as the
          // # elements of the compare (and the # elements of the compare result
          // for that matter).  Check to see that they are the same size.  If so,
          // we know that the element size of the sext'd result matches the
          // element size of the compare operands.
-        return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                           DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
+        return DAG.getNode(ISD::AND, DL, VT,
+                           DAG.getSetCC(DL, VT, N0.getOperand(0),
                                           N0.getOperand(1),
                                   cast<CondCodeSDNode>(N0.getOperand(2))->get()),
-                           DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
+                           DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
                                         OneOps));
  
        // If the desired elements are smaller or larger than the source
@@ -6014,18 +6296,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
                           N0VT.getVectorNumElements());
        SDValue VsetCC =
-        DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
+        DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
                        N0.getOperand(1),
                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
-      return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                         DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
-                         DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps));
+      return DAG.getNode(ISD::AND, DL, VT,
+                         DAG.getSExtOrTrunc(VsetCC, DL, VT),
+                         DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps));
      }
  
      // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDLoc DL(N);
      SDValue SCC =
-      SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+      SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
                         cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
      if (SCC.getNode()) return SCC;
    }
@@ -6117,8 +6400,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
      }
      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
      Mask = Mask.zext(VT.getSizeInBits());
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       X, DAG.getConstant(Mask, VT));
+    SDLoc DL(N);
+    return DAG.getNode(ISD::AND, DL, VT,
+                       X, DAG.getConstant(Mask, DL, VT));
    }
  
    // fold (aext (load x)) -> (aext (truncate (extload x)))
@@ -6201,9 +6485,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
      }
  
      // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDLoc DL(N);
      SDValue SCC =
-      SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+      SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
                         cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
      if (SCC.getNode())
        return SCC;
@@ -6224,7 +6509,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
      const APInt &CVal = CV->getAPIntValue();
      APInt NewVal = CVal & Mask;
      if (NewVal != CVal)
-      return DAG.getConstant(NewVal, V.getValueType());
+      return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
      break;
    }
    case ISD::OR:
@@ -6239,15 +6524,14 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
      // Only look at single-use SRLs.
      if (!V.getNode()->hasOneUse())
        break;
-    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+    if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
        // See if we can recursively simplify the LHS.
        unsigned Amt = RHSC->getZExtValue();
  
        // Watch out for shift count overflow though.
        if (Amt >= Mask.getBitWidth()) break;
        APInt NewMask = Mask << Amt;
-      SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
-      if (SimplifyLHS.getNode())
+      if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
          return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
                             SimplifyLHS, V.getOperand(1));
      }
@@ -6376,7 +6660,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
  
    // For big endian targets, we need to adjust the offset to the pointer to
    // load the correct bytes.
-  if (TLI.isBigEndian()) {
+  if (DAG.getDataLayout().isBigEndian()) {
      unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
      ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
@@ -6384,9 +6668,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
  
    uint64_t PtrOff = ShAmt / 8;
    unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
-  SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
+  SDLoc DL(LN0);
+  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
                                 PtrType, LN0->getBasePtr(),
-                               DAG.getConstant(PtrOff, PtrType));
+                               DAG.getConstant(PtrOff, DL, PtrType));
    AddToWorklist(NewPtr.getNode());
  
    SDValue Load;
@@ -6415,11 +6700,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
      // no larger than the source) then the useful bits of the result are
      // zero; we can't simply return the shortened shift, because the result
      // of that operation is undefined.
+    SDLoc DL(N0);
      if (ShLeftAmt >= VT.getSizeInBits())
-      Result = DAG.getConstant(0, VT);
+      Result = DAG.getConstant(0, DL, VT);
      else
-      Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT,
-                          Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+      Result = DAG.getNode(ISD::SHL, DL, VT,
+                          Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
    }
  
    // Return the new loaded value.
@@ -6469,8 +6755,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
  
    // fold (sext_in_reg (load x)) -> (smaller sextload x)
    // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
-  SDValue NarrowLoad = ReduceLoadWidth(N);
-  if (NarrowLoad.getNode())
+  if (SDValue NarrowLoad = ReduceLoadWidth(N))
      return NarrowLoad;
  
    // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
@@ -6546,7 +6831,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
        ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
        const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
        Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
-                                     Op.getValueType()));
+                                     SDLoc(Op), Op.getValueType()));
      }
  
      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
@@ -6555,10 +6840,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
    return SDValue();
  }
  
+SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(VT);
+
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
+
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
-  bool isLE = TLI.isLittleEndian();
+  bool isLE = DAG.getDataLayout().isLittleEndian();
  
    // noop truncate
    if (N0.getValueType() == N->getValueType(0))
@@ -6611,15 +6910,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
      SDValue EltNo = N0->getOperand(1);
      if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
        int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-      EVT IndexTy = TLI.getVectorIdxTy();
+      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
        int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
  
        SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
                                NVT, N0.getOperand(0));
  
+      SDLoc DL(N);
        return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
-                         SDLoc(N), TrTy, V,
-                         DAG.getConstant(Index, IndexTy));
+                         DL, TrTy, V,
+                         DAG.getConstant(Index, DL, IndexTy));
      }
    }
  
@@ -6682,9 +6982,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    // fold (truncate (load x)) -> (smaller load x)
    // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
    if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
-    SDValue Reduced = ReduceLoadWidth(N);
-    if (Reduced.getNode())
+    if (SDValue Reduced = ReduceLoadWidth(N))
        return Reduced;
+
      // Handle the case where the load remains an extending load even
      // after truncation.
      if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
@@ -6777,8 +7077,8 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
        !LD2->isVolatile() &&
        DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
      unsigned Align = LD1->getAlignment();
-    unsigned NewAlign = TLI.getDataLayout()->
-      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+    unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+        VT.getTypeForEVT(*DAG.getContext()));
  
      if (NewAlign <= Align &&
          (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
@@ -6834,13 +7134,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
        // Do not change the width of a volatile load.
        !cast<LoadSDNode>(N0)->isVolatile() &&
        // Do not remove the cast if the types differ in endian layout.
-      TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
-      TLI.hasBigEndianPartOrdering(VT) &&
+      TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
+          TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
        TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    unsigned Align = TLI.getDataLayout()->
-      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+    unsigned Align = DAG.getDataLayout().getABITypeAlignment(
+        VT.getTypeForEVT(*DAG.getContext()));
      unsigned OrigAlign = LN0->getAlignment();
  
      if (Align <= OrigAlign) {
@@ -6865,13 +7165,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
                                    N0.getOperand(0));
      AddToWorklist(NewConv.getNode());
  
+    SDLoc DL(N);
      APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
      if (N0.getOpcode() == ISD::FNEG)
-      return DAG.getNode(ISD::XOR, SDLoc(N), VT,
-                         NewConv, DAG.getConstant(SignBit, VT));
+      return DAG.getNode(ISD::XOR, DL, VT,
+                         NewConv, DAG.getConstant(SignBit, DL, VT));
      assert(N0.getOpcode() == ISD::FABS);
-    return DAG.getNode(ISD::AND, SDLoc(N), VT,
-                       NewConv, DAG.getConstant(~SignBit, VT));
+    return DAG.getNode(ISD::AND, DL, VT,
+                       NewConv, DAG.getConstant(~SignBit, DL, VT));
    }
  
    // fold (bitconvert (fcopysign cst, x)) ->
@@ -6896,9 +7197,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
        } else if (OrigXWidth > VTWidth) {
          // To get the sign bit in the right place, we have to shift it right
          // before truncating.
-        X = DAG.getNode(ISD::SRL, SDLoc(X),
+        SDLoc DL(X);
+        X = DAG.getNode(ISD::SRL, DL,
                          X.getValueType(), X,
-                        DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+                        DAG.getConstant(OrigXWidth-VTWidth, DL,
+                                        X.getValueType()));
          AddToWorklist(X.getNode());
          X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
          AddToWorklist(X.getNode());
@@ -6906,13 +7209,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
  
        APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
        X = DAG.getNode(ISD::AND, SDLoc(X), VT,
-                      X, DAG.getConstant(SignBit, VT));
+                      X, DAG.getConstant(SignBit, SDLoc(X), VT));
        AddToWorklist(X.getNode());
  
        SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
                                  VT, N0.getOperand(0));
        Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
-                        Cst, DAG.getConstant(~SignBit, VT));
+                        Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
        AddToWorklist(Cst.getNode());
  
        return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
@@ -6920,13 +7223,56 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
    }
  
    // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
-  if (N0.getOpcode() == ISD::BUILD_PAIR) {
-    SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
-    if (CombineLD.getNode())
+  if (N0.getOpcode() == ISD::BUILD_PAIR)
+    if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
        return CombineLD;
-  }
  
-  return SDValue();
+  // Remove double bitcasts from shuffles - this is often a legacy of
+  // XformToShuffleWithZero being used to combine bitmaskings (of
+  // float vectors bitcast to integer vectors) into shuffles.
+  // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
+  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
+      N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
+      VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
+      !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
+    ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
+
+    // If operands are a bitcast, peek through if it casts the original VT.
+    // If operands are a constant, just bitcast back to original VT.
+    auto PeekThroughBitcast = [&](SDValue Op) {
+      if (Op.getOpcode() == ISD::BITCAST &&
+          Op.getOperand(0).getValueType() == VT)
+        return SDValue(Op.getOperand(0));
+      if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+          ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+        return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
+      return SDValue();
+    };
+
+    SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
+    SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
+    if (!(SV0 && SV1))
+      return SDValue();
+
+    int MaskScale =
+        VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
+    SmallVector<int, 8> NewMask;
+    for (int M : SVN->getMask())
+      for (int i = 0; i != MaskScale; ++i)
+        NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
+
+    bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+    if (!LegalMask) {
+      std::swap(SV0, SV1);
+      ShuffleVectorSDNode::commuteMask(NewMask);
+      LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
+    }
+
+    if (LegalMask)
+      return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
+  }
+
+  return SDValue();
  }
  
  SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
@@ -6960,8 +7306,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
                                       DstEltVT, BV->getOperand(0)));
  
      SmallVector<SDValue, 8> Ops;
-    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
-      SDValue Op = BV->getOperand(i);
+    for (SDValue Op : BV->op_values()) {
        // If the vector element type is not legal, the BUILD_VECTOR operands
        // are promoted and implicitly truncated.  Make that explicit here.
        if (Op.getValueType() != SrcEltVT)
@@ -6994,6 +7339,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
      return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
    }
  
+  SDLoc DL(BV);
+
    // Okay, we know the src/dst types are both integers of differing types.
    // Handling growing first.
    assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
@@ -7003,7 +7350,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
      SmallVector<SDValue, 8> Ops;
      for (unsigned i = 0, e = BV->getNumOperands(); i != e;
           i += NumInputsPerOutput) {
-      bool isLE = TLI.isLittleEndian();
+      bool isLE = DAG.getDataLayout().isLittleEndian();
        APInt NewBits = APInt(DstBitSize, 0);
        bool EltIsUndef = true;
        for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
@@ -7020,11 +7367,11 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
        if (EltIsUndef)
          Ops.push_back(DAG.getUNDEF(DstEltVT));
        else
-        Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+        Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
      }
  
      EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
    }
  
    // Finally, this must be the case where we are shrinking elements: each input
@@ -7034,43 +7381,63 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
                              NumOutputsPerInput*BV->getNumOperands());
    SmallVector<SDValue, 8> Ops;
  
-  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
-    if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+  for (const SDValue &Op : BV->op_values()) {
+    if (Op.getOpcode() == ISD::UNDEF) {
        Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
        continue;
      }
  
-    APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+    APInt OpVal = cast<ConstantSDNode>(Op)->
                    getAPIntValue().zextOrTrunc(SrcBitSize);
  
      for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
        APInt ThisVal = OpVal.trunc(DstBitSize);
-      Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+      Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
        OpVal = OpVal.lshr(DstBitSize);
      }
  
      // For big endian targets, swap the order of the pieces of each element.
-    if (TLI.isBigEndian())
+    if (DAG.getDataLayout().isBigEndian())
        std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
    }
  
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
  }
  
-// Attempt different variants of (fadd (fmul a, b), c) -> fma or fmad
-static SDValue performFaddFmulCombines(unsigned FusedOpcode,
-                                       bool Aggressive,
-                                       SDNode *N,
-                                       const TargetLowering &TLI,
-                                       SelectionDAG &DAG) {
+/// Try to perform FMA combining on a given FADD node.
+SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
    EVT VT = N->getValueType(0);
+  SDLoc SL(N);
+
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+                       Options.UnsafeFPMath);
+
+  // Floating-point multiply-add with intermediate rounding.
+  bool HasFMAD = (LegalOperations &&
+                  TLI.isOperationLegal(ISD::FMAD, VT));
+
+  // Floating-point multiply-add without intermediate rounding.
+  bool HasFMA = ((!LegalOperations ||
+                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+                 UnsafeFPMath);
+
+  // No valid opcode, do not combine.
+  if (!HasFMAD && !HasFMA)
+    return SDValue();
+
+  // Always prefer FMAD to FMA for precision.
+  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+  bool LookThroughFPExt = TLI.isFPExtFree(VT);
  
    // fold (fadd (fmul x, y), z) -> (fma x, y, z)
    if (N0.getOpcode() == ISD::FMUL &&
        (Aggressive || N0->hasOneUse())) {
-    return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
                         N0.getOperand(0), N0.getOperand(1), N1);
    }
  
@@ -7078,53 +7445,176 @@ static SDValue performFaddFmulCombines(unsigned FusedOpcode,
    // Note: Commutes FADD operands.
    if (N1.getOpcode() == ISD::FMUL &&
        (Aggressive || N1->hasOneUse())) {
-    return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
                         N1.getOperand(0), N1.getOperand(1), N0);
    }
  
+  // Look through FP_EXTEND nodes to do more combining.
+  if (UnsafeFPMath && LookThroughFPExt) {
+    // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(0)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(1)), N1);
+    }
+
+    // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+    // Note: Commutes FADD operands.
+    if (N1.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N10 = N1.getOperand(0);
+      if (N10.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N10.getOperand(0)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N10.getOperand(1)), N0);
+    }
+  }
+
    // More folding opportunities when target permits.
-  if (Aggressive) {
+  if ((UnsafeFPMath || HasFMAD)  && Aggressive) {
      // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
-    if (N0.getOpcode() == ISD::FMA &&
+    if (N0.getOpcode() == PreferredFusedOpcode &&
          N0.getOperand(2).getOpcode() == ISD::FMUL) {
-      return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
                           N0.getOperand(0), N0.getOperand(1),
-                         DAG.getNode(FusedOpcode, SDLoc(N), VT,
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
                                       N0.getOperand(2).getOperand(0),
                                       N0.getOperand(2).getOperand(1),
                                       N1));
      }
  
      // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
-    if (N1->getOpcode() == ISD::FMA &&
+    if (N1->getOpcode() == PreferredFusedOpcode &&
          N1.getOperand(2).getOpcode() == ISD::FMUL) {
-      return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
                           N1.getOperand(0), N1.getOperand(1),
-                         DAG.getNode(FusedOpcode, SDLoc(N), VT,
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
                                       N1.getOperand(2).getOperand(0),
                                       N1.getOperand(2).getOperand(1),
                                       N0));
      }
+
+    if (UnsafeFPMath && LookThroughFPExt) {
+      // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+      //   -> (fma x, y, (fma (fpext u), (fpext v), z))
+      auto FoldFAddFMAFPExtFMul = [&] (
+          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+        return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+                                       Z));
+      };
+      if (N0.getOpcode() == PreferredFusedOpcode) {
+        SDValue N02 = N0.getOperand(2);
+        if (N02.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N020 = N02.getOperand(0);
+          if (N020.getOpcode() == ISD::FMUL)
+            return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+                                        N020.getOperand(0), N020.getOperand(1),
+                                        N1);
+        }
+      }
+
+      // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+      //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      auto FoldFAddFPExtFMAFMul = [&] (
+          SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+                           DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+                                       Z));
+      };
+      if (N0.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == PreferredFusedOpcode) {
+          SDValue N002 = N00.getOperand(2);
+          if (N002.getOpcode() == ISD::FMUL)
+            return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+                                        N002.getOperand(0), N002.getOperand(1),
+                                        N1);
+        }
+      }
+
+      // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+      //   -> (fma y, z, (fma (fpext u), (fpext v), x))
+      if (N1.getOpcode() == PreferredFusedOpcode) {
+        SDValue N12 = N1.getOperand(2);
+        if (N12.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N120 = N12.getOperand(0);
+          if (N120.getOpcode() == ISD::FMUL)
+            return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+                                        N120.getOperand(0), N120.getOperand(1),
+                                        N0);
+        }
+      }
+
+      // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+      //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      if (N1.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N10 = N1.getOperand(0);
+        if (N10.getOpcode() == PreferredFusedOpcode) {
+          SDValue N102 = N10.getOperand(2);
+          if (N102.getOpcode() == ISD::FMUL)
+            return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+                                        N102.getOperand(0), N102.getOperand(1),
+                                        N0);
+        }
+      }
+    }
    }
  
    return SDValue();
  }
  
-static SDValue performFsubFmulCombines(unsigned FusedOpcode,
-                                       bool Aggressive,
-                                       SDNode *N,
-                                       const TargetLowering &TLI,
-                                       SelectionDAG &DAG) {
+/// Try to perform FMA combining on a given FSUB node.
+SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
    EVT VT = N->getValueType(0);
-
    SDLoc SL(N);
  
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+                       Options.UnsafeFPMath);
+
+  // Floating-point multiply-add with intermediate rounding.
+  bool HasFMAD = (LegalOperations &&
+                  TLI.isOperationLegal(ISD::FMAD, VT));
+
+  // Floating-point multiply-add without intermediate rounding.
+  bool HasFMA = ((!LegalOperations ||
+                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
+                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+                 UnsafeFPMath);
+
+  // No valid opcode, do not combine.
+  if (!HasFMAD && !HasFMA)
+    return SDValue();
+
+  // Always prefer FMAD to FMA for precision.
+  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+  bool LookThroughFPExt = TLI.isFPExtFree(VT);
+
    // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
    if (N0.getOpcode() == ISD::FMUL &&
        (Aggressive || N0->hasOneUse())) {
-    return DAG.getNode(FusedOpcode, SL, VT,
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
                         N0.getOperand(0), N0.getOperand(1),
                         DAG.getNode(ISD::FNEG, SL, VT, N1));
    }
@@ -7133,7 +7623,7 @@ static SDValue performFsubFmulCombines(unsigned FusedOpcode,
    // Note: Commutes FSUB operands.
    if (N1.getOpcode() == ISD::FMUL &&
        (Aggressive || N1->hasOneUse()))
-    return DAG.getNode(FusedOpcode, SL, VT,
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
                         DAG.getNode(ISD::FNEG, SL, VT,
                                     N1.getOperand(0)),
                         N1.getOperand(1), N0);
@@ -7144,41 +7634,214 @@ static SDValue performFsubFmulCombines(unsigned FusedOpcode,
        (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
      SDValue N00 = N0.getOperand(0).getOperand(0);
      SDValue N01 = N0.getOperand(0).getOperand(1);
-    return DAG.getNode(FusedOpcode, SL, VT,
+    return DAG.getNode(PreferredFusedOpcode, SL, VT,
                         DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
                         DAG.getNode(ISD::FNEG, SL, VT, N1));
    }
  
+  // Look through FP_EXTEND nodes to do more combining.
+  if (UnsafeFPMath && LookThroughFPExt) {
+    // fold (fsub (fpext (fmul x, y)), z)
+    //   -> (fma (fpext x), (fpext y), (fneg z))
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(0)),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N00.getOperand(1)),
+                           DAG.getNode(ISD::FNEG, SL, VT, N1));
+    }
+
+    // fold (fsub x, (fpext (fmul y, z)))
+    //   -> (fma (fneg (fpext y)), (fpext z), x)
+    // Note: Commutes FSUB operands.
+    if (N1.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N10 = N1.getOperand(0);
+      if (N10.getOpcode() == ISD::FMUL)
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FNEG, SL, VT,
+                                       DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                   N10.getOperand(0))),
+                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                       N10.getOperand(1)),
+                           N0);
+    }
+
+    // fold (fsub (fpext (fneg (fmul, x, y))), z)
+    //   -> (fneg (fma (fpext x), (fpext y), z))
+    // Note: This could be removed with appropriate canonicalization of the
+    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+    // from implementing the canonicalization in visitFSUB.
+    if (N0.getOpcode() == ISD::FP_EXTEND) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FNEG) {
+        SDValue N000 = N00.getOperand(0);
+        if (N000.getOpcode() == ISD::FMUL) {
+          return DAG.getNode(ISD::FNEG, SL, VT,
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(0)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(1)),
+                                         N1));
+        }
+      }
+    }
+
+    // fold (fsub (fneg (fpext (fmul, x, y))), z)
+    //   -> (fneg (fma (fpext x)), (fpext y), z)
+    // Note: This could be removed with appropriate canonicalization of the
+    // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+    // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+    // from implementing the canonicalization in visitFSUB.
+    if (N0.getOpcode() == ISD::FNEG) {
+      SDValue N00 = N0.getOperand(0);
+      if (N00.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N000 = N00.getOperand(0);
+        if (N000.getOpcode() == ISD::FMUL) {
+          return DAG.getNode(ISD::FNEG, SL, VT,
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(0)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N000.getOperand(1)),
+                                         N1));
+        }
+      }
+    }
+
+  }
+
    // More folding opportunities when target permits.
-  if (Aggressive) {
+  if ((UnsafeFPMath || HasFMAD) && Aggressive) {
      // fold (fsub (fma x, y, (fmul u, v)), z)
      //   -> (fma x, y (fma u, v, (fneg z)))
-    if (N0.getOpcode() == FusedOpcode &&
+    if (N0.getOpcode() == PreferredFusedOpcode &&
          N0.getOperand(2).getOpcode() == ISD::FMUL) {
-      return DAG.getNode(FusedOpcode, SDLoc(N), VT,
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
                           N0.getOperand(0), N0.getOperand(1),
-                         DAG.getNode(FusedOpcode, SDLoc(N), VT,
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
                                       N0.getOperand(2).getOperand(0),
                                       N0.getOperand(2).getOperand(1),
-                                     DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+                                     DAG.getNode(ISD::FNEG, SL, VT,
                                                   N1)));
      }
  
      // fold (fsub x, (fma y, z, (fmul u, v)))
      //   -> (fma (fneg y), z, (fma (fneg u), v, x))
-    if (N1.getOpcode() == FusedOpcode &&
+    if (N1.getOpcode() == PreferredFusedOpcode &&
          N1.getOperand(2).getOpcode() == ISD::FMUL) {
        SDValue N20 = N1.getOperand(2).getOperand(0);
        SDValue N21 = N1.getOperand(2).getOperand(1);
-      return DAG.getNode(FusedOpcode, SDLoc(N), VT,
-                         DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+      return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                         DAG.getNode(ISD::FNEG, SL, VT,
                                       N1.getOperand(0)),
                           N1.getOperand(1),
-                         DAG.getNode(FusedOpcode, SDLoc(N), VT,
-                                     DAG.getNode(ISD::FNEG, SDLoc(N),  VT,
-                                                 N20),
+                         DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                     DAG.getNode(ISD::FNEG, SL, VT, N20),
+
                                       N21, N0));
      }
+
+    if (UnsafeFPMath && LookThroughFPExt) {
+      // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+      //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+      if (N0.getOpcode() == PreferredFusedOpcode) {
+        SDValue N02 = N0.getOperand(2);
+        if (N02.getOpcode() == ISD::FP_EXTEND) {
+          SDValue N020 = N02.getOperand(0);
+          if (N020.getOpcode() == ISD::FMUL)
+            return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                               N0.getOperand(0), N0.getOperand(1),
+                               DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N020.getOperand(0)),
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N020.getOperand(1)),
+                                           DAG.getNode(ISD::FNEG, SL, VT,
+                                                       N1)));
+        }
+      }
+
+      // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+      //   -> (fma (fpext x), (fpext y),
+      //           (fma (fpext u), (fpext v), (fneg z)))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      if (N0.getOpcode() == ISD::FP_EXTEND) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == PreferredFusedOpcode) {
+          SDValue N002 = N00.getOperand(2);
+          if (N002.getOpcode() == ISD::FMUL)
+            return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                           N00.getOperand(0)),
+                               DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                           N00.getOperand(1)),
+                               DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N002.getOperand(0)),
+                                           DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                       N002.getOperand(1)),
+                                           DAG.getNode(ISD::FNEG, SL, VT,
+                                                       N1)));
+        }
+      }
+
+      // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+      //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+      if (N1.getOpcode() == PreferredFusedOpcode &&
+        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+        SDValue N120 = N1.getOperand(2).getOperand(0);
+        if (N120.getOpcode() == ISD::FMUL) {
+          SDValue N1200 = N120.getOperand(0);
+          SDValue N1201 = N120.getOperand(1);
+          return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+                             N1.getOperand(1),
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FNEG, SL, VT,
+                                             DAG.getNode(ISD::FP_EXTEND, SL,
+                                                         VT, N1200)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N1201),
+                                         N0));
+        }
+      }
+
+      // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+      //   -> (fma (fneg (fpext y)), (fpext z),
+      //           (fma (fneg (fpext u)), (fpext v), x))
+      // FIXME: This turns two single-precision and one double-precision
+      // operation into two double-precision operations, which might not be
+      // interesting for all targets, especially GPUs.
+      if (N1.getOpcode() == ISD::FP_EXTEND &&
+        N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+        SDValue N100 = N1.getOperand(0).getOperand(0);
+        SDValue N101 = N1.getOperand(0).getOperand(1);
+        SDValue N102 = N1.getOperand(0).getOperand(2);
+        if (N102.getOpcode() == ISD::FMUL) {
+          SDValue N1020 = N102.getOperand(0);
+          SDValue N1021 = N102.getOperand(1);
+          return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                             DAG.getNode(ISD::FNEG, SL, VT,
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N100)),
+                             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+                             DAG.getNode(PreferredFusedOpcode, SL, VT,
+                                         DAG.getNode(ISD::FNEG, SL, VT,
+                                             DAG.getNode(ISD::FP_EXTEND, SL,
+                                                         VT, N1020)),
+                                         DAG.getNode(ISD::FP_EXTEND, SL, VT,
+                                                     N1021),
+                                         N0));
+        }
+      }
+    }
    }
  
    return SDValue();
@@ -7190,6 +7853,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
    EVT VT = N->getValueType(0);
+  SDLoc DL(N);
    const TargetOptions &Options = DAG.getTarget().Options;
  
    // fold vector ops
@@ -7199,22 +7863,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
  
    // fold (fadd c1, c2) -> c1 + c2
    if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
  
    // canonicalize constant to RHS
    if (N0CFP && !N1CFP)
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
  
    // fold (fadd A, (fneg B)) -> (fsub A, B)
    if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
        isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
+    return DAG.getNode(ISD::FSUB, DL, VT, N0,
                         GetNegatedExpression(N1, DAG, LegalOperations));
  
    // fold (fadd (fneg A), B) -> (fsub B, A)
    if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
        isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
+    return DAG.getNode(ISD::FSUB, DL, VT, N1,
                         GetNegatedExpression(N0, DAG, LegalOperations));
  
    // If 'unsafe math' is enabled, fold lots of things.
@@ -7224,23 +7888,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
      bool AllowNewConst = (Level < AfterLegalizeDAG);
  
      // fold (fadd A, 0) -> A
-    if (N1CFP && N1CFP->getValueAPF().isZero())
+    if (N1CFP && N1CFP->isZero())
        return N0;
  
      // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
      if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
          isa<ConstantFPSDNode>(N0.getOperand(1)))
-      return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     N0.getOperand(1), N1));
+      return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
+                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
  
      // If allowed, fold (fadd (fneg x), x) -> 0.0
      if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
-      return DAG.getConstantFP(0.0, VT);
+      return DAG.getConstantFP(0.0, DL, VT);
  
      // If allowed, fold (fadd x, (fneg x)) -> 0.0
      if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
-      return DAG.getConstantFP(0.0, VT);
+      return DAG.getConstantFP(0.0, DL, VT);
  
      // We can fold chains of FADD's of the same value into multiplications.
      // This transform is not safe in general because we are reducing the number
@@ -7252,21 +7915,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
  
          // (fadd (fmul x, c), x) -> (fmul x, c+1)
          if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP01, 0),
-                                       DAG.getConstantFP(1.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+                                       DAG.getConstantFP(1.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
          }
  
          // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
          if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
              N1.getOperand(0) == N1.getOperand(1) &&
              N0.getOperand(0) == N1.getOperand(0)) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP01, 0),
-                                       DAG.getConstantFP(2.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                             N0.getOperand(0), NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
+                                       DAG.getConstantFP(2.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
          }
        }
  
@@ -7276,20 +7936,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
  
          // (fadd x, (fmul x, c)) -> (fmul x, c+1)
          if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP11, 0),
-                                       DAG.getConstantFP(1.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+                                       DAG.getConstantFP(1.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
          }
  
          // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
          if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
              N0.getOperand(0) == N0.getOperand(1) &&
              N1.getOperand(0) == N0.getOperand(0)) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                       SDValue(CFP11, 0),
-                                       DAG.getConstantFP(2.0, VT));
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
+                                       DAG.getConstantFP(2.0, DL, VT));
+          return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
          }
        }
  
@@ -7297,18 +7955,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
          ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
          // (fadd (fadd x, x), x) -> (fmul x, 3.0)
          if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
-            (N0.getOperand(0) == N1))
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                             N1, DAG.getConstantFP(3.0, VT));
+            (N0.getOperand(0) == N1)) {
+          return DAG.getNode(ISD::FMUL, DL, VT,
+                             N1, DAG.getConstantFP(3.0, DL, VT));
+        }
        }
  
        if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
          ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
          // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
          if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
-            N1.getOperand(0) == N0)
-          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                             N0, DAG.getConstantFP(3.0, VT));
+            N1.getOperand(0) == N0) {
+          return DAG.getNode(ISD::FMUL, DL, VT,
+                             N0, DAG.getConstantFP(3.0, DL, VT));
+        }
        }
  
        // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
@@ -7316,61 +7976,18 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
            N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
            N0.getOperand(0) == N0.getOperand(1) &&
            N1.getOperand(0) == N1.getOperand(1) &&
-          N0.getOperand(0) == N1.getOperand(0))
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0.getOperand(0), DAG.getConstantFP(4.0, VT));
+          N0.getOperand(0) == N1.getOperand(0)) {
+        return DAG.getNode(ISD::FMUL, DL, VT,
+                           N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+      }
      }
    } // enable-unsafe-fp-math
  
-  if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
-    // Assume if there is an fmad instruction that it should be aggressively
-    // used.
-    if (SDValue Fused = performFaddFmulCombines(ISD::FMAD, true, N, TLI, DAG))
-      return Fused;
-  }
-
    // FADD -> FMA combines:
-  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
-      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
-      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
-    if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
-      // Don't form FMA if we are preferring FMAD.
-      if (SDValue Fused
-          = performFaddFmulCombines(ISD::FMA,
-                                    TLI.enableAggressiveFMAFusion(VT),
-                                    N, TLI, DAG)) {
-        return Fused;
-      }
-    }
-
-    // When FP_EXTEND nodes are free on the target, and there is an opportunity
-    // to combine into FMA, arrange such nodes accordingly.
-    if (TLI.isFPExtFree(VT)) {
-
-      // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(0)),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(1)), N1);
-      }
-
-      // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x)
-      // Note: Commutes FADD operands.
-      if (N1.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N10 = N1.getOperand(0);
-        if (N10.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N10.getOperand(0)),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N10.getOperand(1)), N0);
-      }
-    }
+  SDValue Fused = visitFADDForFMACombine(N);
+  if (Fused) {
+    AddToWorklist(Fused.getNode());
+    return Fused;
    }
  
    return SDValue();
@@ -7392,7 +8009,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
  
    // fold (fsub c1, c2) -> c1-c2
    if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
  
    // fold (fsub A, (fneg B)) -> (fadd A, B)
    if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -7402,11 +8019,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
    // If 'unsafe math' is enabled, fold lots of things.
    if (Options.UnsafeFPMath) {
      // (fsub A, 0) -> A
-    if (N1CFP && N1CFP->getValueAPF().isZero())
+    if (N1CFP && N1CFP->isZero())
        return N0;
  
      // (fsub 0, B) -> -B
-    if (N0CFP && N0CFP->getValueAPF().isZero()) {
+    if (N0CFP && N0CFP->isZero()) {
        if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
          return GetNegatedExpression(N1, DAG, LegalOperations);
        if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
@@ -7415,7 +8032,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
  
      // (fsub x, x) -> 0.0
      if (N0 == N1)
-      return DAG.getConstantFP(0.0f, VT);
+      return DAG.getConstantFP(0.0f, dl, VT);
  
      // (fsub x, (fadd x, y)) -> (fneg y)
      // (fsub x, (fadd y, x)) -> (fneg y)
@@ -7431,96 +8048,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
      }
    }
  
-  if (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)) {
-    // Assume if there is an fmad instruction that it should be aggressively
-    // used.
-    if (SDValue Fused = performFsubFmulCombines(ISD::FMAD, true, N, TLI, DAG))
-      return Fused;
-  }
-
    // FSUB -> FMA combines:
-  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
-      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
-      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
-
-    if (!TLI.isOperationLegal(ISD::FMAD, VT)) {
-      // Don't form FMA if we are preferring FMAD.
-
-      if (SDValue Fused
-          = performFsubFmulCombines(ISD::FMA,
-                                    TLI.enableAggressiveFMAFusion(VT),
-                                    N, TLI, DAG)) {
-        return Fused;
-      }
-    }
-
-    // When FP_EXTEND nodes are free on the target, and there is an opportunity
-    // to combine into FMA, arrange such nodes accordingly.
-    if (TLI.isFPExtFree(VT)) {
-      // fold (fsub (fpext (fmul x, y)), z)
-      //   -> (fma (fpext x), (fpext y), (fneg z))
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(0)),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N00.getOperand(1)),
-                             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1));
-      }
-
-      // fold (fsub x, (fpext (fmul y, z)))
-      //   -> (fma (fneg (fpext y)), (fpext z), x)
-      // Note: Commutes FSUB operands.
-      if (N1.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N10 = N1.getOperand(0);
-        if (N10.getOpcode() == ISD::FMUL)
-          return DAG.getNode(ISD::FMA, SDLoc(N), VT,
-                             DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                         DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
-                                                     VT, N10.getOperand(0))),
-                             DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                         N10.getOperand(1)),
-                             N0);
-      }
-
-      // fold (fsub (fpext (fneg (fmul, x, y))), z)
-      //   -> (fma (fneg (fpext x)), (fpext y), (fneg z))
-      if (N0.getOpcode() == ISD::FP_EXTEND) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FNEG) {
-          SDValue N000 = N00.getOperand(0);
-          if (N000.getOpcode() == ISD::FMUL) {
-            return DAG.getNode(ISD::FMA, dl, VT,
-                               DAG.getNode(ISD::FNEG, dl, VT,
-                                           DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
-                                                       VT, N000.getOperand(0))),
-                               DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                           N000.getOperand(1)),
-                               DAG.getNode(ISD::FNEG, dl, VT, N1));
-          }
-        }
-      }
-
-      // fold (fsub (fneg (fpext (fmul, x, y))), z)
-      //   -> (fma (fneg (fpext x)), (fpext y), (fneg z))
-      if (N0.getOpcode() == ISD::FNEG) {
-        SDValue N00 = N0.getOperand(0);
-        if (N00.getOpcode() == ISD::FP_EXTEND) {
-          SDValue N000 = N00.getOperand(0);
-          if (N000.getOpcode() == ISD::FMUL) {
-            return DAG.getNode(ISD::FMA, dl, VT,
-                               DAG.getNode(ISD::FNEG, dl, VT,
-                                           DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
-                                           VT, N000.getOperand(0))),
-                               DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT,
-                                           N000.getOperand(1)),
-                               DAG.getNode(ISD::FNEG, dl, VT, N1));
-          }
-        }
-      }
-    }
+  SDValue Fused = visitFSUBForFMACombine(N);
+  if (Fused) {
+    AddToWorklist(Fused.getNode());
+    return Fused;
    }
  
    return SDValue();
@@ -7532,6 +8064,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
    ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
    ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
    EVT VT = N->getValueType(0);
+  SDLoc DL(N);
    const TargetOptions &Options = DAG.getTarget().Options;
  
    // fold vector ops
@@ -7543,12 +8076,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
  
    // fold (fmul c1, c2) -> c1*c2
    if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
  
    // canonicalize constant to RHS
    if (isConstantFPBuildVectorOrConstantFP(N0) &&
       !isConstantFPBuildVectorOrConstantFP(N1))
-    return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
+    return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
  
    // fold (fmul A, 1.0) -> A
    if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -7556,7 +8089,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
  
    if (Options.UnsafeFPMath) {
      // fold (fmul A, 0) -> 0
-    if (N1CFP && N1CFP->getValueAPF().isZero())
+    if (N1CFP && N1CFP->isZero())
        return N1;
  
      // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
@@ -7569,7 +8102,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
        auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
        auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
        auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
-      
+
        // Check 1: Make sure that the first operand of the inner multiply is NOT
        // a constant. Otherwise, we may induce infinite looping.
        if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
@@ -7577,9 +8110,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
          // the second operand of the outer multiply are constants.
          if ((N1CFP && isConstOrConstSplatFP(N01)) ||
              (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
-          SDLoc SL(N);
-          SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
-          return DAG.getNode(ISD::FMUL, SL, VT, N00, MulConsts);
+          SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
+          return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
          }
        }
      }
@@ -7588,22 +8120,23 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
      // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
      // during an early run of DAGCombiner can prevent folding with fmuls
      // inserted during lowering.
-    if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
-      SDLoc SL(N);
-      const SDValue Two = DAG.getConstantFP(2.0, VT);
-      SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1);
-      return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts);
+    if (N0.getOpcode() == ISD::FADD &&
+        (N0.getOperand(0) == N0.getOperand(1)) &&
+        N0.hasOneUse()) {
+      const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
+      SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
      }
    }
  
    // fold (fmul X, 2.0) -> (fadd X, X)
    if (N1CFP && N1CFP->isExactlyValue(+2.0))
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
  
    // fold (fmul X, -1.0) -> (fneg X)
    if (N1CFP && N1CFP->isExactlyValue(-1.0))
      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-      return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
+      return DAG.getNode(ISD::FNEG, DL, VT, N0);
  
    // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
    if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
@@ -7611,7 +8144,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
        // Both can be negated for free, check to see if at least one is cheaper
        // negated.
        if (LHSNeg == 2 || RHSNeg == 2)
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+        return DAG.getNode(ISD::FMUL, DL, VT,
                             GetNegatedExpression(N0, DAG, LegalOperations),
                             GetNegatedExpression(N1, DAG, LegalOperations));
      }
@@ -7690,19 +8223,73 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
    if (Options.UnsafeFPMath && N1CFP && N0 == N2)
      return DAG.getNode(ISD::FMUL, dl, VT, N0,
                         DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(1.0, VT)));
+                                   N1, DAG.getConstantFP(1.0, dl, VT)));
  
    // (fma x, c, (fneg x)) -> (fmul x, (c-1))
    if (Options.UnsafeFPMath && N1CFP &&
        N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
      return DAG.getNode(ISD::FMUL, dl, VT, N0,
                         DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(-1.0, VT)));
+                                   N1, DAG.getConstantFP(-1.0, dl, VT)));
  
  
    return SDValue();
  }
  
+// Combine multiple FDIVs with the same divisor into multiple FMULs by the
+// reciprocal.
+// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// Notice that this is not always beneficial. One reason is different target
+// may have different costs for FDIV and FMUL, so sometimes the cost of two
+// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+  if (!DAG.getTarget().Options.UnsafeFPMath)
+    return SDValue();
+  
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+  // Skip if current node is a reciprocal.
+  if (N0CFP && N0CFP->isExactlyValue(1.0))
+    return SDValue();
+  
+  SDValue N1 = N->getOperand(1);
+  SmallVector<SDNode *, 4> Users;
+
+  // Find all FDIV users of the same divisor.
+  for (auto *U : N1->uses()) {
+    if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
+      Users.push_back(U);
+  }
+
+  if (!TLI.combineRepeatedFPDivisors(Users.size()))
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+  // FIXME: This optimization requires some level of fast-math, so the
+  // created reciprocal node should at least have the 'allowReciprocal'
+  // fast-math-flag set.
+  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
+
+  // Dividend / Divisor -> Dividend * Reciprocal
+  for (auto *U : Users) {
+    SDValue Dividend = U->getOperand(0);
+    if (Dividend != FPOne) {
+      SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+                                    Reciprocal);
+      CombineTo(U, NewNode);
+    } else if (U != Reciprocal.getNode()) {
+      // In the absence of fast-math-flags, this user node is always the
+      // same node as Reciprocal, but with FMF they may be different nodes.
+      CombineTo(U, Reciprocal);
+    }
+  }
+  return SDValue(N, 0);  // N was replaced.
+}
+
  SDValue DAGCombiner::visitFDIV(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -7737,8 +8324,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
             // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
             TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
             TLI.isFPImmLegal(Recip, VT)))
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
-                           DAG.getConstantFP(Recip, VT));
+        return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                           DAG.getConstantFP(Recip, DL, VT));
      }
  
      // If this FDIV is part of a reciprocal square root, it may be folded
@@ -7803,43 +8390,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
      }
    }
  
-  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
-  // reciprocal.
-  // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
-  // Notice that this is not always beneficial. One reason is different target
-  // may have different costs for FDIV and FMUL, so sometimes the cost of two
-  // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
-  // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
-  if (Options.UnsafeFPMath) {
-    // Skip if current node is a reciprocal.
-    if (N0CFP && N0CFP->isExactlyValue(1.0))
-      return SDValue();
-
-    SmallVector<SDNode *, 4> Users;
-    // Find all FDIV users of the same divisor.
-    for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
-                              UE = N1.getNode()->use_end();
-         UI != UE; ++UI) {
-      SDNode *User = UI.getUse().getUser();
-      if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
-        Users.push_back(User);
-    }
-
-    if (TLI.combineRepeatedFPDivisors(Users.size())) {
-      SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
-      SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
-
-      // Dividend / Divisor -> Dividend * Reciprocal
-      for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
-        if ((*I)->getOperand(0) != FPOne) {
-          SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
-                                        (*I)->getOperand(0), Reciprocal);
-          DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
-        }
-      }
-      return SDValue();
-    }
-  }
+  if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
+    return CombineRepeatedDivisors;
  
    return SDValue();
  }
@@ -7859,29 +8411,29 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
  }
  
  SDValue DAGCombiner::visitFSQRT(SDNode *N) {
-  if (DAG.getTarget().Options.UnsafeFPMath &&
-      !TLI.isFsqrtCheap()) {
-    // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
-    if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
-      EVT VT = RV.getValueType();
-      RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
-      AddToWorklist(RV.getNode());
+  if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
+    return SDValue();
  
-      // Unfortunately, RV is now NaN if the input was exactly 0.
-      // Select out this case and force the answer to 0.
-      SDValue Zero = DAG.getConstantFP(0.0, VT);
-      SDValue ZeroCmp =
-        DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
-                     N->getOperand(0), Zero, ISD::SETEQ);
-      AddToWorklist(ZeroCmp.getNode());
-      AddToWorklist(RV.getNode());
+  // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
+  SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+  if (!RV)
+    return SDValue();
+  
+  EVT VT = RV.getValueType();
+  SDLoc DL(N);
+  RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+  AddToWorklist(RV.getNode());
  
-      RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
-                       SDLoc(N), VT, ZeroCmp, Zero, RV);
-      return RV;
-    }
-  }
-  return SDValue();
+  // Unfortunately, RV is now NaN if the input was exactly 0.
+  // Select out this case and force the answer to 0.
+  SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
+  EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
+  AddToWorklist(ZeroCmp.getNode());
+  AddToWorklist(RV.getNode());
+
+  return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+                     ZeroCmp, Zero, RV);
  }
  
  SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
@@ -7962,11 +8514,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
          !VT.isVector() &&
          (!LegalOperations ||
           TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+      SDLoc DL(N);
        SDValue Ops[] =
          { N0.getOperand(0), N0.getOperand(1),
-          DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
+          DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
            N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
      }
  
      // fold (sint_to_fp (zext (setcc x, y, cc))) ->
@@ -7975,11 +8528,12 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
          N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
          (!LegalOperations ||
           TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+      SDLoc DL(N);
        SDValue Ops[] =
          { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
-          DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
+          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
            N0.getOperand(0).getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
      }
    }
  
@@ -8014,11 +8568,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
      if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
          (!LegalOperations ||
           TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+      SDLoc DL(N);
        SDValue Ops[] =
          { N0.getOperand(0), N0.getOperand(1),
-          DAG.getConstantFP(1.0, VT),  DAG.getConstantFP(0.0, VT),
+          DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
            N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
+      return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
      }
    }
  
@@ -8071,11 +8626,10 @@ static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
  
  SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    EVT VT = N->getValueType(0);
  
    // fold (fp_to_sint c1fp) -> c1
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
      return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
  
    return FoldIntToFPToInt(N, DAG);
@@ -8083,11 +8637,10 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
  
  SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    EVT VT = N->getValueType(0);
  
    // fold (fp_to_uint c1fp) -> c1
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
      return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
  
    return FoldIntToFPToInt(N, DAG);
@@ -8116,9 +8669,11 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
      // single-step fp_round we want to fold to.
      // In other words, double rounding isn't the same as rounding.
      // Also, this is a value preserving truncation iff both fp_round's are.
-    if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc)
-      return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc));
+    if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
+                         DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
+    }
    }
  
    // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
@@ -8141,8 +8696,9 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
  
    // fold (fp_round_inreg c1fp) -> c1fp
    if (N0CFP && isTypeLegal(EVT)) {
-    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
-    return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round);
+    SDLoc DL(N);
+    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
+    return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
    }
  
    return SDValue();
@@ -8161,6 +8717,11 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
    if (isConstantFPBuildVectorOrConstantFP(N0))
      return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
  
+  // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
+  if (N0.getOpcode() == ISD::FP16_TO_FP &&
+      TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
+    return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
+
    // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
    // value of X.
    if (N0.getOpcode() == ISD::FP_ROUND
@@ -8184,7 +8745,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
      CombineTo(N, ExtLoad);
      CombineTo(N0.getNode(),
                DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
-                          N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+                          N0.getValueType(), ExtLoad,
+                          DAG.getIntPtrConstant(1, SDLoc(N0))),
                ExtLoad.getValue(1));
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    }
@@ -8194,11 +8756,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
  
  SDValue DAGCombiner::visitFCEIL(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    EVT VT = N->getValueType(0);
  
    // fold (fceil c1) -> fceil(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
      return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
  
    return SDValue();
@@ -8206,11 +8767,10 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) {
  
  SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    EVT VT = N->getValueType(0);
  
    // fold (ftrunc c1) -> ftrunc(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
      return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
  
    return SDValue();
@@ -8218,11 +8778,10 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
  
  SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    EVT VT = N->getValueType(0);
  
    // fold (ffloor c1) -> ffloor(c1)
-  if (N0CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0))
      return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
  
    return SDValue();
@@ -8259,15 +8818,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
          // For a scalar, just generate 0x80...
          SignMask = APInt::getSignBit(IntVT.getSizeInBits());
        }
-      Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
-                        DAG.getConstant(SignMask, IntVT));
+      SDLoc DL0(N0);
+      Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
+                        DAG.getConstant(SignMask, DL0, IntVT));
        AddToWorklist(Int.getNode());
        return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
      }
    }
  
    // (fneg (fmul c, x)) -> (fmul -c, x)
-  if (N0.getOpcode() == ISD::FMUL) {
+  if (N0.getOpcode() == ISD::FMUL &&
+      (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
      ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
      if (CFP1) {
        APFloat CVal = CFP1->getValueAPF();
@@ -8293,7 +8854,7 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
    if (N0CFP && N1CFP) {
      const APFloat &C0 = N0CFP->getValueAPF();
      const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0));
+    return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
    }
  
    if (N0CFP) {
@@ -8314,7 +8875,7 @@ SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
    if (N0CFP && N1CFP) {
      const APFloat &C0 = N0CFP->getValueAPF();
      const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0));
+    return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
    }
  
    if (N0CFP) {
@@ -8361,8 +8922,9 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
          // For a scalar, just generate 0x7f...
          SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
        }
-      Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
-                        DAG.getConstant(SignMask, IntVT));
+      SDLoc DL(N0);
+      Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
+                        DAG.getConstant(SignMask, DL, IntVT));
        AddToWorklist(Int.getNode());
        return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
      }
@@ -8432,13 +8994,14 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
  
          if (AndConst.isPowerOf2() &&
              cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+          SDLoc DL(N);
            SDValue SetCC =
-            DAG.getSetCC(SDLoc(N),
+            DAG.getSetCC(DL,
                           getSetCCResultType(Op0.getValueType()),
-                         Op0, DAG.getConstant(0, Op0.getValueType()),
+                         Op0, DAG.getConstant(0, DL, Op0.getValueType()),
                           ISD::SETNE);
  
-          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N),
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
                                            MVT::Other, Chain, SetCC, N2);
            // Don't add the new BRCond into the worklist or else SimplifySelectCC
            // will convert it back to (X & C1) >> C2.
@@ -8491,12 +9054,11 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
  
      if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
        bool Equal = false;
-      if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
-        if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
-            Op0.getOpcode() == ISD::XOR) {
-          TheXor = Op0.getNode();
-          Equal = true;
-        }
+      if (isOneConstant(Op0) && Op0.hasOneUse() &&
+          Op0.getOpcode() == ISD::XOR) {
+        TheXor = Op0.getNode();
+        Equal = true;
+      }
  
        EVT SetCCVT = N1.getValueType();
        if (LegalTypes)
@@ -8551,14 +9113,18 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
    EVT VT;
+  unsigned AS;
+
    if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
      if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
        return false;
-    VT = Use->getValueType(0);
+    VT = LD->getMemoryVT();
+    AS = LD->getAddressSpace();
    } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
      if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
        return false;
-    VT = ST->getValue().getValueType();
+    VT = ST->getMemoryVT();
+    AS = ST->getAddressSpace();
    } else
      return false;
  
@@ -8582,7 +9148,8 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
    } else
      return false;
  
-  return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+                                   VT.getTypeForEVT(*DAG.getContext()), AS);
  }
  
  /// Try turning a load/store into a pre-indexed load/store when the base
@@ -8641,8 +9208,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
    }
  
    // Don't create a indexed load / store with zero offset.
-  if (isa<ConstantSDNode>(Offset) &&
-      cast<ConstantSDNode>(Offset)->isNullValue())
+  if (isNullConstant(Offset))
      return false;
  
    // Try turning it into a pre-indexed load / store except when:
@@ -8670,24 +9236,25 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
    // a copy of the original base pointer.
    SmallVector<SDNode *, 16> OtherUses;
    if (isa<ConstantSDNode>(Offset))
-    for (SDNode *Use : BasePtr.getNode()->uses()) {
-      if (Use == Ptr.getNode())
+    for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
+                              UE = BasePtr.getNode()->use_end();
+         UI != UE; ++UI) {
+      SDUse &Use = UI.getUse();
+      // Skip the use that is Ptr and uses of other results from BasePtr's
+      // node (important for nodes that return multiple results).
+      if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
          continue;
  
-      if (Use->isPredecessorOf(N))
+      if (Use.getUser()->isPredecessorOf(N))
          continue;
  
-      if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) {
+      if (Use.getUser()->getOpcode() != ISD::ADD &&
+          Use.getUser()->getOpcode() != ISD::SUB) {
          OtherUses.clear();
          break;
        }
  
-      SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
-      if (Op1.getNode() == BasePtr.getNode())
-        std::swap(Op0, Op1);
-      assert(Op0.getNode() == BasePtr.getNode() &&
-             "Use of ADD/SUB but not an operand");
-
+      SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
        if (!isa<ConstantSDNode>(Op1)) {
          OtherUses.clear();
          break;
@@ -8699,7 +9266,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
          break;
        }
  
-      OtherUses.push_back(Use);
+      OtherUses.push_back(Use.getUser());
      }
  
    if (Swapped)
@@ -8792,12 +9359,14 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
      if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
      else CNV = CNV - Offset1;
  
+    SDLoc DL(OtherUses[i]);
+
      // We can now generate the new expression.
-    SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
+    SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
      SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
  
      SDValue NewUse = DAG.getNode(Opcode,
-                                 SDLoc(OtherUses[i]),
+                                 DL,
                                   OtherUses[i]->getValueType(0), NewOp1, NewOp2);
      DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
      deleteAndRecombine(OtherUses[i]);
@@ -8855,8 +9424,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
      ISD::MemIndexedMode AM = ISD::UNINDEXED;
      if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
        // Don't create a indexed load / store with zero offset.
-      if (isa<ConstantSDNode>(Offset) &&
-          cast<ConstantSDNode>(Offset)->isNullValue())
+      if (isNullConstant(Offset))
          continue;
  
        // Try turning it into a post-indexed load / store except when
@@ -8946,7 +9514,7 @@ SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
           "Cannot split out indexing using opaque target constants");
    if (Inc.getOpcode() == ISD::TargetConstant) {
      ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
-    Inc = DAG.getConstant(*ConstInc->getConstantIntValue(),
+    Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
                            ConstInc->getValueType(0));
    }
  
@@ -9306,8 +9874,7 @@ struct LoadedSlice {
    /// \pre DAG != nullptr.
    uint64_t getOffsetFromBase() const {
      assert(DAG && "Missing context.");
-    bool IsBigEndian =
-        DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
+    bool IsBigEndian = DAG->getDataLayout().isBigEndian();
      assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
      uint64_t Offset = Shift / 8;
      unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
@@ -9339,8 +9906,9 @@ struct LoadedSlice {
      if (Offset) {
        // BaseAddr = BaseAddr + Offset.
        EVT ArithType = BaseAddr.getValueType();
-      BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr,
-                              DAG->getConstant(Offset, ArithType));
+      SDLoc DL(Origin);
+      BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
+                              DAG->getConstant(Offset, DL, ArithType));
      }
  
      // Create the type of the loaded slice according to its size.
@@ -9389,7 +9957,7 @@ struct LoadedSlice {
  
      // Check if it will be merged with the load.
      // 1. Check the alignment constraint.
-    unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
+    unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
          ResVT.getTypeForEVT(*DAG->getContext()));
  
      if (RequiredAlignment > getAlignment())
@@ -9670,8 +10238,8 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
      return Result; // Fail.
    else {
      bool isOk = false;
-    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
-      if (Chain->getOperand(i).getNode() == LD) {
+    for (const SDValue &ChainOp : Chain->op_values())
+      if (ChainOp.getNode() == LD) {
          isOk = true;
          break;
        }
@@ -9746,24 +10314,27 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
  
    // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
    // shifted by ByteShift and truncated down to NumBytes.
-  if (ByteShift)
-    IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal,
-                       DAG.getConstant(ByteShift*8,
+  if (ByteShift) {
+    SDLoc DL(IVal);
+    IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
+                       DAG.getConstant(ByteShift*8, DL,
                                      DC->getShiftAmountTy(IVal.getValueType())));
+  }
  
    // Figure out the offset for the store and the alignment of the access.
    unsigned StOffset;
    unsigned NewAlign = St->getAlignment();
  
-  if (DAG.getTargetLoweringInfo().isLittleEndian())
+  if (DAG.getDataLayout().isLittleEndian())
      StOffset = ByteShift;
    else
      StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
  
    SDValue Ptr = St->getBasePtr();
    if (StOffset) {
-    Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(),
-                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+    SDLoc DL(IVal);
+    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
+                      Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
      NewAlign = MinAlign(NewAlign, StOffset);
    }
  
@@ -9867,17 +10438,18 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
        uint64_t PtrOff = ShAmt / 8;
        // For big endian targets, we need to adjust the offset to the pointer to
        // load the correct bytes.
-      if (TLI.isBigEndian())
+      if (DAG.getDataLayout().isBigEndian())
          PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
  
        unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
        Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
-      if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
+      if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
          return SDValue();
  
        SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
                                     Ptr.getValueType(), Ptr,
-                                   DAG.getConstant(PtrOff, Ptr.getValueType()));
+                                   DAG.getConstant(PtrOff, SDLoc(LD),
+                                                   Ptr.getValueType()));
        SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
                                    LD->getChain(), NewPtr,
                                    LD->getPointerInfo().getWithOffset(PtrOff),
@@ -9885,7 +10457,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                    LD->isInvariant(), NewAlign,
                                    LD->getAAInfo());
        SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
-                                   DAG.getConstant(NewImm, NewVT));
+                                   DAG.getConstant(NewImm, SDLoc(Value),
+                                                   NewVT));
        SDValue NewST = DAG.getStore(Chain, SDLoc(N),
                                     NewVal, NewPtr,
                                     ST->getPointerInfo().getWithOffset(PtrOff),
@@ -9934,7 +10507,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
      unsigned LDAlign = LD->getAlignment();
      unsigned STAlign = ST->getAlignment();
      Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
-    unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
+    unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
      if (LDAlign < ABIAlign || STAlign < ABIAlign)
        return SDValue();
  
@@ -10049,6 +10622,18 @@ struct BaseIndexOffset {
  };
  } // namespace
  
+SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
+                                                  SDLoc SL,
+                                                  ArrayRef<MemOpLink> Stores,
+                                                  EVT Ty) const {
+  SmallVector<SDValue, 8> BuildVector;
+
+  for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
+    BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
+
+  return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
+}
+
  bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
                    SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
                    unsigned NumElem, bool IsConstantSrc, bool UseVector) {
@@ -10058,19 +10643,19 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
  
    int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
-  unsigned EarliestNodeUsed = 0;
+  unsigned LatestNodeUsed = 0;
  
    for (unsigned i=0; i < NumElem; ++i) {
      // Find a chain for the new wide-store operand. Notice that some
      // of the store nodes that we found may not be selected for inclusion
      // in the wide store. The chain we use needs to be the chain of the
-    // earliest store node which is *used* and replaced by the wide store.
-    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-      EarliestNodeUsed = i;
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
    }
  
-  // The earliest Node in the DAG.
-  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  // The latest Node in the DAG.
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
    SDLoc DL(StoreNodes[0].MemNode);
  
    SDValue StoredVal;
@@ -10079,12 +10664,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
      assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
      if (IsConstantSrc) {
-      // A vector store with a constant source implies that the constant is
-      // zero; we only handle merging stores of constant zeros because the zero
-      // can be materialized without a load.
-      // It may be beneficial to loosen this restriction to allow non-zero
-      // store merging.
-      StoredVal = DAG.getConstant(0, Ty);
+      StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
      } else {
        SmallVector<SDValue, 8> Ops;
        for (unsigned i = 0; i < NumElem ; ++i) {
@@ -10104,42 +10684,42 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
      // elements, so this path implies a store of constants.
      assert(IsConstantSrc && "Merged vector elements should use vector store");
  
-    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
-    APInt StoreInt(StoreBW, 0);
+    unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+    APInt StoreInt(SizeInBits, 0);
  
      // Construct a single integer constant which is made of the smaller
      // constant inputs.
-    bool IsLE = TLI.isLittleEndian();
+    bool IsLE = DAG.getDataLayout().isLittleEndian();
      for (unsigned i = 0; i < NumElem ; ++i) {
        unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
        SDValue Val = St->getValue();
-      StoreInt <<= ElementSizeBytes*8;
+      StoreInt <<= ElementSizeBytes * 8;
        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
-        StoreInt |= C->getAPIntValue().zext(StoreBW);
+        StoreInt |= C->getAPIntValue().zext(SizeInBits);
        } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
-        StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+        StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
        } else {
          llvm_unreachable("Invalid constant element type");
        }
      }
  
      // Create the new Load and Store operations.
-    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-    StoredVal = DAG.getConstant(StoreInt, StoreTy);
+    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+    StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
    }
  
-  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
+  SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
                                    FirstInChain->getBasePtr(),
                                    FirstInChain->getPointerInfo(),
                                    false, false,
                                    FirstInChain->getAlignment());
  
-  // Replace the first store with the new store
-  CombineTo(EarliestOp, NewStore);
+  // Replace the last store with the new store
+  CombineTo(LatestOp, NewStore);
    // Erase all other stores.
    for (unsigned i = 0; i < NumElem ; ++i) {
-    if (StoreNodes[i].MemNode == EarliestOp)
+    if (StoreNodes[i].MemNode == LatestOp)
        continue;
      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
      // ReplaceAllUsesWith will replace all uses that existed when it was
@@ -10160,58 +10740,36 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
    return true;
  }
  
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
-  if (OptLevel == CodeGenOpt::None)
-    return false;
-
-  EVT MemVT = St->getMemoryVT();
-  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
-  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
-      Attribute::NoImplicitFloat);
-
-  // Don't merge vectors into wider inputs.
-  if (MemVT.isVector() || !MemVT.isSimple())
-    return false;
-
-  // Perform an early exit check. Do not bother looking at stored values that
-  // are not constants, loads, or extracted vector elements.
-  SDValue StoredVal = St->getValue();
-  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
-  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
-                       isa<ConstantFPSDNode>(StoredVal);
-  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
-
-  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
-    return false;
+static bool allowableAlignment(const SelectionDAG &DAG,
+                               const TargetLowering &TLI, EVT EVTTy,
+                               unsigned AS, unsigned Align) {
+  if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
+    return true;
  
-  // Only look at ends of store sequences.
-  SDValue Chain = SDValue(St, 0);
-  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
-    return false;
+  Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
+  unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
+  return (Align >= ABIAlignment);
+}
  
+void DAGCombiner::getStoreMergeAndAliasCandidates(
+    StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
+    SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
    // This holds the base pointer, index, and the offset in bytes from the base
    // pointer.
    BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
  
    // We must have a base and an offset.
    if (!BasePtr.Base.getNode())
-    return false;
+    return;
  
    // Do not handle stores to undef base pointers.
    if (BasePtr.Base.getOpcode() == ISD::UNDEF)
-    return false;
-
-  // Save the LoadSDNodes that we find in the chain.
-  // We need to make sure that these nodes do not interfere with
-  // any of the store nodes.
-  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
-  // Save the StoreSDNodes that we find in the chain.
-  SmallVector<MemOpLink, 8> StoreNodes;
+    return;
  
    // Walk up the chain and look for nodes with offsets from the same
    // base pointer. Stop when reaching an instruction with a different kind
    // or instruction which has a different base pointer.
+  EVT MemVT = St->getMemoryVT();
    unsigned Seq = 0;
    StoreSDNode *Index = St;
    while (Index) {
@@ -10226,10 +10784,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
      if (!Ptr.equalBaseIndex(BasePtr))
        break;
  
-    // Check that the alignment is the same.
-    if (Index->getAlignment() != St->getAlignment())
-      break;
-
      // The memory operands must not be volatile.
      if (Index->isVolatile() || Index->isIndexed())
        break;
@@ -10243,11 +10797,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
      if (Index->getMemoryVT() != MemVT)
        break;
  
-    // We do not allow unaligned stores because we want to prevent overriding
-    // stores.
-    if (Index->getAlignment()*8 != MemVT.getSizeInBits())
-      break;
-
      // We found a potential memory operand to merge.
      StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
  
@@ -10277,7 +10826,51 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        }
      }
    }
+}
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
+  EVT MemVT = St->getMemoryVT();
+  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+      Attribute::NoImplicitFloat);
  
+  // This function cannot currently deal with non-byte-sized memory sizes.
+  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+    return false;
+
+  // Don't merge vectors into wider inputs.
+  if (MemVT.isVector() || !MemVT.isSimple())
+    return false;
+
+  // Perform an early exit check. Do not bother looking at stored values that
+  // are not constants, loads, or extracted vector elements.
+  SDValue StoredVal = St->getValue();
+  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+                       isa<ConstantFPSDNode>(StoredVal);
+  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
+    return false;
+
+  // Only look at ends of store sequences.
+  SDValue Chain = SDValue(St, 0);
+  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+    return false;
+
+  // Save the LoadSDNodes that we find in the chain.
+  // We need to make sure that these nodes do not interfere with
+  // any of the store nodes.
+  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
+  
+  // Save the StoreSDNodes that we find in the chain.
+  SmallVector<MemOpLink, 8> StoreNodes;
+
+  getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
+  
    // Check if there is anything to merge.
    if (StoreNodes.size() < 2)
      return false;
@@ -10321,6 +10914,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
  
    // The node with the lowest store address.
    LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+  unsigned FirstStoreAlign = FirstInChain->getAlignment();
  
    // Store the constants into memory as one consecutive store.
    if (IsConstantSrc) {
@@ -10341,29 +10936,43 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        }
  
        // Find a legal type for the constant store.
-      unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
-      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-      if (TLI.isTypeLegal(StoreTy))
+      unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
+      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+      if (TLI.isTypeLegal(StoreTy) &&
+          allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
+                             FirstStoreAlign)) {
          LastLegalType = i+1;
        // Or check whether a truncstore is legal.
-      else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
-               TargetLowering::TypePromoteInteger) {
+      } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+                 TargetLowering::TypePromoteInteger) {
          EVT LegalizedStoredValueTy =
            TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
-        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy))
-          LastLegalType = i+1;
+        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+            allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
+                               FirstStoreAlign)) {
+          LastLegalType = i + 1;
+        }
        }
  
        // Find a legal type for the vector store.
        EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
-      if (TLI.isTypeLegal(Ty))
+      if (TLI.isTypeLegal(Ty) &&
+          allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
          LastLegalVectorType = i + 1;
+      }
      }
  
-    // We only use vectors if the constant is known to be zero and the
-    // function is not marked with the noimplicitfloat attribute.
-    if (NonZero || NoVectors)
+
+    // We only use vectors if the constant is known to be zero or the target
+    // allows it and the function is not marked with the noimplicitfloat
+    // attribute.
+    if (NoVectors) {
+      LastLegalVectorType = 0;
+    } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
+                                                            LastLegalVectorType,
+                                                            FirstStoreAS)) {
        LastLegalVectorType = 0;
+    }
  
      // Check if we found a legal integer type to store.
      if (LastLegalType == 0 && LastLegalVectorType == 0)
@@ -10393,7 +11002,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
  
        // Find a legal type for the vector store.
        EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
-      if (TLI.isTypeLegal(Ty))
+      if (TLI.isTypeLegal(Ty) &&
+          allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
          NumElem = i + 1;
      }
  
@@ -10420,10 +11030,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
      if (!Ld->hasNUsesOfValue(1, 0))
        break;
  
-    // Check that the alignment is the same as the stores.
-    if (Ld->getAlignment() != St->getAlignment())
-      break;
-
      // The memory operands must not be volatile.
      if (Ld->isVolatile() || Ld->isIndexed())
        break;
@@ -10461,6 +11067,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        St->getAlignment() >= RequiredAlignment)
      return false;
  
+  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
+  unsigned FirstLoadAS = FirstLoad->getAddressSpace();
+  unsigned FirstLoadAlign = FirstLoad->getAlignment();
+
    // Scan the memory operations on the chain and find the first non-consecutive
    // load memory address. These variables hold the index in the store node
    // array.
@@ -10469,7 +11079,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
    unsigned LastLegalVectorType = 0;
    unsigned LastLegalIntegerType = 0;
    StartAddress = LoadNodes[0].OffsetFromBase;
-  SDValue FirstChain = LoadNodes[0].MemNode->getChain();
+  SDValue FirstChain = FirstLoad->getChain();
    for (unsigned i = 1; i < LoadNodes.size(); ++i) {
      // All loads much share the same chain.
      if (LoadNodes[i].MemNode->getChain() != FirstChain)
@@ -10482,13 +11092,18 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
  
      // Find a legal type for the vector store.
      EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
-    if (TLI.isTypeLegal(StoreTy))
+    if (TLI.isTypeLegal(StoreTy) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
        LastLegalVectorType = i + 1;
+    }
  
      // Find a legal type for the integer store.
-    unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
-    StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-    if (TLI.isTypeLegal(StoreTy))
+    unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
+    StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+    if (TLI.isTypeLegal(StoreTy) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
+        allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
        LastLegalIntegerType = i + 1;
      // Or check whether a truncstore and extload is legal.
      else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
@@ -10498,7 +11113,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
            TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
            TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
-          TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy))
+          TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
+          allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
+                             FirstStoreAlign) &&
+          allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
+                             FirstLoadAlign))
          LastLegalIntegerType = i+1;
      }
    }
@@ -10516,43 +11135,39 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
    if (NumElem < 2)
      return false;
  
-  // The earliest Node in the DAG.
-  unsigned EarliestNodeUsed = 0;
-  LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
+  // The latest Node in the DAG.
+  unsigned LatestNodeUsed = 0;
    for (unsigned i=1; i<NumElem; ++i) {
      // Find a chain for the new wide-store operand. Notice that some
      // of the store nodes that we found may not be selected for inclusion
      // in the wide store. The chain we use needs to be the chain of the
-    // earliest store node which is *used* and replaced by the wide store.
-    if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
-      EarliestNodeUsed = i;
+    // latest store node which is *used* and replaced by the wide store.
+    if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
+      LatestNodeUsed = i;
    }
  
+  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
+
    // Find if it is better to use vectors or integers to load and store
    // to memory.
    EVT JointMemOpVT;
    if (UseVectorTy) {
      JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
    } else {
-    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
-    JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+    JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
    }
  
    SDLoc LoadDL(LoadNodes[0].MemNode);
    SDLoc StoreDL(StoreNodes[0].MemNode);
  
-  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
-  SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL,
-                                FirstLoad->getChain(),
-                                FirstLoad->getBasePtr(),
-                                FirstLoad->getPointerInfo(),
-                                false, false, false,
-                                FirstLoad->getAlignment());
-
-  SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad,
-                                  FirstInChain->getBasePtr(),
-                                  FirstInChain->getPointerInfo(), false, false,
-                                  FirstInChain->getAlignment());
+  SDValue NewLoad = DAG.getLoad(
+      JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
+      FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
+
+  SDValue NewStore = DAG.getStore(
+      LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
+      FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
  
    // Replace one of the loads with the new load.
    LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
@@ -10567,12 +11182,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
      DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
    }
  
-  // Replace the first store with the new store.
-  CombineTo(EarliestOp, NewStore);
+  // Replace the last store with the new store.
+  CombineTo(LatestOp, NewStore);
    // Erase all other stores.
    for (unsigned i = 0; i < NumElem ; ++i) {
      // Remove all Store nodes.
-    if (StoreNodes[i].MemNode == EarliestOp)
+    if (StoreNodes[i].MemNode == LatestOp)
        continue;
      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
      DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
@@ -10594,8 +11209,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
        ST->isUnindexed()) {
      unsigned OrigAlign = ST->getAlignment();
      EVT SVT = Value.getOperand(0).getValueType();
-    unsigned Align = TLI.getDataLayout()->
-      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+    unsigned Align = DAG.getDataLayout().getABITypeAlignment(
+        SVT.getTypeForEVT(*DAG.getContext()));
      if (Align <= OrigAlign &&
          ((!LegalOperations && !ST->isVolatile()) ||
           TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
@@ -10627,8 +11242,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
        case MVT::f32:
          if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
              TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+          ;
            Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
-                              bitcastToAPInt().getZExtValue(), MVT::i32);
+                              bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+                              MVT::i32);
            return DAG.getStore(Chain, SDLoc(N), Tmp,
                                Ptr, ST->getMemOperand());
          }
@@ -10637,8 +11254,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
          if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
               !ST->isVolatile()) ||
              TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+          ;
            Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
-                                getZExtValue(), MVT::i64);
+                                getZExtValue(), SDLoc(CFP), MVT::i64);
            return DAG.getStore(Chain, SDLoc(N), Tmp,
                                Ptr, ST->getMemOperand());
          }
@@ -10649,27 +11267,30 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
            // argument passing.  Since this is so common, custom legalize the
            // 64-bit integer store into two 32-bit stores.
            uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
-          SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
-          if (TLI.isBigEndian()) std::swap(Lo, Hi);
+          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+          SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
+          if (DAG.getDataLayout().isBigEndian())
+            std::swap(Lo, Hi);
  
            unsigned Alignment = ST->getAlignment();
            bool isVolatile = ST->isVolatile();
            bool isNonTemporal = ST->isNonTemporal();
            AAMDNodes AAInfo = ST->getAAInfo();
  
+          SDLoc DL(N);
+
            SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
                                       Ptr, ST->getPointerInfo(),
                                       isVolatile, isNonTemporal,
                                       ST->getAlignment(), AAInfo);
-          Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
-                            DAG.getConstant(4, Ptr.getValueType()));
+          Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                            DAG.getConstant(4, DL, Ptr.getValueType()));
            Alignment = MinAlign(Alignment, 4U);
            SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
                                       Ptr, ST->getPointerInfo().getWithOffset(4),
                                       isVolatile, isNonTemporal,
                                       Alignment, AAInfo);
-          return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
+          return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                               St0, St1);
          }
  
@@ -10898,7 +11519,7 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
    EVT ResultVT = EVE->getValueType(0);
    EVT VecEltVT = InVecVT.getVectorElementType();
    unsigned Align = OriginalLoad->getAlignment();
-  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
        VecEltVT.getTypeForEVT(*DAG.getContext()));
  
    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
@@ -10910,24 +11531,20 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
    SDValue Offset;
    EVT PtrType = NewPtr.getValueType();
    MachinePointerInfo MPI;
+  SDLoc DL(EVE);
    if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
      int Elt = ConstEltNo->getZExtValue();
      unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
-    if (TLI.isBigEndian())
-      PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
-    Offset = DAG.getConstant(PtrOff, PtrType);
+    Offset = DAG.getConstant(PtrOff, DL, PtrType);
      MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
    } else {
+    Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
      Offset = DAG.getNode(
-        ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
-        DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
-    if (TLI.isBigEndian())
-      Offset = DAG.getNode(
-          ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
-          DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+        ISD::MUL, DL, PtrType, Offset,
+        DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
      MPI = OriginalLoad->getPointerInfo();
    }
-  NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
  
    // The replacement we need to do here is a little tricky: we need to
    // replace an extractelement of a load with a load.
@@ -11036,9 +11653,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      // scalar_to_vector here as well.
  
      if (!LegalOperations) {
-      EVT IndexTy = TLI.getVectorIdxTy();
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
-                         SVInVec, DAG.getConstant(OrigElt, IndexTy));
+      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
+                         DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
      }
    }
  
@@ -11127,7 +11744,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
        if (ISD::isNormalLoad(InVec.getNode())) {
          LN0 = cast<LoadSDNode>(InVec);
          Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
-        EltNo = DAG.getConstant(Elt, EltNo.getValueType());
+        EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
        }
      }
  
@@ -11213,11 +11830,11 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
    if (!ValidTypes)
      return SDValue();
  
-  bool isLE = TLI.isLittleEndian();
+  bool isLE = DAG.getDataLayout().isLittleEndian();
    unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
    assert(ElemRatio > 1 && "Invalid element size ratio");
    SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
-                               DAG.getConstant(0, SourceType);
+                               DAG.getConstant(0, SDLoc(N), SourceType);
  
    unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
    SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
@@ -11362,10 +11979,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      if (Op.getOpcode() == ISD::UNDEF) continue;
  
      // See if we can combine this build_vector into a blend with a zero vector.
-    if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant &&
-        cast<ConstantSDNode>(Op.getNode())->isNullValue()) ||
-        (Op.getOpcode() == ISD::ConstantFP &&
-        cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
+    if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
        UsesZeroVector = true;
        continue;
      }
@@ -11470,17 +12084,20 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
  
          // Try to replace VecIn1 with two extract_subvectors
          // No need to update the masks, they should still be correct.
-        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-          DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
-        VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-          DAG.getConstant(0, TLI.getVectorIdxTy()));
+        VecIn2 = DAG.getNode(
+            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+            DAG.getConstant(VT.getVectorNumElements(), dl,
+                            TLI.getVectorIdxTy(DAG.getDataLayout())));
+        VecIn1 = DAG.getNode(
+            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+            DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
        } else
          return SDValue();
      }
  
      if (UsesZeroVector)
-      VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) :
-                                DAG.getConstantFP(0.0, VT);
+      VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
+                                DAG.getConstantFP(0.0, dl, VT);
      else
        // If VecIn2 is unused then change it to undef.
        VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
@@ -11501,6 +12118,68 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
    return SDValue();
  }
  
+static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT OpVT = N->getOperand(0).getValueType();
+
+  // If the operands are legal vectors, leave them alone.
+  if (TLI.isTypeLegal(OpVT))
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  SmallVector<SDValue, 8> Ops;
+
+  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
+  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+
+  // Keep track of what we encounter.
+  bool AnyInteger = false;
+  bool AnyFP = false;
+  for (const SDValue &Op : N->ops()) {
+    if (ISD::BITCAST == Op.getOpcode() &&
+        !Op.getOperand(0).getValueType().isVector())
+      Ops.push_back(Op.getOperand(0));
+    else if (ISD::UNDEF == Op.getOpcode())
+      Ops.push_back(ScalarUndef);
+    else
+      return SDValue();
+
+    // Note whether we encounter an integer or floating point scalar.
+    // If it's neither, bail out, it could be something weird like x86mmx.
+    EVT LastOpVT = Ops.back().getValueType();
+    if (LastOpVT.isFloatingPoint())
+      AnyFP = true;
+    else if (LastOpVT.isInteger())
+      AnyInteger = true;
+    else
+      return SDValue();
+  }
+
+  // If any of the operands is a floating point scalar bitcast to a vector,
+  // use floating point types throughout, and bitcast everything.
+  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
+  if (AnyFP) {
+    SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
+    ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
+    if (AnyInteger) {
+      for (SDValue &Op : Ops) {
+        if (Op.getValueType() == SVT)
+          continue;
+        if (Op.getOpcode() == ISD::UNDEF)
+          Op = ScalarUndef;
+        else
+          Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
+      }
+    }
+  }
+
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
+                               VT.getSizeInBits() / SVT.getSizeInBits());
+  return DAG.getNode(ISD::BITCAST, DL, VT,
+                     DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
+}
+
  SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
    // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
    // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
@@ -11516,9 +12195,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
    if (ISD::allOperandsUndef(N))
      return DAG.getUNDEF(VT);
  
-  // Optimize concat_vectors where one of the vectors is undef.
-  if (N->getNumOperands() == 2 &&
-      N->getOperand(1)->getOpcode() == ISD::UNDEF) {
+  // Optimize concat_vectors where all but the first of the vectors are undef.
+  if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
+        return Op.getOpcode() == ISD::UNDEF;
+      })) {
      SDValue In = N->getOperand(0);
      assert(In.getValueType().isVector() && "Must concat vectors");
  
@@ -11526,6 +12206,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
      if (In->getOpcode() == ISD::BITCAST &&
          !In->getOperand(0)->getValueType(0).isVector()) {
        SDValue Scalar = In->getOperand(0);
+
+      // If the bitcast type isn't legal, it might be a trunc of a legal type;
+      // look through the trunc so we can still do the transform:
+      //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
+      if (Scalar->getOpcode() == ISD::TRUNCATE &&
+          !TLI.isTypeLegal(Scalar.getValueType()) &&
+          TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
+        Scalar = Scalar->getOperand(0);
+
        EVT SclTy = Scalar->getValueType(0);
  
        if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
@@ -11593,6 +12282,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
    }
  
+  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
+    return V;
+
    // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
    // nodes often generate nop CONCAT_VECTOR nodes.
    // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -11654,7 +12347,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
      // type.
      if (V->getOperand(0).getValueType() != NVT)
        return SDValue();
-    unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    unsigned Idx = N->getConstantOperandVal(1);
      unsigned NumElems = NVT.getVectorNumElements();
      assert((Idx % NumElems) == 0 &&
             "IDX in concat is not a multiple of the result vector length.");
@@ -12275,6 +12968,16 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
    return SDValue();
  }
  
+SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+
+  // fold (fp_to_fp16 (fp16_to_fp op)) -> op
+  if (N0->getOpcode() == ISD::FP16_TO_FP)
+    return N0->getOperand(0);
+
+  return SDValue();
+}
+
  /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
  /// with the destination vector and a zero vector.
  /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -12285,7 +12988,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
    SDValue RHS = N->getOperand(1);
    SDLoc dl(N);
  
-  // Make sure we're not running after operation legalization where it 
+  // Make sure we're not running after operation legalization where it
    // may have custom lowered the vector shuffles.
    if (LegalOperations)
      return SDValue();
@@ -12302,12 +13005,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
  
      for (unsigned i = 0; i != NumElts; ++i) {
        SDValue Elt = RHS.getOperand(i);
-      if (!isa<ConstantSDNode>(Elt))
-        return SDValue();
-
-      if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+      if (isAllOnesConstant(Elt))
          Indices.push_back(i);
-      else if (cast<ConstantSDNode>(Elt)->isNullValue())
+      else if (isNullConstant(Elt))
          Indices.push_back(NumElts+i);
        else
          return SDValue();
@@ -12321,8 +13021,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
      // Return the new VECTOR_SHUFFLE node.
      EVT EltVT = RVT.getVectorElementType();
      SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
-                                   DAG.getConstant(0, EltVT));
-    SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
+                                   DAG.getConstant(0, dl, EltVT));
+    SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
      LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
      SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
      return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
@@ -12359,10 +13059,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
        // Can't fold divide by zero.
        if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
            N->getOpcode() == ISD::FDIV) {
-        if ((RHSOp.getOpcode() == ISD::Constant &&
-             cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
-            (RHSOp.getOpcode() == ISD::ConstantFP &&
-             cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+        if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
+             cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
            break;
        }
  
@@ -12454,6 +13152,38 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
  bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                      SDValue RHS) {
  
+  // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+  // The select + setcc is redundant, because fsqrt returns NaN for X < -0.
+  if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
+    if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
+      // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
+      SDValue Sqrt = RHS;
+      ISD::CondCode CC;
+      SDValue CmpLHS;
+      const ConstantFPSDNode *NegZero = nullptr;
+
+      if (TheSelect->getOpcode() == ISD::SELECT_CC) {
+        CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
+        CmpLHS = TheSelect->getOperand(0);
+        NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
+      } else {
+        // SELECT or VSELECT
+        SDValue Cmp = TheSelect->getOperand(0);
+        if (Cmp.getOpcode() == ISD::SETCC) {
+          CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
+          CmpLHS = Cmp.getOperand(0);
+          NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
+        }
+      }
+      if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
+          Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
+          CC == ISD::SETULT || CC == ISD::SETLT)) {
+        // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
+        CombineTo(TheSelect, Sqrt);
+        return true;
+      }
+    }
+  }
    // Cannot simplify select with vector condition
    if (TheSelect->getOperand(0).getValueType().isVector()) return false;
  
@@ -12475,6 +13205,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
      if (LHS.getOperand(0) != RHS.getOperand(0) ||
          // Do not let this transformation reduce the number of volatile loads.
          LLD->isVolatile() || RLD->isVolatile() ||
+        // FIXME: If either is a pre/post inc/dec load,
+        // we'd need to split out the address adjustment.
+        LLD->isIndexed() || RLD->isIndexed() ||
          // If this is an EXTLOAD, the VT's must match.
          LLD->getMemoryVT() != RLD->getMemoryVT() ||
          // If this is an EXTLOAD, the kind of extension must match.
@@ -12576,25 +13309,22 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
    EVT VT = N2.getValueType();
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
    ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
-  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
  
    // Determine if the condition we're dealing with is constant
    SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
                                N0, N1, CC, DL, false);
    if (SCC.getNode()) AddToWorklist(SCC.getNode());
-  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
  
-  // fold select_cc true, x, y -> x
-  if (SCCC && !SCCC->isNullValue())
-    return N2;
-  // fold select_cc false, x, y -> y
-  if (SCCC && SCCC->isNullValue())
-    return N3;
+  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+    // fold select_cc true, x, y -> x
+    // fold select_cc false, x, y -> y
+    return !SCCC->isNullValue() ? N2 : N3;
+  }
  
    // Check to see if we can simplify the select into an fabs node
    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
      // Allow either -0.0 or 0.0
-    if (CFP->getValueAPF().isZero()) {
+    if (CFP->isZero()) {
        // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
        if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
            N0 == N2 && N3.getOpcode() == ISD::FNEG &&
@@ -12632,19 +13362,20 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
            const_cast<ConstantFP*>(TV->getConstantFPValue())
          };
          Type *FPTy = Elts[0]->getType();
-        const DataLayout &TD = *TLI.getDataLayout();
+        const DataLayout &TD = DAG.getDataLayout();
  
          // Create a ConstantArray of the two constants.
          Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
-        SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
-                                            TD.getPrefTypeAlignment(FPTy));
+        SDValue CPIdx =
+            DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+                                TD.getPrefTypeAlignment(FPTy));
          unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
  
          // Get the offsets to the 0 and 1 element of the array so that we can
          // select between them.
-        SDValue Zero = DAG.getIntPtrConstant(0);
+        SDValue Zero = DAG.getIntPtrConstant(0, DL);
          unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
-        SDValue One = DAG.getIntPtrConstant(EltSize);
+        SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
  
          SDValue Cond = DAG.getSetCC(DL,
                                      getSetCCResultType(N0.getValueType()),
@@ -12659,24 +13390,23 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
          return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                             MachinePointerInfo::getConstantPool(), false,
                             false, false, Alignment);
-
        }
      }
  
    // Check to see if we can perform the "gzip trick", transforming
    // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
-  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
-      (N1C->isNullValue() ||                         // (a < 0) ? b : 0
-       (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
+  if (isNullConstant(N3) && CC == ISD::SETLT &&
+      (isNullConstant(N1) ||                 // (a < 0) ? b : 0
+       (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
      EVT XType = N0.getValueType();
      EVT AType = N2.getValueType();
      if (XType.bitsGE(AType)) {
        // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
        // single-bit constant.
-      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
          unsigned ShCtV = N2C->getAPIntValue().logBase2();
-        ShCtV = XType.getSizeInBits()-ShCtV-1;
-        SDValue ShCt = DAG.getConstant(ShCtV,
+        ShCtV = XType.getSizeInBits() - ShCtV - 1;
+        SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
                                         getShiftAmountTy(N0.getValueType()));
          SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
                                      XType, N0, ShCt);
@@ -12692,7 +13422,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
  
        SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
                                    XType, N0,
-                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                  DAG.getConstant(XType.getSizeInBits() - 1,
+                                                  SDLoc(N0),
                                           getShiftAmountTy(N0.getValueType())));
        AddToWorklist(Shift.getNode());
  
@@ -12712,23 +13443,21 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
    // single bit-test can be materialized as an all-ones register with
    // shift-left and shift-right-arith.
    if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
-      N0->getValueType(0) == VT &&
-      N1C && N1C->isNullValue() &&
-      N2C && N2C->isNullValue()) {
+      N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
      SDValue AndLHS = N0->getOperand(0);
      ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
      if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
        // Shift the tested bit over the sign bit.
        APInt AndMask = ConstAndRHS->getAPIntValue();
        SDValue ShlAmt =
-        DAG.getConstant(AndMask.countLeadingZeros(),
+        DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
                          getShiftAmountTy(AndLHS.getValueType()));
        SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
  
        // Now arithmetic right shift it all the way over, so the result is either
        // all-ones, or zero.
        SDValue ShrAmt =
-        DAG.getConstant(AndMask.getBitWidth()-1,
+        DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
                          getShiftAmountTy(Shl.getValueType()));
        SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
  
@@ -12737,13 +13466,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
    }
  
    // fold select C, 16, 0 -> shl C, 4
-  if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+  if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
        TLI.getBooleanContents(N0.getValueType()) ==
            TargetLowering::ZeroOrOneBooleanContent) {
  
      // If the caller doesn't want us to simplify this into a zext of a compare,
      // don't do it.
-    if (NotExtCompare && N2C->getAPIntValue() == 1)
+    if (NotExtCompare && N2C->isOne())
        return SDValue();
  
      // Get a SetCC of the condition
@@ -12771,13 +13500,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
        AddToWorklist(SCC.getNode());
        AddToWorklist(Temp.getNode());
  
-      if (N2C->getAPIntValue() == 1)
+      if (N2C->isOne())
          return Temp;
  
        // shl setcc result by log2 n2c
        return DAG.getNode(
            ISD::SHL, DL, N2.getValueType(), Temp,
-          DAG.getConstant(N2C->getAPIntValue().logBase2(),
+          DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
                            getShiftAmountTy(Temp.getValueType())));
      }
    }
@@ -12785,7 +13514,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
    // Check to see if this is the equivalent of setcc
    // FIXME: Turn all of these into setcc if setcc if setcc is legal
    // otherwise, go ahead with the folds.
-  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+  if (0 && isNullConstant(N3) && isOneConstant(N2)) {
      EVT XType = N0.getValueType();
      if (!LegalOperations ||
          TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
@@ -12796,30 +13525,34 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
      }
  
      // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
-    if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+    if (isNullConstant(N1) && CC == ISD::SETEQ &&
          (!LegalOperations ||
           TLI.isOperationLegal(ISD::CTLZ, XType))) {
        SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
        return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
                           DAG.getConstant(Log2_32(XType.getSizeInBits()),
+                                         SDLoc(Ctlz),
                                         getShiftAmountTy(Ctlz.getValueType())));
      }
      // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
-    if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
-      SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0),
-                                  XType, DAG.getConstant(0, XType), N0);
-      SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType);
+    if (isNullConstant(N1) && CC == ISD::SETGT) {
+      SDLoc DL(N0);
+      SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
+                                  XType, DAG.getConstant(0, DL, XType), N0);
+      SDValue NotN0 = DAG.getNOT(DL, N0, XType);
        return DAG.getNode(ISD::SRL, DL, XType,
                           DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
-                         DAG.getConstant(XType.getSizeInBits()-1,
+                         DAG.getConstant(XType.getSizeInBits() - 1, DL,
                                           getShiftAmountTy(XType)));
      }
      // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
-    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
-      SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0,
-                                 DAG.getConstant(XType.getSizeInBits()-1,
+    if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
+      SDLoc DL(N0);
+      SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
+                                 DAG.getConstant(XType.getSizeInBits() - 1, DL,
                                           getShiftAmountTy(N0.getValueType())));
-      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
+                                                                    XType));
      }
    }
  
@@ -12842,11 +13575,12 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
  
      EVT XType = N0.getValueType();
      if (SubC && SubC->isNullValue() && XType.isInteger()) {
-      SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType,
+      SDLoc DL(N0);
+      SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
                                    N0,
-                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                  DAG.getConstant(XType.getSizeInBits() - 1, DL,
                                           getShiftAmountTy(N0.getValueType())));
-      SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
+      SDValue Add = DAG.getNode(ISD::ADD, DL,
                                  XType, N0, Shift);
        AddToWorklist(Shift.getNode());
        AddToWorklist(Add.getNode());
@@ -12876,7 +13610,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
      return SDValue();
  
    // Avoid division by zero.
-  if (!C->getAPIntValue())
+  if (C->isNullValue())
      return SDValue();
  
    std::vector<SDNode*> Built;
@@ -12896,7 +13630,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
      return SDValue();
  
    // Avoid division by zero.
-  if (!C->getAPIntValue())
+  if (C->isNullValue())
      return SDValue();
  
    std::vector<SDNode *> Built;
@@ -12917,7 +13651,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
      return SDValue();
  
    // Avoid division by zero.
-  if (!C->getAPIntValue())
+  if (C->isNullValue())
      return SDValue();
  
    std::vector<SDNode*> Built;
@@ -12947,7 +13681,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
        //     does not require additional intermediate precision]
        EVT VT = Op.getValueType();
        SDLoc DL(Op);
-      SDValue FPOne = DAG.getConstantFP(1.0, VT);
+      SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
  
        AddToWorklist(Est.getNode());
  
@@ -12982,7 +13716,7 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
                                            unsigned Iterations) {
    EVT VT = Arg.getValueType();
    SDLoc DL(Arg);
-  SDValue ThreeHalves = DAG.getConstantFP(1.5, VT);
+  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
  
    // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
    // this entire sequence requires only one FP constant.
@@ -13018,8 +13752,8 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
                                            unsigned Iterations) {
    EVT VT = Arg.getValueType();
    SDLoc DL(Arg);
-  SDValue MinusThree = DAG.getConstantFP(-3.0, VT);
-  SDValue MinusHalf = DAG.getConstantFP(-0.5, VT);
+  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
+  SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
  
    // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
    for (unsigned i = 0; i < Iterations; ++i) {
@@ -13107,6 +13841,15 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
    // If they are both volatile then they cannot be reordered.
    if (Op0->isVolatile() && Op1->isVolatile()) return true;
  
+  // If one operation reads from invariant memory, and the other may store, they
+  // cannot alias. These should really be checking the equivalent of mayWrite,
+  // but it only matters for memory nodes other than load /store.
+  if (Op0->isInvariant() && Op1->writeMem())
+    return false;
+
+  if (Op1->isInvariant() && Op0->writeMem())
+    return false;
+
    // Gather base node and offset information.
    SDValue Base1, Base2;
    int64_t Offset1, Offset2;
@@ -13175,14 +13918,12 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
          Op0->getSrcValueOffset() - MinOffset;
      int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
          Op1->getSrcValueOffset() - MinOffset;
-    AliasAnalysis::AliasResult AAResult =
-        AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(),
-                                         Overlap1,
-                                         UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
-                 AliasAnalysis::Location(Op1->getMemOperand()->getValue(),
-                                         Overlap2,
-                                         UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
-    if (AAResult == AliasAnalysis::NoAlias)
+    AliasResult AAResult =
+        AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
+                                UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
+                 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
+                                UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
+    if (AAResult == NoAlias)
        return false;
    }
  
@@ -13208,8 +13949,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
    // aliases list.  If not, then continue up the chain looking for the next
    // candidate.
    while (!Chains.empty()) {
-    SDValue Chain = Chains.back();
-    Chains.pop_back();
+    SDValue Chain = Chains.pop_back_val();
  
      // For TokenFactor nodes, look at each operand and only continue up the
      // chain until we find two aliases.  If we've seen two aliases, assume we'll
@@ -13316,7 +14056,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
           UIE = M->use_end(); UI != UIE; ++UI)
        if (UI.getUse().getValueType() == MVT::Other &&
            Visited.insert(*UI).second) {
-        if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
+        if (isa<MemSDNode>(*UI)) {
            // We've not visited this use, and we care about it (it could have an
            // ordering dependency with the original node).
            Aliases.clear();