move combineRepeatedFPDivisors logic into a helper function; NFCI

[oota-llvm.git] / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 20d78b87154ee4a367d21a612746ff06e895e4bc..b06d53310bbb67ef4ea67dc07ba6048a9b68e461 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -255,6 +255,7 @@ namespace {
      SDValue visitSRA(SDNode *N);
      SDValue visitSRL(SDNode *N);
      SDValue visitRotate(SDNode *N);
+    SDValue visitBSWAP(SDNode *N);
      SDValue visitCTLZ(SDNode *N);
      SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
      SDValue visitCTTZ(SDNode *N);
@@ -268,6 +269,7 @@ namespace {
      SDValue visitZERO_EXTEND(SDNode *N);
      SDValue visitANY_EXTEND(SDNode *N);
      SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+    SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
      SDValue visitTRUNCATE(SDNode *N);
      SDValue visitBITCAST(SDNode *N);
      SDValue visitBUILD_PAIR(SDNode *N);
@@ -336,6 +338,7 @@ namespace {
                                           unsigned HiOp);
      SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
      SDValue CombineExtLoad(SDNode *N);
+    SDValue combineRepeatedFPDivisors(SDNode *N);
      SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
      SDValue BuildSDIV(SDNode *N);
      SDValue BuildSDIVPow2(SDNode *N);
@@ -386,6 +389,13 @@ namespace {
        unsigned SequenceNum;
      };
  
+    /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
+    /// constant build_vector of the stored constant values in Stores.
+    SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
+                                         SDLoc SL,
+                                         ArrayRef<MemOpLink> Stores,
+                                         EVT Ty) const;
+
      /// This is a helper function for MergeConsecutiveStores. When the source
      /// elements of the consecutive stores are all constants or all extracted
      /// vector elements, try to merge them into one larger store.
@@ -394,6 +404,13 @@ namespace {
                                           EVT MemVT, unsigned NumElem,
                                           bool IsConstantSrc, bool UseVector);
  
+    /// This is a helper function for MergeConsecutiveStores.
+    /// Stores that may be merged are placed in StoreNodes.
+    /// Loads that may alias with those stores are placed in AliasLoadNodes.
+    void getStoreMergeAndAliasCandidates(
+        StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
+        SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
+    
      /// Merge consecutive store operations into a wide store.
      /// This optimization uses wide integers or vectors when possible.
      /// \return True if some memory operations were changed.
@@ -427,8 +444,9 @@ namespace {
        assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
        if (LHSTy.isVector())
          return LHSTy;
-      return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
-                        : TLI.getPointerTy();
+      auto &DL = DAG.getDataLayout();
+      return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
+                        : TLI.getPointerTy(DL);
      }
  
      /// This method returns true if we are running before type legalization or
@@ -440,7 +458,7 @@ namespace {
  
      /// Convenience wrapper around TargetLowering::getSetCCResultType
      EVT getSetCCResultType(EVT VT) const {
-      return TLI.getSetCCResultType(*DAG.getContext(), VT);
+      return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
      }
    };
  }
@@ -618,7 +636,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
  
      // fold (fneg (fsub 0, B)) -> B
      if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
-      if (N0CFP->getValueAPF().isZero())
+      if (N0CFP->isZero())
          return Op.getOperand(1);
  
      // fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -1176,8 +1194,8 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
        continue;
  
      if (N->use_empty()) {
-      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-        Nodes.insert(N->getOperand(i).getNode());
+      for (const SDValue &ChildN : N->op_values())
+        Nodes.insert(ChildN.getNode());
  
        removeFromWorklist(N);
        DAG.DeleteNode(N);
@@ -1199,9 +1217,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
    LegalTypes = Level >= AfterLegalizeTypes;
  
    // Add all the dag nodes to the worklist.
-  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = DAG.allnodes_end(); I != E; ++I)
-    AddToWorklist(I);
+  for (SDNode &Node : DAG.allnodes())
+    AddToWorklist(&Node);
  
    // Create a dummy node (which is not added to allnodes), that adds a reference
    // to the root node, preventing it from being deleted, and tracking any
@@ -1250,9 +1267,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
      // worklist as well. Because the worklist uniques things already, this
      // won't repeatedly process the same operand.
      CombinedNodes.insert(N);
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      if (!CombinedNodes.count(N->getOperand(i).getNode()))
-        AddToWorklist(N->getOperand(i).getNode());
+    for (const SDValue &ChildN : N->op_values())
+      if (!CombinedNodes.count(ChildN.getNode()))
+        AddToWorklist(ChildN.getNode());
  
      SDValue RV = combine(N);
  
@@ -1334,6 +1351,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::SRL:                return visitSRL(N);
    case ISD::ROTR:
    case ISD::ROTL:               return visitRotate(N);
+  case ISD::BSWAP:              return visitBSWAP(N);
    case ISD::CTLZ:               return visitCTLZ(N);
    case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
    case ISD::CTTZ:               return visitCTTZ(N);
@@ -1347,6 +1365,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
    case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
    case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
+  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
    case ISD::TRUNCATE:           return visitTRUNCATE(N);
    case ISD::BITCAST:            return visitBITCAST(N);
    case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
@@ -1452,12 +1471,9 @@ SDValue DAGCombiner::combine(SDNode *N) {
      if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
        SDValue Ops[] = {N1, N0};
        SDNode *CSENode;
-      if (const BinaryWithFlagsSDNode *BinNode =
-              dyn_cast<BinaryWithFlagsSDNode>(N)) {
+      if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
          CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
-                                      BinNode->Flags.hasNoUnsignedWrap(),
-                                      BinNode->Flags.hasNoSignedWrap(),
-                                      BinNode->Flags.hasExact());
+                                      &BinNode->Flags);
        } else {
          CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
        }
@@ -1508,8 +1524,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
      SDNode *TF = TFs[i];
  
      // Check each of the operands.
-    for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
-      SDValue Op = TF->getOperand(i);
+    for (const SDValue &Op : TF->op_values()) {
  
        switch (Op.getOpcode()) {
        case ISD::EntryToken:
@@ -1585,6 +1600,11 @@ static bool isNullConstant(SDValue V) {
    return Const != nullptr && Const->isNullValue();
  }
  
+static bool isNullFPConstant(SDValue V) {
+  ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+  return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
  static bool isAllOnesConstant(SDValue V) {
    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
    return Const != nullptr && Const->isAllOnesValue();
@@ -1595,6 +1615,13 @@ static bool isOneConstant(SDValue V) {
    return Const != nullptr && Const->isOne();
  }
  
+/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
+/// ContantSDNode pointer else nullptr.
+static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
+  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
+}
+
  SDValue DAGCombiner::visitADD(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -1618,8 +1645,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    if (N1.getOpcode() == ISD::UNDEF)
      return N1;
    // fold (add c1, c2) -> c1+c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    if (N0C && N1C)
      return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
    // canonicalize constant to RHS
@@ -1638,7 +1665,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                                      (uint64_t)N1C->getSExtValue());
    // fold ((c1-A)+c2) -> (c1+c2)-A
    if (N1C && N0.getOpcode() == ISD::SUB)
-    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+    if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
        SDLoc DL(N);
        return DAG.getNode(ISD::SUB, DL, VT,
                           DAG.getConstant(N1C->getAPIntValue()+
@@ -1853,8 +1880,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
    if (N0 == N1)
      return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
    // fold (sub c1, c2) -> c1-c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    if (N0C && N1C)
      return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
    // fold (sub x, c) -> (add x, -c)
@@ -1996,6 +2023,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
  
    bool N0IsConst = false;
    bool N1IsConst = false;
+  bool N1IsOpaqueConst = false;
+  bool N0IsOpaqueConst = false;
    APInt ConstValue0, ConstValue1;
    // fold vector ops
    if (VT.isVector()) {
@@ -2006,15 +2035,19 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
      N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
    } else {
      N0IsConst = isa<ConstantSDNode>(N0);
-    if (N0IsConst)
+    if (N0IsConst) {
        ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
+      N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
+    }
      N1IsConst = isa<ConstantSDNode>(N1);
-    if (N1IsConst)
+    if (N1IsConst) {
        ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+      N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
+    }
    }
  
    // fold (mul c1, c2) -> c1*c2
-  if (N0IsConst && N1IsConst)
+  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
      return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
                                        N0.getNode(), N1.getNode());
  
@@ -2039,14 +2072,16 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                         DAG.getConstant(0, DL, VT), N0);
    }
    // fold (mul x, (1 << c)) -> x << c
-  if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) {
+  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
+      IsFullSplat) {
      SDLoc DL(N);
      return DAG.getNode(ISD::SHL, DL, VT, N0,
                         DAG.getConstant(ConstValue1.logBase2(), DL,
                                         getShiftAmountTy(N0.getValueType())));
    }
    // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
-  if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) {
+  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
+      IsFullSplat) {
      unsigned Log2Val = (-ConstValue1).logBase2();
      SDLoc DL(N);
      // FIXME: If the input is something that is easily negated (e.g. a
@@ -2124,7 +2159,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
    // fold (sdiv c1, c2) -> c1/c2
    ConstantSDNode *N0C = isConstOrConstSplat(N0);
    ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N0C && N1C && !N1C->isNullValue())
+  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
      return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
    // fold (sdiv X, 1) -> X
    if (N1C && N1C->isOne())
@@ -2144,16 +2179,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
    }
  
    // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
-                                     (-N1C->getAPIntValue()).isPowerOf2())) {
+  // FIXME: We check for the exact bit here because the generic lowering gives
+  // better results in that case. The target-specific lowering should learn how
+  // to handle exact sdivs efficiently.
+  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+      !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
+      (N1C->getAPIntValue().isPowerOf2() ||
+       (-N1C->getAPIntValue()).isPowerOf2())) {
      // If dividing by powers of two is cheap, then don't perform the following
      // fold.
      if (TLI.isPow2SDivCheap())
        return SDValue();
  
      // Target-specific implementation of sdiv x, pow2.
-    SDValue Res = BuildSDIVPow2(N);
-    if (Res.getNode())
+    if (SDValue Res = BuildSDIVPow2(N))
        return Res;
  
      unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
@@ -2189,10 +2228,9 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
  
    // If integer divide is expensive and we satisfy the requirements, emit an
    // alternate sequence.
-  if (N1C && !TLI.isIntDivCheap()) {
-    SDValue Op = BuildSDIV(N);
-    if (Op.getNode()) return Op;
-  }
+  if (N1C && !TLI.isIntDivCheap())
+    if (SDValue Op = BuildSDIV(N))
+      return Op;
  
    // undef / X -> 0
    if (N0.getOpcode() == ISD::UNDEF)
@@ -2217,10 +2255,12 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
    // fold (udiv c1, c2) -> c1/c2
    ConstantSDNode *N0C = isConstOrConstSplat(N0);
    ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT, N0C, N1C);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
    // fold (udiv x, (1 << c)) -> x >>u c
-  if (N1C && N1C->getAPIntValue().isPowerOf2()) {
+  if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
      SDLoc DL(N);
      return DAG.getNode(ISD::SRL, DL, VT, N0,
                         DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
@@ -2228,7 +2268,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
    }
    // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
    if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
        if (SHC->getAPIntValue().isPowerOf2()) {
          EVT ADDVT = N1.getOperand(1).getValueType();
          SDLoc DL(N);
@@ -2243,10 +2283,9 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
      }
    }
    // fold (udiv x, c) -> alternate
-  if (N1C && !TLI.isIntDivCheap()) {
-    SDValue Op = BuildUDIV(N);
-    if (Op.getNode()) return Op;
-  }
+  if (N1C && !TLI.isIntDivCheap())
+    if (SDValue Op = BuildUDIV(N))
+      return Op;
  
    // undef / X -> 0
    if (N0.getOpcode() == ISD::UNDEF)
@@ -2266,8 +2305,10 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
    // fold (srem c1, c2) -> c1%c2
    ConstantSDNode *N0C = isConstOrConstSplat(N0);
    ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT, N0C, N1C);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
    // If we know the sign bits of both operands are zero, strength reduce to a
    // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
    if (!VT.isVector()) {
@@ -2308,17 +2349,20 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
    // fold (urem c1, c2) -> c1%c2
    ConstantSDNode *N0C = isConstOrConstSplat(N0);
    ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N0C && N1C && !N1C->isNullValue())
-    return DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT, N0C, N1C);
+  if (N0C && N1C)
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
+                                                    N0C, N1C))
+      return Folded;
    // fold (urem x, pow2) -> (and x, pow2-1)
-  if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) {
+  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+      N1C->getAPIntValue().isPowerOf2()) {
      SDLoc DL(N);
      return DAG.getNode(ISD::AND, DL, VT, N0,
                         DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
    }
    // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
    if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
        if (SHC->getAPIntValue().isPowerOf2()) {
          SDLoc DL(N);
          SDValue Add =
@@ -2485,8 +2529,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  }
  
  SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
+    return Res;
  
    EVT VT = N->getValueType(0);
    SDLoc DL(N);
@@ -2516,8 +2560,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
  }
  
  SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
+    return Res;
  
    EVT VT = N->getValueType(0);
    SDLoc DL(N);
@@ -2567,15 +2611,15 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) {
  }
  
  SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM))
+    return Res;
  
    return SDValue();
  }
  
  SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM))
+    return Res;
  
    return SDValue();
  }
@@ -2872,9 +2916,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    }
  
    // fold (and c1, c2) -> c1&c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (N0C && N1C)
+  if (N0C && N1C && !N1C->isOpaque())
      return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
    // canonicalize constant to RHS
    if (isConstantIntBuildVectorOrConstantInt(N0) &&
@@ -3065,7 +3109,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
            // For big endian targets, we need to add an offset to the pointer
            // to load the correct bytes.  For little endian systems, we merely
            // need to read fewer bytes from the same pointer.
-          if (TLI.isBigEndian()) {
+          if (DAG.getDataLayout().isBigEndian()) {
              unsigned LVTStoreBytes = LoadedVT.getStoreSize();
              unsigned EVTStoreBytes = ExtVT.getStoreSize();
              unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
@@ -3095,10 +3139,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
      return Combined;
  
    // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
+  if (N0.getOpcode() == N1.getOpcode())
+    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+      return Tmp;
  
    // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
    // fold (and (sra)) -> (and (srl)) when possible.
@@ -3468,26 +3511,29 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
    }
  
    // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
-  if (N0.getOpcode() == ISD::AND &&
-      N1.getOpcode() == ISD::AND &&
-      N0.getOperand(1).getOpcode() == ISD::Constant &&
-      N1.getOperand(1).getOpcode() == ISD::Constant &&
+  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
        // Don't increase # computations.
        (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
      // We can only do this xform if we know that bits from X that are set in C2
      // but not in C1 are already zero.  Likewise for Y.
-    const APInt &LHSMask =
-      cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-    const APInt &RHSMask =
-      cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
-
-    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
-        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
-      SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
-                              N0.getOperand(0), N1.getOperand(0));
-      SDLoc DL(LocReference);
-      return DAG.getNode(ISD::AND, DL, VT, X,
-                         DAG.getConstant(LHSMask | RHSMask, DL, VT));
+    if (const ConstantSDNode *N0O1C =
+        getAsNonOpaqueConstant(N0.getOperand(1))) {
+      if (const ConstantSDNode *N1O1C =
+          getAsNonOpaqueConstant(N1.getOperand(1))) {
+        // We can only do this xform if we know that bits from X that are set in
+        // C2 but not in C1 are already zero.  Likewise for Y.
+        const APInt &LHSMask = N0O1C->getAPIntValue();
+        const APInt &RHSMask = N1O1C->getAPIntValue();
+
+        if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+            DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+          SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
+                                  N0.getOperand(0), N1.getOperand(0));
+          SDLoc DL(LocReference);
+          return DAG.getNode(ISD::AND, DL, VT, X,
+                             DAG.getConstant(LHSMask | RHSMask, DL, VT));
+        }
+      }
      }
    }
  
@@ -3593,9 +3639,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    }
  
    // fold (or c1, c2) -> c1|c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (N0C && N1C)
+  if (N0C && N1C && !N1C->isOpaque())
      return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
    // canonicalize constant to RHS
    if (isConstantIntBuildVectorOrConstantInt(N0) &&
@@ -3615,11 +3661,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
      return Combined;
  
    // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
-  SDValue BSwap = MatchBSwapHWord(N, N0, N1);
-  if (BSwap.getNode())
+  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
      return BSwap;
-  BSwap = MatchBSwapHWordLow(N, N0, N1);
-  if (BSwap.getNode())
+  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
      return BSwap;
  
    // reassociate or
@@ -3640,10 +3684,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
      }
    }
    // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
+  if (N0.getOpcode() == N1.getOpcode())
+    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+      return Tmp;
  
    // See if this is some rotate idiom.
    if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
@@ -3937,8 +3980,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    if (N1.getOpcode() == ISD::UNDEF)
      return N1;
    // fold (xor c1, c2) -> c1^c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
    if (N0C && N1C)
      return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
    // canonicalize constant to RHS
@@ -4019,15 +4062,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    }
    // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
    if (N1C && N0.getOpcode() == ISD::XOR) {
-    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
-    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-    if (N00C) {
+    if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
        SDLoc DL(N);
        return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
                           DAG.getConstant(N1C->getAPIntValue() ^
                                           N00C->getAPIntValue(), DL, VT));
      }
-    if (N01C) {
+    if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
        SDLoc DL(N);
        return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
                           DAG.getConstant(N1C->getAPIntValue() ^
@@ -4064,10 +4105,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    }
  
    // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
+  if (N0.getOpcode() == N1.getOpcode())
+    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+      return Tmp;
  
    // Simplify the expression using non-local knowledge.
    if (!VT.isVector() &&
@@ -4080,10 +4120,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
  /// Handle transforms common to the three shifts, when the shift amount is a
  /// constant.
  SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
-  // We can't and shouldn't fold opaque constants.
-  if (Amt->isOpaque())
-    return SDValue();
-
    SDNode *LHS = N->getOperand(0).getNode();
    if (!LHS->hasOneUse()) return SDValue();
  
@@ -4110,8 +4146,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
    }
  
    // We require the RHS of the binop to be a constant and not opaque as well.
-  ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
-  if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
+  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
+  if (!BinOpCst) return SDValue();
  
    // FIXME: disable this unless the input to the binop is a shift by a constant.
    // If it is not a shift, it pessimizes some common cases like:
@@ -4164,15 +4200,17 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
      SDValue N01 = N->getOperand(0).getOperand(1);
  
      if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
-      EVT TruncVT = N->getValueType(0);
-      SDValue N00 = N->getOperand(0).getOperand(0);
-      APInt TruncC = N01C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
-      SDLoc DL(N);
+      if (!N01C->isOpaque()) {
+        EVT TruncVT = N->getValueType(0);
+        SDValue N00 = N->getOperand(0).getOperand(0);
+        APInt TruncC = N01C->getAPIntValue();
+        TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+        SDLoc DL(N);
  
-      return DAG.getNode(ISD::AND, DL, TruncVT,
-                         DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
-                         DAG.getConstant(TruncC, DL, TruncVT));
+        return DAG.getNode(ISD::AND, DL, TruncVT,
+                           DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
+                           DAG.getConstant(TruncC, DL, TruncVT));
+      }
      }
    }
  
@@ -4226,14 +4264,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
    }
  
    // fold (shl c1, c2) -> c1<<c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  if (N0C && N1C)
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
      return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
    // fold (shl 0, x) -> 0
    if (isNullConstant(N0))
      return N0;
    // fold (shl x, c >= size(x)) -> undef
-  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
      return DAG.getUNDEF(VT);
    // fold (shl x, 0) -> x
    if (N1C && N1C->isNullValue())
@@ -4320,6 +4358,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
      }
    }
  
+  // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
+  // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
+  if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
+      cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+      uint64_t C1 = N0C1->getZExtValue();
+      uint64_t C2 = N1C->getZExtValue();
+      SDLoc DL(N);
+      if (C1 <= C2)
+        return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
+                           DAG.getConstant(C2 - C1, DL, N1.getValueType()));
+      return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
+                         DAG.getConstant(C1 - C2, DL, N1.getValueType()));
+    }
+  }
+
    // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
    //                               (and (srl x, (sub c1, c2), MASK)
    // Only fold this if the inner shift has no other uses -- if it does, folding
@@ -4372,11 +4426,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
      return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
    }
  
-  if (N1C) {
-    SDValue NewSHL = visitShiftByConstant(N, N1C);
-    if (NewSHL.getNode())
+  if (N1C && !N1C->isOpaque())
+    if (SDValue NewSHL = visitShiftByConstant(N, N1C))
        return NewSHL;
-  }
  
    return SDValue();
  }
@@ -4397,8 +4449,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    }
  
    // fold (sra c1, c2) -> (sra c1, c2)
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  if (N0C && N1C)
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
      return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
    // fold (sra 0, x) -> 0
    if (isNullConstant(N0))
@@ -4521,11 +4573,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    if (DAG.SignBitIsZero(N0))
      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
  
-  if (N1C) {
-    SDValue NewSRA = visitShiftByConstant(N, N1C);
-    if (NewSRA.getNode())
+  if (N1C && !N1C->isOpaque())
+    if (SDValue NewSRA = visitShiftByConstant(N, N1C))
        return NewSRA;
-  }
  
    return SDValue();
  }
@@ -4546,8 +4596,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    }
  
    // fold (srl c1, c2) -> c1 >>u c2
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  if (N0C && N1C)
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  if (N0C && N1C && !N1C->isOpaque())
      return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
    // fold (srl 0, x) -> 0
    if (isNullConstant(N0))
@@ -4692,7 +4742,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
      return SDValue(N, 0);
  
-  if (N1C) {
+  if (N1C && !N1C->isOpaque()) {
      SDValue NewSRL = visitShiftByConstant(N, N1C);
      if (NewSRL.getNode())
        return NewSRL;
@@ -4735,12 +4785,25 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    return SDValue();
  }
  
+SDValue DAGCombiner::visitBSWAP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (bswap c1) -> c2
+  if (isConstantIntBuildVectorOrConstantInt(N0))
+    return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
+  // fold (bswap (bswap x)) -> x
+  if (N0.getOpcode() == ISD::BSWAP)
+    return N0->getOperand(0);
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitCTLZ(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
  
    // fold (ctlz c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -4750,7 +4813,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
    EVT VT = N->getValueType(0);
  
    // fold (ctlz_zero_undef c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -4760,7 +4823,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
    EVT VT = N->getValueType(0);
  
    // fold (cttz c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -4770,7 +4833,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
    EVT VT = N->getValueType(0);
  
    // fold (cttz_zero_undef c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -4780,7 +4843,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
    EVT VT = N->getValueType(0);
  
    // fold (ctpop c1) -> c2
-  if (isa<ConstantSDNode>(N0))
+  if (isConstantIntBuildVectorOrConstantInt(N0))
      return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
    return SDValue();
  }
@@ -5112,7 +5175,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
    std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
  
    MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MSC->getPointerInfo(), 
+    getMachineMemOperand(MSC->getPointerInfo(),
                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
                            Alignment, MSC->getAAInfo(), MSC->getRanges());
  
@@ -5251,7 +5314,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
  
    MachineMemOperand *MMO = DAG.getMachineFunction().
-    getMachineMemOperand(MGT->getPointerInfo(), 
+    getMachineMemOperand(MGT->getPointerInfo(),
                            MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                            Alignment, MGT->getAAInfo(), MGT->getRanges());
  
@@ -5505,12 +5568,12 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
                         SDLoc(N));
  }
  
-// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
-// dag node into a ConstantSDNode or a build_vector of constants.
-// This function is called by the DAGCombiner when visiting sext/zext/aext
-// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
-// Vector extends are not folded if operations are legal; this is to
-// avoid introducing illegal build_vector dag nodes.
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or 
+/// a build_vector of constants.
+/// This function is called by the DAGCombiner when visiting sext/zext/aext
+/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+/// Vector extends are not folded if operations are legal; this is to
+/// avoid introducing illegal build_vector dag nodes.
  static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
                                           SelectionDAG &DAG, bool LegalTypes,
                                           bool LegalOperations) {
@@ -5519,7 +5582,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
    EVT VT = N->getValueType(0);
  
    assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
-         Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
+         Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+         && "Expected EXTEND dag node in input!");
  
    // fold (sext c1) -> c1
    // fold (zext c1) -> c1
@@ -5539,9 +5603,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
    // We can fold this node into a build_vector.
    unsigned VTBits = SVT.getSizeInBits();
    unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
-  unsigned ShAmt = VTBits - EVTBits;
    SmallVector<SDValue, 8> Elts;
-  unsigned NumElts = N0->getNumOperands();
+  unsigned NumElts = VT.getVectorNumElements();
    SDLoc DL(N);
  
    for (unsigned i=0; i != NumElts; ++i) {
@@ -5552,14 +5615,13 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
      }
  
      SDLoc DL(Op);
-    ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
-    const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
-    if (Opcode == ISD::SIGN_EXTEND)
-      Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
-                                     DL, SVT));
+    // Get the constant value and if needed trunc it to the size of the type.
+    // Nodes like build_vector might have constants wider than the scalar type.
+    APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
+    if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
+      Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
      else
-      Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
-                                     DL, SVT));
+      Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
    }
  
    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
@@ -5763,8 +5825,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    if (N0.getOpcode() == ISD::TRUNCATE) {
      // fold (sext (truncate (load x))) -> (sext (smaller load x))
      // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
        SDNode* oye = N0.getNode()->getOperand(0).getNode();
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
@@ -6046,8 +6107,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
    // fold (zext (truncate (load x))) -> (zext (smaller load x))
    // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
    if (N0.getOpcode() == ISD::TRUNCATE) {
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
        SDNode* oye = N0.getNode()->getOperand(0).getNode();
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
@@ -6064,8 +6124,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
  
      // fold (zext (truncate (load x))) -> (zext (smaller load x))
      // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
        SDNode* oye = N0.getNode()->getOperand(0).getNode();
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
@@ -6465,15 +6524,14 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
      // Only look at single-use SRLs.
      if (!V.getNode()->hasOneUse())
        break;
-    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+    if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
        // See if we can recursively simplify the LHS.
        unsigned Amt = RHSC->getZExtValue();
  
        // Watch out for shift count overflow though.
        if (Amt >= Mask.getBitWidth()) break;
        APInt NewMask = Mask << Amt;
-      SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
-      if (SimplifyLHS.getNode())
+      if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
          return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
                             SimplifyLHS, V.getOperand(1));
      }
@@ -6602,7 +6660,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
  
    // For big endian targets, we need to adjust the offset to the pointer to
    // load the correct bytes.
-  if (TLI.isBigEndian()) {
+  if (DAG.getDataLayout().isBigEndian()) {
      unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
      ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
@@ -6697,8 +6755,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
  
    // fold (sext_in_reg (load x)) -> (smaller sextload x)
    // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
-  SDValue NarrowLoad = ReduceLoadWidth(N);
-  if (NarrowLoad.getNode())
+  if (SDValue NarrowLoad = ReduceLoadWidth(N))
      return NarrowLoad;
  
    // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
@@ -6783,10 +6840,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
    return SDValue();
  }
  
+SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(VT);
+
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
+
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
-  bool isLE = TLI.isLittleEndian();
+  bool isLE = DAG.getDataLayout().isLittleEndian();
  
    // noop truncate
    if (N0.getValueType() == N->getValueType(0))
@@ -6839,7 +6910,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
      SDValue EltNo = N0->getOperand(1);
      if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
        int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-      EVT IndexTy = TLI.getVectorIdxTy();
+      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
        int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
  
        SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
@@ -6911,9 +6982,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    // fold (truncate (load x)) -> (smaller load x)
    // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
    if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
-    SDValue Reduced = ReduceLoadWidth(N);
-    if (Reduced.getNode())
+    if (SDValue Reduced = ReduceLoadWidth(N))
        return Reduced;
+
      // Handle the case where the load remains an extending load even
      // after truncation.
      if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
@@ -7006,8 +7077,8 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
        !LD2->isVolatile() &&
        DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
      unsigned Align = LD1->getAlignment();
-    unsigned NewAlign = TLI.getDataLayout()->
-      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+    unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+        VT.getTypeForEVT(*DAG.getContext()));
  
      if (NewAlign <= Align &&
          (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
@@ -7063,13 +7134,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
        // Do not change the width of a volatile load.
        !cast<LoadSDNode>(N0)->isVolatile() &&
        // Do not remove the cast if the types differ in endian layout.
-      TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
-      TLI.hasBigEndianPartOrdering(VT) &&
+      TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
+          TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
        TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    unsigned Align = TLI.getDataLayout()->
-      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+    unsigned Align = DAG.getDataLayout().getABITypeAlignment(
+        VT.getTypeForEVT(*DAG.getContext()));
      unsigned OrigAlign = LN0->getAlignment();
  
      if (Align <= OrigAlign) {
@@ -7152,11 +7223,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
    }
  
    // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
-  if (N0.getOpcode() == ISD::BUILD_PAIR) {
-    SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
-    if (CombineLD.getNode())
+  if (N0.getOpcode() == ISD::BUILD_PAIR)
+    if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
        return CombineLD;
-  }
  
    // Remove double bitcasts from shuffles - this is often a legacy of
    // XformToShuffleWithZero being used to combine bitmaskings (of
@@ -7169,10 +7238,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
      ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
  
      // If operands are a bitcast, peek through if it casts the original VT.
-    // If operands are a UNDEF or constant, just bitcast back to original VT.
+    // If operands are a constant, just bitcast back to original VT.
      auto PeekThroughBitcast = [&](SDValue Op) {
        if (Op.getOpcode() == ISD::BITCAST &&
-          Op.getOperand(0)->getValueType(0) == VT)
+          Op.getOperand(0).getValueType() == VT)
          return SDValue(Op.getOperand(0));
        if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
            ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
@@ -7237,8 +7306,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
                                       DstEltVT, BV->getOperand(0)));
  
      SmallVector<SDValue, 8> Ops;
-    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
-      SDValue Op = BV->getOperand(i);
+    for (SDValue Op : BV->op_values()) {
        // If the vector element type is not legal, the BUILD_VECTOR operands
        // are promoted and implicitly truncated.  Make that explicit here.
        if (Op.getValueType() != SrcEltVT)
@@ -7282,7 +7350,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
      SmallVector<SDValue, 8> Ops;
      for (unsigned i = 0, e = BV->getNumOperands(); i != e;
           i += NumInputsPerOutput) {
-      bool isLE = TLI.isLittleEndian();
+      bool isLE = DAG.getDataLayout().isLittleEndian();
        APInt NewBits = APInt(DstBitSize, 0);
        bool EltIsUndef = true;
        for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
@@ -7313,13 +7381,13 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
                              NumOutputsPerInput*BV->getNumOperands());
    SmallVector<SDValue, 8> Ops;
  
-  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
-    if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+  for (const SDValue &Op : BV->op_values()) {
+    if (Op.getOpcode() == ISD::UNDEF) {
        Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
        continue;
      }
  
-    APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+    APInt OpVal = cast<ConstantSDNode>(Op)->
                    getAPIntValue().zextOrTrunc(SrcBitSize);
  
      for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
@@ -7329,7 +7397,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
      }
  
      // For big endian targets, swap the order of the pieces of each element.
-    if (TLI.isBigEndian())
+    if (DAG.getDataLayout().isBigEndian())
        std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
    }
  
@@ -7820,7 +7888,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
      bool AllowNewConst = (Level < AfterLegalizeDAG);
  
      // fold (fadd A, 0) -> A
-    if (N1CFP && N1CFP->getValueAPF().isZero())
+    if (N1CFP && N1CFP->isZero())
        return N0;
  
      // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
@@ -7951,11 +8019,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
    // If 'unsafe math' is enabled, fold lots of things.
    if (Options.UnsafeFPMath) {
      // (fsub A, 0) -> A
-    if (N1CFP && N1CFP->getValueAPF().isZero())
+    if (N1CFP && N1CFP->isZero())
        return N0;
  
      // (fsub 0, B) -> -B
-    if (N0CFP && N0CFP->getValueAPF().isZero()) {
+    if (N0CFP && N0CFP->isZero()) {
        if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
          return GetNegatedExpression(N1, DAG, LegalOperations);
        if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
@@ -8021,7 +8089,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
  
    if (Options.UnsafeFPMath) {
      // fold (fmul A, 0) -> 0
-    if (N1CFP && N1CFP->getValueAPF().isZero())
+    if (N1CFP && N1CFP->isZero())
        return N1;
  
      // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
@@ -8034,7 +8102,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
        auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
        auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
        auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
-      
+
        // Check 1: Make sure that the first operand of the inner multiply is NOT
        // a constant. Otherwise, we may induce infinite looping.
        if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
@@ -8052,7 +8120,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
      // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
      // during an early run of DAGCombiner can prevent folding with fmuls
      // inserted during lowering.
-    if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
+    if (N0.getOpcode() == ISD::FADD &&
+        (N0.getOperand(0) == N0.getOperand(1)) &&
+        N0.hasOneUse()) {
        const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
        SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
        return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
@@ -8166,6 +8236,60 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
    return SDValue();
  }
  
+// Combine multiple FDIVs with the same divisor into multiple FMULs by the
+// reciprocal.
+// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// Notice that this is not always beneficial. One reason is different target
+// may have different costs for FDIV and FMUL, so sometimes the cost of two
+// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+  if (!DAG.getTarget().Options.UnsafeFPMath)
+    return SDValue();
+  
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+  // Skip if current node is a reciprocal.
+  if (N0CFP && N0CFP->isExactlyValue(1.0))
+    return SDValue();
+  
+  SDValue N1 = N->getOperand(1);
+  SmallVector<SDNode *, 4> Users;
+
+  // Find all FDIV users of the same divisor.
+  for (auto *U : N1->uses()) {
+    if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
+      Users.push_back(U);
+  }
+
+  if (!TLI.combineRepeatedFPDivisors(Users.size()))
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+  // FIXME: This optimization requires some level of fast-math, so the
+  // created reciprocal node should at least have the 'allowReciprocal'
+  // fast-math-flag set.
+  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
+
+  // Dividend / Divisor -> Dividend * Reciprocal
+  for (auto *U : Users) {
+    SDValue Dividend = U->getOperand(0);
+    if (Dividend != FPOne) {
+      SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+                                    Reciprocal);
+      CombineTo(U, NewNode);
+    } else if (U != Reciprocal.getNode()) {
+      // In the absence of fast-math-flags, this user node is always the
+      // same node as Reciprocal, but with FMF they may be different nodes.
+      CombineTo(U, Reciprocal);
+    }
+  }
+  return SDValue(N, 0);  // N was replaced.
+}
+
  SDValue DAGCombiner::visitFDIV(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -8266,41 +8390,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
      }
    }
  
-  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
-  // reciprocal.
-  // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
-  // Notice that this is not always beneficial. One reason is different target
-  // may have different costs for FDIV and FMUL, so sometimes the cost of two
-  // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
-  // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
-  if (Options.UnsafeFPMath) {
-    // Skip if current node is a reciprocal.
-    if (N0CFP && N0CFP->isExactlyValue(1.0))
-      return SDValue();
-
-    SmallVector<SDNode *, 4> Users;
-    // Find all FDIV users of the same divisor.
-    for (auto U : N1->uses()) {
-      if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
-        Users.push_back(U);
-    }
-
-    if (TLI.combineRepeatedFPDivisors(Users.size())) {
-      SDLoc DL(N);
-      SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); // floating point 1.0
-      SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
-
-      // Dividend / Divisor -> Dividend * Reciprocal
-      for (auto U : Users) {
-        if (U->getOperand(0) != FPOne) {
-          SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT,
-                                        U->getOperand(0), Reciprocal);
-          DAG.ReplaceAllUsesWith(U, NewNode.getNode());
-        }
-      }
-      return SDValue();
-    }
-  }
+  if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
+    return CombineRepeatedDivisors;
  
    return SDValue();
  }
@@ -8320,30 +8411,29 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
  }
  
  SDValue DAGCombiner::visitFSQRT(SDNode *N) {
-  if (DAG.getTarget().Options.UnsafeFPMath &&
-      !TLI.isFsqrtCheap()) {
-    // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
-    if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
-      EVT VT = RV.getValueType();
-      SDLoc DL(N);
-      RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
-      AddToWorklist(RV.getNode());
+  if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
+    return SDValue();
  
-      // Unfortunately, RV is now NaN if the input was exactly 0.
-      // Select out this case and force the answer to 0.
-      SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
-      SDValue ZeroCmp =
-        DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT),
-                     N->getOperand(0), Zero, ISD::SETEQ);
-      AddToWorklist(ZeroCmp.getNode());
-      AddToWorklist(RV.getNode());
+  // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
+  SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+  if (!RV)
+    return SDValue();
+  
+  EVT VT = RV.getValueType();
+  SDLoc DL(N);
+  RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+  AddToWorklist(RV.getNode());
  
-      RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
-                       DL, VT, ZeroCmp, Zero, RV);
-      return RV;
-    }
-  }
-  return SDValue();
+  // Unfortunately, RV is now NaN if the input was exactly 0.
+  // Select out this case and force the answer to 0.
+  SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
+  EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
+  AddToWorklist(ZeroCmp.getNode());
+  AddToWorklist(RV.getNode());
+
+  return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+                     ZeroCmp, Zero, RV);
  }
  
  SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
@@ -8737,7 +8827,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
    }
  
    // (fneg (fmul c, x)) -> (fmul -c, x)
-  if (N0.getOpcode() == ISD::FMUL) {
+  if (N0.getOpcode() == ISD::FMUL &&
+      (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
      ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
      if (CFP1) {
        APFloat CVal = CFP1->getValueAPF();
@@ -9022,14 +9113,18 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
    EVT VT;
+  unsigned AS;
+
    if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
      if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
        return false;
      VT = LD->getMemoryVT();
+    AS = LD->getAddressSpace();
    } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
      if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
        return false;
      VT = ST->getMemoryVT();
+    AS = ST->getAddressSpace();
    } else
      return false;
  
@@ -9053,7 +9148,8 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
    } else
      return false;
  
-  return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+                                   VT.getTypeForEVT(*DAG.getContext()), AS);
  }
  
  /// Try turning a load/store into a pre-indexed load/store when the base
@@ -9778,8 +9874,7 @@ struct LoadedSlice {
    /// \pre DAG != nullptr.
    uint64_t getOffsetFromBase() const {
      assert(DAG && "Missing context.");
-    bool IsBigEndian =
-        DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
+    bool IsBigEndian = DAG->getDataLayout().isBigEndian();
      assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
      uint64_t Offset = Shift / 8;
      unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
@@ -9862,7 +9957,7 @@ struct LoadedSlice {
  
      // Check if it will be merged with the load.
      // 1. Check the alignment constraint.
-    unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
+    unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
          ResVT.getTypeForEVT(*DAG->getContext()));
  
      if (RequiredAlignment > getAlignment())
@@ -10143,8 +10238,8 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
      return Result; // Fail.
    else {
      bool isOk = false;
-    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
-      if (Chain->getOperand(i).getNode() == LD) {
+    for (const SDValue &ChainOp : Chain->op_values())
+      if (ChainOp.getNode() == LD) {
          isOk = true;
          break;
        }
@@ -10230,7 +10325,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
    unsigned StOffset;
    unsigned NewAlign = St->getAlignment();
  
-  if (DAG.getTargetLoweringInfo().isLittleEndian())
+  if (DAG.getDataLayout().isLittleEndian())
      StOffset = ByteShift;
    else
      StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
@@ -10343,12 +10438,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
        uint64_t PtrOff = ShAmt / 8;
        // For big endian targets, we need to adjust the offset to the pointer to
        // load the correct bytes.
-      if (TLI.isBigEndian())
+      if (DAG.getDataLayout().isBigEndian())
          PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
  
        unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
        Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
-      if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
+      if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
          return SDValue();
  
        SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
@@ -10412,7 +10507,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
      unsigned LDAlign = LD->getAlignment();
      unsigned STAlign = ST->getAlignment();
      Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
-    unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
+    unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
      if (LDAlign < ABIAlign || STAlign < ABIAlign)
        return SDValue();
  
@@ -10527,6 +10622,18 @@ struct BaseIndexOffset {
  };
  } // namespace
  
+SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
+                                                  SDLoc SL,
+                                                  ArrayRef<MemOpLink> Stores,
+                                                  EVT Ty) const {
+  SmallVector<SDValue, 8> BuildVector;
+
+  for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
+    BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
+
+  return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
+}
+
  bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
                    SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
                    unsigned NumElem, bool IsConstantSrc, bool UseVector) {
@@ -10557,12 +10664,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
      assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
      if (IsConstantSrc) {
-      // A vector store with a constant source implies that the constant is
-      // zero; we only handle merging stores of constant zeros because the zero
-      // can be materialized without a load.
-      // It may be beneficial to loosen this restriction to allow non-zero
-      // store merging.
-      StoredVal = DAG.getConstant(0, DL, Ty);
+      StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
      } else {
        SmallVector<SDValue, 8> Ops;
        for (unsigned i = 0; i < NumElem ; ++i) {
@@ -10582,28 +10684,28 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
      // elements, so this path implies a store of constants.
      assert(IsConstantSrc && "Merged vector elements should use vector store");
  
-    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
-    APInt StoreInt(StoreBW, 0);
+    unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+    APInt StoreInt(SizeInBits, 0);
  
      // Construct a single integer constant which is made of the smaller
      // constant inputs.
-    bool IsLE = TLI.isLittleEndian();
+    bool IsLE = DAG.getDataLayout().isLittleEndian();
      for (unsigned i = 0; i < NumElem ; ++i) {
        unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
        SDValue Val = St->getValue();
-      StoreInt <<= ElementSizeBytes*8;
+      StoreInt <<= ElementSizeBytes * 8;
        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
-        StoreInt |= C->getAPIntValue().zext(StoreBW);
+        StoreInt |= C->getAPIntValue().zext(SizeInBits);
        } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
-        StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+        StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
        } else {
          llvm_unreachable("Invalid constant element type");
        }
      }
  
      // Create the new Load and Store operations.
-    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
      StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
    }
  
@@ -10645,66 +10747,29 @@ static bool allowableAlignment(const SelectionDAG &DAG,
      return true;
  
    Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
-  unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+  unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
    return (Align >= ABIAlignment);
  }
  
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
-  if (OptLevel == CodeGenOpt::None)
-    return false;
-
-  EVT MemVT = St->getMemoryVT();
-  int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
-  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
-      Attribute::NoImplicitFloat);
-
-  // This function cannot currently deal with non-byte-sized memory sizes.
-  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
-    return false;
-
-  // Don't merge vectors into wider inputs.
-  if (MemVT.isVector() || !MemVT.isSimple())
-    return false;
-
-  // Perform an early exit check. Do not bother looking at stored values that
-  // are not constants, loads, or extracted vector elements.
-  SDValue StoredVal = St->getValue();
-  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
-  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
-                       isa<ConstantFPSDNode>(StoredVal);
-  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
-
-  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
-    return false;
-
-  // Only look at ends of store sequences.
-  SDValue Chain = SDValue(St, 0);
-  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
-    return false;
-
+void DAGCombiner::getStoreMergeAndAliasCandidates(
+    StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
+    SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
    // This holds the base pointer, index, and the offset in bytes from the base
    // pointer.
    BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
  
    // We must have a base and an offset.
    if (!BasePtr.Base.getNode())
-    return false;
+    return;
  
    // Do not handle stores to undef base pointers.
    if (BasePtr.Base.getOpcode() == ISD::UNDEF)
-    return false;
-
-  // Save the LoadSDNodes that we find in the chain.
-  // We need to make sure that these nodes do not interfere with
-  // any of the store nodes.
-  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
-  // Save the StoreSDNodes that we find in the chain.
-  SmallVector<MemOpLink, 8> StoreNodes;
+    return;
  
    // Walk up the chain and look for nodes with offsets from the same
    // base pointer. Stop when reaching an instruction with a different kind
    // or instruction which has a different base pointer.
+  EVT MemVT = St->getMemoryVT();
    unsigned Seq = 0;
    StoreSDNode *Index = St;
    while (Index) {
@@ -10761,7 +10826,51 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        }
      }
    }
+}
+
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
+  EVT MemVT = St->getMemoryVT();
+  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+      Attribute::NoImplicitFloat);
+
+  // This function cannot currently deal with non-byte-sized memory sizes.
+  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+    return false;
+
+  // Don't merge vectors into wider inputs.
+  if (MemVT.isVector() || !MemVT.isSimple())
+    return false;
  
+  // Perform an early exit check. Do not bother looking at stored values that
+  // are not constants, loads, or extracted vector elements.
+  SDValue StoredVal = St->getValue();
+  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
+  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
+                       isa<ConstantFPSDNode>(StoredVal);
+  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+
+  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
+    return false;
+
+  // Only look at ends of store sequences.
+  SDValue Chain = SDValue(St, 0);
+  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
+    return false;
+
+  // Save the LoadSDNodes that we find in the chain.
+  // We need to make sure that these nodes do not interfere with
+  // any of the store nodes.
+  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
+  
+  // Save the StoreSDNodes that we find in the chain.
+  SmallVector<MemOpLink, 8> StoreNodes;
+
+  getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
+  
    // Check if there is anything to merge.
    if (StoreNodes.size() < 2)
      return false;
@@ -10827,8 +10936,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        }
  
        // Find a legal type for the constant store.
-      unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
-      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+      unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
+      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
        if (TLI.isTypeLegal(StoreTy) &&
            allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
                               FirstStoreAlign)) {
@@ -10853,10 +10962,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        }
      }
  
-    // We only use vectors if the constant is known to be zero and the
-    // function is not marked with the noimplicitfloat attribute.
-    if (NonZero || NoVectors)
+
+    // We only use vectors if the constant is known to be zero or the target
+    // allows it and the function is not marked with the noimplicitfloat
+    // attribute.
+    if (NoVectors) {
+      LastLegalVectorType = 0;
+    } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
+                                                            LastLegalVectorType,
+                                                            FirstStoreAS)) {
        LastLegalVectorType = 0;
+    }
  
      // Check if we found a legal integer type to store.
      if (LastLegalType == 0 && LastLegalVectorType == 0)
@@ -10983,8 +11099,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
      }
  
      // Find a legal type for the integer store.
-    unsigned StoreBW = (i+1) * ElementSizeBytes * 8;
-    StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
+    StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
      if (TLI.isTypeLegal(StoreTy) &&
          allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
          allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
@@ -11038,8 +11154,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
    if (UseVectorTy) {
      JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
    } else {
-    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
-    JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+    unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+    JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
    }
  
    SDLoc LoadDL(LoadNodes[0].MemNode);
@@ -11093,8 +11209,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
        ST->isUnindexed()) {
      unsigned OrigAlign = ST->getAlignment();
      EVT SVT = Value.getOperand(0).getValueType();
-    unsigned Align = TLI.getDataLayout()->
-      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+    unsigned Align = DAG.getDataLayout().getABITypeAlignment(
+        SVT.getTypeForEVT(*DAG.getContext()));
      if (Align <= OrigAlign &&
          ((!LegalOperations && !ST->isVolatile()) ||
           TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
@@ -11153,7 +11269,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
            uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
            SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
            SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
-          if (TLI.isBigEndian()) std::swap(Lo, Hi);
+          if (DAG.getDataLayout().isBigEndian())
+            std::swap(Lo, Hi);
  
            unsigned Alignment = ST->getAlignment();
            bool isVolatile = ST->isVolatile();
@@ -11402,7 +11519,7 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
    EVT ResultVT = EVE->getValueType(0);
    EVT VecEltVT = InVecVT.getVectorElementType();
    unsigned Align = OriginalLoad->getAlignment();
-  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
        VecEltVT.getTypeForEVT(*DAG.getContext()));
  
    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
@@ -11536,7 +11653,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      // scalar_to_vector here as well.
  
      if (!LegalOperations) {
-      EVT IndexTy = TLI.getVectorIdxTy();
+      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
        return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
                           DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
      }
@@ -11713,7 +11830,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
    if (!ValidTypes)
      return SDValue();
  
-  bool isLE = TLI.isLittleEndian();
+  bool isLE = DAG.getDataLayout().isLittleEndian();
    unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
    assert(ElemRatio > 1 && "Invalid element size ratio");
    SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
@@ -11862,9 +11979,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      if (Op.getOpcode() == ISD::UNDEF) continue;
  
      // See if we can combine this build_vector into a blend with a zero vector.
-    if (!VecIn2.getNode() && (isNullConstant(Op) ||
-        (Op.getOpcode() == ISD::ConstantFP &&
-        cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
+    if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
        UsesZeroVector = true;
        continue;
      }
@@ -11969,10 +12084,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
  
          // Try to replace VecIn1 with two extract_subvectors
          // No need to update the masks, they should still be correct.
-        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-          DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy()));
-        VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-          DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+        VecIn2 = DAG.getNode(
+            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+            DAG.getConstant(VT.getVectorNumElements(), dl,
+                            TLI.getVectorIdxTy(DAG.getDataLayout())));
+        VecIn1 = DAG.getNode(
+            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+            DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
        } else
          return SDValue();
      }
@@ -12039,7 +12157,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
    }
  
    // If any of the operands is a floating point scalar bitcast to a vector,
-  // use floating point types throughout, and bitcast everything.  
+  // use floating point types throughout, and bitcast everything.
    // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
    if (AnyFP) {
      SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
@@ -12870,7 +12988,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
    SDValue RHS = N->getOperand(1);
    SDLoc dl(N);
  
-  // Make sure we're not running after operation legalization where it 
+  // Make sure we're not running after operation legalization where it
    // may have custom lowered the vector shuffles.
    if (LegalOperations)
      return SDValue();
@@ -12942,7 +13060,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
        if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
            N->getOpcode() == ISD::FDIV) {
          if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
-             cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+             cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
            break;
        }
  
@@ -13206,7 +13324,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
    // Check to see if we can simplify the select into an fabs node
    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
      // Allow either -0.0 or 0.0
-    if (CFP->getValueAPF().isZero()) {
+    if (CFP->isZero()) {
        // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
        if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
            N0 == N2 && N3.getOpcode() == ISD::FNEG &&
@@ -13244,12 +13362,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
            const_cast<ConstantFP*>(TV->getConstantFPValue())
          };
          Type *FPTy = Elts[0]->getType();
-        const DataLayout &TD = *TLI.getDataLayout();
+        const DataLayout &TD = DAG.getDataLayout();
  
          // Create a ConstantArray of the two constants.
          Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
-        SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
-                                            TD.getPrefTypeAlignment(FPTy));
+        SDValue CPIdx =
+            DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+                                TD.getPrefTypeAlignment(FPTy));
          unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
  
          // Get the offsets to the 0 and 1 element of the array so that we can
@@ -13722,6 +13841,15 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
    // If they are both volatile then they cannot be reordered.
    if (Op0->isVolatile() && Op1->isVolatile()) return true;
  
+  // If one operation reads from invariant memory, and the other may store, they
+  // cannot alias. These should really be checking the equivalent of mayWrite,
+  // but it only matters for memory nodes other than load /store.
+  if (Op0->isInvariant() && Op1->writeMem())
+    return false;
+
+  if (Op1->isInvariant() && Op0->writeMem())
+    return false;
+
    // Gather base node and offset information.
    SDValue Base1, Base2;
    int64_t Offset1, Offset2;
@@ -13790,14 +13918,12 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
          Op0->getSrcValueOffset() - MinOffset;
      int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
          Op1->getSrcValueOffset() - MinOffset;
-    AliasAnalysis::AliasResult AAResult =
-        AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(),
-                                         Overlap1,
-                                         UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
-                 AliasAnalysis::Location(Op1->getMemOperand()->getValue(),
-                                         Overlap2,
-                                         UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
-    if (AAResult == AliasAnalysis::NoAlias)
+    AliasResult AAResult =
+        AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
+                                UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
+                 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
+                                UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
+    if (AAResult == NoAlias)
        return false;
    }
  
@@ -13823,8 +13949,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
    // aliases list.  If not, then continue up the chain looking for the next
    // candidate.
    while (!Chains.empty()) {
-    SDValue Chain = Chains.back();
-    Chains.pop_back();
+    SDValue Chain = Chains.pop_back_val();
  
      // For TokenFactor nodes, look at each operand and only continue up the
      // chain until we find two aliases.  If we've seen two aliases, assume we'll
@@ -13931,7 +14056,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
           UIE = M->use_end(); UI != UIE; ++UI)
        if (UI.getUse().getValueType() == MVT::Other &&
            Visited.insert(*UI).second) {
-        if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
+        if (isa<MemSDNode>(*UI)) {
            // We've not visited this use, and we care about it (it could have an
            // ordering dependency with the original node).
            Aliases.clear();