CodeGen: extend f16 conversions to permit types > float.

[oota-llvm.git] / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 2e2bfee2bc284a9766a0b42a9fe3e94ffdb529df..58b4e906bfab59345a5ec5c6eeca833a5772f8ff 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16,7 +16,6 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "dagcombine"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/Statistic.h"
@@ -40,6 +39,8 @@
  #include <algorithm>
  using namespace llvm;
  
+#define DEBUG_TYPE "dagcombine"
+
  STATISTIC(NodesCombined   , "Number of dag nodes combined");
  STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
@@ -168,6 +169,16 @@ namespace {
      bool CombineToPostIndexedLoadStore(SDNode *N);
      bool SliceUpLoad(SDNode *N);
  
+    /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+    ///   load.
+    ///
+    /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+    /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+    /// \param EltNo index of the vector element to load.
+    /// \param OriginalLoad load that EVE came from to be replaced.
+    /// \returns EVE on success SDValue() on failure.
+    SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+        SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
      void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
      SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
      SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -317,26 +328,7 @@ namespace {
  
      /// isAlias - Return true if there is any possibility that the two addresses
      /// overlap.
-    bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
-                 const Value *SrcValue1, int SrcValueOffset1,
-                 unsigned SrcValueAlign1,
-                 const MDNode *TBAAInfo1,
-                 SDValue Ptr2, int64_t Size2, bool IsVolatile2,
-                 const Value *SrcValue2, int SrcValueOffset2,
-                 unsigned SrcValueAlign2,
-                 const MDNode *TBAAInfo2) const;
-
-    /// isAlias - Return true if there is any possibility that the two addresses
-    /// overlap.
-    bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
-
-    /// FindAliasInfo - Extracts the relevant alias information from the memory
-    /// node.  Returns true if the operand was a load.
-    bool FindAliasInfo(SDNode *N,
-                       SDValue &Ptr, int64_t &Size, bool &IsVolatile,
-                       const Value *&SrcValue, int &SrcValueOffset,
-                       unsigned &SrcValueAlignment,
-                       const MDNode *&TBAAInfo) const;
+    bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
  
      /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
      /// looking for a better chain (aliasing node.)
@@ -662,8 +654,17 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) {
    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
      return CN;
  
-  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N))
-    return BV->getConstantSplatValue();
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    BitVector UndefElements;
+    ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
+
+    // BuildVectors can truncate their operands. Ignore that case here.
+    // FIXME: We blindly ignore splats which include undef which is overly
+    // pessimistic.
+    if (CN && UndefElements.none() &&
+        CN->getValueType(0) == N.getValueType().getScalarType())
+      return CN;
+  }
  
    return nullptr;
  }
@@ -1328,9 +1329,16 @@ SDValue DAGCombiner::combine(SDNode *N) {
  
      // Constant operands are canonicalized to RHS.
      if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
-      SDValue Ops[] = { N1, N0 };
-      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
-                                            Ops, 2);
+      SDValue Ops[] = {N1, N0};
+      SDNode *CSENode;
+      if (const BinaryWithFlagsSDNode *BinNode =
+              dyn_cast<BinaryWithFlagsSDNode>(N)) {
+        CSENode = DAG.getNodeIfExists(
+            N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
+            BinNode->hasNoSignedWrap(), BinNode->isExact());
+      } else {
+        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
+      }
        if (CSENode)
          return SDValue(CSENode, 0);
      }
@@ -1420,8 +1428,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
        Result = DAG.getEntryNode();
      } else {
        // New and improved token factor.
-      Result = DAG.getNode(ISD::TokenFactor, SDLoc(N),
-                           MVT::Other, &Ops[0], Ops.size());
+      Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
      }
  
      // Don't add users to work list.
@@ -1573,10 +1580,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    if (VT.isInteger() && !VT.isVector()) {
      APInt LHSZero, LHSOne;
      APInt RHSZero, RHSOne;
-    DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+    DAG.computeKnownBits(N0, LHSZero, LHSOne);
  
      if (LHSZero.getBoolValue()) {
-      DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+      DAG.computeKnownBits(N1, RHSZero, RHSOne);
  
        // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
        // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
@@ -1668,10 +1675,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
    // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
    APInt LHSZero, LHSOne;
    APInt RHSZero, RHSOne;
-  DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+  DAG.computeKnownBits(N0, LHSZero, LHSOne);
  
    if (LHSZero.getBoolValue()) {
-    DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+    DAG.computeKnownBits(N1, RHSZero, RHSOne);
  
      // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
      // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
@@ -1976,8 +1983,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
  SDValue DAGCombiner::visitSDIV(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
    EVT VT = N->getValueType(0);
  
    // fold vector ops
@@ -2003,10 +2010,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
        return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
                           N0, N1);
    }
+
    // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() &&
-      (N1C->getAPIntValue().isPowerOf2() ||
-       (-N1C->getAPIntValue()).isPowerOf2())) {
+  if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
+                                     (-N1C->getAPIntValue()).isPowerOf2())) {
      // If dividing by powers of two is cheap, then don't perform the following
      // fold.
      if (TLI.isPow2DivCheap())
@@ -2015,15 +2022,17 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
      unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
  
      // Splat the sign bit into the register
-    SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
-                              DAG.getConstant(VT.getSizeInBits()-1,
-                                       getShiftAmountTy(N0.getValueType())));
+    SDValue SGN =
+        DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
+                    DAG.getConstant(VT.getScalarSizeInBits() - 1,
+                                    getShiftAmountTy(N0.getValueType())));
      AddToWorkList(SGN.getNode());
  
      // Add (N0 < 0) ? abs2 - 1 : 0;
-    SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
-                              DAG.getConstant(VT.getSizeInBits() - lg2,
-                                       getShiftAmountTy(SGN.getValueType())));
+    SDValue SRL =
+        DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
+                    DAG.getConstant(VT.getScalarSizeInBits() - lg2,
+                                    getShiftAmountTy(SGN.getValueType())));
      SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
      AddToWorkList(SRL.getNode());
      AddToWorkList(ADD.getNode());    // Divide by pow2
@@ -2036,13 +2045,12 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
        return SRA;
  
      AddToWorkList(SRA.getNode());
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT), SRA);
+    return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
    }
  
    // if integer divide is expensive and we satisfy the requirements, emit an
    // alternate sequence.
-  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+  if (N1C && !TLI.isIntDivCheap()) {
      SDValue Op = BuildSDIV(N);
      if (Op.getNode()) return Op;
    }
@@ -2060,8 +2068,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
  SDValue DAGCombiner::visitUDIV(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
    EVT VT = N->getValueType(0);
  
    // fold vector ops
@@ -2094,7 +2102,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
      }
    }
    // fold (udiv x, c) -> alternate
-  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+  if (N1C && !TLI.isIntDivCheap()) {
      SDValue Op = BuildUDIV(N);
      if (Op.getNode()) return Op;
    }
@@ -2112,8 +2120,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
  SDValue DAGCombiner::visitSREM(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
    EVT VT = N->getValueType(0);
  
    // fold (srem c1, c2) -> c1%c2
@@ -2154,8 +2162,8 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
  SDValue DAGCombiner::visitUREM(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
    EVT VT = N->getValueType(0);
  
    // fold (urem c1, c2) -> c1%c2
@@ -2290,7 +2298,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
        (!LegalOperations ||
         TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
      SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
-                              N->op_begin(), N->getNumOperands());
+                              ArrayRef<SDUse>(N->op_begin(), N->op_end()));
      return CombineTo(N, Res, Res);
    }
  
@@ -2300,7 +2308,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
        (!LegalOperations ||
         TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
      SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
-                              N->op_begin(), N->getNumOperands());
+                              ArrayRef<SDUse>(N->op_begin(), N->op_end()));
      return CombineTo(N, Res, Res);
    }
  
@@ -2311,7 +2319,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    // If the two computed results can be simplified separately, separate them.
    if (LoExists) {
      SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
-                             N->op_begin(), N->getNumOperands());
+                             ArrayRef<SDUse>(N->op_begin(), N->op_end()));
      AddToWorkList(Lo.getNode());
      SDValue LoOpt = combine(Lo.getNode());
      if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
@@ -2322,7 +2330,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  
    if (HiExists) {
      SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
-                             N->op_begin(), N->getNumOperands());
+                             ArrayRef<SDUse>(N->op_begin(), N->op_end()));
      AddToWorkList(Hi.getNode());
      SDValue HiOpt = combine(Hi.getNode());
      if (HiOpt.getNode() && HiOpt != Hi &&
@@ -2524,7 +2532,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
  
      assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
             "Inputs to shuffles are not the same type");
- 
+
      // Check that both shuffles use the same mask. The masks are known to be of
      // the same length because the result vector type is the same.
      // Check also that shuffles have only one use to avoid introducing extra
@@ -3247,6 +3255,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
      // Do this only if the resulting shuffle is legal.
      if (isa<ShuffleVectorSDNode>(N0) &&
          isa<ShuffleVectorSDNode>(N1) &&
+        // Avoid folding a node with illegal type.
+        TLI.isTypeLegal(VT) &&
          N0->getOperand(1) == N1->getOperand(1) &&
          ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
        bool CanFold = true;
@@ -3262,11 +3272,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
        // two ways to fold this node into a shuffle.
        SmallVector<int,4> Mask1;
        SmallVector<int,4> Mask2;
-      
+
        for (unsigned i = 0; i != NumElts && CanFold; ++i) {
          int M0 = SV0->getMaskElt(i);
          int M1 = SV1->getMaskElt(i);
-   
+
          // Both shuffle indexes are undef. Propagate Undef.
          if (M0 < 0 && M1 < 0) {
            Mask1.push_back(M0);
@@ -3280,7 +3290,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
            CanFold = false;
            break;
          }
-        
+
          Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
          Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
        }
@@ -3574,27 +3584,6 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
                         HasPos ? Pos : Neg).getNode();
    }
  
-  // fold (or (shl (*ext x), (*ext y)),
-  //          (srl (*ext x), (*ext (sub 32, y)))) ->
-  //   (*ext (rotl x, y)) or (*ext (rotr x, (sub 32, y)))
-  //
-  // fold (or (shl (*ext x), (*ext (sub 32, y))),
-  //          (srl (*ext x), (*ext y))) ->
-  //   (*ext (rotr x, y)) or (*ext (rotl x, (sub 32, y)))
-  if (Shifted.getOpcode() == ISD::ZERO_EXTEND ||
-      Shifted.getOpcode() == ISD::ANY_EXTEND) {
-    SDValue InnerShifted = Shifted.getOperand(0);
-    EVT InnerVT = InnerShifted.getValueType();
-    bool HasPosInner = TLI.isOperationLegalOrCustom(PosOpcode, InnerVT);
-    if (HasPosInner || TLI.isOperationLegalOrCustom(NegOpcode, InnerVT)) {
-      if (matchRotateSub(InnerPos, InnerNeg, InnerVT.getSizeInBits())) {
-        SDValue V = DAG.getNode(HasPosInner ? PosOpcode : NegOpcode, DL,
-                                InnerVT, InnerShifted, HasPosInner ? Pos : Neg);
-        return DAG.getNode(Shifted.getOpcode(), DL, VT, V).getNode();
-      }
-    }
-  }
-
    return nullptr;
  }
  
@@ -3901,6 +3890,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
        return SDValue();
    }
  
+  if (!TLI.isDesirableToCommuteWithShift(LHS))
+    return SDValue();
+
    // Fold the constants, shifting the binop RHS by the shift amount.
    SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
                                 N->getValueType(0),
@@ -3968,14 +3960,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
      // If setcc produces all-one true value then:
      // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
      if (N1CV && N1CV->isConstant()) {
-      if (N0.getOpcode() == ISD::AND &&
-          TLI.getBooleanContents(true) ==
-          TargetLowering::ZeroOrNegativeOneBooleanContent) {
+      if (N0.getOpcode() == ISD::AND) {
          SDValue N00 = N0->getOperand(0);
          SDValue N01 = N0->getOperand(1);
          BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
  
-        if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+        if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+            TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+                TargetLowering::ZeroOrNegativeOneBooleanContent) {
            SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
            if (C.getNode())
              return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
@@ -4374,7 +4366,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    if (N1C && N0.getOpcode() == ISD::CTLZ &&
        N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
      APInt KnownZero, KnownOne;
-    DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
+    DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
  
      // If any of the input bits are KnownOne, then the input couldn't be all
      // zeros, thus the result of the srl will always be zero.
@@ -4534,11 +4526,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
    if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
      return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
    // fold (select C, 0, 1) -> (xor C, 1)
+  // We can't do this reliably if integer based booleans have different contents
+  // to floating point based booleans. This is because we can't tell whether we
+  // have an integer-based boolean or a floating-point-based boolean unless we
+  // can find the SETCC that produced it and inspect its operands. This is
+  // fairly easy if C is the SETCC node, but it can potentially be
+  // undiscoverable (or not reasonably discoverable). For example, it could be
+  // in another basic block or it could require searching a complicated
+  // expression.
    if (VT.isInteger() &&
-      (VT0 == MVT::i1 ||
-       (VT0.isInteger() &&
-        TLI.getBooleanContents(false) ==
-        TargetLowering::ZeroOrOneBooleanContent)) &&
+      (VT0 == MVT::i1 || (VT0.isInteger() &&
+                          TLI.getBooleanContents(false, false) ==
+                              TLI.getBooleanContents(false, true) &&
+                          TLI.getBooleanContents(false, false) ==
+                              TargetLowering::ZeroOrOneBooleanContent)) &&
        N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
      SDValue XORNode;
      if (VT == VT0)
@@ -4581,12 +4582,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
  
    // fold selects based on a setcc into other things, such as min/max/abs
    if (N0.getOpcode() == ISD::SETCC) {
-    // FIXME:
-    // Check against MVT::Other for SELECT_CC, which is a workaround for targets
-    // having to say they don't support SELECT_CC on every type the DAG knows
-    // about, since there is no way to mark an opcode illegal at all value types
-    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
-        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+    if ((!LegalOperations &&
+         TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+       TLI.isOperationLegal(ISD::SELECT_CC, VT))
        return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
                           N0.getOperand(0), N0.getOperand(1),
                           N1, N2, N0.getOperand(2));
@@ -4613,6 +4611,56 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
    return std::make_pair(Lo, Hi);
  }
  
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+  SDLoc dl(N);
+  SDValue Cond = N->getOperand(0);
+  SDValue LHS = N->getOperand(1);
+  SDValue RHS = N->getOperand(2);
+  MVT VT = N->getSimpleValueType(0);
+  int NumElems = VT.getVectorNumElements();
+  assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+         RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+         Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+  // We're sure we have an even number of elements due to the
+  // concat_vectors we have as arguments to vselect.
+  // Skip BV elements until we find one that's not an UNDEF
+  // After we find an UNDEF element, keep looping until we get to half the
+  // length of the BV and see if all the non-undef nodes are the same.
+  ConstantSDNode *BottomHalf = nullptr;
+  for (int i = 0; i < NumElems / 2; ++i) {
+    if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+      continue;
+
+    if (BottomHalf == nullptr)
+      BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+    else if (Cond->getOperand(i).getNode() != BottomHalf)
+      return SDValue();
+  }
+
+  // Do the same for the second half of the BuildVector
+  ConstantSDNode *TopHalf = nullptr;
+  for (int i = NumElems / 2; i < NumElems; ++i) {
+    if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+      continue;
+
+    if (TopHalf == nullptr)
+      TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+    else if (Cond->getOperand(i).getNode() != TopHalf)
+      return SDValue();
+  }
+
+  assert(TopHalf && BottomHalf &&
+         "One half of the selector was all UNDEFs and the other was all the "
+         "same value. This should have been addressed before this function.");
+  return DAG.getNode(
+      ISD::CONCAT_VECTORS, dl, VT,
+      BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
+      TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
  SDValue DAGCombiner::visitVSELECT(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -4685,6 +4733,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
      return N2;
  
+  // The ConvertSelectToConcatVector function is assuming both the above
+  // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+  // and addressed.
+  if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+      N2.getOpcode() == ISD::CONCAT_VECTORS &&
+      ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+    SDValue CV = ConvertSelectToConcatVector(N, DAG);
+    if (CV.getNode())
+      return CV;
+  }
+
    return SDValue();
  }
  
@@ -4737,7 +4796,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
  // tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
  // dag node into a ConstantSDNode or a build_vector of constants.
  // This function is called by the DAGCombiner when visiting sext/zext/aext
-// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 
+// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
  // Vector extends are not folded if operations are legal; this is to
  // avoid introducing illegal build_vector dag nodes.
  static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
@@ -4764,7 +4823,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
        (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
        ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
      return nullptr;
-  
+
    // We can fold this node into a build_vector.
    unsigned VTBits = SVT.getSizeInBits();
    unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
@@ -4790,7 +4849,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
                                       SVT));
    }
  
-  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], NumElts).getNode();
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
  }
  
  // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
@@ -4874,8 +4933,7 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
      }
  
      Ops.push_back(SetCC->getOperand(2));
-    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
-                                 &Ops[0], Ops.size()));
+    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
    }
  }
  
@@ -5030,12 +5088,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    }
  
    if (N0.getOpcode() == ISD::SETCC) {
+    EVT N0VT = N0.getOperand(0).getValueType();
      // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
      // Only do this before legalize for now.
      if (VT.isVector() && !LegalOperations &&
-        TLI.getBooleanContents(true) ==
-          TargetLowering::ZeroOrNegativeOneBooleanContent) {
-      EVT N0VT = N0.getOperand(0).getValueType();
+        TLI.getBooleanContents(N0VT) ==
+            TargetLowering::ZeroOrNegativeOneBooleanContent) {
        // On some architectures (such as SSE/NEON/etc) the SETCC result type is
        // of the same size as the compared operands. Only optimize sext(setcc())
        // if this is the case.
@@ -5101,13 +5159,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
  // isTruncateOf - If N is a truncate of some other value, return true, record
  // the value being truncated in Op and which of Op's bits are zero in KnownZero.
  // This function computes KnownZero to avoid a duplicated call to
-// ComputeMaskedBits in the caller.
+// computeKnownBits in the caller.
  static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
                           APInt &KnownZero) {
    APInt KnownOne;
    if (N->getOpcode() == ISD::TRUNCATE) {
      Op = N->getOperand(0);
-    DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+    DAG.computeKnownBits(Op, KnownZero, KnownOne);
      return true;
    }
  
@@ -5128,7 +5186,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
    else
      return false;
  
-  DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+  DAG.computeKnownBits(Op, KnownZero, KnownOne);
  
    if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
      return false;
@@ -5347,7 +5405,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                           N0.getOperand(1),
                                   cast<CondCodeSDNode>(N0.getOperand(2))->get()),
                             DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
-                                       &OneOps[0], OneOps.size()));
+                                       OneOps));
  
        // If the desired elements are smaller or larger than the source
        // elements we can use a matching integer vector type and then
@@ -5364,8 +5422,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
        return DAG.getNode(ISD::AND, SDLoc(N), VT,
                           DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
-                         DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
-                                     &OneOps[0], OneOps.size()));
+                         DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps));
      }
  
      // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -5473,8 +5530,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
    // scalars.
    if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
        ISD::isUNINDEXEDLoad(N0.getNode()) &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
-       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+      TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
      bool DoXform = true;
      SmallVector<SDNode*, 4> SetCCs;
      if (!N0.hasOneUse())
@@ -5518,7 +5574,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
    }
  
    if (N0.getOpcode() == ISD::SETCC) {
-    // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    // For vectors:
+    // aext(setcc) -> vsetcc
+    // aext(setcc) -> truncate(vsetcc)
+    // aext(setcc) -> aext(vsetcc)
      // Only do this before legalize for now.
      if (VT.isVector() && !LegalOperations) {
        EVT N0VT = N0.getOperand(0).getValueType();
@@ -5533,19 +5592,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
                               cast<CondCodeSDNode>(N0.getOperand(2))->get());
        // If the desired elements are smaller or larger than the source
        // elements we can use a matching integer vector type and then
-      // truncate/sign extend
+      // truncate/any extend
        else {
-        EVT MatchingElementType =
-          EVT::getIntegerVT(*DAG.getContext(),
-                            N0VT.getScalarType().getSizeInBits());
-        EVT MatchingVectorType =
-          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
-                           N0VT.getVectorNumElements());
+        EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
          SDValue VsetCC =
            DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
                          N0.getOperand(1),
                          cast<CondCodeSDNode>(N0.getOperand(2))->get());
-        return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
+        return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
        }
      }
  
@@ -5895,7 +5949,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                       Op.getValueType()));
      }
  
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts);
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
    }
  
    return SDValue();
@@ -5969,6 +6023,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
      }
    }
  
+  // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
+  if (N0.getOpcode() == ISD::SELECT) {
+    EVT SrcVT = N0.getValueType();
+    if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
+        TLI.isTruncateFree(SrcVT, VT)) {
+      SDLoc SL(N0);
+      SDValue Cond = N0.getOperand(0);
+      SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+      SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
+      return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+    }
+  }
+
    // Fold a series of buildvector, bitcast, and truncate if possible.
    // For example fold
    //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -5996,8 +6063,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
        for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
          Opnds.push_back(BuildVect.getOperand(i));
  
-      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
-                         Opnds.size());
+      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
      }
    }
  
@@ -6072,8 +6138,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
          AddToWorkList(NV.getNode());
          Opnds.push_back(NV);
        }
-      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
-                         &Opnds[0], Opnds.size());
+      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
      }
    }
  
@@ -6172,6 +6237,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
    if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
        // Do not change the width of a volatile load.
        !cast<LoadSDNode>(N0)->isVolatile() &&
+      // Do not remove the cast if the types differ in endian layout.
+      TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
+      TLI.hasBigEndianPartOrdering(VT) &&
        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
        TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -6311,8 +6379,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
                                  DstEltVT, Op));
        AddToWorkList(Ops.back().getNode());
      }
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
-                       &Ops[0], Ops.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
    }
  
    // Otherwise, we're growing or shrinking the elements.  To avoid having to
@@ -6368,8 +6435,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
      }
  
      EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
-                       &Ops[0], Ops.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
    }
  
    // Finally, this must be the case where we are shrinking elements: each input
@@ -6405,8 +6471,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
        std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
    }
  
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
-                     &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
  }
  
  SDValue DAGCombiner::visitFADD(SDNode *N) {
@@ -6990,11 +7055,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
    }
  
    // The next optimizations are desirable only if SELECT_CC can be lowered.
-  // Check against MVT::Other for SELECT_CC, which is a workaround for targets
-  // having to say they don't support SELECT_CC on every type the DAG knows
-  // about, since there is no way to mark an opcode illegal at all value types
-  // (See also visitSELECT)
-  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
      // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
      if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
          !VT.isVector() &&
@@ -7004,7 +7065,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
          { N0.getOperand(0), N0.getOperand(1),
            DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
            N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
+      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
      }
  
      // fold (sint_to_fp (zext (setcc x, y, cc))) ->
@@ -7017,7 +7078,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
          { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
            DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
            N0.getOperand(0).getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
+      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
      }
    }
  
@@ -7047,11 +7108,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
    }
  
    // The next optimizations are desirable only if SELECT_CC can be lowered.
-  // Check against MVT::Other for SELECT_CC, which is a workaround for targets
-  // having to say they don't support SELECT_CC on every type the DAG knows
-  // about, since there is no way to mark an opcode illegal at all value types
-  // (See also visitSELECT)
-  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
      // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
  
      if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
@@ -7061,7 +7118,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
          { N0.getOperand(0), N0.getOperand(1),
            DAG.getConstantFP(1.0, VT),  DAG.getConstantFP(0.0, VT),
            N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
+      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
      }
    }
  
@@ -7170,8 +7227,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
  
    // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
    if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
-       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
                                       LN0->getChain(),
@@ -7221,11 +7277,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
    // (fneg (fmul c, x)) -> (fmul -c, x)
    if (N0.getOpcode() == ISD::FMUL) {
      ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
-    if (CFP1)
-      return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                         N0.getOperand(0),
-                         DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                     N0.getOperand(1)));
+    if (CFP1) {
+      APFloat CVal = CFP1->getValueAPF();
+      CVal.changeSign();
+      if (Level >= AfterLegalizeDAG &&
+          (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
+           TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
+        return DAG.getNode(
+            ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+            DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+    }
    }
  
    return SDValue();
@@ -8553,7 +8614,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
    }
  
    SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
-                              &ArgChains[0], ArgChains.size());
+                              ArgChains);
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
    return true;
  }
@@ -9678,6 +9739,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
      return SDValue();
    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  
+  // Canonicalize insert_vector_elt dag nodes.
+  // Example:
+  // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+  // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+  //
+  // Do this only if the child insert_vector node has one use; also
+  // do this only if indices are both constants and Idx1 < Idx0.
+  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+      && isa<ConstantSDNode>(InVec.getOperand(2))) {
+    unsigned OtherElt =
+      cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+    if (Elt < OtherElt) {
+      // Swap nodes.
+      SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+                                  InVec.getOperand(0), InVal, EltNo);
+      AddToWorkList(NewOp.getNode());
+      return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+                         VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+    }
+  }
+
    // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
    // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
    // vector elements.
@@ -9707,8 +9789,87 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
    }
  
    // Return the new vector
-  return DAG.getNode(ISD::BUILD_VECTOR, dl,
-                     VT, &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
+SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+    SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+  EVT ResultVT = EVE->getValueType(0);
+  EVT VecEltVT = InVecVT.getVectorElementType();
+  unsigned Align = OriginalLoad->getAlignment();
+  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+      VecEltVT.getTypeForEVT(*DAG.getContext()));
+
+  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+    return SDValue();
+
+  Align = NewAlign;
+
+  SDValue NewPtr = OriginalLoad->getBasePtr();
+  SDValue Offset;
+  EVT PtrType = NewPtr.getValueType();
+  MachinePointerInfo MPI;
+  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+    int Elt = ConstEltNo->getZExtValue();
+    unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+    if (TLI.isBigEndian())
+      PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
+    Offset = DAG.getConstant(PtrOff, PtrType);
+    MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+  } else {
+    Offset = DAG.getNode(
+        ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
+        DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
+    if (TLI.isBigEndian())
+      Offset = DAG.getNode(
+          ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
+          DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+    MPI = OriginalLoad->getPointerInfo();
+  }
+  NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+
+  // The replacement we need to do here is a little tricky: we need to
+  // replace an extractelement of a load with a load.
+  // Use ReplaceAllUsesOfValuesWith to do the replacement.
+  // Note that this replacement assumes that the extractvalue is the only
+  // use of the load; that's okay because we don't want to perform this
+  // transformation in other cases anyway.
+  SDValue Load;
+  SDValue Chain;
+  if (ResultVT.bitsGT(VecEltVT)) {
+    // If the result type of vextract is wider than the load, then issue an
+    // extending load instead.
+    ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
+                                   ? ISD::ZEXTLOAD
+                                   : ISD::EXTLOAD;
+    Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
+                          NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
+                          OriginalLoad->isNonTemporal(), Align,
+                          OriginalLoad->getTBAAInfo());
+    Chain = Load.getValue(1);
+  } else {
+    Load = DAG.getLoad(
+        VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
+        OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+        OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
+    Chain = Load.getValue(1);
+    if (ResultVT.bitsLT(VecEltVT))
+      Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+    else
+      Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
+  }
+  WorkListRemover DeadNodes(*this);
+  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
+  SDValue To[] = { Load, Chain };
+  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+  // Since we're explicitly calling ReplaceAllUses, add the new node to the
+  // worklist explicitly as well.
+  AddToWorkList(Load.getNode());
+  AddUsersToWorkList(Load.getNode()); // Add users too
+  // Make sure to revisit this node to clean it up; it will usually be dead.
+  AddToWorkList(EVE);
+  ++OpsNarrowed;
+  return SDValue(EVE, 0);
  }
  
  SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
@@ -9779,6 +9940,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      }
    }
  
+  bool BCNumEltsChanged = false;
+  EVT ExtVT = VT.getVectorElementType();
+  EVT LVT = ExtVT;
+
+  // If the result of load has to be truncated, then it's not necessarily
+  // profitable.
+  if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+    return SDValue();
+
+  if (InVec.getOpcode() == ISD::BITCAST) {
+    // Don't duplicate a load with other uses.
+    if (!InVec.hasOneUse())
+      return SDValue();
+
+    EVT BCVT = InVec.getOperand(0).getValueType();
+    if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+      return SDValue();
+    if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+      BCNumEltsChanged = true;
+    InVec = InVec.getOperand(0);
+    ExtVT = BCVT.getVectorElementType();
+  }
+
+  // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
+  if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
+      ISD::isNormalLoad(InVec.getNode())) {
+    SDValue Index = N->getOperand(1);
+    if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
+      return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
+                                                           OrigLoad);
+  }
+
    // Perform only after legalization to ensure build_vector / vector_shuffle
    // optimizations have already been done.
    if (!LegalOperations) return SDValue();
@@ -9789,30 +9982,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  
    if (ConstEltNo) {
      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-    bool NewLoad = false;
-    bool BCNumEltsChanged = false;
-    EVT ExtVT = VT.getVectorElementType();
-    EVT LVT = ExtVT;
-
-    // If the result of load has to be truncated, then it's not necessarily
-    // profitable.
-    if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
-      return SDValue();
-
-    if (InVec.getOpcode() == ISD::BITCAST) {
-      // Don't duplicate a load with other uses.
-      if (!InVec.hasOneUse())
-        return SDValue();
-
-      EVT BCVT = InVec.getOperand(0).getValueType();
-      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
-        return SDValue();
-      if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
-        BCNumEltsChanged = true;
-      InVec = InVec.getOperand(0);
-      ExtVT = BCVT.getVectorElementType();
-      NewLoad = true;
-    }
  
      LoadSDNode *LN0 = nullptr;
      const ShuffleVectorSDNode *SVN = nullptr;
@@ -9855,6 +10024,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
        if (ISD::isNormalLoad(InVec.getNode())) {
          LN0 = cast<LoadSDNode>(InVec);
          Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+        EltNo = DAG.getConstant(Elt, EltNo.getValueType());
        }
      }
  
@@ -9867,72 +10037,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      if (Elt == -1)
        return DAG.getUNDEF(LVT);
  
-    unsigned Align = LN0->getAlignment();
-    if (NewLoad) {
-      // Check the resultant load doesn't need a higher alignment than the
-      // original load.
-      unsigned NewAlign =
-        TLI.getDataLayout()
-            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
-
-      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
-        return SDValue();
-
-      Align = NewAlign;
-    }
-
-    SDValue NewPtr = LN0->getBasePtr();
-    unsigned PtrOff = 0;
-
-    if (Elt) {
-      PtrOff = LVT.getSizeInBits() * Elt / 8;
-      EVT PtrType = NewPtr.getValueType();
-      if (TLI.isBigEndian())
-        PtrOff = VT.getSizeInBits() / 8 - PtrOff;
-      NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
-                           DAG.getConstant(PtrOff, PtrType));
-    }
-
-    // The replacement we need to do here is a little tricky: we need to
-    // replace an extractelement of a load with a load.
-    // Use ReplaceAllUsesOfValuesWith to do the replacement.
-    // Note that this replacement assumes that the extractvalue is the only
-    // use of the load; that's okay because we don't want to perform this
-    // transformation in other cases anyway.
-    SDValue Load;
-    SDValue Chain;
-    if (NVT.bitsGT(LVT)) {
-      // If the result type of vextract is wider than the load, then issue an
-      // extending load instead.
-      ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
-        ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-      Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
-                            NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
-                            LVT, LN0->isVolatile(), LN0->isNonTemporal(),
-                            Align, LN0->getTBAAInfo());
-      Chain = Load.getValue(1);
-    } else {
-      Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
-                         LN0->getPointerInfo().getWithOffset(PtrOff),
-                         LN0->isVolatile(), LN0->isNonTemporal(),
-                         LN0->isInvariant(), Align, LN0->getTBAAInfo());
-      Chain = Load.getValue(1);
-      if (NVT.bitsLT(LVT))
-        Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
-      else
-        Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
-    }
-    WorkListRemover DeadNodes(*this);
-    SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
-    SDValue To[] = { Load, Chain };
-    DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
-    // Since we're explcitly calling ReplaceAllUses, add the new node to the
-    // worklist explicitly as well.
-    AddToWorkList(Load.getNode());
-    AddUsersToWorkList(Load.getNode()); // Add users too
-    // Make sure to revisit this node to clean it up; it will usually be dead.
-    AddToWorkList(N);
-    return SDValue(N, 0);
+    return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
    }
  
    return SDValue();
@@ -10040,7 +10145,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
    if (!isTypeLegal(VecVT)) return SDValue();
  
    // Make the new BUILD_VECTOR.
-  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
  
    // The new BUILD_VECTOR node has the potential to be further optimized.
    AddToWorkList(BV.getNode());
@@ -10108,8 +10213,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
      else
        Opnds.push_back(In.getOperand(0));
    }
-  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
-                           &Opnds[0], Opnds.size());
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
    AddToWorkList(BV.getNode());
  
    return DAG.getNode(Opcode, dl, VT, BV);
@@ -10170,7 +10274,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      }
    }
  
-    // If everything is good, we can make a shuffle operation.
+  // If everything is good, we can make a shuffle operation.
    if (VecIn1.getNode()) {
      SmallVector<int, 8> Mask;
      for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -10294,13 +10398,26 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
      SmallVector<SDValue, 8> Opnds;
      unsigned BuildVecNumElts =  N0.getNumOperands();
  
-    for (unsigned i = 0; i != BuildVecNumElts; ++i)
-      Opnds.push_back(N0.getOperand(i));
-    for (unsigned i = 0; i != BuildVecNumElts; ++i)
-      Opnds.push_back(N1.getOperand(i));
+    EVT SclTy0 = N0.getOperand(0)->getValueType(0);
+    EVT SclTy1 = N1.getOperand(0)->getValueType(0);
+    if (SclTy0.isFloatingPoint()) {
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(N0.getOperand(i));
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(N1.getOperand(i));
+    } else {
+      // If BUILD_VECTOR are from built from integer, they may have different
+      // operand types. Get the smaller type and truncate all operands to it.
+      EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+                        N0.getOperand(i)));
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+                        N1.getOperand(i)));
+    }
  
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
-                       Opnds.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
    }
  
    // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
@@ -10457,8 +10574,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
      }
    }
  
-  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(),
-                     Ops.size());
+  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
  }
  
  SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
@@ -10574,22 +10690,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
    }
  
    // If this shuffle node is simply a swizzle of another shuffle node,
-  // and it reverses the swizzle of the previous shuffle then we can
-  // optimize shuffle(shuffle(x, undef), undef) -> x.
+  // then try to simplify it.
    if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
        N1.getOpcode() == ISD::UNDEF) {
  
      ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
  
-    // Shuffle nodes can only reverse shuffles with a single non-undef value.
-    if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
-      return SDValue();
-
      // The incoming shuffle must be of the same type as the result of the
      // current shuffle.
      assert(OtherSV->getOperand(0).getValueType() == VT &&
             "Shuffle types don't match");
  
+    SmallVector<int, 4> Mask;
+    // Compute the combined shuffle mask.
      for (unsigned i = 0; i != NumElts; ++i) {
        int Idx = SVN->getMaskElt(i);
        assert(Idx < (int)NumElts && "Index references undef operand");
@@ -10597,13 +10710,125 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
        // shuffle. Adopt the incoming index.
        if (Idx >= 0)
          Idx = OtherSV->getMaskElt(Idx);
+      Mask.push_back(Idx);
+    }
+    
+    bool CommuteOperands = false;
+    if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
+      // To be valid, the combine shuffle mask should only reference elements
+      // from one of the two vectors in input to the inner shufflevector.
+      bool IsValidMask = true;
+      for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+        // See if the combined mask only reference undefs or elements coming
+        // from the first shufflevector operand.
+        IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
+
+      if (!IsValidMask) {
+        IsValidMask = true;
+        for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+          // Check that all the elements come from the second shuffle operand.
+          IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
+        CommuteOperands = IsValidMask;
+      }
  
-      // The combined shuffle must map each index to itself.
-      if (Idx >= 0 && (unsigned)Idx != i)
+      // Early exit if the combined shuffle mask is not valid.
+      if (!IsValidMask)
          return SDValue();
      }
  
-    return OtherSV->getOperand(0);
+    // See if this pair of shuffles can be safely folded according to either
+    // of the following rules:
+    //   shuffle(shuffle(x, y), undef) -> x
+    //   shuffle(shuffle(x, undef), undef) -> x
+    //   shuffle(shuffle(x, y), undef) -> y
+    bool IsIdentityMask = true;
+    unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
+    for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
+      // Skip Undefs.
+      if (Mask[i] < 0)
+        continue;
+
+      // The combined shuffle must map each index to itself.
+      IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
+    }
+    
+    if (IsIdentityMask) {
+      if (CommuteOperands)
+        // optimize shuffle(shuffle(x, y), undef) -> y.
+        return OtherSV->getOperand(1);
+      
+      // optimize shuffle(shuffle(x, undef), undef) -> x
+      // optimize shuffle(shuffle(x, y), undef) -> x
+      return OtherSV->getOperand(0);
+    }
+
+    // It may still be beneficial to combine the two shuffles if the
+    // resulting shuffle is legal.
+    if (TLI.isTypeLegal(VT) && TLI.isShuffleMaskLegal(Mask, VT)) {
+      if (!CommuteOperands)
+        // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+        // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+        return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+                                    &Mask[0]);
+      
+      //   shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
+      return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
+                                  &Mask[0]);
+    }
+  }
+
+  // Try to fold according to rules:
+  //   shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2)
+  //   shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2)
+  //   shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
+  //   shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+  // Don't try to fold shuffles with illegal type.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+      N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) {
+    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+    // The incoming shuffle must be of the same type as the result of the
+    // current shuffle.
+    assert(OtherSV->getOperand(0).getValueType() == VT &&
+           "Shuffle types don't match");
+
+    SDValue SV0 = OtherSV->getOperand(0);
+    SDValue SV1 = OtherSV->getOperand(1);
+    bool HasSameOp0 = N1 == SV0;
+    bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
+    if (!HasSameOp0 && !IsSV1Undef && N1 != SV1)
+      // Early exit.
+      return SDValue();
+
+    SmallVector<int, 4> Mask;
+    // Compute the combined shuffle mask for a shuffle with SV0 as the first
+    // operand, and SV1 as the second operand.
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx < 0) {
+        // Propagate Undef.
+        Mask.push_back(Idx);
+        continue;
+      }
+
+      if (Idx < (int)NumElts) {
+        Idx = OtherSV->getMaskElt(Idx);
+        if (IsSV1Undef && Idx >= (int) NumElts)
+          Idx = -1;  // Propagate Undef.
+      } else
+        Idx = HasSameOp0 ? Idx - NumElts : Idx;
+
+      Mask.push_back(Idx);
+    }
+
+    // Avoid introducing shuffles with illegal mask.
+    if (TLI.isShuffleMaskLegal(Mask, VT)) {
+      if (IsSV1Undef)
+        //   shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
+        //   shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+        return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]);
+      return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
+    }
    }
  
    return SDValue();
@@ -10673,8 +10898,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
        EVT EltVT = RVT.getVectorElementType();
        SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
                                       DAG.getConstant(0, EltVT));
-      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
-                                 RVT, &ZeroOps[0], ZeroOps.size());
+      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
        LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
        SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
        return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
@@ -10743,8 +10967,28 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
      }
  
      if (Ops.size() == LHS.getNumOperands())
-      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
-                         LHS.getValueType(), &Ops[0], Ops.size());
+      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
+  }
+
+  // Type legalization might introduce new shuffles in the DAG.
+  // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
+  //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
+  if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
+      isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
+      LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
+      RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
+    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
+    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
+
+    if (SVN0->getMask().equals(SVN1->getMask())) {
+      EVT VT = N->getValueType(0);
+      SDValue UndefVector = LHS.getOperand(1);
+      SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+                                     LHS.getOperand(0), RHS.getOperand(0));
+      AddUsersToWorkList(N);
+      return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
+                                  &SVN0->getMask()[0]);
+    }
    }
  
    return SDValue();
@@ -10779,8 +11023,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
    if (Ops.size() != N0.getNumOperands())
      return SDValue();
  
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
-                     N0.getValueType(), &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
  }
  
  SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
@@ -10982,7 +11225,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
      if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
        if (TLI.isTypeLegal(N2.getValueType()) &&
            (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
-           TargetLowering::Legal) &&
+               TargetLowering::Legal &&
+           !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
+           !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
            // If both constants have multiple uses, then we won't need to do an
            // extra load, they are likely around in registers for other users.
            (TV->hasOneUse() || FV->hasOneUse())) {
@@ -11097,8 +11342,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
  
    // fold select C, 16, 0 -> shl C, 4
    if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
-    TLI.getBooleanContents(N0.getValueType().isVector()) ==
-      TargetLowering::ZeroOrOneBooleanContent) {
+      TLI.getBooleanContents(N0.getValueType()) ==
+          TargetLowering::ZeroOrOneBooleanContent) {
  
      // If the caller doesn't want us to simplify this into a zext of a compare,
      // don't do it.
@@ -11230,26 +11475,42 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
  /// multiplying by a magic number.  See:
  /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
  SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+  if (!C)
+    return SDValue();
+
+  // Avoid division by zero.
+  if (!C->getAPIntValue())
+    return SDValue();
+
    std::vector<SDNode*> Built;
-  SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
+  SDValue S =
+      TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
  
-  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
-       ii != ee; ++ii)
-    AddToWorkList(*ii);
+  for (SDNode *N : Built)
+    AddToWorkList(N);
    return S;
  }
  
-/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant,
  /// return a DAG expression to select that will generate the same value by
  /// multiplying by a magic number.  See:
  /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
  SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+  if (!C)
+    return SDValue();
+
+  // Avoid division by zero.
+  if (!C->getAPIntValue())
+    return SDValue();
+
    std::vector<SDNode*> Built;
-  SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
+  SDValue S =
+      TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
  
-  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
-       ii != ee; ++ii)
-    AddToWorkList(*ii);
+  for (SDNode *N : Built)
+    AddToWorkList(N);
    return S;
  }
  
@@ -11293,31 +11554,27 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
  
  /// isAlias - Return true if there is any possibility that the two addresses
  /// overlap.
-bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
-                          const Value *SrcValue1, int SrcValueOffset1,
-                          unsigned SrcValueAlign1,
-                          const MDNode *TBAAInfo1,
-                          SDValue Ptr2, int64_t Size2, bool IsVolatile2,
-                          const Value *SrcValue2, int SrcValueOffset2,
-                          unsigned SrcValueAlign2,
-                          const MDNode *TBAAInfo2) const {
+bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
    // If they are the same then they must be aliases.
-  if (Ptr1 == Ptr2) return true;
+  if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
  
    // If they are both volatile then they cannot be reordered.
-  if (IsVolatile1 && IsVolatile2) return true;
+  if (Op0->isVolatile() && Op1->isVolatile()) return true;
  
    // Gather base node and offset information.
    SDValue Base1, Base2;
    int64_t Offset1, Offset2;
    const GlobalValue *GV1, *GV2;
    const void *CV1, *CV2;
-  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
-  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+  bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
+                                      Base1, Offset1, GV1, CV1);
+  bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
+                                      Base2, Offset2, GV2, CV2);
  
    // If they have a same base address then check to see if they overlap.
    if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
-    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+    return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
+             (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
  
    // It is possible for different frame indices to alias each other, mostly
    // when tail call optimization reuses return address slots for arguments.
@@ -11327,7 +11584,8 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
      MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
      Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
      Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
-    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+    return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
+             (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
    }
  
    // Otherwise, if we know what the bases are, and they aren't identical, then
@@ -11339,15 +11597,18 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
    // compared to the size and offset of the access, we may be able to prove they
    // do not alias.  This check is conservative for now to catch cases created by
    // splitting vector types.
-  if ((SrcValueAlign1 == SrcValueAlign2) &&
-      (SrcValueOffset1 != SrcValueOffset2) &&
-      (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
-    int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
-    int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+  if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
+      (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
+      (Op0->getMemoryVT().getSizeInBits() >> 3 ==
+       Op1->getMemoryVT().getSizeInBits() >> 3) &&
+      (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) {
+    int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
+    int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
  
      // There is no overlap between these relatively aligned accesses of similar
      // size, return no alias.
-    if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+    if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
+        (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
        return false;
    }
  
@@ -11358,16 +11619,22 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
        CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
      UseAA = false;
  #endif
-  if (UseAA && SrcValue1 && SrcValue2) {
+  if (UseAA &&
+      Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
      // Use alias analysis information.
-    int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
-    int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
-    int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+    int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
+                                 Op1->getSrcValueOffset());
+    int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
+        Op0->getSrcValueOffset() - MinOffset;
+    int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
+        Op1->getSrcValueOffset() - MinOffset;
      AliasAnalysis::AliasResult AAResult =
-      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1,
-                                       UseTBAA ? TBAAInfo1 : nullptr),
-               AliasAnalysis::Location(SrcValue2, Overlap2,
-                                       UseTBAA ? TBAAInfo2 : nullptr));
+        AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(),
+                                         Overlap1,
+                                         UseTBAA ? Op0->getTBAAInfo() : nullptr),
+                 AliasAnalysis::Location(Op1->getMemOperand()->getValue(),
+                                         Overlap2,
+                                         UseTBAA ? Op1->getTBAAInfo() : nullptr));
      if (AAResult == AliasAnalysis::NoAlias)
        return false;
    }
@@ -11376,44 +11643,6 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
    return true;
  }
  
-bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
-  SDValue Ptr0, Ptr1;
-  int64_t Size0, Size1;
-  bool IsVolatile0, IsVolatile1;
-  const Value *SrcValue0, *SrcValue1;
-  int SrcValueOffset0, SrcValueOffset1;
-  unsigned SrcValueAlign0, SrcValueAlign1;
-  const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
-  FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
-                SrcValueAlign0, SrcTBAAInfo0);
-  FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
-                SrcValueAlign1, SrcTBAAInfo1);
-  return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
-                 SrcValueAlign0, SrcTBAAInfo0,
-                 Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
-                 SrcValueAlign1, SrcTBAAInfo1);
-}
-
-/// FindAliasInfo - Extracts the relevant alias information from the memory
-/// node.  Returns true if the operand was a nonvolatile load.
-bool DAGCombiner::FindAliasInfo(SDNode *N,
-                                SDValue &Ptr, int64_t &Size, bool &IsVolatile,
-                                const Value *&SrcValue,
-                                int &SrcValueOffset,
-                                unsigned &SrcValueAlign,
-                                const MDNode *&TBAAInfo) const {
-  LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
-
-  Ptr = LS->getBasePtr();
-  Size = LS->getMemoryVT().getSizeInBits() >> 3;
-  IsVolatile = LS->isVolatile();
-  SrcValue = LS->getSrcValue();
-  SrcValueOffset = LS->getSrcValueOffset();
-  SrcValueAlign = LS->getOriginalAlignment();
-  TBAAInfo = LS->getTBAAInfo();
-  return isa<LoadSDNode>(LS) && !IsVolatile;
-}
-
  /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
  /// looking for aliasing nodes and adding them to the Aliases vector.
  void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
@@ -11422,15 +11651,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
    SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
  
    // Get alias information for node.
-  SDValue Ptr;
-  int64_t Size;
-  bool IsVolatile;
-  const Value *SrcValue;
-  int SrcValueOffset;
-  unsigned SrcValueAlign;
-  const MDNode *SrcTBAAInfo;
-  bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue,
-                              SrcValueOffset, SrcValueAlign, SrcTBAAInfo);
+  bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
  
    // Starting off.
    Chains.push_back(OriginalChain);
@@ -11469,24 +11690,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
      case ISD::LOAD:
      case ISD::STORE: {
        // Get alias information for Chain.
-      SDValue OpPtr;
-      int64_t OpSize;
-      bool OpIsVolatile;
-      const Value *OpSrcValue;
-      int OpSrcValueOffset;
-      unsigned OpSrcValueAlign;
-      const MDNode *OpSrcTBAAInfo;
-      bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
-                                    OpIsVolatile, OpSrcValue, OpSrcValueOffset,
-                                    OpSrcValueAlign,
-                                    OpSrcTBAAInfo);
+      bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
+          !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
  
        // If chain is alias then stop here.
        if (!(IsLoad && IsOpLoad) &&
-          isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset,
-                  SrcValueAlign, SrcTBAAInfo,
-                  OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset,
-                  OpSrcValueAlign, OpSrcTBAAInfo)) {
+          isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
          Aliases.push_back(Chain);
        } else {
          // Look further up the chain.
@@ -11592,8 +11801,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
      return Aliases[0];
  
    // Construct a custom tailored token factor.
-  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
-                     &Aliases[0], Aliases.size());
+  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
  }
  
  // SelectionDAG::Combine - This is the entry point for the file.