Have SelectionDAG's subtarget TargetSelectionDAGInfo be set

[oota-llvm.git] / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index b95a6c1198dbc6e507215afeb0707ef8391d288a..074068e97484cc91a8cda750b999cf1375a4f5df 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16,9 +16,10 @@
  //
  //===----------------------------------------------------------------------===//
  
-#define DEBUG_TYPE "dagcombine"
  #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallBitVector.h"
  #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -40,6 +41,8 @@
  #include <algorithm>
  using namespace llvm;
  
+#define DEBUG_TYPE "dagcombine"
+
  STATISTIC(NodesCombined   , "Number of dag nodes combined");
  STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
@@ -56,14 +59,8 @@ namespace {
      CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
                 cl::desc("Enable DAG combiner's use of IR alias analysis"));
  
-// FIXME: Enable the use of TBAA. There are two known issues preventing this:
-//   1. Stack coloring does not update TBAA when merging allocas
-//   2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations.
-//      Because BasicAA does not handle inttoptr, we'll often miss basic type
-//      punning idioms that we need to catch so we don't miscompile real-world
-//      code.
    static cl::opt<bool>
-    UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(false),
+    UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
                 cl::desc("Enable DAG combiner's use of TBAA"));
  
  #ifndef NDEBUG
@@ -81,6 +78,10 @@ namespace {
                               "slicing"),
                      cl::init(false));
  
+  static cl::opt<bool>
+    MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
+                      cl::desc("DAG combiner may split indexing from loads"));
+
  //------------------------------ DAGCombiner ---------------------------------//
  
    class DAGCombiner {
@@ -92,57 +93,70 @@ namespace {
      bool LegalTypes;
      bool ForCodeSize;
  
-    // Worklist of all of the nodes that need to be simplified.
-    //
-    // This has the semantics that when adding to the worklist,
-    // the item added must be next to be processed. It should
-    // also only appear once. The naive approach to this takes
-    // linear time.
-    //
-    // To reduce the insert/remove time to logarithmic, we use
-    // a set and a vector to maintain our worklist.
-    //
-    // The set contains the items on the worklist, but does not
-    // maintain the order they should be visited.
-    //
-    // The vector maintains the order nodes should be visited, but may
-    // contain duplicate or removed nodes. When choosing a node to
-    // visit, we pop off the order stack until we find an item that is
-    // also in the contents set. All operations are O(log N).
-    SmallPtrSet<SDNode*, 64> WorkListContents;
-    SmallVector<SDNode*, 64> WorkListOrder;
+    /// \brief Worklist of all of the nodes that need to be simplified.
+    ///
+    /// This must behave as a stack -- new nodes to process are pushed onto the
+    /// back and when processing we pop off of the back.
+    ///
+    /// The worklist will not contain duplicates but may contain null entries
+    /// due to nodes being deleted from the underlying DAG.
+    SmallVector<SDNode *, 64> Worklist;
+
+    /// \brief Mapping from an SDNode to its position on the worklist.
+    ///
+    /// This is used to find and remove nodes from the worklist (by nulling
+    /// them) when they are deleted from the underlying DAG. It relies on
+    /// stable indices of nodes within the worklist.
+    DenseMap<SDNode *, unsigned> WorklistMap;
+
+    /// \brief Set of nodes which have been combined (at least once).
+    ///
+    /// This is used to allow us to reliably add any operands of a DAG node
+    /// which have not yet been combined to the worklist.
+    SmallPtrSet<SDNode *, 64> CombinedNodes;
  
      // AA - Used for DAG load/store alias analysis.
      AliasAnalysis &AA;
  
-    /// AddUsersToWorkList - When an instruction is simplified, add all users of
-    /// the instruction to the work lists because they might get more simplified
-    /// now.
-    ///
-    void AddUsersToWorkList(SDNode *N) {
-      for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
-           UI != UE; ++UI)
-        AddToWorkList(*UI);
+    /// When an instruction is simplified, add all users of the instruction to
+    /// the work lists because they might get more simplified now.
+    void AddUsersToWorklist(SDNode *N) {
+      for (SDNode *Node : N->uses())
+        AddToWorklist(Node);
      }
  
-    /// visit - call the node-specific routine that knows how to fold each
-    /// particular type of node.
+    /// Call the node-specific routine that folds each particular type of node.
      SDValue visit(SDNode *N);
  
    public:
-    /// AddToWorkList - Add to the work list making sure its instance is at the
-    /// back (next to be processed.)
-    void AddToWorkList(SDNode *N) {
-      WorkListContents.insert(N);
-      WorkListOrder.push_back(N);
+    /// Add to the worklist making sure its instance is at the back (next to be
+    /// processed.)
+    void AddToWorklist(SDNode *N) {
+      // Skip handle nodes as they can't usefully be combined and confuse the
+      // zero-use deletion strategy.
+      if (N->getOpcode() == ISD::HANDLENODE)
+        return;
+
+      if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
+        Worklist.push_back(N);
      }
  
-    /// removeFromWorkList - remove all instances of N from the worklist.
-    ///
-    void removeFromWorkList(SDNode *N) {
-      WorkListContents.erase(N);
+    /// Remove all instances of N from the worklist.
+    void removeFromWorklist(SDNode *N) {
+      CombinedNodes.erase(N);
+
+      auto It = WorklistMap.find(N);
+      if (It == WorklistMap.end())
+        return; // Not in the worklist.
+
+      // Null out the entry rather than erasing it to avoid a linear operation.
+      Worklist[It->second] = nullptr;
+      WorklistMap.erase(It);
      }
  
+    void deleteAndRecombine(SDNode *N);
+    bool recursivelyDeleteUnusedNodes(SDNode *N);
+
      SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
                        bool AddTo = true);
  
@@ -160,9 +174,9 @@ namespace {
  
    private:
  
-    /// SimplifyDemandedBits - Check the specified integer node value to see if
-    /// it can be simplified or if things it uses can be simplified by bit
-    /// propagation.  If so, return true.
+    /// Check the specified integer node value to see if it can be simplified or
+    /// if things it uses can be simplified by bit propagation.
+    /// If so, return true.
      bool SimplifyDemandedBits(SDValue Op) {
        unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
        APInt Demanded = APInt::getAllOnesValue(BitWidth);
@@ -173,8 +187,19 @@ namespace {
  
      bool CombineToPreIndexedLoadStore(SDNode *N);
      bool CombineToPostIndexedLoadStore(SDNode *N);
+    SDValue SplitIndexingFromLoad(LoadSDNode *LD);
      bool SliceUpLoad(SDNode *N);
  
+    /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+    ///   load.
+    ///
+    /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+    /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+    /// \param EltNo index of the vector element to load.
+    /// \param OriginalLoad load that EVE came from to be replaced.
+    /// \returns EVE on success SDValue() on failure.
+    SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+        SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
      void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
      SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
      SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -188,7 +213,7 @@ namespace {
                           SDValue Trunc, SDValue ExtLoad, SDLoc DL,
                           ISD::NodeType ExtType);
  
-    /// combine - call the node-specific routine that knows how to fold each
+    /// Call the node-specific routine that knows how to fold each
      /// particular type of node. If that doesn't do anything, try the
      /// target-specific DAG combines.
      SDValue combine(SDNode *N);
@@ -229,6 +254,7 @@ namespace {
      SDValue visitSHL(SDNode *N);
      SDValue visitSRA(SDNode *N);
      SDValue visitSRL(SDNode *N);
+    SDValue visitRotate(SDNode *N);
      SDValue visitCTLZ(SDNode *N);
      SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
      SDValue visitCTTZ(SDNode *N);
@@ -251,6 +277,7 @@ namespace {
      SDValue visitFMA(SDNode *N);
      SDValue visitFDIV(SDNode *N);
      SDValue visitFREM(SDNode *N);
+    SDValue visitFSQRT(SDNode *N);
      SDValue visitFCOPYSIGN(SDNode *N);
      SDValue visitSINT_TO_FP(SDNode *N);
      SDValue visitUINT_TO_FP(SDNode *N);
@@ -274,11 +301,12 @@ namespace {
      SDValue visitCONCAT_VECTORS(SDNode *N);
      SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
      SDValue visitVECTOR_SHUFFLE(SDNode *N);
+    SDValue visitINSERT_SUBVECTOR(SDNode *N);
  
      SDValue XformToShuffleWithZero(SDNode *N);
      SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
  
-    SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+    SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
  
      bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
      SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
@@ -288,12 +316,20 @@ namespace {
                               bool NotExtCompare = false);
      SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                            SDLoc DL, bool foldBooleans = true);
+
+    bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+                           SDValue &CC) const;
+    bool isOneUseSetCC(SDValue N) const;
+
      SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                           unsigned HiOp);
      SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
      SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
      SDValue BuildSDIV(SDNode *N);
+    SDValue BuildSDIVPow2(SDNode *N);
      SDValue BuildUDIV(SDNode *N);
+    SDValue BuildReciprocalEstimate(SDValue Op);
+    SDValue BuildRsqrtEstimate(SDValue Op);
      SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                 bool DemandHighBits = true);
      SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -310,36 +346,16 @@ namespace {
  
      SDValue GetDemandedBits(SDValue V, const APInt &Mask);
  
-    /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+    /// Walk up chain skipping non-aliasing memory nodes,
      /// looking for aliasing nodes and adding them to the Aliases vector.
      void GatherAllAliases(SDNode *N, SDValue OriginalChain,
                            SmallVectorImpl<SDValue> &Aliases);
  
-    /// isAlias - Return true if there is any possibility that the two addresses
-    /// overlap.
-    bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
-                 const Value *SrcValue1, int SrcValueOffset1,
-                 unsigned SrcValueAlign1,
-                 const MDNode *TBAAInfo1,
-                 SDValue Ptr2, int64_t Size2, bool IsVolatile2,
-                 const Value *SrcValue2, int SrcValueOffset2,
-                 unsigned SrcValueAlign2,
-                 const MDNode *TBAAInfo2) const;
-
-    /// isAlias - Return true if there is any possibility that the two addresses
-    /// overlap.
-    bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
-
-    /// FindAliasInfo - Extracts the relevant alias information from the memory
-    /// node.  Returns true if the operand was a load.
-    bool FindAliasInfo(SDNode *N,
-                       SDValue &Ptr, int64_t &Size, bool &IsVolatile,
-                       const Value *&SrcValue, int &SrcValueOffset,
-                       unsigned &SrcValueAlignment,
-                       const MDNode *&TBAAInfo) const;
-
-    /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
-    /// looking for a better chain (aliasing node.)
+    /// Return true if there is any possibility that the two addresses overlap.
+    bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
+
+    /// Walk up chain skipping non-aliasing memory nodes, looking for a better
+    /// chain (aliasing node.)
      SDValue FindBetterChain(SDNode *N, SDValue Chain);
  
      /// Merge consecutive store operations into a wide store.
@@ -347,6 +363,14 @@ namespace {
      /// \return True if some memory operations were changed.
      bool MergeConsecutiveStores(StoreSDNode *N);
  
+    /// \brief Try to transform a truncation where C is a constant:
+    ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
+    ///
+    /// \p N needs to be a truncation and its first operand an AND. Other
+    /// requirements are checked by the function (e.g. that trunc is
+    /// single-use) and if missed an empty SDValue is returned.
+    SDValue distributeTruncateThroughAnd(SDNode *N);
+
    public:
      DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
          : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
@@ -359,13 +383,13 @@ namespace {
            FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
      }
  
-    /// Run - runs the dag combiner on all nodes in the work list
+    /// Runs the dag combiner on all nodes in the work list
      void Run(CombineLevel AtLevel);
  
      SelectionDAG &getDAG() const { return DAG; }
  
-    /// getShiftAmountTy - Returns a type large enough to hold any valid
-    /// shift amount - before type legalization these can be huge.
+    /// Returns a type large enough to hold any valid shift amount - before type
+    /// legalization these can be huge.
      EVT getShiftAmountTy(EVT LHSTy) {
        assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
        if (LHSTy.isVector())
@@ -374,15 +398,14 @@ namespace {
                          : TLI.getPointerTy();
      }
  
-    /// isTypeLegal - This method returns true if we are running before type
-    /// legalization or if the specified VT is legal.
+    /// This method returns true if we are running before type legalization or
+    /// if the specified VT is legal.
      bool isTypeLegal(const EVT &VT) {
        if (!LegalTypes) return true;
        return TLI.isTypeLegal(VT);
      }
  
-    /// getSetCCResultType - Convenience wrapper around
-    /// TargetLowering::getSetCCResultType
+    /// Convenience wrapper around TargetLowering::getSetCCResultType
      EVT getSetCCResultType(EVT VT) const {
        return TLI.getSetCCResultType(*DAG.getContext(), VT);
      }
@@ -391,16 +414,16 @@ namespace {
  
  
  namespace {
-/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// This class is a DAGUpdateListener that removes any deleted
  /// nodes from the worklist.
-class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+class WorklistRemover : public SelectionDAG::DAGUpdateListener {
    DAGCombiner &DC;
  public:
-  explicit WorkListRemover(DAGCombiner &dc)
+  explicit WorklistRemover(DAGCombiner &dc)
      : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
  
-  virtual void NodeDeleted(SDNode *N, SDNode *E) {
-    DC.removeFromWorkList(N);
+  void NodeDeleted(SDNode *N, SDNode *E) override {
+    DC.removeFromWorklist(N);
    }
  };
  }
@@ -410,11 +433,11 @@ public:
  //===----------------------------------------------------------------------===//
  
  void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
-  ((DAGCombiner*)DC)->AddToWorkList(N);
+  ((DAGCombiner*)DC)->AddToWorklist(N);
  }
  
  void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
-  ((DAGCombiner*)DC)->removeFromWorkList(N);
+  ((DAGCombiner*)DC)->removeFromWorklist(N);
  }
  
  SDValue TargetLowering::DAGCombinerInfo::
@@ -442,9 +465,24 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
  // Helper Functions
  //===----------------------------------------------------------------------===//
  
-/// isNegatibleForFree - Return 1 if we can compute the negated form of the
-/// specified expression for the same cost as the expression itself, or 2 if we
-/// can compute the negated form more cheaply than the expression itself.
+void DAGCombiner::deleteAndRecombine(SDNode *N) {
+  removeFromWorklist(N);
+
+  // If the operands of this node are only used by the node, they will now be
+  // dead. Make sure to re-visit them and recursively delete dead nodes.
+  for (const SDValue &Op : N->ops())
+    // For an operand generating multiple values, one of the values may
+    // become dead allowing further simplification (e.g. split index
+    // arithmetic from an indexed load).
+    if (Op->hasOneUse() || Op->getNumValues() > 1)
+      AddToWorklist(Op.getNode());
+
+  DAG.DeleteNode(N);
+}
+
+/// Return 1 if we can compute the negated form of the specified expression for
+/// the same cost as the expression itself, or 2 if we can compute the negated
+/// form more cheaply than the expression itself.
  static char isNegatibleForFree(SDValue Op, bool LegalOperations,
                                 const TargetLowering &TLI,
                                 const TargetOptions *Options,
@@ -507,10 +545,10 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
    }
  }
  
-/// GetNegatedExpression - If isNegatibleForFree returns true, this function
-/// returns the newly negated expression.
+/// If isNegatibleForFree returns true, return the newly negated expression.
  static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
                                      bool LegalOperations, unsigned Depth = 0) {
+  const TargetOptions &Options = DAG.getTarget().Options;
    // fneg is removable even if it has multiple uses.
    if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
  
@@ -527,12 +565,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
    }
    case ISD::FADD:
      // FIXME: determine better conditions for this xform.
-    assert(DAG.getTarget().Options.UnsafeFPMath);
+    assert(Options.UnsafeFPMath);
  
      // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
      if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
-                           DAG.getTargetLoweringInfo(),
-                           &DAG.getTarget().Options, Depth+1))
+                           DAG.getTargetLoweringInfo(), &Options, Depth+1))
        return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
                           GetNegatedExpression(Op.getOperand(0), DAG,
                                                LegalOperations, Depth+1),
@@ -544,7 +581,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
                         Op.getOperand(0));
    case ISD::FSUB:
      // We can't turn -(A-B) into B-A when we honor signed zeros.
-    assert(DAG.getTarget().Options.UnsafeFPMath);
+    assert(Options.UnsafeFPMath);
  
      // fold (fneg (fsub 0, B)) -> B
      if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -557,12 +594,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
  
    case ISD::FMUL:
    case ISD::FDIV:
-    assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
+    assert(!Options.HonorSignDependentRoundingFPMath());
  
      // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
      if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
-                           DAG.getTargetLoweringInfo(),
-                           &DAG.getTarget().Options, Depth+1))
+                           DAG.getTargetLoweringInfo(), &Options, Depth+1))
        return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
                           GetNegatedExpression(Op.getOperand(0), DAG,
                                                LegalOperations, Depth+1),
@@ -587,51 +623,103 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
    }
  }
  
-
-// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
-// that selects between the values 1 and 0, making it equivalent to a setcc.
-// Also, set the incoming LHS, RHS, and CC references to the appropriate
-// nodes based on the type of node we are checking.  This simplifies life a
-// bit for the callers.
-static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
-                              SDValue &CC) {
+// Return true if this node is a setcc, or is a select_cc
+// that selects between the target values used for true and false, making it
+// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
+// the appropriate nodes based on the type of node we are checking. This
+// simplifies life a bit for the callers.
+bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+                                    SDValue &CC) const {
    if (N.getOpcode() == ISD::SETCC) {
      LHS = N.getOperand(0);
      RHS = N.getOperand(1);
      CC  = N.getOperand(2);
      return true;
    }
-  if (N.getOpcode() == ISD::SELECT_CC &&
-      N.getOperand(2).getOpcode() == ISD::Constant &&
-      N.getOperand(3).getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
-      cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
-    LHS = N.getOperand(0);
-    RHS = N.getOperand(1);
-    CC  = N.getOperand(4);
-    return true;
-  }
-  return false;
+
+  if (N.getOpcode() != ISD::SELECT_CC ||
+      !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
+      !TLI.isConstFalseVal(N.getOperand(3).getNode()))
+    return false;
+
+  LHS = N.getOperand(0);
+  RHS = N.getOperand(1);
+  CC  = N.getOperand(4);
+  return true;
  }
  
-// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
-// one use.  If this is true, it allows the users to invert the operation for
-// free when it is profitable to do so.
-static bool isOneUseSetCC(SDValue N) {
+/// Return true if this is a SetCC-equivalent operation with only one use.
+/// If this is true, it allows the users to invert the operation for free when
+/// it is profitable to do so.
+bool DAGCombiner::isOneUseSetCC(SDValue N) const {
    SDValue N0, N1, N2;
    if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
      return true;
    return false;
  }
  
-// \brief Returns the SDNode if it is a constant BuildVector or constant int.
+/// Returns true if N is a BUILD_VECTOR node whose
+/// elements are all the same constant or undefined.
+static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
+  BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
+  if (!C)
+    return false;
+
+  APInt SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+                             HasAnyUndefs) &&
+          EltVT.getSizeInBits() >= SplatBitSize);
+}
+
+// \brief Returns the SDNode if it is a constant BuildVector or constant.
  static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
    if (isa<ConstantSDNode>(N))
      return N.getNode();
    BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
-  if(BV && BV->isConstant())
+  if (BV && BV->isConstant())
      return BV;
-  return NULL;
+  return nullptr;
+}
+
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// int.
+static ConstantSDNode *isConstOrConstSplat(SDValue N) {
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+    return CN;
+
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    BitVector UndefElements;
+    ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
+
+    // BuildVectors can truncate their operands. Ignore that case here.
+    // FIXME: We blindly ignore splats which include undef which is overly
+    // pessimistic.
+    if (CN && UndefElements.none() &&
+        CN->getValueType(0) == N.getValueType().getScalarType())
+      return CN;
+  }
+
+  return nullptr;
+}
+
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// float.
+static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
+  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+    return CN;
+
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    BitVector UndefElements;
+    ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+
+    if (CN && UndefElements.none())
+      return CN;
+  }
+
+  return nullptr;
  }
  
  SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
@@ -652,7 +740,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
          SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
          if (!OpNode.getNode())
            return SDValue();
-        AddToWorkList(OpNode.getNode());
+        AddToWorklist(OpNode.getNode());
          return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
        }
      }
@@ -673,7 +761,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
          SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
          if (!OpNode.getNode())
            return SDValue();
-        AddToWorkList(OpNode.getNode());
+        AddToWorklist(OpNode.getNode());
          return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
        }
      }
@@ -695,14 +783,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
            assert((!To[i].getNode() ||
                    N->getValueType(i) == To[i].getValueType()) &&
                   "Cannot combine value to value of different type!"));
-  WorkListRemover DeadNodes(*this);
+  WorklistRemover DeadNodes(*this);
    DAG.ReplaceAllUsesWith(N, To);
    if (AddTo) {
      // Push the new nodes and any users onto the worklist
      for (unsigned i = 0, e = NumTo; i != e; ++i) {
        if (To[i].getNode()) {
-        AddToWorkList(To[i].getNode());
-        AddUsersToWorkList(To[i].getNode());
+        AddToWorklist(To[i].getNode());
+        AddUsersToWorklist(To[i].getNode());
        }
      }
    }
@@ -710,14 +798,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    // Finally, if the node is now dead, remove it from the graph.  The node
    // may not be dead if the replacement process recursively simplified to
    // something else needing this node.
-  if (N->use_empty()) {
-    // Nodes can be reintroduced into the worklist.  Make sure we do not
-    // process a node that has been replaced.
-    removeFromWorkList(N);
-
-    // Finally, since the node is now dead, remove it from the graph.
-    DAG.DeleteNode(N);
-  }
+  if (N->use_empty())
+    deleteAndRecombine(N);
    return SDValue(N, 0);
  }
  
@@ -725,32 +807,22 @@ void DAGCombiner::
  CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    // Replace all uses.  If any nodes become isomorphic to other nodes and
    // are deleted, make sure to remove them from our worklist.
-  WorkListRemover DeadNodes(*this);
+  WorklistRemover DeadNodes(*this);
    DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
  
    // Push the new node and any (possibly new) users onto the worklist.
-  AddToWorkList(TLO.New.getNode());
-  AddUsersToWorkList(TLO.New.getNode());
+  AddToWorklist(TLO.New.getNode());
+  AddUsersToWorklist(TLO.New.getNode());
  
    // Finally, if the node is now dead, remove it from the graph.  The node
    // may not be dead if the replacement process recursively simplified to
    // something else needing this node.
-  if (TLO.Old.getNode()->use_empty()) {
-    removeFromWorkList(TLO.Old.getNode());
-
-    // If the operands of this node are only used by the node, they will now
-    // be dead.  Make sure to visit them first to delete dead nodes early.
-    for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
-      if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
-        AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
-
-    DAG.DeleteNode(TLO.Old.getNode());
-  }
+  if (TLO.Old.getNode()->use_empty())
+    deleteAndRecombine(TLO.Old.getNode());
  }
  
-/// SimplifyDemandedBits - Check the specified integer node value to see if
-/// it can be simplified or if things it uses can be simplified by bit
-/// propagation.  If so, return true.
+/// Check the specified integer node value to see if it can be simplified or if
+/// things it uses can be simplified by bit propagation. If so, return true.
  bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
    TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
    APInt KnownZero, KnownOne;
@@ -758,7 +830,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
      return false;
  
    // Revisit the node.
-  AddToWorkList(Op.getNode());
+  AddToWorklist(Op.getNode());
  
    // Replace the old value with the new one.
    ++NodesCombined;
@@ -782,12 +854,11 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
          dbgs() << "\nWith: ";
          Trunc.getNode()->dump(&DAG);
          dbgs() << '\n');
-  WorkListRemover DeadNodes(*this);
+  WorklistRemover DeadNodes(*this);
    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
-  removeFromWorkList(Load);
-  DAG.DeleteNode(Load);
-  AddToWorkList(Trunc.getNode());
+  deleteAndRecombine(Load);
+  AddToWorklist(Trunc.getNode());
  }
  
  SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
@@ -835,9 +906,9 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
    SDLoc dl(Op);
    bool Replace = false;
    SDValue NewOp = PromoteOperand(Op, PVT, Replace);
-  if (NewOp.getNode() == 0)
+  if (!NewOp.getNode())
      return SDValue();
-  AddToWorkList(NewOp.getNode());
+  AddToWorklist(NewOp.getNode());
  
    if (Replace)
      ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
@@ -850,18 +921,18 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
    SDLoc dl(Op);
    bool Replace = false;
    SDValue NewOp = PromoteOperand(Op, PVT, Replace);
-  if (NewOp.getNode() == 0)
+  if (!NewOp.getNode())
      return SDValue();
-  AddToWorkList(NewOp.getNode());
+  AddToWorklist(NewOp.getNode());
  
    if (Replace)
      ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
    return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
  }
  
-/// PromoteIntBinOp - Promote the specified integer binary operation if the
-/// target indicates it is beneficial. e.g. On x86, it's usually better to
-/// promote i16 operations to i32 since i16 instructions are longer.
+/// Promote the specified integer binary operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
  SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
    if (!LegalOperations)
      return SDValue();
@@ -885,7 +956,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
      bool Replace0 = false;
      SDValue N0 = Op.getOperand(0);
      SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
-    if (NN0.getNode() == 0)
+    if (!NN0.getNode())
        return SDValue();
  
      bool Replace1 = false;
@@ -895,13 +966,13 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
        NN1 = NN0;
      else {
        NN1 = PromoteOperand(N1, PVT, Replace1);
-      if (NN1.getNode() == 0)
+      if (!NN1.getNode())
          return SDValue();
      }
  
-    AddToWorkList(NN0.getNode());
+    AddToWorklist(NN0.getNode());
      if (NN1.getNode())
-      AddToWorkList(NN1.getNode());
+      AddToWorklist(NN1.getNode());
  
      if (Replace0)
        ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
@@ -917,9 +988,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
    return SDValue();
  }
  
-/// PromoteIntShiftOp - Promote the specified integer shift operation if the
-/// target indicates it is beneficial. e.g. On x86, it's usually better to
-/// promote i16 operations to i32 since i16 instructions are longer.
+/// Promote the specified integer shift operation if the target indicates it is
+/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
+/// i32 since i16 instructions are longer.
  SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
    if (!LegalOperations)
      return SDValue();
@@ -948,10 +1019,10 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
        N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
      else
        N0 = PromoteOperand(N0, PVT, Replace);
-    if (N0.getNode() == 0)
+    if (!N0.getNode())
        return SDValue();
  
-    AddToWorkList(N0.getNode());
+    AddToWorklist(N0.getNode());
      if (Replace)
        ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
  
@@ -1031,17 +1102,45 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
            dbgs() << "\nTo: ";
            Result.getNode()->dump(&DAG);
            dbgs() << '\n');
-    WorkListRemover DeadNodes(*this);
+    WorklistRemover DeadNodes(*this);
      DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
      DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
-    removeFromWorkList(N);
-    DAG.DeleteNode(N);
-    AddToWorkList(Result.getNode());
+    deleteAndRecombine(N);
+    AddToWorklist(Result.getNode());
      return true;
    }
    return false;
  }
  
+/// \brief Recursively delete a node which has no uses and any operands for
+/// which it is the only use.
+///
+/// Note that this both deletes the nodes and removes them from the worklist.
+/// It also adds any nodes who have had a user deleted to the worklist as they
+/// may now have only one use and subject to other combines.
+bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
+  if (!N->use_empty())
+    return false;
+
+  SmallSetVector<SDNode *, 16> Nodes;
+  Nodes.insert(N);
+  do {
+    N = Nodes.pop_back_val();
+    if (!N)
+      continue;
+
+    if (N->use_empty()) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        Nodes.insert(N->getOperand(i).getNode());
+
+      removeFromWorklist(N);
+      DAG.DeleteNode(N);
+    } else {
+      AddToWorklist(N);
+    }
+  } while (!Nodes.empty());
+  return true;
+}
  
  //===----------------------------------------------------------------------===//
  //  Main DAG Combiner implementation
@@ -1056,44 +1155,62 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
    // Add all the dag nodes to the worklist.
    for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
         E = DAG.allnodes_end(); I != E; ++I)
-    AddToWorkList(I);
+    AddToWorklist(I);
  
    // Create a dummy node (which is not added to allnodes), that adds a reference
    // to the root node, preventing it from being deleted, and tracking any
    // changes of the root.
    HandleSDNode Dummy(DAG.getRoot());
  
-  // The root of the dag may dangle to deleted nodes until the dag combiner is
-  // done.  Set it to null to avoid confusion.
-  DAG.setRoot(SDValue());
-
    // while the worklist isn't empty, find a node and
    // try and combine it.
-  while (!WorkListContents.empty()) {
+  while (!WorklistMap.empty()) {
      SDNode *N;
-    // The WorkListOrder holds the SDNodes in order, but it may contain
-    // duplicates.
-    // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
-    // worklist *should* contain, and check the node we want to visit is should
-    // actually be visited.
+    // The Worklist holds the SDNodes in order, but it may contain null entries.
      do {
-      N = WorkListOrder.pop_back_val();
-    } while (!WorkListContents.erase(N));
+      N = Worklist.pop_back_val();
+    } while (!N);
+
+    bool GoodWorklistEntry = WorklistMap.erase(N);
+    (void)GoodWorklistEntry;
+    assert(GoodWorklistEntry &&
+           "Found a worklist entry without a corresponding map entry!");
  
      // If N has no uses, it is dead.  Make sure to revisit all N's operands once
      // N is deleted from the DAG, since they too may now be dead or may have a
      // reduced number of uses, allowing other xforms.
-    if (N->use_empty() && N != &Dummy) {
-      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-        AddToWorkList(N->getOperand(i).getNode());
-
-      DAG.DeleteNode(N);
+    if (recursivelyDeleteUnusedNodes(N))
        continue;
+
+    WorklistRemover DeadNodes(*this);
+
+    // If this combine is running after legalizing the DAG, re-legalize any
+    // nodes pulled off the worklist.
+    if (Level == AfterLegalizeDAG) {
+      SmallSetVector<SDNode *, 16> UpdatedNodes;
+      bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
+
+      for (SDNode *LN : UpdatedNodes) {
+        AddToWorklist(LN);
+        AddUsersToWorklist(LN);
+      }
+      if (!NIsValid)
+        continue;
      }
  
+    DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
+
+    // Add any operands of the new node which have not yet been combined to the
+    // worklist as well. Because the worklist uniques things already, this
+    // won't repeatedly process the same operand.
+    CombinedNodes.insert(N);
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      if (!CombinedNodes.count(N->getOperand(i).getNode()))
+        AddToWorklist(N->getOperand(i).getNode());
+
      SDValue RV = combine(N);
  
-    if (RV.getNode() == 0)
+    if (!RV.getNode())
        continue;
  
      ++NodesCombined;
@@ -1109,15 +1226,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
             RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
             "Node was deleted but visit returned new node!");
  
-    DEBUG(dbgs() << "\nReplacing.3 ";
-          N->dump(&DAG);
-          dbgs() << "\nWith: ";
-          RV.getNode()->dump(&DAG);
-          dbgs() << '\n');
+    DEBUG(dbgs() << " ... into: ";
+          RV.getNode()->dump(&DAG));
  
      // Transfer debug value.
      DAG.TransferDbgValues(SDValue(N, 0), RV);
-    WorkListRemover DeadNodes(*this);
      if (N->getNumValues() == RV.getNode()->getNumValues())
        DAG.ReplaceAllUsesWith(N, RV.getNode());
      else {
@@ -1128,26 +1241,14 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
      }
  
      // Push the new node and any users onto the worklist
-    AddToWorkList(RV.getNode());
-    AddUsersToWorkList(RV.getNode());
-
-    // Add any uses of the old node to the worklist in case this node is the
-    // last one that uses them.  They may become dead after this node is
-    // deleted.
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-      AddToWorkList(N->getOperand(i).getNode());
+    AddToWorklist(RV.getNode());
+    AddUsersToWorklist(RV.getNode());
  
      // Finally, if the node is now dead, remove it from the graph.  The node
      // may not be dead if the replacement process recursively simplified to
-    // something else needing this node.
-    if (N->use_empty()) {
-      // Nodes can be reintroduced into the worklist.  Make sure we do not
-      // process a node that has been replaced.
-      removeFromWorkList(N);
-
-      // Finally, since the node is now dead, remove it from the graph.
-      DAG.DeleteNode(N);
-    }
+    // something else needing this node. This will also take care of adding any
+    // operands which have lost a user to the worklist.
+    recursivelyDeleteUnusedNodes(N);
    }
  
    // If the root changed (e.g. it was a dead load, update the root).
@@ -1185,6 +1286,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::SHL:                return visitSHL(N);
    case ISD::SRA:                return visitSRA(N);
    case ISD::SRL:                return visitSRL(N);
+  case ISD::ROTR:
+  case ISD::ROTL:               return visitRotate(N);
    case ISD::CTLZ:               return visitCTLZ(N);
    case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
    case ISD::CTTZ:               return visitCTTZ(N);
@@ -1207,6 +1310,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::FMA:                return visitFMA(N);
    case ISD::FDIV:               return visitFDIV(N);
    case ISD::FREM:               return visitFREM(N);
+  case ISD::FSQRT:              return visitFSQRT(N);
    case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
    case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
    case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
@@ -1230,6 +1334,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
    case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
    case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
    }
    return SDValue();
  }
@@ -1238,7 +1343,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
    SDValue RV = visit(N);
  
    // If nothing happened, try a target-specific DAG combine.
-  if (RV.getNode() == 0) {
+  if (!RV.getNode()) {
      assert(N->getOpcode() != ISD::DELETED_NODE &&
             "Node was deleted but visit returned NULL!");
  
@@ -1254,7 +1359,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
    }
  
    // If nothing happened still, try promoting the operation.
-  if (RV.getNode() == 0) {
+  if (!RV.getNode()) {
      switch (N->getOpcode()) {
      default: break;
      case ISD::ADD:
@@ -1284,17 +1389,23 @@ SDValue DAGCombiner::combine(SDNode *N) {
  
    // If N is a commutative binary node, try commuting it to enable more
    // sdisel CSE.
-  if (RV.getNode() == 0 &&
-      SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+  if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
        N->getNumValues() == 1) {
      SDValue N0 = N->getOperand(0);
      SDValue N1 = N->getOperand(1);
  
      // Constant operands are canonicalized to RHS.
      if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
-      SDValue Ops[] = { N1, N0 };
-      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
-                                            Ops, 2);
+      SDValue Ops[] = {N1, N0};
+      SDNode *CSENode;
+      if (const BinaryWithFlagsSDNode *BinNode =
+              dyn_cast<BinaryWithFlagsSDNode>(N)) {
+        CSENode = DAG.getNodeIfExists(
+            N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
+            BinNode->hasNoSignedWrap(), BinNode->isExact());
+      } else {
+        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
+      }
        if (CSENode)
          return SDValue(CSENode, 0);
      }
@@ -1303,8 +1414,8 @@ SDValue DAGCombiner::combine(SDNode *N) {
    return RV;
  }
  
-/// getInputChainForNode - Given a node, return its input chain if it has one,
-/// otherwise return a null sd operand.
+/// Given a node, return its input chain if it has one, otherwise return a null
+/// sd operand.
  static SDValue getInputChainForNode(SDNode *N) {
    if (unsigned NumOps = N->getNumOperands()) {
      if (N->getOperand(0).getValueType() == MVT::Other)
@@ -1358,7 +1469,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
            // Queue up for processing.
            TFs.push_back(Op.getNode());
            // Clean up in case the token factor is removed.
-          AddToWorkList(Op.getNode());
+          AddToWorklist(Op.getNode());
            Changed = true;
            break;
          }
@@ -1384,8 +1495,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
        Result = DAG.getEntryNode();
      } else {
        // New and improved token factor.
-      Result = DAG.getNode(ISD::TokenFactor, SDLoc(N),
-                           MVT::Other, &Ops[0], Ops.size());
+      Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
      }
  
      // Don't add users to work list.
@@ -1397,44 +1507,21 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
  
  /// MERGE_VALUES can always be eliminated.
  SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
-  WorkListRemover DeadNodes(*this);
+  WorklistRemover DeadNodes(*this);
    // Replacing results may cause a different MERGE_VALUES to suddenly
    // be CSE'd with N, and carry its uses with it. Iterate until no
    // uses remain, to ensure that the node can be safely deleted.
    // First add the users of this node to the work list so that they
    // can be tried again once they have new operands.
-  AddUsersToWorkList(N);
+  AddUsersToWorklist(N);
    do {
      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
        DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
    } while (!N->use_empty());
-  removeFromWorkList(N);
-  DAG.DeleteNode(N);
+  deleteAndRecombine(N);
    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
  }
  
-static
-SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1,
-                              SelectionDAG &DAG) {
-  EVT VT = N0.getValueType();
-  SDValue N00 = N0.getOperand(0);
-  SDValue N01 = N0.getOperand(1);
-  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
-
-  if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
-      isa<ConstantSDNode>(N00.getOperand(1))) {
-    // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
-    N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT,
-                     DAG.getNode(ISD::SHL, SDLoc(N00), VT,
-                                 N00.getOperand(0), N01),
-                     DAG.getNode(ISD::SHL, SDLoc(N01), VT,
-                                 N00.getOperand(1), N01));
-    return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
-  }
-
-  return SDValue();
-}
-
  SDValue DAGCombiner::visitADD(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -1484,7 +1571,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
                           N0.getOperand(1));
    // reassociate add
    SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
-  if (RADD.getNode() != 0)
+  if (RADD.getNode())
      return RADD;
    // fold ((0-A) + B) -> B-A
    if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
@@ -1537,28 +1624,20 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    if (VT.isInteger() && !VT.isVector()) {
      APInt LHSZero, LHSOne;
      APInt RHSZero, RHSOne;
-    DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+    DAG.computeKnownBits(N0, LHSZero, LHSOne);
  
      if (LHSZero.getBoolValue()) {
-      DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+      DAG.computeKnownBits(N1, RHSZero, RHSOne);
  
        // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
        // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
-      if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
-        return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+      if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
+        if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
+          return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+      }
      }
    }
  
-  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
-  if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
-    SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG);
-    if (Result.getNode()) return Result;
-  }
-  if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
-    SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG);
-    if (Result.getNode()) return Result;
-  }
-
    // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
    if (N1.getOpcode() == ISD::SHL &&
        N1.getOperand(0).getOpcode() == ISD::SUB)
@@ -1630,10 +1709,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
    // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
    APInt LHSZero, LHSOne;
    APInt RHSZero, RHSOne;
-  DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+  DAG.computeKnownBits(N0, LHSZero, LHSOne);
  
    if (LHSZero.getBoolValue()) {
-    DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+    DAG.computeKnownBits(N1, RHSZero, RHSOne);
  
      // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
      // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
@@ -1682,7 +1761,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
      dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
    EVT VT = N0.getValueType();
  
@@ -1815,22 +1894,6 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) {
    return SDValue();
  }
  
-/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
-/// elements are all the same constant or undefined.
-static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
-  BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
-  if (!C)
-    return false;
-
-  APInt SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  EVT EltVT = N->getValueType(0).getVectorElementType();
-  return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
-                             HasAnyUndefs) &&
-          EltVT.getSizeInBits() >= SplatBitSize);
-}
-
  SDValue DAGCombiner::visitMUL(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -1851,10 +1914,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
      N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
      N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
    } else {
-    N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0;
+    N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
      ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
                              : APInt();
-    N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0;
+    N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
      ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
                              : APInt();
    }
@@ -1904,7 +1967,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                       isa<ConstantSDNode>(N0.getOperand(1)))) {
      SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
                               N1, N0.getOperand(1));
-    AddToWorkList(C3.getNode());
+    AddToWorklist(C3.getNode());
      return DAG.getNode(ISD::MUL, SDLoc(N), VT,
                         N0.getOperand(0), C3);
    }
@@ -1912,7 +1975,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
    // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
    // use.
    {
-    SDValue Sh(0,0), Y(0,0);
+    SDValue Sh(nullptr,0), Y(nullptr,0);
      // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
      if (N0.getOpcode() == ISD::SHL &&
          (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
@@ -1945,7 +2008,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
  
    // reassociate mul
    SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
-  if (RMUL.getNode() != 0)
+  if (RMUL.getNode())
      return RMUL;
  
    return SDValue();
@@ -1954,8 +2017,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
  SDValue DAGCombiner::visitSDIV(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
    EVT VT = N->getValueType(0);
  
    // fold vector ops
@@ -1981,30 +2044,37 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
        return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
                           N0, N1);
    }
+
    // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() &&
-      (N1C->getAPIntValue().isPowerOf2() ||
-       (-N1C->getAPIntValue()).isPowerOf2())) {
+  if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
+                                     (-N1C->getAPIntValue()).isPowerOf2())) {
      // If dividing by powers of two is cheap, then don't perform the following
      // fold.
-    if (TLI.isPow2DivCheap())
+    if (TLI.isPow2SDivCheap())
        return SDValue();
  
+    // Target-specific implementation of sdiv x, pow2.
+    SDValue Res = BuildSDIVPow2(N);
+    if (Res.getNode())
+      return Res;
+
      unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
  
      // Splat the sign bit into the register
-    SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
-                              DAG.getConstant(VT.getSizeInBits()-1,
-                                       getShiftAmountTy(N0.getValueType())));
-    AddToWorkList(SGN.getNode());
+    SDValue SGN =
+        DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
+                    DAG.getConstant(VT.getScalarSizeInBits() - 1,
+                                    getShiftAmountTy(N0.getValueType())));
+    AddToWorklist(SGN.getNode());
  
      // Add (N0 < 0) ? abs2 - 1 : 0;
-    SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
-                              DAG.getConstant(VT.getSizeInBits() - lg2,
-                                       getShiftAmountTy(SGN.getValueType())));
+    SDValue SRL =
+        DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
+                    DAG.getConstant(VT.getScalarSizeInBits() - lg2,
+                                    getShiftAmountTy(SGN.getValueType())));
      SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
-    AddToWorkList(SRL.getNode());
-    AddToWorkList(ADD.getNode());    // Divide by pow2
+    AddToWorklist(SRL.getNode());
+    AddToWorklist(ADD.getNode());    // Divide by pow2
      SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD,
                    DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
  
@@ -2013,14 +2083,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
      if (N1C->getAPIntValue().isNonNegative())
        return SRA;
  
-    AddToWorkList(SRA.getNode());
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       DAG.getConstant(0, VT), SRA);
+    AddToWorklist(SRA.getNode());
+    return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
    }
  
    // if integer divide is expensive and we satisfy the requirements, emit an
    // alternate sequence.
-  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+  if (N1C && !TLI.isIntDivCheap()) {
      SDValue Op = BuildSDIV(N);
      if (Op.getNode()) return Op;
    }
@@ -2038,8 +2107,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
  SDValue DAGCombiner::visitUDIV(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
    EVT VT = N->getValueType(0);
  
    // fold vector ops
@@ -2066,13 +2135,13 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
                                    DAG.getConstant(SHC->getAPIntValue()
                                                                    .logBase2(),
                                                    ADDVT));
-        AddToWorkList(Add.getNode());
+        AddToWorklist(Add.getNode());
          return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add);
        }
      }
    }
    // fold (udiv x, c) -> alternate
-  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+  if (N1C && !TLI.isIntDivCheap()) {
      SDValue Op = BuildUDIV(N);
      if (Op.getNode()) return Op;
    }
@@ -2090,8 +2159,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
  SDValue DAGCombiner::visitSREM(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
    EVT VT = N->getValueType(0);
  
    // fold (srem c1, c2) -> c1%c2
@@ -2108,13 +2177,13 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
    // X%C to the equivalent of X-X/C*C.
    if (N1C && !N1C->isNullValue()) {
      SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
-    AddToWorkList(Div.getNode());
+    AddToWorklist(Div.getNode());
      SDValue OptimizedDiv = combine(Div.getNode());
      if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
        SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
                                  OptimizedDiv, N1);
        SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
-      AddToWorkList(Mul.getNode());
+      AddToWorklist(Mul.getNode());
        return Sub;
      }
    }
@@ -2132,8 +2201,8 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
  SDValue DAGCombiner::visitUREM(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N0C = isConstOrConstSplat(N0);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
    EVT VT = N->getValueType(0);
  
    // fold (urem c1, c2) -> c1%c2
@@ -2151,7 +2220,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
            DAG.getNode(ISD::ADD, SDLoc(N), VT, N1,
                   DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
                                   VT));
-        AddToWorkList(Add.getNode());
+        AddToWorklist(Add.getNode());
          return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add);
        }
      }
@@ -2161,13 +2230,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
    // X%C to the equivalent of X-X/C*C.
    if (N1C && !N1C->isNullValue()) {
      SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
-    AddToWorkList(Div.getNode());
+    AddToWorklist(Div.getNode());
      SDValue OptimizedDiv = combine(Div.getNode());
      if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
        SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
                                  OptimizedDiv, N1);
        SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
-      AddToWorkList(Mul.getNode());
+      AddToWorklist(Mul.getNode());
        return Sub;
      }
    }
@@ -2256,10 +2325,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
    return SDValue();
  }
  
-/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
-/// compute two values. LoOp and HiOp give the opcodes for the two computations
-/// that are being performed. Return true if a simplification was made.
-///
+/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
+/// give the opcodes for the two computations that are being performed. Return
+/// true if a simplification was made.
  SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                                  unsigned HiOp) {
    // If the high half is not needed, just compute the low half.
@@ -2267,8 +2335,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    if (!HiExists &&
        (!LegalOperations ||
         TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
-    SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
-                              N->op_begin(), N->getNumOperands());
+    SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
      return CombineTo(N, Res, Res);
    }
  
@@ -2277,8 +2344,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    if (!LoExists &&
        (!LegalOperations ||
         TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
-    SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
-                              N->op_begin(), N->getNumOperands());
+    SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
      return CombineTo(N, Res, Res);
    }
  
@@ -2288,9 +2354,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
  
    // If the two computed results can be simplified separately, separate them.
    if (LoExists) {
-    SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
-                             N->op_begin(), N->getNumOperands());
-    AddToWorkList(Lo.getNode());
+    SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
+    AddToWorklist(Lo.getNode());
      SDValue LoOpt = combine(Lo.getNode());
      if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
          (!LegalOperations ||
@@ -2299,9 +2364,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    }
  
    if (HiExists) {
-    SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
-                             N->op_begin(), N->getNumOperands());
-    AddToWorkList(Hi.getNode());
+    SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
+    AddToWorklist(Hi.getNode());
      SDValue HiOpt = combine(Hi.getNode());
      if (HiOpt.getNode() && HiOpt != Hi &&
          (!LegalOperations ||
@@ -2406,8 +2470,8 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
    return SDValue();
  }
  
-/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
-/// two operands of the same opcode, try to simplify it.
+/// If this is a binary operator with two operands of the same opcode, try to
+/// simplify it.
  SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
    EVT VT = N0.getValueType();
@@ -2440,7 +2504,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
      SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
                                   N0.getOperand(0).getValueType(),
                                   N0.getOperand(0), N1.getOperand(0));
-    AddToWorkList(ORNode.getNode());
+    AddToWorklist(ORNode.getNode());
      return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
    }
  
@@ -2454,7 +2518,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
      SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
                                   N0.getOperand(0).getValueType(),
                                   N0.getOperand(0), N1.getOperand(0));
-    AddToWorkList(ORNode.getNode());
+    AddToWorklist(ORNode.getNode());
      return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
                         ORNode, N0.getOperand(1));
    }
@@ -2479,7 +2543,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
      if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
        SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
        SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
-      AddToWorkList(Op.getNode());
+      AddToWorklist(Op.getNode());
        return BC;
      }
    }
@@ -2491,35 +2555,66 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
    // The type-legalizer generates this pattern when loading illegal
    // vector types from memory. In many cases this allows additional shuffle
    // optimizations.
-  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
-      N0.getOperand(1).getOpcode() == ISD::UNDEF &&
-      N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+  // There are other cases where moving the shuffle after the xor/and/or
+  // is profitable even if shuffles don't perform a swizzle.
+  // If both shuffles use the same mask, and both shuffles have the same first
+  // or second operand, then it might still be profitable to move the shuffle
+  // after the xor/and/or operation.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
      ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
      ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
  
-    assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+    assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
             "Inputs to shuffles are not the same type");
  
-    unsigned NumElts = VT.getVectorNumElements();
-
      // Check that both shuffles use the same mask. The masks are known to be of
      // the same length because the result vector type is the same.
-    bool SameMask = true;
-    for (unsigned i = 0; i != NumElts; ++i) {
-      int Idx0 = SVN0->getMaskElt(i);
-      int Idx1 = SVN1->getMaskElt(i);
-      if (Idx0 != Idx1) {
-        SameMask = false;
-        break;
+    // Check also that shuffles have only one use to avoid introducing extra
+    // instructions.
+    if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
+        SVN0->getMask().equals(SVN1->getMask())) {
+      SDValue ShOp = N0->getOperand(1);
+
+      // Don't try to fold this node if it requires introducing a
+      // build vector of all zeros that might be illegal at this stage.
+      if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+        if (!LegalTypes)
+          ShOp = DAG.getConstant(0, VT);
+        else
+          ShOp = SDValue();
+      }
+
+      // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
+      // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
+      // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
+      if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+        SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+                                      N0->getOperand(0), N1->getOperand(0));
+        AddToWorklist(NewNode.getNode());
+        return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
+                                    &SVN0->getMask()[0]);
        }
-    }
  
-    if (SameMask) {
-      SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
-                               N0.getOperand(0), N1.getOperand(0));
-      AddToWorkList(Op.getNode());
-      return DAG.getVectorShuffle(VT, SDLoc(N), Op,
-                                  DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+      // Don't try to fold this node if it requires introducing a
+      // build vector of all zeros that might be illegal at this stage.
+      ShOp = N0->getOperand(0);
+      if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+        if (!LegalTypes)
+          ShOp = DAG.getConstant(0, VT);
+        else
+          ShOp = SDValue();
+      }
+
+      // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
+      // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
+      // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
+      if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
+        SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+                                      N0->getOperand(1), N1->getOperand(1));
+        AddToWorklist(NewNode.getNode());
+        return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
+                                    &SVN0->getMask()[0]);
+      }
      }
    }
  
@@ -2542,9 +2637,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
  
      // fold (and x, 0) -> 0, vector edition
      if (ISD::isBuildVectorAllZeros(N0.getNode()))
-      return N0;
+      // do not return N0, because undef node may exist in N0
+      return DAG.getConstant(
+          APInt::getNullValue(
+              N0.getValueType().getScalarType().getSizeInBits()),
+          N0.getValueType());
      if (ISD::isBuildVectorAllZeros(N1.getNode()))
-      return N1;
+      // do not return N1, because undef node may exist in N1
+      return DAG.getConstant(
+          APInt::getNullValue(
+              N1.getValueType().getScalarType().getSizeInBits()),
+          N1.getValueType());
  
      // fold (and x, -1) -> x, vector edition
      if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -2571,7 +2674,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
      return DAG.getConstant(0, VT);
    // reassociate and
    SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
-  if (RAND.getNode() != 0)
+  if (RAND.getNode())
      return RAND;
    // fold (and (or x, C), D) -> D if (C & D) == D
    if (N1C && N0.getOpcode() == ISD::OR)
@@ -2707,21 +2810,21 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
        if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
                                       LR.getValueType(), LL, RL);
-        AddToWorkList(ORNode.getNode());
+        AddToWorklist(ORNode.getNode());
          return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
        }
        // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
        if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
                                        LR.getValueType(), LL, RL);
-        AddToWorkList(ANDNode.getNode());
+        AddToWorklist(ANDNode.getNode());
          return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
        }
        // fold (and (setgt X,  -1), (setgt Y,  -1)) -> (setgt (or X, Y), -1)
        if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
                                       LR.getValueType(), LL, RL);
-        AddToWorkList(ORNode.getNode());
+        AddToWorklist(ORNode.getNode());
          return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
        }
      }
@@ -2734,7 +2837,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                                   cast<ConstantSDNode>(RR)->isNullValue()))) {
        SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(),
                                      LL, DAG.getConstant(1, LL.getValueType()));
-      AddToWorkList(ADDNode.getNode());
+      AddToWorklist(ADDNode.getNode());
        return DAG.getSetCC(SDLoc(N), VT, ADDNode,
                            DAG.getConstant(2, LL.getValueType()), ISD::SETUGE);
      }
@@ -2782,7 +2885,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
                                         LN0->getChain(), LN0->getBasePtr(),
                                         MemVT, LN0->getMemOperand());
-      AddToWorkList(N);
+      AddToWorklist(N);
        CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
      }
@@ -2802,7 +2905,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
                                         LN0->getChain(), LN0->getBasePtr(),
                                         MemVT, LN0->getMemOperand());
-      AddToWorkList(N);
+      AddToWorklist(N);
        CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
      }
@@ -2833,7 +2936,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
              DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
                             LN0->getChain(), LN0->getBasePtr(), ExtVT,
                             LN0->getMemOperand());
-          AddToWorkList(N);
+          AddToWorklist(N);
            CombineTo(LN0, NewLoad, NewLoad.getValue(1));
            return SDValue(N, 0);   // Return N so it doesn't get rechecked!
          }
@@ -2860,7 +2963,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
              Alignment = MinAlign(Alignment, PtrOff);
            }
  
-          AddToWorkList(NewPtr.getNode());
+          AddToWorklist(NewPtr.getNode());
  
            EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
            SDValue Load =
@@ -2868,8 +2971,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
                             LN0->getChain(), NewPtr,
                             LN0->getPointerInfo(),
                             ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
-                           Alignment, LN0->getTBAAInfo());
-          AddToWorkList(N);
+                           LN0->isInvariant(), Alignment, LN0->getAAInfo());
+          AddToWorklist(N);
            CombineTo(LN0, Load, Load.getValue(1));
            return SDValue(N, 0);   // Return N so it doesn't get rechecked!
          }
@@ -2915,8 +3018,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    return SDValue();
  }
  
-/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
-///
+/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
  SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                          bool DemandHighBits) {
    if (!LegalOperations)
@@ -3021,9 +3123,12 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    return Res;
  }
  
-/// isBSwapHWordElement - Return true if the specified node is an element
-/// that makes up a 32-bit packed halfword byteswap. i.e.
-/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// Return true if the specified node is an element that makes up a 32-bit
+/// packed halfword byteswap.
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
  static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
    if (!N.getNode()->hasOneUse())
      return false;
@@ -3091,8 +3196,11 @@ static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) {
    return true;
  }
  
-/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
-/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// Match a 32-bit packed halfword bswap. That is
+/// ((x & 0x000000ff) << 8) |
+/// ((x & 0x0000ff00) >> 8) |
+/// ((x & 0x00ff0000) << 8) |
+/// ((x & 0xff000000) >> 8)
  /// => (rotl (bswap x), 16)
  SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
    if (!LegalOperations)
@@ -3104,7 +3212,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
    if (!TLI.isOperationLegal(ISD::BSWAP, VT))
      return SDValue();
  
-  SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+  SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr);
    // Look for either
    // (or (or (and), (and)), (or (and), (and)))
    // (or (or (or (and), (and)), (and)), (and))
@@ -3185,9 +3293,73 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
  
      // fold (or x, -1) -> -1, vector edition
      if (ISD::isBuildVectorAllOnes(N0.getNode()))
-      return N0;
+      // do not return N0, because undef node may exist in N0
+      return DAG.getConstant(
+          APInt::getAllOnesValue(
+              N0.getValueType().getScalarType().getSizeInBits()),
+          N0.getValueType());
      if (ISD::isBuildVectorAllOnes(N1.getNode()))
-      return N1;
+      // do not return N1, because undef node may exist in N1
+      return DAG.getConstant(
+          APInt::getAllOnesValue(
+              N1.getValueType().getScalarType().getSizeInBits()),
+          N1.getValueType());
+
+    // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
+    // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
+    // Do this only if the resulting shuffle is legal.
+    if (isa<ShuffleVectorSDNode>(N0) &&
+        isa<ShuffleVectorSDNode>(N1) &&
+        // Avoid folding a node with illegal type.
+        TLI.isTypeLegal(VT) &&
+        N0->getOperand(1) == N1->getOperand(1) &&
+        ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
+      bool CanFold = true;
+      unsigned NumElts = VT.getVectorNumElements();
+      const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
+      const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
+      // We construct two shuffle masks:
+      // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
+      // and N1 as the second operand.
+      // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
+      // and N0 as the second operand.
+      // We do this because OR is commutable and therefore there might be
+      // two ways to fold this node into a shuffle.
+      SmallVector<int,4> Mask1;
+      SmallVector<int,4> Mask2;
+
+      for (unsigned i = 0; i != NumElts && CanFold; ++i) {
+        int M0 = SV0->getMaskElt(i);
+        int M1 = SV1->getMaskElt(i);
+
+        // Both shuffle indexes are undef. Propagate Undef.
+        if (M0 < 0 && M1 < 0) {
+          Mask1.push_back(M0);
+          Mask2.push_back(M0);
+          continue;
+        }
+
+        if (M0 < 0 || M1 < 0 ||
+            (M0 < (int)NumElts && M1 < (int)NumElts) ||
+            (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
+          CanFold = false;
+          break;
+        }
+
+        Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
+        Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
+      }
+
+      if (CanFold) {
+        // Fold this sequence only if the resulting shuffle is 'legal'.
+        if (TLI.isShuffleMaskLegal(Mask1, VT))
+          return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
+                                      N1->getOperand(0), &Mask1[0]);
+        if (TLI.isShuffleMaskLegal(Mask2, VT))
+          return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
+                                      N0->getOperand(0), &Mask2[0]);
+      }
+    }
    }
  
    // fold (or x, undef) -> -1
@@ -3214,15 +3386,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
  
    // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
    SDValue BSwap = MatchBSwapHWord(N, N0, N1);
-  if (BSwap.getNode() != 0)
+  if (BSwap.getNode())
      return BSwap;
    BSwap = MatchBSwapHWordLow(N, N0, N1);
-  if (BSwap.getNode() != 0)
+  if (BSwap.getNode())
      return BSwap;
  
    // reassociate or
    SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
-  if (ROR.getNode() != 0)
+  if (ROR.getNode())
      return ROR;
    // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
    // iff (c1 & c2) == 0.
@@ -3251,7 +3423,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
            (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
                                       LR.getValueType(), LL, RL);
-        AddToWorkList(ORNode.getNode());
+        AddToWorklist(ORNode.getNode());
          return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1);
        }
        // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
@@ -3260,7 +3432,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
            (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
                                        LR.getValueType(), LL, RL);
-        AddToWorkList(ANDNode.getNode());
+        AddToWorklist(ANDNode.getNode());
          return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1);
        }
      }
@@ -3323,7 +3495,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    return SDValue();
  }
  
-/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+/// Match "(X shl/srl V1) & V2" where V2 may not be present.
  static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
    if (Op.getOpcode() == ISD::AND) {
      if (isa<ConstantSDNode>(Op.getOperand(1))) {
@@ -3348,9 +3520,9 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
  //
  //     (or (shift1 X, Neg), (shift2 X, Pos))
  //
-// reduces to a rotate in direction shift2 by Pos and a rotate in direction
-// shift1 by Neg.  The range [0, OpSize) means that we only need to consider
-// shift amounts with defined behavior.
+// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
+// in direction shift1 by Neg.  The range [0, OpSize) means that we only need
+// to consider shift amounts with defined behavior.
  static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
    // If OpSize is a power of 2 then:
    //
@@ -3371,7 +3543,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
    //
    // for all Neg and Pos.  Note that the (or ...) then invokes undefined
    // behavior if Pos == 0 (and consequently Neg == OpSize).
-  // 
+  //
    // We could actually use [A] whenever OpSize is a power of 2, but the
    // only extra cases that it would match are those uninteresting ones
    // where Neg and Pos are never in range at the same time.  E.g. for
@@ -3383,13 +3555,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
    // always invokes undefined behavior for 32-bit X.
    //
    // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
-  unsigned LoBits = 0;
+  unsigned MaskLoBits = 0;
    if (Neg.getOpcode() == ISD::AND &&
        isPowerOf2_64(OpSize) &&
        Neg.getOperand(1).getOpcode() == ISD::Constant &&
        cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
      Neg = Neg.getOperand(0);
-    LoBits = Log2_64(OpSize);
+    MaskLoBits = Log2_64(OpSize);
    }
  
    // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
@@ -3400,6 +3572,14 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
      return 0;
    SDValue NegOp1 = Neg.getOperand(1);
  
+  // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
+  // Pos'.  The truncation is redundant for the purpose of the equality.
+  if (MaskLoBits &&
+      Pos.getOpcode() == ISD::AND &&
+      Pos.getOperand(1).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
+    Pos = Pos.getOperand(0);
+
    // The condition we need is now:
    //
    //     (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
@@ -3430,8 +3610,9 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
      return false;
  
    // Now we just need to check that OpSize & Mask == Width & Mask.
-  if (LoBits)
-    return Width.getLoBits(LoBits) == 0;
+  if (MaskLoBits)
+    // Opsize & Mask is 0 since Mask is Opsize - 1.
+    return Width.getLoBits(MaskLoBits) == 0;
    return Width == OpSize;
  }
  
@@ -3458,28 +3639,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
                         HasPos ? Pos : Neg).getNode();
    }
  
-  // fold (or (shl (*ext x), (*ext y)),
-  //          (srl (*ext x), (*ext (sub 32, y)))) ->
-  //   (*ext (rotl x, y)) or (*ext (rotr x, (sub 32, y)))
-  //
-  // fold (or (shl (*ext x), (*ext (sub 32, y))),
-  //          (srl (*ext x), (*ext y))) ->
-  //   (*ext (rotr x, y)) or (*ext (rotl x, (sub 32, y)))
-  if (Shifted.getOpcode() == ISD::ZERO_EXTEND ||
-      Shifted.getOpcode() == ISD::ANY_EXTEND) {
-    SDValue InnerShifted = Shifted.getOperand(0);
-    EVT InnerVT = InnerShifted.getValueType();
-    bool HasPosInner = TLI.isOperationLegalOrCustom(PosOpcode, InnerVT);
-    if (HasPosInner || TLI.isOperationLegalOrCustom(NegOpcode, InnerVT)) {
-      if (matchRotateSub(InnerPos, InnerNeg, InnerVT.getSizeInBits())) {
-        SDValue V = DAG.getNode(HasPosInner ? PosOpcode : NegOpcode, DL,
-                                InnerVT, InnerShifted, HasPosInner ? Pos : Neg);
-        return DAG.getNode(Shifted.getOpcode(), DL, VT, V).getNode();
-      }
-    }
-  }
-
-  return 0;
+  return nullptr;
  }
  
  // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
@@ -3488,29 +3648,29 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
  SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
    // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
    EVT VT = LHS.getValueType();
-  if (!TLI.isTypeLegal(VT)) return 0;
+  if (!TLI.isTypeLegal(VT)) return nullptr;
  
    // The target must have at least one rotate flavor.
    bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
    bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
-  if (!HasROTL && !HasROTR) return 0;
+  if (!HasROTL && !HasROTR) return nullptr;
  
    // Match "(X shl/srl V1) & V2" where V2 may not be present.
    SDValue LHSShift;   // The shift.
    SDValue LHSMask;    // AND value if any.
    if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
-    return 0; // Not part of a rotate.
+    return nullptr; // Not part of a rotate.
  
    SDValue RHSShift;   // The shift.
    SDValue RHSMask;    // AND value if any.
    if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
-    return 0; // Not part of a rotate.
+    return nullptr; // Not part of a rotate.
  
    if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
-    return 0;   // Not shifting the same value.
+    return nullptr;   // Not shifting the same value.
  
    if (LHSShift.getOpcode() == RHSShift.getOpcode())
-    return 0;   // Shifts must disagree.
+    return nullptr;   // Shifts must disagree.
  
    // Canonicalize shl to left side in a shl/srl pair.
    if (RHSShift.getOpcode() == ISD::SHL) {
@@ -3532,7 +3692,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
      uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
      uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
      if ((LShVal + RShVal) != OpSizeInBits)
-      return 0;
+      return nullptr;
  
      SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
                                LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
@@ -3559,7 +3719,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
    // If there is a mask here, and we have a variable shift, we can't be sure
    // that we're masking out the right stuff.
    if (LHSMask.getNode() || RHSMask.getNode())
-    return 0;
+    return nullptr;
  
    // If the shift amount is sign/zext/any-extended just peel it off.
    SDValue LExtOp0 = LHSShiftAmt;
@@ -3586,7 +3746,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
    if (TryR)
      return TryR;
  
-  return 0;
+  return nullptr;
  }
  
  SDValue DAGCombiner::visitXOR(SDNode *N) {
@@ -3628,7 +3788,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
      return N0;
    // reassociate xor
    SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
-  if (RXOR.getNode() != 0)
+  if (RXOR.getNode())
      return RXOR;
  
    // fold !(x cc y) -> (x !cc y)
@@ -3658,7 +3818,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
      SDValue V = N0.getOperand(0);
      V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V,
                      DAG.getConstant(1, V.getValueType()));
-    AddToWorkList(V.getNode());
+    AddToWorklist(V.getNode());
      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
    }
  
@@ -3670,7 +3830,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
        unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
        LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
        RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
-      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+      AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
        return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
      }
    }
@@ -3682,7 +3842,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
        unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
        LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
        RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
-      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+      AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
        return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
      }
    }
@@ -3691,7 +3851,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
        N0->getOperand(1) == N1) {
      SDValue X = N0->getOperand(0);
      SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
-    AddToWorkList(NotX.getNode());
+    AddToWorklist(NotX.getNode());
      return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
    }
    // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
@@ -3725,9 +3885,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    return SDValue();
  }
  
-/// visitShiftByConstant - Handle transforms common to the three shifts, when
-/// the shift amount is a constant.
-SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+/// Handle transforms common to the three shifts, when the shift amount is a
+/// constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+  // We can't and shouldn't fold opaque constants.
+  if (Amt->isOpaque())
+    return SDValue();
+
    SDNode *LHS = N->getOperand(0).getNode();
    if (!LHS->hasOneUse()) return SDValue();
  
@@ -3753,9 +3917,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
      break;
    }
  
-  // We require the RHS of the binop to be a constant as well.
+  // We require the RHS of the binop to be a constant and not opaque as well.
    ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
-  if (!BinOpCst) return SDValue();
+  if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
  
    // FIXME: disable this unless the input to the binop is a shift by a constant.
    // If it is not a shift, it pessimizes some common cases like:
@@ -3781,10 +3945,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
        return SDValue();
    }
  
+  if (!TLI.isDesirableToCommuteWithShift(LHS))
+    return SDValue();
+
    // Fold the constants, shifting the binop RHS by the shift amount.
    SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
                                 N->getValueType(0),
                                 LHS->getOperand(1), N->getOperand(1));
+  assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
  
    // Create the new shift.
    SDValue NewShift = DAG.getNode(N->getOpcode(),
@@ -3795,18 +3963,74 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
    return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
  }
  
+SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
+  assert(N->getOpcode() == ISD::TRUNCATE);
+  assert(N->getOperand(0).getOpcode() == ISD::AND);
+
+  // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
+  if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+    SDValue N01 = N->getOperand(0).getOperand(1);
+
+    if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
+      EVT TruncVT = N->getValueType(0);
+      SDValue N00 = N->getOperand(0).getOperand(0);
+      APInt TruncC = N01C->getAPIntValue();
+      TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+
+      return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
+                         DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
+                         DAG.getConstant(TruncC, TruncVT));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitRotate(SDNode *N) {
+  // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
+  if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
+      N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
+    SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
+    if (NewOp1.getNode())
+      return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+                         N->getOperand(0), NewOp1);
+  }
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitSHL(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    EVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+  unsigned OpSizeInBits = VT.getScalarSizeInBits();
  
    // fold vector ops
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
      if (FoldedVOp.getNode()) return FoldedVOp;
+
+    BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
+    // If setcc produces all-one true value then:
+    // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
+    if (N1CV && N1CV->isConstant()) {
+      if (N0.getOpcode() == ISD::AND) {
+        SDValue N00 = N0->getOperand(0);
+        SDValue N01 = N0->getOperand(1);
+        BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
+
+        if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+            TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+                TargetLowering::ZeroOrNegativeOneBooleanContent) {
+          SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
+          if (C.getNode())
+            return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+        }
+      } else {
+        N1C = isConstOrConstSplat(N1);
+      }
+    }
    }
  
    // fold (shl c1, c2) -> c1<<c2
@@ -3830,35 +4054,25 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
      return DAG.getConstant(0, VT);
    // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
    if (N1.getOpcode() == ISD::TRUNCATE &&
-      N1.getOperand(0).getOpcode() == ISD::AND &&
-      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
-    SDValue N101 = N1.getOperand(0).getOperand(1);
-    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      EVT TruncVT = N1.getValueType();
-      SDValue N100 = N1.getOperand(0).getOperand(0);
-      APInt TruncC = N101C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
-      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
-                         DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
-                                     DAG.getNode(ISD::TRUNCATE,
-                                                 SDLoc(N),
-                                                 TruncVT, N100),
-                                     DAG.getConstant(TruncC, TruncVT)));
-    }
+      N1.getOperand(0).getOpcode() == ISD::AND) {
+    SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+    if (NewOp1.getNode())
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
    }
  
    if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
      return SDValue(N, 0);
  
    // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
-  if (N1C && N0.getOpcode() == ISD::SHL &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
-    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-    uint64_t c2 = N1C->getZExtValue();
-    if (c1 + c2 >= OpSizeInBits)
-      return DAG.getConstant(0, VT);
-    return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  if (N1C && N0.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+      uint64_t c1 = N0C1->getZExtValue();
+      uint64_t c2 = N1C->getZExtValue();
+      if (c1 + c2 >= OpSizeInBits)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, N1.getValueType()));
+    }
    }
  
    // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
@@ -3869,20 +4083,21 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
    if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
                N0.getOpcode() == ISD::ANY_EXTEND ||
                N0.getOpcode() == ISD::SIGN_EXTEND) &&
-      N0.getOperand(0).getOpcode() == ISD::SHL &&
-      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
-    uint64_t c1 =
-      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
-    uint64_t c2 = N1C->getZExtValue();
-    EVT InnerShiftVT = N0.getOperand(0).getValueType();
-    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
-    if (c2 >= OpSizeInBits - InnerShiftSize) {
-      if (c1 + c2 >= OpSizeInBits)
-        return DAG.getConstant(0, VT);
-      return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
-                         DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
-                                     N0.getOperand(0)->getOperand(0)),
-                         DAG.getConstant(c1 + c2, N1.getValueType()));
+      N0.getOperand(0).getOpcode() == ISD::SHL) {
+    SDValue N0Op0 = N0.getOperand(0);
+    if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+      uint64_t c1 = N0Op0C1->getZExtValue();
+      uint64_t c2 = N1C->getZExtValue();
+      EVT InnerShiftVT = N0Op0.getValueType();
+      uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+      if (c2 >= OpSizeInBits - InnerShiftSize) {
+        if (c1 + c2 >= OpSizeInBits)
+          return DAG.getConstant(0, VT);
+        return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
+                           DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+                                       N0Op0->getOperand(0)),
+                           DAG.getConstant(c1 + c2, N1.getValueType()));
+      }
      }
    }
  
@@ -3890,19 +4105,20 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
    // Only fold this if the inner zext has no other uses to avoid increasing
    // the total number of instructions.
    if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
-      N0.getOperand(0).getOpcode() == ISD::SRL &&
-      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
-    uint64_t c1 =
-      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
-    if (c1 < VT.getSizeInBits()) {
-      uint64_t c2 = N1C->getZExtValue();
-      if (c1 == c2) {
-        SDValue NewOp0 = N0.getOperand(0);
-        EVT CountVT = NewOp0.getOperand(1).getValueType();
-        SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
-                                     NewOp0, DAG.getConstant(c2, CountVT));
-        AddToWorkList(NewSHL.getNode());
-        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+      N0.getOperand(0).getOpcode() == ISD::SRL) {
+    SDValue N0Op0 = N0.getOperand(0);
+    if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+      uint64_t c1 = N0Op0C1->getZExtValue();
+      if (c1 < VT.getScalarSizeInBits()) {
+        uint64_t c2 = N1C->getZExtValue();
+        if (c1 == c2) {
+          SDValue NewOp0 = N0.getOperand(0);
+          EVT CountVT = NewOp0.getOperand(1).getValueType();
+          SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
+                                       NewOp0, DAG.getConstant(c2, CountVT));
+          AddToWorklist(NewSHL.getNode());
+          return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+        }
        }
      }
    }
@@ -3911,40 +4127,51 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
    //                               (and (srl x, (sub c1, c2), MASK)
    // Only fold this if the inner shift has no other uses -- if it does, folding
    // this will increase the total number of instructions.
-  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
-    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-    if (c1 < VT.getSizeInBits()) {
-      uint64_t c2 = N1C->getZExtValue();
-      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
-                                         VT.getSizeInBits() - c1);
-      SDValue Shift;
-      if (c2 > c1) {
-        Mask = Mask.shl(c2-c1);
-        Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
-                            DAG.getConstant(c2-c1, N1.getValueType()));
-      } else {
-        Mask = Mask.lshr(c1-c2);
-        Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                            DAG.getConstant(c1-c2, N1.getValueType()));
+  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+      uint64_t c1 = N0C1->getZExtValue();
+      if (c1 < OpSizeInBits) {
+        uint64_t c2 = N1C->getZExtValue();
+        APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
+        SDValue Shift;
+        if (c2 > c1) {
+          Mask = Mask.shl(c2 - c1);
+          Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+                              DAG.getConstant(c2 - c1, N1.getValueType()));
+        } else {
+          Mask = Mask.lshr(c1 - c2);
+          Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+                              DAG.getConstant(c1 - c2, N1.getValueType()));
+        }
+        return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
+                           DAG.getConstant(Mask, VT));
        }
-      return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
-                         DAG.getConstant(Mask, VT));
      }
    }
    // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
    if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+    unsigned BitSize = VT.getScalarSizeInBits();
      SDValue HiBitsMask =
-      DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
-                                            VT.getSizeInBits() -
-                                              N1C->getZExtValue()),
-                      VT);
+      DAG.getConstant(APInt::getHighBitsSet(BitSize,
+                                            BitSize - N1C->getZExtValue()), VT);
      return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
                         HiBitsMask);
    }
  
+  // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+  // Variant of version done on multiply, except mul by a power of 2 is turned
+  // into a shift.
+  APInt Val;
+  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+      (isa<ConstantSDNode>(N0.getOperand(1)) ||
+       isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+    SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
+    SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+    return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
+  }
+
    if (N1C) {
-    SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+    SDValue NewSHL = visitShiftByConstant(N, N1C);
      if (NewSHL.getNode())
        return NewSHL;
    }
@@ -3964,6 +4191,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
      if (FoldedVOp.getNode()) return FoldedVOp;
+
+    N1C = isConstOrConstSplat(N1);
    }
  
    // fold (sra c1, c2) -> (sra c1, c2)
@@ -3997,11 +4226,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
  
    // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
    if (N1C && N0.getOpcode() == ISD::SRA) {
-    if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+    if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
        unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
-      if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+      if (Sum >= OpSizeInBits)
+        Sum = OpSizeInBits - 1;
        return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
-                         DAG.getConstant(Sum, N1C->getValueType(0)));
+                         DAG.getConstant(Sum, N1.getValueType()));
      }
    }
  
@@ -4010,14 +4240,17 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    // result_size - n != m.
    // If truncate is free for the target sext(shl) is likely to result in better
    // code.
-  if (N0.getOpcode() == ISD::SHL) {
+  if (N0.getOpcode() == ISD::SHL && N1C) {
      // Get the two constanst of the shifts, CN0 = m, CN = n.
-    const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-    if (N01C && N1C) {
+    const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
+    if (N01C) {
+      LLVMContext &Ctx = *DAG.getContext();
        // Determine what the truncate's result bitsize and type would be.
-      EVT TruncVT =
-        EVT::getIntegerVT(*DAG.getContext(),
-                          OpSizeInBits - N1C->getZExtValue());
+      EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
+
+      if (VT.isVector())
+        TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
        // Determine the residual right-shift amount.
        signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
  
@@ -4044,44 +4277,33 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
  
    // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
    if (N1.getOpcode() == ISD::TRUNCATE &&
-      N1.getOperand(0).getOpcode() == ISD::AND &&
-      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
-    SDValue N101 = N1.getOperand(0).getOperand(1);
-    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      EVT TruncVT = N1.getValueType();
-      SDValue N100 = N1.getOperand(0).getOperand(0);
-      APInt TruncC = N101C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
-      return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
-                         DAG.getNode(ISD::AND, SDLoc(N),
-                                     TruncVT,
-                                     DAG.getNode(ISD::TRUNCATE,
-                                                 SDLoc(N),
-                                                 TruncVT, N100),
-                                     DAG.getConstant(TruncC, TruncVT)));
-    }
-  }
-
-  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+      N1.getOperand(0).getOpcode() == ISD::AND) {
+    SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+    if (NewOp1.getNode())
+      return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
+  }
+
+  // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
    //      if c1 is equal to the number of bits the trunc removes
    if (N0.getOpcode() == ISD::TRUNCATE &&
        (N0.getOperand(0).getOpcode() == ISD::SRL ||
         N0.getOperand(0).getOpcode() == ISD::SRA) &&
        N0.getOperand(0).hasOneUse() &&
        N0.getOperand(0).getOperand(1).hasOneUse() &&
-      N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
-    EVT LargeVT = N0.getOperand(0).getValueType();
-    ConstantSDNode *LargeShiftAmt =
-      cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
-
-    if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
-        LargeShiftAmt->getZExtValue()) {
-      SDValue Amt =
-        DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
-              getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
-      SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
-                                N0.getOperand(0).getOperand(0), Amt);
-      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+      N1C) {
+    SDValue N0Op0 = N0.getOperand(0);
+    if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
+      unsigned LargeShiftVal = LargeShift->getZExtValue();
+      EVT LargeVT = N0Op0.getValueType();
+
+      if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+        SDValue Amt =
+          DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+                          getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
+        SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+                                  N0Op0.getOperand(0), Amt);
+        return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+      }
      }
    }
  
@@ -4095,7 +4317,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
  
    if (N1C) {
-    SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+    SDValue NewSRA = visitShiftByConstant(N, N1C);
      if (NewSRA.getNode())
        return NewSRA;
    }
@@ -4115,6 +4337,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
      if (FoldedVOp.getNode()) return FoldedVOp;
+
+    N1C = isConstOrConstSplat(N1);
    }
  
    // fold (srl c1, c2) -> c1 >>u c2
@@ -4135,14 +4359,15 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
      return DAG.getConstant(0, VT);
  
    // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
-  if (N1C && N0.getOpcode() == ISD::SRL &&
-      N0.getOperand(1).getOpcode() == ISD::Constant) {
-    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-    uint64_t c2 = N1C->getZExtValue();
-    if (c1 + c2 >= OpSizeInBits)
-      return DAG.getConstant(0, VT);
-    return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  if (N1C && N0.getOpcode() == ISD::SRL) {
+    if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
+      uint64_t c1 = N01C->getZExtValue();
+      uint64_t c2 = N1C->getZExtValue();
+      if (c1 + c2 >= OpSizeInBits)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+                         DAG.getConstant(c1 + c2, N1.getValueType()));
+    }
    }
  
    // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
@@ -4167,18 +4392,21 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    }
  
    // fold (srl (shl x, c), c) -> (and x, cst2)
-  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
-      N0.getValueSizeInBits() <= 64) {
-    uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
-    return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getConstant(~0ULL >> ShAmt, VT));
+  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
+    unsigned BitSize = N0.getScalarValueSizeInBits();
+    if (BitSize <= 64) {
+      uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
+      return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+                         DAG.getConstant(~0ULL >> ShAmt, VT));
+    }
    }
  
    // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
    if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
      // Shifting in all undef bits?
      EVT SmallVT = N0.getOperand(0).getValueType();
-    if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+    unsigned BitSize = SmallVT.getScalarSizeInBits();
+    if (N1C->getZExtValue() >= BitSize)
        return DAG.getUNDEF(VT);
  
      if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
@@ -4186,8 +4414,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
        SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT,
                                         N0.getOperand(0),
                            DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
-      AddToWorkList(SmallShift.getNode());
-      APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
+      AddToWorklist(SmallShift.getNode());
+      APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
        return DAG.getNode(ISD::AND, SDLoc(N), VT,
                           DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
                           DAG.getConstant(Mask, VT));
@@ -4196,16 +4424,16 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
  
    // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
    // bit, which is unmodified by sra.
-  if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+  if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
      if (N0.getOpcode() == ISD::SRA)
        return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
    }
  
    // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
    if (N1C && N0.getOpcode() == ISD::CTLZ &&
-      N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+      N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
      APInt KnownZero, KnownOne;
-    DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
+    DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
  
      // If any of the input bits are KnownOne, then the input couldn't be all
      // zeros, thus the result of the srl will always be zero.
@@ -4228,7 +4456,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
        if (ShAmt) {
          Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op,
                    DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
-        AddToWorkList(Op.getNode());
+        AddToWorklist(Op.getNode());
        }
  
        return DAG.getNode(ISD::XOR, SDLoc(N), VT,
@@ -4238,22 +4466,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
  
    // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
    if (N1.getOpcode() == ISD::TRUNCATE &&
-      N1.getOperand(0).getOpcode() == ISD::AND &&
-      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
-    SDValue N101 = N1.getOperand(0).getOperand(1);
-    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      EVT TruncVT = N1.getValueType();
-      SDValue N100 = N1.getOperand(0).getOperand(0);
-      APInt TruncC = N101C->getAPIntValue();
-      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
-      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
-                         DAG.getNode(ISD::AND, SDLoc(N),
-                                     TruncVT,
-                                     DAG.getNode(ISD::TRUNCATE,
-                                                 SDLoc(N),
-                                                 TruncVT, N100),
-                                     DAG.getConstant(TruncC, TruncVT)));
-    }
+      N1.getOperand(0).getOpcode() == ISD::AND) {
+    SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+    if (NewOp1.getNode())
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
    }
  
    // fold operands of srl based on knowledge that the low bits are not
@@ -4262,7 +4478,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
      return SDValue(N, 0);
  
    if (N1C) {
-    SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+    SDValue NewSRL = visitShiftByConstant(N, N1C);
      if (NewSRL.getNode())
        return NewSRL;
    }
@@ -4292,12 +4508,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    if (N->hasOneUse()) {
      SDNode *Use = *N->use_begin();
      if (Use->getOpcode() == ISD::BRCOND)
-      AddToWorkList(Use);
+      AddToWorklist(Use);
      else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
        // Also look pass the truncate.
        Use = *Use->use_begin();
        if (Use->getOpcode() == ISD::BRCOND)
-        AddToWorkList(Use);
+        AddToWorklist(Use);
      }
    }
  
@@ -4377,11 +4593,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
    if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
      return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
    // fold (select C, 0, 1) -> (xor C, 1)
+  // We can't do this reliably if integer based booleans have different contents
+  // to floating point based booleans. This is because we can't tell whether we
+  // have an integer-based boolean or a floating-point-based boolean unless we
+  // can find the SETCC that produced it and inspect its operands. This is
+  // fairly easy if C is the SETCC node, but it can potentially be
+  // undiscoverable (or not reasonably discoverable). For example, it could be
+  // in another basic block or it could require searching a complicated
+  // expression.
    if (VT.isInteger() &&
-      (VT0 == MVT::i1 ||
-       (VT0.isInteger() &&
-        TLI.getBooleanContents(false) ==
-        TargetLowering::ZeroOrOneBooleanContent)) &&
+      (VT0 == MVT::i1 || (VT0.isInteger() &&
+                          TLI.getBooleanContents(false, false) ==
+                              TLI.getBooleanContents(false, true) &&
+                          TLI.getBooleanContents(false, false) ==
+                              TargetLowering::ZeroOrOneBooleanContent)) &&
        N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
      SDValue XORNode;
      if (VT == VT0)
@@ -4389,7 +4614,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
                           N0, DAG.getConstant(1, VT0));
      XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0,
                            N0, DAG.getConstant(1, VT0));
-    AddToWorkList(XORNode.getNode());
+    AddToWorklist(XORNode.getNode());
      if (VT.bitsGT(VT0))
        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
@@ -4397,13 +4622,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
    // fold (select C, 0, X) -> (and (not C), X)
    if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
      SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
-    AddToWorkList(NOTNode.getNode());
+    AddToWorklist(NOTNode.getNode());
      return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
    }
    // fold (select C, X, 1) -> (or (not C), X)
    if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
      SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
-    AddToWorkList(NOTNode.getNode());
+    AddToWorklist(NOTNode.getNode());
      return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
    }
    // fold (select C, X, 0) -> (and C, X)
@@ -4424,12 +4649,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
  
    // fold selects based on a setcc into other things, such as min/max/abs
    if (N0.getOpcode() == ISD::SETCC) {
-    // FIXME:
-    // Check against MVT::Other for SELECT_CC, which is a workaround for targets
-    // having to say they don't support SELECT_CC on every type the DAG knows
-    // about, since there is no way to mark an opcode illegal at all value types
-    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
-        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+    if ((!LegalOperations &&
+         TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+       TLI.isOperationLegal(ISD::SELECT_CC, VT))
        return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
                           N0.getOperand(0), N0.getOperand(1),
                           N1, N2, N0.getOperand(2));
@@ -4443,12 +4665,12 @@ static
  std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
    SDLoc DL(N);
    EVT LoVT, HiVT;
-  llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
  
    // Split the inputs.
    SDValue Lo, Hi, LL, LH, RL, RH;
-  llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
-  llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
  
    Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
    Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -4456,6 +4678,61 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
    return std::make_pair(Lo, Hi);
  }
  
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+  SDLoc dl(N);
+  SDValue Cond = N->getOperand(0);
+  SDValue LHS = N->getOperand(1);
+  SDValue RHS = N->getOperand(2);
+  EVT VT = N->getValueType(0);
+  int NumElems = VT.getVectorNumElements();
+  assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+         RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+         Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+  // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
+  // binary ones here.
+  if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
+    return SDValue();
+
+  // We're sure we have an even number of elements due to the
+  // concat_vectors we have as arguments to vselect.
+  // Skip BV elements until we find one that's not an UNDEF
+  // After we find an UNDEF element, keep looping until we get to half the
+  // length of the BV and see if all the non-undef nodes are the same.
+  ConstantSDNode *BottomHalf = nullptr;
+  for (int i = 0; i < NumElems / 2; ++i) {
+    if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+      continue;
+
+    if (BottomHalf == nullptr)
+      BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+    else if (Cond->getOperand(i).getNode() != BottomHalf)
+      return SDValue();
+  }
+
+  // Do the same for the second half of the BuildVector
+  ConstantSDNode *TopHalf = nullptr;
+  for (int i = NumElems / 2; i < NumElems; ++i) {
+    if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+      continue;
+
+    if (TopHalf == nullptr)
+      TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+    else if (Cond->getOperand(i).getNode() != TopHalf)
+      return SDValue();
+  }
+
+  assert(TopHalf && BottomHalf &&
+         "One half of the selector was all UNDEFs and the other was all the "
+         "same value. This should have been addressed before this function.");
+  return DAG.getNode(
+      ISD::CONCAT_VECTORS, dl, VT,
+      BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
+      TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
  SDValue DAGCombiner::visitVSELECT(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -4487,8 +4764,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
            ISD::SRA, DL, VT, LHS,
            DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
        SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
-      AddToWorkList(Shift.getNode());
-      AddToWorkList(Add.getNode());
+      AddToWorklist(Shift.getNode());
+      AddToWorklist(Add.getNode());
        return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
      }
    }
@@ -4506,17 +4783,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
        return SDValue();
  
      SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
-    llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
-    llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
-    llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
+    std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
+    std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
+    std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
  
      Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
      Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
  
      // Add the new VSELECT nodes to the work list in case they need to be split
      // again.
-    AddToWorkList(Lo.getNode());
-    AddToWorkList(Hi.getNode());
+    AddToWorklist(Lo.getNode());
+    AddToWorklist(Hi.getNode());
  
      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
    }
@@ -4528,6 +4805,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
    if (ISD::isBuildVectorAllZeros(N0.getNode()))
      return N2;
  
+  // The ConvertSelectToConcatVector function is assuming both the above
+  // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+  // and addressed.
+  if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+      N2.getOpcode() == ISD::CONCAT_VECTORS &&
+      ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+    SDValue CV = ConvertSelectToConcatVector(N, DAG);
+    if (CV.getNode())
+      return CV;
+  }
+
    return SDValue();
  }
  
@@ -4547,7 +4835,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
    SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
                                N0, N1, CC, SDLoc(N), false);
    if (SCC.getNode()) {
-    AddToWorkList(SCC.getNode());
+    AddToWorklist(SCC.getNode());
  
      if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
        if (!SCCC->isNullValue())
@@ -4577,6 +4865,65 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
                         SDLoc(N));
  }
  
+// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
+// dag node into a ConstantSDNode or a build_vector of constants.
+// This function is called by the DAGCombiner when visiting sext/zext/aext
+// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+// Vector extends are not folded if operations are legal; this is to
+// avoid introducing illegal build_vector dag nodes.
+static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+                                         SelectionDAG &DAG, bool LegalTypes,
+                                         bool LegalOperations) {
+  unsigned Opcode = N->getOpcode();
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
+         Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
+
+  // fold (sext c1) -> c1
+  // fold (zext c1) -> c1
+  // fold (aext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
+
+  // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
+  // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
+  // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
+  EVT SVT = VT.getScalarType();
+  if (!(VT.isVector() &&
+      (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
+      ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
+    return nullptr;
+
+  // We can fold this node into a build_vector.
+  unsigned VTBits = SVT.getSizeInBits();
+  unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
+  unsigned ShAmt = VTBits - EVTBits;
+  SmallVector<SDValue, 8> Elts;
+  unsigned NumElts = N0->getNumOperands();
+  SDLoc DL(N);
+
+  for (unsigned i=0; i != NumElts; ++i) {
+    SDValue Op = N0->getOperand(i);
+    if (Op->getOpcode() == ISD::UNDEF) {
+      Elts.push_back(DAG.getUNDEF(SVT));
+      continue;
+    }
+
+    ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
+    const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
+    if (Opcode == ISD::SIGN_EXTEND)
+      Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
+                                     SVT));
+    else
+      Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
+                                     SVT));
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
+}
+
  // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
  // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
  // transformation. Returns true if extension are possible and the above
@@ -4658,8 +5005,7 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
      }
  
      Ops.push_back(SetCC->getOperand(2));
-    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
-                                 &Ops[0], Ops.size()));
+    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
    }
  }
  
@@ -4667,9 +5013,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
  
-  // fold (sext c1) -> c1
-  if (isa<ConstantSDNode>(N0))
-    return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0);
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
  
    // fold (sext (sext x)) -> (sext x)
    // fold (sext (aext x)) -> (sext x)
@@ -4686,7 +5032,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
          // CombineTo deleted the truncate, if needed, but not what's under it.
-        AddToWorkList(oye);
+        AddToWorklist(oye);
        }
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
      }
@@ -4733,6 +5079,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    // on vectors in one instruction.  We only perform this transformation on
    // scalars.
    if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) &&
        ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
         TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
      bool DoXform = true;
@@ -4785,7 +5132,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
        TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
        (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
-    if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+    if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
        bool DoXform = true;
        SmallVector<SDNode*, 4> SetCCs;
        if (!N0.hasOneUse())
@@ -4813,12 +5160,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    }
  
    if (N0.getOpcode() == ISD::SETCC) {
+    EVT N0VT = N0.getOperand(0).getValueType();
      // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
      // Only do this before legalize for now.
      if (VT.isVector() && !LegalOperations &&
-        TLI.getBooleanContents(true) ==
-          TargetLowering::ZeroOrNegativeOneBooleanContent) {
-      EVT N0VT = N0.getOperand(0).getValueType();
+        TLI.getBooleanContents(N0VT) ==
+            TargetLowering::ZeroOrNegativeOneBooleanContent) {
        // On some architectures (such as SSE/NEON/etc) the SETCC result type is
        // of the same size as the compared operands. Only optimize sext(setcc())
        // if this is the case.
@@ -4846,7 +5193,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
        }
      }
  
-    // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
      unsigned ElementWidth = VT.getScalarType().getSizeInBits();
      SDValue NegOne =
        DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
@@ -4855,15 +5202,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                         NegOne, DAG.getConstant(0, VT),
                         cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
      if (SCC.getNode()) return SCC;
-    if (!VT.isVector() &&
-        (!LegalOperations ||
-         TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
-      return DAG.getSelect(SDLoc(N), VT,
-                           DAG.getSetCC(SDLoc(N),
-                           getSetCCResultType(VT),
-                           N0.getOperand(0), N0.getOperand(1),
-                           cast<CondCodeSDNode>(N0.getOperand(2))->get()),
-                           NegOne, DAG.getConstant(0, VT));
+
+    if (!VT.isVector()) {
+      EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
+      if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
+        SDLoc DL(N);
+        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+        SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
+                                     N0.getOperand(0), N0.getOperand(1), CC);
+        return DAG.getSelect(DL, VT, SetCC,
+                             NegOne, DAG.getConstant(0, VT));
+      }
      }
    }
  
@@ -4878,13 +5227,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
  // isTruncateOf - If N is a truncate of some other value, return true, record
  // the value being truncated in Op and which of Op's bits are zero in KnownZero.
  // This function computes KnownZero to avoid a duplicated call to
-// ComputeMaskedBits in the caller.
+// computeKnownBits in the caller.
  static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
                           APInt &KnownZero) {
    APInt KnownOne;
    if (N->getOpcode() == ISD::TRUNCATE) {
      Op = N->getOperand(0);
-    DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+    DAG.computeKnownBits(Op, KnownZero, KnownOne);
      return true;
    }
  
@@ -4905,7 +5254,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
    else
      return false;
  
-  DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+  DAG.computeKnownBits(Op, KnownZero, KnownOne);
  
    if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
      return false;
@@ -4917,9 +5266,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
  
-  // fold (zext c1) -> c1
-  if (isa<ConstantSDNode>(N0))
-    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
+
    // fold (zext (zext x)) -> (zext x)
    // fold (zext (aext x)) -> (zext x)
    if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
@@ -4959,7 +5309,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
          // CombineTo deleted the truncate, if needed, but not what's under it.
-        AddToWorkList(oye);
+        AddToWorklist(oye);
        }
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
      }
@@ -4977,7 +5327,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
          // CombineTo deleted the truncate, if needed, but not what's under it.
-        AddToWorkList(oye);
+        AddToWorklist(oye);
        }
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
      }
@@ -4985,10 +5335,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
      SDValue Op = N0.getOperand(0);
      if (Op.getValueType().bitsLT(VT)) {
        Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
-      AddToWorkList(Op.getNode());
+      AddToWorklist(Op.getNode());
      } else if (Op.getValueType().bitsGT(VT)) {
        Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
-      AddToWorkList(Op.getNode());
+      AddToWorklist(Op.getNode());
      }
      return DAG.getZeroExtendInReg(Op, SDLoc(N),
                                    N0.getValueType().getScalarType());
@@ -5019,6 +5369,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
    // on vectors in one instruction.  We only perform this transformation on
    // scalars.
    if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) &&
        ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
         TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
      bool DoXform = true;
@@ -5051,7 +5402,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
        TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
        (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
-    if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+    if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
        bool DoXform = true;
        SmallVector<SDNode*, 4> SetCCs;
        if (!N0.hasOneUse())
@@ -5122,7 +5473,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                           N0.getOperand(1),
                                   cast<CondCodeSDNode>(N0.getOperand(2))->get()),
                             DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
-                                       &OneOps[0], OneOps.size()));
+                                       OneOps));
  
        // If the desired elements are smaller or larger than the source
        // elements we can use a matching integer vector type and then
@@ -5139,8 +5490,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
        return DAG.getNode(ISD::AND, SDLoc(N), VT,
                           DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
-                         DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
-                                     &OneOps[0], OneOps.size()));
+                         DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps));
      }
  
      // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -5186,9 +5536,10 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    EVT VT = N->getValueType(0);
  
-  // fold (aext c1) -> c1
-  if (isa<ConstantSDNode>(N0))
-    return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0);
+  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+                                              LegalOperations))
+    return SDValue(Res, 0);
+
    // fold (aext (aext x)) -> (aext x)
    // fold (aext (zext x)) -> (zext x)
    // fold (aext (sext x)) -> (sext x)
@@ -5206,7 +5557,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
        if (NarrowLoad.getNode() != N0.getNode()) {
          CombineTo(N0.getNode(), NarrowLoad);
          // CombineTo deleted the truncate, if needed, but not what's under it.
-        AddToWorkList(oye);
+        AddToWorklist(oye);
        }
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
      }
@@ -5246,8 +5597,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
    // on vectors in one instruction.  We only perform this transformation on
    // scalars.
    if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
-       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+      ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
      bool DoXform = true;
      SmallVector<SDNode*, 4> SetCCs;
      if (!N0.hasOneUse())
@@ -5275,20 +5626,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
        !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
        N0.hasOneUse()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    ISD::LoadExtType ExtType = LN0->getExtensionType();
      EVT MemVT = LN0->getMemoryVT();
-    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N),
-                                     VT, LN0->getChain(), LN0->getBasePtr(),
-                                     MemVT, LN0->getMemOperand());
-    CombineTo(N, ExtLoad);
-    CombineTo(N0.getNode(),
-              DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
-                          N0.getValueType(), ExtLoad),
-              ExtLoad.getValue(1));
-    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) {
+      SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
+                                       VT, LN0->getChain(), LN0->getBasePtr(),
+                                       MemVT, LN0->getMemOperand());
+      CombineTo(N, ExtLoad);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+                            N0.getValueType(), ExtLoad),
+                ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
    }
  
    if (N0.getOpcode() == ISD::SETCC) {
-    // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    // For vectors:
+    // aext(setcc) -> vsetcc
+    // aext(setcc) -> truncate(vsetcc)
+    // aext(setcc) -> aext(vsetcc)
      // Only do this before legalize for now.
      if (VT.isVector() && !LegalOperations) {
        EVT N0VT = N0.getOperand(0).getValueType();
@@ -5303,19 +5660,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
                               cast<CondCodeSDNode>(N0.getOperand(2))->get());
        // If the desired elements are smaller or larger than the source
        // elements we can use a matching integer vector type and then
-      // truncate/sign extend
+      // truncate/any extend
        else {
-        EVT MatchingElementType =
-          EVT::getIntegerVT(*DAG.getContext(),
-                            N0VT.getScalarType().getSizeInBits());
-        EVT MatchingVectorType =
-          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
-                           N0VT.getVectorNumElements());
+        EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
          SDValue VsetCC =
            DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
                          N0.getOperand(1),
                          cast<CondCodeSDNode>(N0.getOperand(2))->get());
-        return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
+        return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
        }
      }
  
@@ -5331,15 +5683,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
    return SDValue();
  }
  
-/// GetDemandedBits - See if the specified operand can be simplified with the
-/// knowledge that only the bits specified by Mask are used.  If so, return the
-/// simpler operand, otherwise return a null SDValue.
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by Mask are used.  If so, return the simpler operand,
+/// otherwise return a null SDValue.
  SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
    switch (V.getOpcode()) {
    default: break;
    case ISD::Constant: {
      const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
-    assert(CV != 0 && "Const value should be ConstSDNode.");
+    assert(CV && "Const value should be ConstSDNode.");
      const APInt &CVal = CV->getAPIntValue();
      APInt NewVal = CVal & Mask;
      if (NewVal != CVal)
@@ -5374,11 +5726,11 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
    return SDValue();
  }
  
-/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
-/// bits and then truncated to a narrower type and where N is a multiple
-/// of number of bits of the narrower type, transform it to a narrower load
-/// from address + N / num of bits of new type. If the result is to be
-/// extended, also fold the extension to form a extending load.
+/// If the result of a wider load is shifted to right of N  bits and then
+/// truncated to a narrower type and where N is a multiple of number of bits of
+/// the narrower type, transform it to a narrower load from address + N / num of
+/// bits of new type. If the result is to be extended, also fold the extension
+/// to form a extending load.
  SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
    unsigned Opc = N->getOpcode();
  
@@ -5503,22 +5855,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
    SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0),
                                 PtrType, LN0->getBasePtr(),
                                 DAG.getConstant(PtrOff, PtrType));
-  AddToWorkList(NewPtr.getNode());
+  AddToWorklist(NewPtr.getNode());
  
    SDValue Load;
    if (ExtType == ISD::NON_EXTLOAD)
      Load =  DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
                          LN0->getPointerInfo().getWithOffset(PtrOff),
                          LN0->isVolatile(), LN0->isNonTemporal(),
-                        LN0->isInvariant(), NewAlign, LN0->getTBAAInfo());
+                        LN0->isInvariant(), NewAlign, LN0->getAAInfo());
    else
      Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
                            LN0->getPointerInfo().getWithOffset(PtrOff),
                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
-                          NewAlign, LN0->getTBAAInfo());
+                          LN0->isInvariant(), NewAlign, LN0->getAAInfo());
  
    // Replace the old load's chain with the new load's chain.
-  WorkListRemover DeadNodes(*this);
+  WorklistRemover DeadNodes(*this);
    DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
  
    // Shift the result left, if we've swallowed a left shift.
@@ -5617,7 +5969,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                       LN0->getMemOperand());
      CombineTo(N, ExtLoad);
      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
-    AddToWorkList(ExtLoad.getNode());
+    AddToWorklist(ExtLoad.getNode());
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
    }
    // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
@@ -5640,7 +5992,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
    if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
      SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
                                         N0.getOperand(1), false);
-    if (BSwap.getNode() != 0)
+    if (BSwap.getNode())
        return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
                           BSwap, N1);
    }
@@ -5665,7 +6017,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                       Op.getValueType()));
      }
  
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts);
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
    }
  
    return SDValue();
@@ -5712,7 +6064,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    // creates this pattern) and before operation legalization after which
    // we need to be more careful about the vector instructions that we generate.
    if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-      LegalTypes && !LegalOperations && N0->hasOneUse()) {
+      LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
  
      EVT VecTy = N0.getOperand(0).getValueType();
      EVT ExTy = N0.getValueType();
@@ -5739,6 +6091,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
      }
    }
  
+  // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
+  if (N0.getOpcode() == ISD::SELECT) {
+    EVT SrcVT = N0.getValueType();
+    if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
+        TLI.isTruncateFree(SrcVT, VT)) {
+      SDLoc SL(N0);
+      SDValue Cond = N0.getOperand(0);
+      SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+      SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
+      return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+    }
+  }
+
    // Fold a series of buildvector, bitcast, and truncate if possible.
    // For example fold
    //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -5766,8 +6131,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
        for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
          Opnds.push_back(BuildVect.getOperand(i));
  
-      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
-                         Opnds.size());
+      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
      }
    }
  
@@ -5839,11 +6203,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
            continue;
          }
          SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
-        AddToWorkList(NV.getNode());
+        AddToWorklist(NV.getNode());
          Opnds.push_back(NV);
        }
-      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
-                         &Opnds[0], Opnds.size());
+      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
      }
    }
  
@@ -5862,7 +6225,7 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
    return Elt.getOperand(Elt.getResNo()).getNode();
  }
  
-/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// build_pair (load, load) -> load
  /// if load locations are consecutive.
  SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
    assert(N->getOpcode() == ISD::BUILD_PAIR);
@@ -5870,8 +6233,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
    LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
    if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
-      LD1->getPointerInfo().getAddrSpace() !=
-         LD2->getPointerInfo().getAddrSpace())
+      LD1->getAddressSpace() != LD2->getAddressSpace())
      return SDValue();
    EVT LD1VT = LD1->getValueType(0);
  
@@ -5929,7 +6291,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
        // Ideally this won't happen very often, because instcombine
        // and the earlier dagcombine runs (where illegal nodes are
        // permitted) should have folded most of them already.
-      DAG.DeleteNode(Res.getNode());
+      deleteAndRecombine(Res.getNode());
      }
    }
  
@@ -5943,6 +6305,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
    if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
        // Do not change the width of a volatile load.
        !cast<LoadSDNode>(N0)->isVolatile() &&
+      // Do not remove the cast if the types differ in endian layout.
+      TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
+      TLI.hasBigEndianPartOrdering(VT) &&
        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
        TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -5955,12 +6320,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
                                   LN0->getBasePtr(), LN0->getPointerInfo(),
                                   LN0->isVolatile(), LN0->isNonTemporal(),
                                   LN0->isInvariant(), OrigAlign,
-                                 LN0->getTBAAInfo());
-      AddToWorkList(N);
-      CombineTo(N0.getNode(),
-                DAG.getNode(ISD::BITCAST, SDLoc(N0),
-                            N0.getValueType(), Load),
-                Load.getValue(1));
+                                 LN0->getAAInfo());
+      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
        return Load;
      }
    }
@@ -5974,7 +6335,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
        !VT.isVector() && !N0.getValueType().isVector()) {
      SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
                                    N0.getOperand(0));
-    AddToWorkList(NewConv.getNode());
+    AddToWorklist(NewConv.getNode());
  
      APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
      if (N0.getOpcode() == ISD::FNEG)
@@ -5997,34 +6358,34 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
      if (isTypeLegal(IntXVT)) {
        SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
                                IntXVT, N0.getOperand(1));
-      AddToWorkList(X.getNode());
+      AddToWorklist(X.getNode());
  
        // If X has a different width than the result/lhs, sext it or truncate it.
        unsigned VTWidth = VT.getSizeInBits();
        if (OrigXWidth < VTWidth) {
          X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
-        AddToWorkList(X.getNode());
+        AddToWorklist(X.getNode());
        } else if (OrigXWidth > VTWidth) {
          // To get the sign bit in the right place, we have to shift it right
          // before truncating.
          X = DAG.getNode(ISD::SRL, SDLoc(X),
                          X.getValueType(), X,
                          DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
-        AddToWorkList(X.getNode());
+        AddToWorklist(X.getNode());
          X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
-        AddToWorkList(X.getNode());
+        AddToWorklist(X.getNode());
        }
  
        APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
        X = DAG.getNode(ISD::AND, SDLoc(X), VT,
                        X, DAG.getConstant(SignBit, VT));
-      AddToWorkList(X.getNode());
+      AddToWorklist(X.getNode());
  
        SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
                                  VT, N0.getOperand(0));
        Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
                          Cst, DAG.getConstant(~SignBit, VT));
-      AddToWorkList(Cst.getNode());
+      AddToWorklist(Cst.getNode());
  
        return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
      }
@@ -6045,9 +6406,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
    return CombineConsecutiveLoads(N, VT);
  }
  
-/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
-/// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
-/// destination element value type.
+/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
+/// operands. DstEltVT indicates the destination element value type.
  SDValue DAGCombiner::
  ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
    EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
@@ -6080,10 +6440,9 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
          Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
        Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
                                  DstEltVT, Op));
-      AddToWorkList(Ops.back().getNode());
+      AddToWorklist(Ops.back().getNode());
      }
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
-                       &Ops[0], Ops.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
    }
  
    // Otherwise, we're growing or shrinking the elements.  To avoid having to
@@ -6139,8 +6498,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
      }
  
      EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
-                       &Ops[0], Ops.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
    }
  
    // Finally, this must be the case where we are shrinking elements: each input
@@ -6176,8 +6534,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
        std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
    }
  
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
-                     &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
  }
  
  SDValue DAGCombiner::visitFADD(SDNode *N) {
@@ -6186,7 +6543,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
    EVT VT = N->getValueType(0);
-
+  const TargetOptions &Options = DAG.getTarget().Options;
+  
    // fold vector ops
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
@@ -6196,193 +6554,146 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
    // fold (fadd c1, c2) -> c1 + c2
    if (N0CFP && N1CFP)
      return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1);
+
    // canonicalize constant to RHS
    if (N0CFP && !N1CFP)
      return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0);
-  // fold (fadd A, 0) -> A
-  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
-      N1CFP->getValueAPF().isZero())
-    return N0;
+
    // fold (fadd A, (fneg B)) -> (fsub A, B)
    if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-    isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+      isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
      return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
                         GetNegatedExpression(N1, DAG, LegalOperations));
+  
    // fold (fadd (fneg A), B) -> (fsub B, A)
    if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-    isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
+      isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
      return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1,
                         GetNegatedExpression(N0, DAG, LegalOperations));
  
-  // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
-  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
-      N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
-      isa<ConstantFPSDNode>(N0.getOperand(1)))
-    return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                   N0.getOperand(1), N1));
-
-  // No FP constant should be created after legalization as Instruction
-  // Selection pass has hard time in dealing with FP constant.
-  //
-  // We don't need test this condition for transformation like following, as
-  // the DAG being transformed implies it is legal to take FP constant as
-  // operand.
-  //
-  //  (fadd (fmul c, x), x) -> (fmul c+1, x)
-  //
-  bool AllowNewFpConst = (Level < AfterLegalizeDAG);
-
-  // If allow, fold (fadd (fneg x), x) -> 0.0
-  if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
-      N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
-    return DAG.getConstantFP(0.0, VT);
-
-    // If allow, fold (fadd x, (fneg x)) -> 0.0
-  if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath &&
-      N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
-    return DAG.getConstantFP(0.0, VT);
-
-  // In unsafe math mode, we can fold chains of FADD's of the same value
-  // into multiplications.  This transform is not safe in general because
-  // we are reducing the number of rounding steps.
-  if (DAG.getTarget().Options.UnsafeFPMath &&
-      TLI.isOperationLegalOrCustom(ISD::FMUL, VT) &&
-      !N0CFP && !N1CFP) {
-    if (N0.getOpcode() == ISD::FMUL) {
-      ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
-      ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
-
-      // (fadd (fmul c, x), x) -> (fmul x, c+1)
-      if (CFP00 && !CFP01 && N0.getOperand(1) == N1) {
-        SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     SDValue(CFP00, 0),
-                                     DAG.getConstantFP(1.0, VT));
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N1, NewCFP);
-      }
-
-      // (fadd (fmul x, c), x) -> (fmul x, c+1)
-      if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
-        SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     SDValue(CFP01, 0),
-                                     DAG.getConstantFP(1.0, VT));
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N1, NewCFP);
-      }
-
-      // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2)
-      if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD &&
-          N1.getOperand(0) == N1.getOperand(1) &&
-          N0.getOperand(1) == N1.getOperand(0)) {
-        SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     SDValue(CFP00, 0),
-                                     DAG.getConstantFP(2.0, VT));
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0.getOperand(1), NewCFP);
-      }
+  // If 'unsafe math' is enabled, fold lots of things.
+  if (Options.UnsafeFPMath) {
+    // No FP constant should be created after legalization as Instruction
+    // Selection pass has a hard time dealing with FP constants.
+    bool AllowNewConst = (Level < AfterLegalizeDAG);
+    
+    // fold (fadd A, 0) -> A
+    if (N1CFP && N1CFP->getValueAPF().isZero())
+      return N0;
  
-      // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
-      if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
-          N1.getOperand(0) == N1.getOperand(1) &&
-          N0.getOperand(0) == N1.getOperand(0)) {
-        SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     SDValue(CFP01, 0),
-                                     DAG.getConstantFP(2.0, VT));
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0.getOperand(0), NewCFP);
+    // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+    if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+        isa<ConstantFPSDNode>(N0.getOperand(1)))
+      return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
+                         DAG.getNode(ISD::FADD, SDLoc(N), VT,
+                                     N0.getOperand(1), N1));
+    
+    // If allowed, fold (fadd (fneg x), x) -> 0.0
+    if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
+      return DAG.getConstantFP(0.0, VT);
+    
+    // If allowed, fold (fadd x, (fneg x)) -> 0.0
+    if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
+      return DAG.getConstantFP(0.0, VT);
+    
+    // We can fold chains of FADD's of the same value into multiplications.
+    // This transform is not safe in general because we are reducing the number
+    // of rounding steps.
+    if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
+      if (N0.getOpcode() == ISD::FMUL) {
+        ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+        ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+        
+        // (fadd (fmul x, c), x) -> (fmul x, c+1)
+        if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
+          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+                                       SDValue(CFP01, 0),
+                                       DAG.getConstantFP(1.0, VT));
+          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP);
+        }
+        
+        // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
+        if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
+            N1.getOperand(0) == N1.getOperand(1) &&
+            N0.getOperand(0) == N1.getOperand(0)) {
+          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+                                       SDValue(CFP01, 0),
+                                       DAG.getConstantFP(2.0, VT));
+          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+                             N0.getOperand(0), NewCFP);
+        }
        }
-    }
-
-    if (N1.getOpcode() == ISD::FMUL) {
-      ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
-      ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+      
+      if (N1.getOpcode() == ISD::FMUL) {
+        ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+        ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+        
+        // (fadd x, (fmul x, c)) -> (fmul x, c+1)
+        if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
+          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+                                       SDValue(CFP11, 0),
+                                       DAG.getConstantFP(1.0, VT));
+          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP);
+        }
  
-      // (fadd x, (fmul c, x)) -> (fmul x, c+1)
-      if (CFP10 && !CFP11 && N1.getOperand(1) == N0) {
-        SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     SDValue(CFP10, 0),
-                                     DAG.getConstantFP(1.0, VT));
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0, NewCFP);
+        // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
+        if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+            N0.getOperand(0) == N0.getOperand(1) &&
+            N1.getOperand(0) == N0.getOperand(0)) {
+          SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
+                                       SDValue(CFP11, 0),
+                                       DAG.getConstantFP(2.0, VT));
+          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP);
+        }
        }
  
-      // (fadd x, (fmul x, c)) -> (fmul x, c+1)
-      if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
-        SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     SDValue(CFP11, 0),
-                                     DAG.getConstantFP(1.0, VT));
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0, NewCFP);
+      if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
+        ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+        // (fadd (fadd x, x), x) -> (fmul x, 3.0)
+        if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+            (N0.getOperand(0) == N1))
+          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+                             N1, DAG.getConstantFP(3.0, VT));
        }
-
-
-      // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2)
-      if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD &&
-          N0.getOperand(0) == N0.getOperand(1) &&
-          N1.getOperand(1) == N0.getOperand(0)) {
-        SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     SDValue(CFP10, 0),
-                                     DAG.getConstantFP(2.0, VT));
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N1.getOperand(1), NewCFP);
+      
+      if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
+        ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+        // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
+        if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
+            N1.getOperand(0) == N0)
+          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
+                             N0, DAG.getConstantFP(3.0, VT));
        }
-
-      // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
-      if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
+      
+      // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
+      if (AllowNewConst &&
+          N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
            N0.getOperand(0) == N0.getOperand(1) &&
-          N1.getOperand(0) == N0.getOperand(0)) {
-        SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
-                                     SDValue(CFP11, 0),
-                                     DAG.getConstantFP(2.0, VT));
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N1.getOperand(0), NewCFP);
-      }
-    }
-
-    if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) {
-      ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
-      // (fadd (fadd x, x), x) -> (fmul x, 3.0)
-      if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
-          (N0.getOperand(0) == N1))
-        return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N1, DAG.getConstantFP(3.0, VT));
-    }
-
-    if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) {
-      ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
-      // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
-      if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
-          N1.getOperand(0) == N0)
+          N1.getOperand(0) == N1.getOperand(1) &&
+          N0.getOperand(0) == N1.getOperand(0))
          return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                           N0, DAG.getConstantFP(3.0, VT));
+                           N0.getOperand(0), DAG.getConstantFP(4.0, VT));
      }
-
-    // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
-    if (AllowNewFpConst &&
-        N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
-        N0.getOperand(0) == N0.getOperand(1) &&
-        N1.getOperand(0) == N1.getOperand(1) &&
-        N0.getOperand(0) == N1.getOperand(0))
-      return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                         N0.getOperand(0),
-                         DAG.getConstantFP(4.0, VT));
-  }
-
+  } // enable-unsafe-fp-math
+  
    // FADD -> FMA combines:
-  if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
-       DAG.getTarget().Options.UnsafeFPMath) &&
-      DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+      DAG.getTarget()
+          .getSubtargetImpl()
+          ->getTargetLowering()
+          ->isFMAFasterThanFMulAndFAdd(VT) &&
        (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
  
      // fold (fadd (fmul x, y), z) -> (fma x, y, z)
-    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
+    if (N0.getOpcode() == ISD::FMUL &&
+        (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
                           N0.getOperand(0), N0.getOperand(1), N1);
  
      // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
      // Note: Commutes FADD operands.
-    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
+    if (N1.getOpcode() == ISD::FMUL &&
+        (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
        return DAG.getNode(ISD::FMA, SDLoc(N), VT,
                           N1.getOperand(0), N1.getOperand(1), N0);
    }
@@ -6393,10 +6704,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
  SDValue DAGCombiner::visitFSUB(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
    EVT VT = N->getValueType(0);
    SDLoc dl(N);
+  const TargetOptions &Options = DAG.getTarget().Options;
  
    // fold vector ops
    if (VT.isVector()) {
@@ -6407,60 +6719,62 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
    // fold (fsub c1, c2) -> c1-c2
    if (N0CFP && N1CFP)
      return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1);
-  // fold (fsub A, 0) -> A
-  if (DAG.getTarget().Options.UnsafeFPMath &&
-      N1CFP && N1CFP->getValueAPF().isZero())
-    return N0;
-  // fold (fsub 0, B) -> -B
-  if (DAG.getTarget().Options.UnsafeFPMath &&
-      N0CFP && N0CFP->getValueAPF().isZero()) {
-    if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
-      return GetNegatedExpression(N1, DAG, LegalOperations);
-    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-      return DAG.getNode(ISD::FNEG, dl, VT, N1);
-  }
+
    // fold (fsub A, (fneg B)) -> (fadd A, B)
-  if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
+  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
      return DAG.getNode(ISD::FADD, dl, VT, N0,
                         GetNegatedExpression(N1, DAG, LegalOperations));
  
-  // If 'unsafe math' is enabled, fold
-  //    (fsub x, x) -> 0.0 &
-  //    (fsub x, (fadd x, y)) -> (fneg y) &
-  //    (fsub x, (fadd y, x)) -> (fneg y)
-  if (DAG.getTarget().Options.UnsafeFPMath) {
+  // If 'unsafe math' is enabled, fold lots of things.
+  if (Options.UnsafeFPMath) {
+    // (fsub A, 0) -> A
+    if (N1CFP && N1CFP->getValueAPF().isZero())
+      return N0;
+
+    // (fsub 0, B) -> -B
+    if (N0CFP && N0CFP->getValueAPF().isZero()) {
+      if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+        return GetNegatedExpression(N1, DAG, LegalOperations);
+      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+        return DAG.getNode(ISD::FNEG, dl, VT, N1);
+    }
+
+    // (fsub x, x) -> 0.0
      if (N0 == N1)
        return DAG.getConstantFP(0.0f, VT);
  
+    // (fsub x, (fadd x, y)) -> (fneg y)
+    // (fsub x, (fadd y, x)) -> (fneg y)
      if (N1.getOpcode() == ISD::FADD) {
        SDValue N10 = N1->getOperand(0);
        SDValue N11 = N1->getOperand(1);
  
-      if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
-                                          &DAG.getTarget().Options))
+      if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
          return GetNegatedExpression(N11, DAG, LegalOperations);
  
-      if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
-                                          &DAG.getTarget().Options))
+      if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
          return GetNegatedExpression(N10, DAG, LegalOperations);
      }
    }
  
    // FSUB -> FMA combines:
-  if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
-       DAG.getTarget().Options.UnsafeFPMath) &&
-      DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+  if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+      DAG.getTarget().getSubtargetImpl()
+          ->getTargetLowering()
+          ->isFMAFasterThanFMulAndFAdd(VT) &&
        (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
  
      // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
-    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
+    if (N0.getOpcode() == ISD::FMUL &&
+        (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
        return DAG.getNode(ISD::FMA, dl, VT,
                           N0.getOperand(0), N0.getOperand(1),
                           DAG.getNode(ISD::FNEG, dl, VT, N1));
  
      // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
      // Note: Commutes FSUB operands.
-    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse())
+    if (N1.getOpcode() == ISD::FMUL &&
+        (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT)))
        return DAG.getNode(ISD::FMA, dl, VT,
                           DAG.getNode(ISD::FNEG, dl, VT,
                           N1.getOperand(0)),
@@ -6469,7 +6783,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
      // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
      if (N0.getOpcode() == ISD::FNEG &&
          N0.getOperand(0).getOpcode() == ISD::FMUL &&
-        N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+        ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) ||
+            TLI.enableAggressiveFMAFusion(VT))) {
        SDValue N00 = N0.getOperand(0).getOperand(0);
        SDValue N01 = N0.getOperand(0).getOperand(1);
        return DAG.getNode(ISD::FMA, dl, VT,
@@ -6484,47 +6799,82 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
  SDValue DAGCombiner::visitFMUL(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+  ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
    EVT VT = N->getValueType(0);
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const TargetOptions &Options = DAG.getTarget().Options;
  
    // fold vector ops
    if (VT.isVector()) {
+    // This just handles C1 * C2 for vectors. Other vector folds are below.
      SDValue FoldedVOp = SimplifyVBinOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
+    if (FoldedVOp.getNode())
+      return FoldedVOp;
+    // Canonicalize vector constant to RHS.
+    if (N0.getOpcode() == ISD::BUILD_VECTOR &&
+        N1.getOpcode() != ISD::BUILD_VECTOR)
+      if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0))
+        if (BV0->isConstant())
+          return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
    }
  
    // fold (fmul c1, c2) -> c1*c2
    if (N0CFP && N1CFP)
      return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1);
+
    // canonicalize constant to RHS
    if (N0CFP && !N1CFP)
      return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0);
-  // fold (fmul A, 0) -> 0
-  if (DAG.getTarget().Options.UnsafeFPMath &&
-      N1CFP && N1CFP->getValueAPF().isZero())
-    return N1;
-  // fold (fmul A, 0) -> 0, vector edition.
-  if (DAG.getTarget().Options.UnsafeFPMath &&
-      ISD::isBuildVectorAllZeros(N1.getNode()))
-    return N1;
+
    // fold (fmul A, 1.0) -> A
    if (N1CFP && N1CFP->isExactlyValue(1.0))
      return N0;
+
+  if (Options.UnsafeFPMath) {
+    // fold (fmul A, 0) -> 0
+    if (N1CFP && N1CFP->getValueAPF().isZero())
+      return N1;
+
+    // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+    if (N0.getOpcode() == ISD::FMUL) {
+      // Fold scalars or any vector constants (not just splats).
+      // This fold is done in general by InstCombine, but extra fmul insts
+      // may have been generated during lowering.
+      SDValue N01 = N0.getOperand(1);
+      auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+      auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
+      if ((N1CFP && isConstOrConstSplatFP(N01)) ||
+          (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
+        SDLoc SL(N);
+        SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1);
+        return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts);
+      }
+    }
+
+    // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
+    // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
+    // during an early run of DAGCombiner can prevent folding with fmuls
+    // inserted during lowering.
+    if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
+      SDLoc SL(N);
+      const SDValue Two = DAG.getConstantFP(2.0, VT);
+      SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1);
+      return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts);
+    }
+  }
+
    // fold (fmul X, 2.0) -> (fadd X, X)
    if (N1CFP && N1CFP->isExactlyValue(+2.0))
      return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0);
+
    // fold (fmul X, -1.0) -> (fneg X)
    if (N1CFP && N1CFP->isExactlyValue(-1.0))
      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
        return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
  
    // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
-                                       &DAG.getTarget().Options)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
-                                         &DAG.getTarget().Options)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
        // Both can be negated for free, check to see if at least one is cheaper
        // negated.
        if (LHSNeg == 2 || RHSNeg == 2)
@@ -6534,14 +6884,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
      }
    }
  
-  // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
-  if (DAG.getTarget().Options.UnsafeFPMath &&
-      N1CFP && N0.getOpcode() == ISD::FMUL &&
-      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
-    return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
-                       DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                                   N0.getOperand(1), N1));
-
    return SDValue();
  }
  
@@ -6553,8 +6895,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
    EVT VT = N->getValueType(0);
    SDLoc dl(N);
+  const TargetOptions &Options = DAG.getTarget().Options;
  
-  if (DAG.getTarget().Options.UnsafeFPMath) {
+  // Constant fold FMA.
+  if (isa<ConstantFPSDNode>(N0) &&
+      isa<ConstantFPSDNode>(N1) &&
+      isa<ConstantFPSDNode>(N2)) {
+    return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
+  }
+
+  if (Options.UnsafeFPMath) {
      if (N0CFP && N0CFP->isZero())
        return N2;
      if (N1CFP && N1CFP->isZero())
@@ -6570,7 +6920,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
      return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
  
    // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
-  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+  if (Options.UnsafeFPMath && N1CFP &&
        N2.getOpcode() == ISD::FMUL &&
        N0 == N2.getOperand(0) &&
        N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
@@ -6580,7 +6930,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
  
  
    // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
-  if (DAG.getTarget().Options.UnsafeFPMath &&
+  if (Options.UnsafeFPMath &&
        N0.getOpcode() == ISD::FMUL && N1CFP &&
        N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
      return DAG.getNode(ISD::FMA, dl, VT,
@@ -6598,19 +6948,19 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
      if (N1CFP->isExactlyValue(-1.0) &&
          (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
        SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
-      AddToWorkList(RHSNeg.getNode());
+      AddToWorklist(RHSNeg.getNode());
        return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
      }
    }
  
    // (fma x, c, x) -> (fmul x, (c+1))
-  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2)
+  if (Options.UnsafeFPMath && N1CFP && N0 == N2)
      return DAG.getNode(ISD::FMUL, dl, VT, N0,
                         DAG.getNode(ISD::FADD, dl, VT,
                                     N1, DAG.getConstantFP(1.0, VT)));
  
    // (fma x, c, (fneg x)) -> (fmul x, (c-1))
-  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+  if (Options.UnsafeFPMath && N1CFP &&
        N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
      return DAG.getNode(ISD::FMUL, dl, VT, N0,
                         DAG.getNode(ISD::FADD, dl, VT,
@@ -6626,7 +6976,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
    EVT VT = N->getValueType(0);
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDLoc DL(N);
+  const TargetOptions &Options = DAG.getTarget().Options;
  
    // fold vector ops
    if (VT.isVector()) {
@@ -6638,30 +6989,83 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
    if (N0CFP && N1CFP)
      return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
  
-  // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
-  if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) {
-    // Compute the reciprocal 1.0 / c2.
-    APFloat N1APF = N1CFP->getValueAPF();
-    APFloat Recip(N1APF.getSemantics(), 1); // 1.0
-    APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
-    // Only do the transform if the reciprocal is a legal fp immediate that
-    // isn't too nasty (eg NaN, denormal, ...).
-    if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
-        (!LegalOperations ||
-         // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
-         // backend)... we should handle this gracefully after Legalize.
-         // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
-         TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
-         TLI.isFPImmLegal(Recip, VT)))
-      return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
-                         DAG.getConstantFP(Recip, VT));
+  if (Options.UnsafeFPMath) {
+    // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+    if (N1CFP) {
+      // Compute the reciprocal 1.0 / c2.
+      APFloat N1APF = N1CFP->getValueAPF();
+      APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+      APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+      // Only do the transform if the reciprocal is a legal fp immediate that
+      // isn't too nasty (eg NaN, denormal, ...).
+      if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+          (!LegalOperations ||
+           // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+           // backend)... we should handle this gracefully after Legalize.
+           // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+           TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+           TLI.isFPImmLegal(Recip, VT)))
+        return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
+                           DAG.getConstantFP(Recip, VT));
+    }
+    
+    // If this FDIV is part of a reciprocal square root, it may be folded
+    // into a target-specific square root estimate instruction.
+    if (N1.getOpcode() == ISD::FSQRT) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
+        AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      }
+    } else if (N1.getOpcode() == ISD::FP_EXTEND &&
+               N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+        AddToWorklist(RV.getNode());
+        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
+        AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      }
+    } else if (N1.getOpcode() == ISD::FP_ROUND &&
+               N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+        AddToWorklist(RV.getNode());
+        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
+        AddToWorklist(RV.getNode());
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      }
+    } else if (N1.getOpcode() == ISD::FMUL) {
+      // Look through an FMUL. Even though this won't remove the FDIV directly,
+      // it's still worthwhile to get rid of the FSQRT if possible.
+      SDValue SqrtOp;
+      SDValue OtherOp;
+      if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
+        SqrtOp = N1.getOperand(0);
+        OtherOp = N1.getOperand(1);
+      } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
+        SqrtOp = N1.getOperand(1);
+        OtherOp = N1.getOperand(0);
+      }
+      if (SqrtOp.getNode()) {
+        // We found a FSQRT, so try to make this fold:
+        // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
+        if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
+          AddToWorklist(RV.getNode());
+          RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+          AddToWorklist(RV.getNode());
+          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+        }
+      }
+    }
+    
+    // Fold into a reciprocal estimate and multiply instead of a real divide.
+    if (SDValue RV = BuildReciprocalEstimate(N1)) {
+      AddToWorklist(RV.getNode());
+      return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+    }
    }
  
    // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
-                                       &DAG.getTarget().Options)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
-                                         &DAG.getTarget().Options)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
        // Both can be negated for free, check to see if at least one is cheaper
        // negated.
        if (LHSNeg == 2 || RHSNeg == 2)
@@ -6688,6 +7092,33 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
    return SDValue();
  }
  
+SDValue DAGCombiner::visitFSQRT(SDNode *N) {
+  if (DAG.getTarget().Options.UnsafeFPMath) {
+    // Compute this as 1/(1/sqrt(X)): the reciprocal of the reciprocal sqrt.
+    if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
+      AddToWorklist(RV.getNode());
+      RV = BuildReciprocalEstimate(RV);
+      if (RV.getNode()) {
+        // Unfortunately, RV is now NaN if the input was exactly 0.
+        // Select out this case and force the answer to 0.
+        EVT VT = RV.getValueType();
+      
+        SDValue Zero = DAG.getConstantFP(0.0, VT);
+        SDValue ZeroCmp =
+          DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+                       N->getOperand(0), Zero, ISD::SETEQ);
+        AddToWorklist(ZeroCmp.getNode());
+        AddToWorklist(RV.getNode());
+
+        RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
+                         SDLoc(N), VT, ZeroCmp, Zero, RV);
+        return RV;
+      }
+    }
+  }
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -6761,11 +7192,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
    }
  
    // The next optimizations are desirable only if SELECT_CC can be lowered.
-  // Check against MVT::Other for SELECT_CC, which is a workaround for targets
-  // having to say they don't support SELECT_CC on every type the DAG knows
-  // about, since there is no way to mark an opcode illegal at all value types
-  // (See also visitSELECT)
-  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
      // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
      if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
          !VT.isVector() &&
@@ -6775,7 +7202,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
          { N0.getOperand(0), N0.getOperand(1),
            DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
            N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
+      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
      }
  
      // fold (sint_to_fp (zext (setcc x, y, cc))) ->
@@ -6788,7 +7215,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
          { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
            DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
            N0.getOperand(0).getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
+      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
      }
    }
  
@@ -6818,11 +7245,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
    }
  
    // The next optimizations are desirable only if SELECT_CC can be lowered.
-  // Check against MVT::Other for SELECT_CC, which is a workaround for targets
-  // having to say they don't support SELECT_CC on every type the DAG knows
-  // about, since there is no way to mark an opcode illegal at all value types
-  // (See also visitSELECT)
-  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
      // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
  
      if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
@@ -6832,7 +7255,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
          { N0.getOperand(0), N0.getOperand(1),
            DAG.getConstantFP(1.0, VT),  DAG.getConstantFP(0.0, VT),
            N0.getOperand(2) };
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
+      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
      }
    }
  
@@ -6890,7 +7313,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
    if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
      SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
                                N0.getOperand(0), N1);
-    AddToWorkList(Tmp.getNode());
+    AddToWorklist(Tmp.getNode());
      return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
                         Tmp, N0.getOperand(1));
    }
@@ -6941,8 +7364,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
  
    // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
    if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
-      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
-       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
                                       LN0->getChain(),
@@ -6959,88 +7381,105 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
    return SDValue();
  }
  
-SDValue DAGCombiner::visitFNEG(SDNode *N) {
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
    SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    EVT VT = N->getValueType(0);
  
-  if (VT.isVector()) {
-    SDValue FoldedVOp = SimplifyVUnaryOp(N);
-    if (FoldedVOp.getNode()) return FoldedVOp;
-  }
+  // fold (fceil c1) -> fceil(c1)
+  if (N0CFP)
+    return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
  
-  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
-                         &DAG.getTarget().Options))
-    return GetNegatedExpression(N0, DAG, LegalOperations);
+  return SDValue();
+}
  
-  // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
-  // constant pool values.
-  if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
-      !VT.isVector() &&
-      N0.getNode()->hasOneUse() &&
-      N0.getOperand(0).getValueType().isInteger()) {
-    SDValue Int = N0.getOperand(0);
-    EVT IntVT = Int.getValueType();
-    if (IntVT.isInteger() && !IntVT.isVector()) {
-      Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
-              DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
-      AddToWorkList(Int.getNode());
-      return DAG.getNode(ISD::BITCAST, SDLoc(N),
-                         VT, Int);
-    }
-  }
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
  
-  // (fneg (fmul c, x)) -> (fmul -c, x)
-  if (N0.getOpcode() == ISD::FMUL) {
-    ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
-    if (CFP1)
-      return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
-                         N0.getOperand(0),
-                         DAG.getNode(ISD::FNEG, SDLoc(N), VT,
-                                     N0.getOperand(1)));
-  }
+  // fold (ftrunc c1) -> ftrunc(c1)
+  if (N0CFP)
+    return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
  
    return SDValue();
  }
  
-SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    EVT VT = N->getValueType(0);
  
-  // fold (fceil c1) -> fceil(c1)
+  // fold (ffloor c1) -> ffloor(c1)
    if (N0CFP)
-    return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
+    return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
  
    return SDValue();
  }
  
-SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+// FIXME: FNEG and FABS have a lot in common; refactor.
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    EVT VT = N->getValueType(0);
  
-  // fold (ftrunc c1) -> ftrunc(c1)
-  if (N0CFP)
-    return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVUnaryOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // Constant fold FNEG.
+  if (isa<ConstantFPSDNode>(N0))
+    return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0));
  
-  return SDValue();
-}
+  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+                         &DAG.getTarget().Options))
+    return GetNegatedExpression(N0, DAG, LegalOperations);
  
-SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
-  SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  EVT VT = N->getValueType(0);
+  // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
+  // constant pool values.
+  if (!TLI.isFNegFree(VT) &&
+      N0.getOpcode() == ISD::BITCAST &&
+      N0.getNode()->hasOneUse()) {
+    SDValue Int = N0.getOperand(0);
+    EVT IntVT = Int.getValueType();
+    if (IntVT.isInteger() && !IntVT.isVector()) {
+      APInt SignMask;
+      if (N0.getValueType().isVector()) {
+        // For a vector, get a mask such as 0x80... per scalar element
+        // and splat it.
+        SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+        SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+      } else {
+        // For a scalar, just generate 0x80...
+        SignMask = APInt::getSignBit(IntVT.getSizeInBits());
+      }
+      Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int,
+                        DAG.getConstant(SignMask, IntVT));
+      AddToWorklist(Int.getNode());
+      return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
+    }
+  }
  
-  // fold (ffloor c1) -> ffloor(c1)
-  if (N0CFP)
-    return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
+  // (fneg (fmul c, x)) -> (fmul -c, x)
+  if (N0.getOpcode() == ISD::FMUL) {
+    ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+    if (CFP1) {
+      APFloat CVal = CFP1->getValueAPF();
+      CVal.changeSign();
+      if (Level >= AfterLegalizeDAG &&
+          (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
+           TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
+        return DAG.getNode(
+            ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+            DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+    }
+  }
  
    return SDValue();
  }
  
  SDValue DAGCombiner::visitFABS(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    EVT VT = N->getValueType(0);
  
    if (VT.isVector()) {
@@ -7049,30 +7488,40 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
    }
  
    // fold (fabs c1) -> fabs(c1)
-  if (N0CFP)
+  if (isa<ConstantFPSDNode>(N0))
      return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
+  
    // fold (fabs (fabs x)) -> (fabs x)
    if (N0.getOpcode() == ISD::FABS)
      return N->getOperand(0);
+
    // fold (fabs (fneg x)) -> (fabs x)
    // fold (fabs (fcopysign x, y)) -> (fabs x)
    if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
      return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
  
-  // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+  // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
    // constant pool values.
    if (!TLI.isFAbsFree(VT) &&
-      N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
-      N0.getOperand(0).getValueType().isInteger() &&
-      !N0.getOperand(0).getValueType().isVector()) {
+      N0.getOpcode() == ISD::BITCAST &&
+      N0.getNode()->hasOneUse()) {
      SDValue Int = N0.getOperand(0);
      EVT IntVT = Int.getValueType();
      if (IntVT.isInteger() && !IntVT.isVector()) {
+      APInt SignMask;
+      if (N0.getValueType().isVector()) {
+        // For a vector, get a mask such as 0x7f... per scalar element
+        // and splat it.
+        SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+        SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
+      } else {
+        // For a scalar, just generate 0x7f...
+        SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
+      }
        Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int,
-             DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
-      AddToWorkList(Int.getNode());
-      return DAG.getNode(ISD::BITCAST, SDLoc(N),
-                         N->getValueType(0), Int);
+                        DAG.getConstant(SignMask, IntVT));
+      AddToWorklist(Int.getNode());
+      return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
      }
    }
  
@@ -7104,7 +7553,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
        ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
         (N1.getOperand(0).hasOneUse() &&
          N1.getOperand(0).getOpcode() == ISD::SRL))) {
-    SDNode *Trunc = 0;
+    SDNode *Trunc = nullptr;
      if (N1.getOpcode() == ISD::TRUNCATE) {
        // Look pass the truncate.
        Trunc = N1.getNode();
@@ -7152,15 +7601,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
            // will convert it back to (X & C1) >> C2.
            CombineTo(N, NewBRCond, false);
            // Truncate is dead.
-          if (Trunc) {
-            removeFromWorkList(Trunc);
-            DAG.DeleteNode(Trunc);
-          }
+          if (Trunc)
+            deleteAndRecombine(Trunc);
            // Replace the uses of SRL with SETCC
-          WorkListRemover DeadNodes(*this);
+          WorklistRemover DeadNodes(*this);
            DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
-          removeFromWorkList(N1.getNode());
-          DAG.DeleteNode(N1.getNode());
+          deleteAndRecombine(N1.getNode());
            return SDValue(N, 0);   // Return N so it doesn't get rechecked!
          }
        }
@@ -7187,10 +7633,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                  dbgs() << "\nWith: ";
                  Tmp.getNode()->dump(&DAG);
                  dbgs() << '\n');
-          WorkListRemover DeadNodes(*this);
+          WorklistRemover DeadNodes(*this);
            DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
-          removeFromWorkList(TheXor);
-          DAG.DeleteNode(TheXor);
+          deleteAndRecombine(TheXor);
            return DAG.getNode(ISD::BRCOND, SDLoc(N),
                               MVT::Other, Chain, Tmp, N2);
          }
@@ -7218,10 +7663,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                                     Op0, Op1,
                                     Equal ? ISD::SETEQ : ISD::SETNE);
        // Replace the uses of XOR with SETCC
-      WorkListRemover DeadNodes(*this);
+      WorklistRemover DeadNodes(*this);
        DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
-      removeFromWorkList(N1.getNode());
-      DAG.DeleteNode(N1.getNode());
+      deleteAndRecombine(N1.getNode());
        return DAG.getNode(ISD::BRCOND, SDLoc(N),
                           MVT::Other, Chain, SetCC, N2);
      }
@@ -7246,7 +7690,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
    SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
                                 CondLHS, CondRHS, CC->get(), SDLoc(N),
                                 false);
-  if (Simp.getNode()) AddToWorkList(Simp.getNode());
+  if (Simp.getNode()) AddToWorklist(Simp.getNode());
  
    // fold to a simpler setcc
    if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
@@ -7258,9 +7702,8 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
    return SDValue();
  }
  
-/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
-/// uses N as its base pointer and that N may be folded in the load / store
-/// addressing mode.
+/// Return true if 'Use' is a load or a store that uses N as its base pointer
+/// and that N may be folded in the load / store addressing mode.
  static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
@@ -7299,12 +7742,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
    return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
  }
  
-/// CombineToPreIndexedLoadStore - Try turning a load / store into a
-/// pre-indexed load / store when the base pointer is an add or subtract
-/// and it has other uses besides the load / store. After the
-/// transformation, the new indexed load / store has effectively folded
-/// the add / subtract in and all of its other uses are redirected to the
-/// new load / store.
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
  bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
    if (Level < AfterLegalizeDAG)
      return false;
@@ -7385,9 +7827,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
    // a copy of the original base pointer.
    SmallVector<SDNode *, 16> OtherUses;
    if (isa<ConstantSDNode>(Offset))
-    for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
-         E = BasePtr.getNode()->use_end(); I != E; ++I) {
-      SDNode *Use = *I;
+    for (SDNode *Use : BasePtr.getNode()->uses()) {
        if (Use == Ptr.getNode())
          continue;
  
@@ -7429,9 +7869,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
    SmallPtrSet<const SDNode *, 32> Visited;
    SmallVector<const SDNode *, 16> Worklist;
  
-  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
-         E = Ptr.getNode()->use_end(); I != E; ++I) {
-    SDNode *Use = *I;
+  for (SDNode *Use : Ptr.getNode()->uses()) {
      if (Use == N)
        continue;
      if (N->hasPredecessorHelper(Use, Visited, Worklist))
@@ -7460,7 +7898,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
          dbgs() << "\nWith: ";
          Result.getNode()->dump(&DAG);
          dbgs() << '\n');
-  WorkListRemover DeadNodes(*this);
+  WorklistRemover DeadNodes(*this);
    if (isLoad) {
      DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
      DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
@@ -7469,7 +7907,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
    }
  
    // Finally, since the node is now dead, remove it from the graph.
-  DAG.DeleteNode(N);
+  deleteAndRecombine(N);
  
    if (Swapped)
      std::swap(BasePtr, Offset);
@@ -7519,23 +7957,20 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
                                   SDLoc(OtherUses[i]),
                                   OtherUses[i]->getValueType(0), NewOp1, NewOp2);
      DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
-    removeFromWorkList(OtherUses[i]);
-    DAG.DeleteNode(OtherUses[i]);
+    deleteAndRecombine(OtherUses[i]);
    }
  
    // Replace the uses of Ptr with uses of the updated base value.
    DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
-  removeFromWorkList(Ptr.getNode());
-  DAG.DeleteNode(Ptr.getNode());
+  deleteAndRecombine(Ptr.getNode());
  
    return true;
  }
  
-/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
-/// add / sub of the base pointer node into a post-indexed load / store.
-/// The transformation folded the add / subtract into the new indexed
-/// load / store effectively and all of its uses are redirected to the
-/// new load / store.
+/// Try to combine a load/store with a add/sub of the base pointer node into a
+/// post-indexed load/store. The transformation folded the add/subtract into the
+/// new indexed load/store effectively and all of its uses are redirected to the
+/// new load/store.
  bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
    if (Level < AfterLegalizeDAG)
      return false;
@@ -7567,9 +8002,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
    if (Ptr.getNode()->hasOneUse())
      return false;
  
-  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
-         E = Ptr.getNode()->use_end(); I != E; ++I) {
-    SDNode *Op = *I;
+  for (SDNode *Op : Ptr.getNode()->uses()) {
      if (Op == N ||
          (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
        continue;
@@ -7595,9 +8028,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
  
        // Check for #1.
        bool TryNext = false;
-      for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
-             EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
-        SDNode *Use = *II;
+      for (SDNode *Use : BasePtr.getNode()->uses()) {
          if (Use == Ptr.getNode())
            continue;
  
@@ -7605,9 +8036,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
          // transformation.
          if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
            bool RealUse = false;
-          for (SDNode::use_iterator III = Use->use_begin(),
-                 EEE = Use->use_end(); III != EEE; ++III) {
-            SDNode *UseUse = *III;
+          for (SDNode *UseUse : Use->uses()) {
              if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
                RealUse = true;
            }
@@ -7636,7 +8065,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
                dbgs() << "\nWith: ";
                Result.getNode()->dump(&DAG);
                dbgs() << '\n');
-        WorkListRemover DeadNodes(*this);
+        WorklistRemover DeadNodes(*this);
          if (isLoad) {
            DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
            DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
@@ -7645,13 +8074,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
          }
  
          // Finally, since the node is now dead, remove it from the graph.
-        DAG.DeleteNode(N);
+        deleteAndRecombine(N);
  
          // Replace the uses of Use with uses of the updated base value.
          DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
                                        Result.getValue(isLoad ? 1 : 0));
-        removeFromWorkList(Op);
-        DAG.DeleteNode(Op);
+        deleteAndRecombine(Op);
          return true;
        }
      }
@@ -7660,6 +8088,30 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
    return false;
  }
  
+/// \brief Return the base-pointer arithmetic from an indexed \p LD.
+SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  assert(AM != ISD::UNINDEXED);
+  SDValue BP = LD->getOperand(1);
+  SDValue Inc = LD->getOperand(2);
+
+  // Some backends use TargetConstants for load offsets, but don't expect
+  // TargetConstants in general ADD nodes. We can convert these constants into
+  // regular Constants (if the constant is not opaque).
+  assert((Inc.getOpcode() != ISD::TargetConstant ||
+          !cast<ConstantSDNode>(Inc)->isOpaque()) &&
+         "Cannot split out indexing using opaque target constants");
+  if (Inc.getOpcode() == ISD::TargetConstant) {
+    ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
+    Inc = DAG.getConstant(*ConstInc->getConstantIntValue(),
+                          ConstInc->getValueType(0));
+  }
+
+  unsigned Opc =
+      (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
+  return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
+}
+
  SDValue DAGCombiner::visitLOAD(SDNode *N) {
    LoadSDNode *LD  = cast<LoadSDNode>(N);
    SDValue Chain = LD->getChain();
@@ -7683,33 +8135,46 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                dbgs() << "\nWith chain: ";
                Chain.getNode()->dump(&DAG);
                dbgs() << "\n");
-        WorkListRemover DeadNodes(*this);
+        WorklistRemover DeadNodes(*this);
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
  
-        if (N->use_empty()) {
-          removeFromWorkList(N);
-          DAG.DeleteNode(N);
-        }
+        if (N->use_empty())
+          deleteAndRecombine(N);
  
          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
        }
      } else {
        // Indexed loads.
        assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
-      if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
+
+      // If this load has an opaque TargetConstant offset, then we cannot split
+      // the indexing into an add/sub directly (that TargetConstant may not be
+      // valid for a different type of node, and we cannot convert an opaque
+      // target constant into a regular constant).
+      bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
+                       cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
+
+      if (!N->hasAnyUseOfValue(0) &&
+          ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
          SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+        SDValue Index;
+        if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
+          Index = SplitIndexingFromLoad(LD);
+          // Try to fold the base pointer arithmetic into subsequent loads and
+          // stores.
+          AddUsersToWorklist(N);
+        } else
+          Index = DAG.getUNDEF(N->getValueType(1));
          DEBUG(dbgs() << "\nReplacing.7 ";
                N->dump(&DAG);
                dbgs() << "\nWith: ";
                Undef.getNode()->dump(&DAG);
                dbgs() << " and 2 other values\n");
-        WorkListRemover DeadNodes(*this);
+        WorklistRemover DeadNodes(*this);
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
-        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
-                                      DAG.getUNDEF(N->getValueType(1)));
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
-        removeFromWorkList(N);
-        DAG.DeleteNode(N);
+        deleteAndRecombine(N);
          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
        }
      }
@@ -7737,8 +8202,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                                LD->getValueType(0),
                                Chain, Ptr, LD->getPointerInfo(),
                                LD->getMemoryVT(),
-                              LD->isVolatile(), LD->isNonTemporal(), Align,
-                              LD->getTBAAInfo());
+                              LD->isVolatile(), LD->isNonTemporal(),
+                              LD->isInvariant(), Align, LD->getAAInfo());
          return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
        }
      }
@@ -7775,7 +8240,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                                    MVT::Other, Chain, ReplLoad.getValue(1));
  
        // Make sure the new and old chains are cleaned up.
-      AddToWorkList(Token.getNode());
+      AddToWorklist(Token.getNode());
  
        // Replace uses with load result and token factor. Don't add users
        // to work list.
@@ -7900,8 +8365,8 @@ struct LoadedSlice {
    // This is used to get some contextual information about legal types, etc.
    SelectionDAG *DAG;
  
-  LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL,
-              unsigned Shift = 0, SelectionDAG *DAG = NULL)
+  LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
+              unsigned Shift = 0, SelectionDAG *DAG = nullptr)
        : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
  
    LoadedSlice(const LoadedSlice &LS)
@@ -7997,7 +8462,7 @@ struct LoadedSlice {
  
    /// \brief Get the offset in bytes of this slice in the original chunk of
    /// bits.
-  /// \pre DAG != NULL.
+  /// \pre DAG != nullptr.
    uint64_t getOffsetFromBase() const {
      assert(DAG && "Missing context.");
      bool IsBigEndian =
@@ -8075,7 +8540,8 @@ struct LoadedSlice {
  
      // At this point, we know that we perform a cross-register-bank copy.
      // Check if it is expensive.
-    const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo();
+    const TargetRegisterInfo *TRI =
+        TLI.getTargetMachine().getSubtargetImpl()->getRegisterInfo();
      // Assume bitcasts are cheap, unless both register classes do not
      // explicitly share a common sub class.
      if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
@@ -8102,14 +8568,6 @@ struct LoadedSlice {
  };
  }
  
-/// \brief Sorts LoadedSlice according to their offset.
-struct LoadedSliceSorter {
-  bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
-    assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
-    return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
-  }
-};
-
  /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
  /// \p UsedBits looks like 0..0 1..1 0..0.
  static bool areUsedBitsDense(const APInt &UsedBits) {
@@ -8153,12 +8611,16 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
  
    // Sort the slices so that elements that are likely to be next to each
    // other in memory are next to each other in the list.
-  std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
+  std::sort(LoadedSlices.begin(), LoadedSlices.end(),
+            [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+    assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+    return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+  });
    const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
    // First (resp. Second) is the first (resp. Second) potentially candidate
    // to be placed in a paired load.
-  const LoadedSlice *First = NULL;
-  const LoadedSlice *Second = NULL;
+  const LoadedSlice *First = nullptr;
+  const LoadedSlice *Second = nullptr;
    for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
                  // Set the beginning of the pair.
                                                             First = Second) {
@@ -8180,7 +8642,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
      unsigned RequiredAlignment = 0;
      if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
        // move to the next pair, this type is hopeless.
-      Second = NULL;
+      Second = nullptr;
        continue;
      }
      // Check if we meet the alignment requirement.
@@ -8194,7 +8656,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
      assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
      --GlobalLSCost.Loads;
      // Move to the next pair.
-    Second = NULL;
+    Second = nullptr;
    }
  }
  
@@ -8338,14 +8800,14 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
    }
  
    SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
-                              &ArgChains[0], ArgChains.size());
+                              ArgChains);
    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
    return true;
  }
  
-/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
-/// load is having specific bytes cleared out.  If so, return the byte size
-/// being masked out and the shift amount.
+/// Check to see if V is (and load (ptr), imm), where the load is having
+/// specific bytes cleared out.  If so, return the byte size being masked out
+/// and the shift amount.
  static std::pair<unsigned, unsigned>
  CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
    std::pair<unsigned, unsigned> Result(0, 0);
@@ -8418,9 +8880,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
  }
  
  
-/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
-/// provides a value as specified by MaskInfo.  If so, replace the specified
-/// store with a narrower store of truncated IVal.
+/// Check to see if IVal is something that provides a value as specified by
+/// MaskInfo. If so, replace the specified store with a narrower store of
+/// truncated IVal.
  static SDNode *
  ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
                                  SDValue IVal, StoreSDNode *St,
@@ -8433,14 +8895,14 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
    // that uses this.  If not, this is not a replacement.
    APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
                                    ByteShift*8, (ByteShift+NumBytes)*8);
-  if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+  if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
  
    // Check that it is legal on the target to do this.  It is legal if the new
    // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
    // legalization.
    MVT VT = MVT::getIntegerVT(NumBytes*8);
    if (!DC->isTypeLegal(VT))
-    return 0;
+    return nullptr;
  
    // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
    // shifted by ByteShift and truncated down to NumBytes.
@@ -8475,10 +8937,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
  }
  
  
-/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
-/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
-/// of the loaded bits, try narrowing the load and store if it would end up
-/// being a win for performance or code size.
+/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
+/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
+/// narrowing the load and store if it would end up being a win for performance
+/// or code size.
  SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
    StoreSDNode *ST  = cast<StoreSDNode>(N);
    if (ST->isVolatile())
@@ -8578,7 +9040,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                    LD->getPointerInfo().getWithOffset(PtrOff),
                                    LD->isVolatile(), LD->isNonTemporal(),
                                    LD->isInvariant(), NewAlign,
-                                  LD->getTBAAInfo());
+                                  LD->getAAInfo());
        SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
                                     DAG.getConstant(NewImm, NewVT));
        SDValue NewST = DAG.getStore(Chain, SDLoc(N),
@@ -8586,10 +9048,10 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                     ST->getPointerInfo().getWithOffset(PtrOff),
                                     false, false, NewAlign);
  
-      AddToWorkList(NewPtr.getNode());
-      AddToWorkList(NewLD.getNode());
-      AddToWorkList(NewVal.getNode());
-      WorkListRemover DeadNodes(*this);
+      AddToWorklist(NewPtr.getNode());
+      AddToWorklist(NewLD.getNode());
+      AddToWorklist(NewVal.getNode());
+      WorklistRemover DeadNodes(*this);
        DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
        ++OpsNarrowed;
        return NewST;
@@ -8599,10 +9061,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
    return SDValue();
  }
  
-/// TransformFPLoadStorePair - For a given floating point load / store pair,
-/// if the load value isn't used by any other operations, then consider
-/// transforming the pair to integer load / store operations if the target
-/// deems the transformation profitable.
+/// For a given floating point load / store pair, if the load value isn't used
+/// by any other operations, then consider transforming the pair to integer
+/// load / store operations if the target deems the transformation profitable.
  SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
    StoreSDNode *ST  = cast<StoreSDNode>(N);
    SDValue Chain = ST->getChain();
@@ -8644,9 +9105,9 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
                                   ST->getPointerInfo(),
                                   false, false, STAlign);
  
-    AddToWorkList(NewLD.getNode());
-    AddToWorkList(NewST.getNode());
-    WorkListRemover DeadNodes(*this);
+    AddToWorklist(NewLD.getNode());
+    AddToWorklist(NewST.getNode());
+    WorklistRemover DeadNodes(*this);
      DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
      ++LdStFP2Int;
      return NewST;
@@ -8757,14 +9218,6 @@ struct MemOpLink {
    unsigned SequenceNum;
  };
  
-/// Sorts store nodes in a link according to their offset from a shared
-// base ptr.
-struct ConsecutiveMemoryChainSorter {
-  bool operator()(MemOpLink LHS, MemOpLink RHS) {
-    return LHS.OffsetFromBase < RHS.OffsetFromBase;
-  }
-};
-
  bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
    EVT MemVT = St->getMemoryVT();
    int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
@@ -8784,7 +9237,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
      return false;
  
    // Only look at ends of store sequences.
-  SDValue Chain = SDValue(St, 1);
+  SDValue Chain = SDValue(St, 0);
    if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
      return false;
  
@@ -8815,7 +9268,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
    StoreSDNode *Index = St;
    while (Index) {
      // If the chain has more than one use, then we can't reorder the mem ops.
-    if (Index != St && !SDValue(Index, 1)->hasOneUse())
+    if (Index != St && !SDValue(Index, 0)->hasOneUse())
        break;
  
      // Find the base pointer and offset for this memory node.
@@ -8862,7 +9315,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
          break;
        } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
          if (Ldn->isVolatile()) {
-          Index = NULL;
+          Index = nullptr;
            break;
          }
  
@@ -8871,7 +9324,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
          NextInChain = Ldn->getChain().getNode();
          continue;
        } else {
-        Index = NULL;
+        Index = nullptr;
          break;
        }
      }
@@ -8883,7 +9336,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
  
    // Sort the memory operands according to their distance from the base pointer.
    std::sort(StoreNodes.begin(), StoreNodes.end(),
-            ConsecutiveMemoryChainSorter());
+            [](MemOpLink LHS, MemOpLink RHS) {
+    return LHS.OffsetFromBase < RHS.OffsetFromBase ||
+           (LHS.OffsetFromBase == RHS.OffsetFromBase &&
+            LHS.SequenceNum > RHS.SequenceNum);
+  });
  
    // Scan the memory operations on the chain and find the first non-consecutive
    // store memory address.
@@ -9042,8 +9499,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        // Since we know that St is redundant, just iterate.
        while (!St->use_empty())
          DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
-      removeFromWorkList(St);
-      DAG.DeleteNode(St);
+      deleteAndRecombine(St);
      }
  
      return true;
@@ -9102,6 +9558,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
    if (LoadNodes.size() < 2)
      return false;
  
+  // If we have load/store pair instructions and we only have two values,
+  // don't bother.
+  unsigned RequiredAlignment;
+  if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+      St->getAlignment() >= RequiredAlignment)
+    return false;
+
    // Scan the memory operations on the chain and find the first non-consecutive
    // load memory address. These variables hold the index in the store node
    // array.
@@ -9217,8 +9680,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
        continue;
      StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
      DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
-    removeFromWorkList(St);
-    DAG.DeleteNode(St);
+    deleteAndRecombine(St);
    }
  
    return true;
@@ -9244,7 +9706,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
        return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
                            Ptr, ST->getPointerInfo(), ST->isVolatile(),
                            ST->isNonTemporal(), OrigAlign,
-                          ST->getTBAAInfo());
+                          ST->getAAInfo());
    }
  
    // Turn 'store undef, Ptr' -> nothing.
@@ -9298,19 +9760,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
            unsigned Alignment = ST->getAlignment();
            bool isVolatile = ST->isVolatile();
            bool isNonTemporal = ST->isNonTemporal();
-          const MDNode *TBAAInfo = ST->getTBAAInfo();
+          AAMDNodes AAInfo = ST->getAAInfo();
  
            SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
                                       Ptr, ST->getPointerInfo(),
                                       isVolatile, isNonTemporal,
-                                     ST->getAlignment(), TBAAInfo);
+                                     ST->getAlignment(), AAInfo);
            Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr,
                              DAG.getConstant(4, Ptr.getValueType()));
            Alignment = MinAlign(Alignment, 4U);
            SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
                                       Ptr, ST->getPointerInfo().getWithOffset(4),
                                       isVolatile, isNonTemporal,
-                                     Alignment, TBAAInfo);
+                                     Alignment, AAInfo);
            return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
                               St0, St1);
          }
@@ -9327,7 +9789,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
          return DAG.getTruncStore(Chain, SDLoc(N), Value,
                                   Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
                                   ST->isVolatile(), ST->isNonTemporal(), Align,
-                                 ST->getTBAAInfo());
+                                 ST->getAAInfo());
      }
    }
  
@@ -9366,7 +9828,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
                                    MVT::Other, Chain, ReplStore);
  
        // Make sure the new and old chains are cleaned up.
-      AddToWorkList(Token.getNode());
+      AddToWorklist(Token.getNode());
  
        // Don't add users to work list.
        return CombineTo(N, Token, false);
@@ -9388,7 +9850,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
                        APInt::getLowBitsSet(
                          Value.getValueType().getScalarType().getSizeInBits(),
                          ST->getMemoryVT().getScalarType().getSizeInBits()));
-    AddToWorkList(Value.getNode());
+    AddToWorklist(Value.getNode());
      if (Shorter.getNode())
        return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
                                 Ptr, ST->getMemoryVT(), ST->getMemOperand());
@@ -9415,6 +9877,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
      }
    }
  
+  // If this is a store followed by a store with the same value to the same
+  // location, then the store is dead/noop.
+  if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
+    if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
+        ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
+        ST1->isUnindexed() && !ST1->isVolatile()) {
+      // The store is dead, remove it.
+      return Chain;
+    }
+  }
+
    // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
    // truncating store.  We can do this even if this is already a truncstore.
    if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
@@ -9467,6 +9940,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
      return SDValue();
    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
  
+  // Canonicalize insert_vector_elt dag nodes.
+  // Example:
+  // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+  // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+  //
+  // Do this only if the child insert_vector node has one use; also
+  // do this only if indices are both constants and Idx1 < Idx0.
+  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+      && isa<ConstantSDNode>(InVec.getOperand(2))) {
+    unsigned OtherElt =
+      cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+    if (Elt < OtherElt) {
+      // Swap nodes.
+      SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+                                  InVec.getOperand(0), InVal, EltNo);
+      AddToWorklist(NewOp.getNode());
+      return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+                         VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+    }
+  }
+
    // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
    // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
    // vector elements.
@@ -9496,8 +9990,87 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
    }
  
    // Return the new vector
-  return DAG.getNode(ISD::BUILD_VECTOR, dl,
-                     VT, &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
+SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+    SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+  EVT ResultVT = EVE->getValueType(0);
+  EVT VecEltVT = InVecVT.getVectorElementType();
+  unsigned Align = OriginalLoad->getAlignment();
+  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+      VecEltVT.getTypeForEVT(*DAG.getContext()));
+
+  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+    return SDValue();
+
+  Align = NewAlign;
+
+  SDValue NewPtr = OriginalLoad->getBasePtr();
+  SDValue Offset;
+  EVT PtrType = NewPtr.getValueType();
+  MachinePointerInfo MPI;
+  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+    int Elt = ConstEltNo->getZExtValue();
+    unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+    if (TLI.isBigEndian())
+      PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
+    Offset = DAG.getConstant(PtrOff, PtrType);
+    MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+  } else {
+    Offset = DAG.getNode(
+        ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
+        DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
+    if (TLI.isBigEndian())
+      Offset = DAG.getNode(
+          ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
+          DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+    MPI = OriginalLoad->getPointerInfo();
+  }
+  NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+
+  // The replacement we need to do here is a little tricky: we need to
+  // replace an extractelement of a load with a load.
+  // Use ReplaceAllUsesOfValuesWith to do the replacement.
+  // Note that this replacement assumes that the extractvalue is the only
+  // use of the load; that's okay because we don't want to perform this
+  // transformation in other cases anyway.
+  SDValue Load;
+  SDValue Chain;
+  if (ResultVT.bitsGT(VecEltVT)) {
+    // If the result type of vextract is wider than the load, then issue an
+    // extending load instead.
+    ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
+                                   ? ISD::ZEXTLOAD
+                                   : ISD::EXTLOAD;
+    Load = DAG.getExtLoad(
+        ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI,
+        VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+        OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
+    Chain = Load.getValue(1);
+  } else {
+    Load = DAG.getLoad(
+        VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
+        OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+        OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
+    Chain = Load.getValue(1);
+    if (ResultVT.bitsLT(VecEltVT))
+      Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+    else
+      Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
+  }
+  WorklistRemover DeadNodes(*this);
+  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
+  SDValue To[] = { Load, Chain };
+  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+  // Since we're explicitly calling ReplaceAllUses, add the new node to the
+  // worklist explicitly as well.
+  AddToWorklist(Load.getNode());
+  AddUsersToWorklist(Load.getNode()); // Add users too
+  // Make sure to revisit this node to clean it up; it will usually be dead.
+  AddToWorklist(EVE);
+  ++OpsNarrowed;
+  return SDValue(EVE, 0);
  }
  
  SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
@@ -9525,9 +10098,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
    // We only perform this optimization before the op legalization phase because
    // we may introduce new vector instructions which are not backed by TD
    // patterns. For example on AVX, extracting elements from a wide vector
-  // without using extract_subvector.
+  // without using extract_subvector. However, if we can find an underlying
+  // scalar value, then we can always use that.
    if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
-      && ConstEltNo && !LegalOperations) {
+      && ConstEltNo) {
      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
      int NumElem = VT.getVectorNumElements();
      ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
@@ -9539,16 +10113,65 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
        return DAG.getUNDEF(NVT);
  
      // Select the right vector half to extract from.
+    SDValue SVInVec;
      if (OrigElt < NumElem) {
-      InVec = InVec->getOperand(0);
+      SVInVec = InVec->getOperand(0);
      } else {
-      InVec = InVec->getOperand(1);
+      SVInVec = InVec->getOperand(1);
        OrigElt -= NumElem;
      }
  
-    EVT IndexTy = TLI.getVectorIdxTy();
-    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
-                       InVec, DAG.getConstant(OrigElt, IndexTy));
+    if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
+      SDValue InOp = SVInVec.getOperand(OrigElt);
+      if (InOp.getValueType() != NVT) {
+        assert(InOp.getValueType().isInteger() && NVT.isInteger());
+        InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
+      }
+
+      return InOp;
+    }
+
+    // FIXME: We should handle recursing on other vector shuffles and
+    // scalar_to_vector here as well.
+
+    if (!LegalOperations) {
+      EVT IndexTy = TLI.getVectorIdxTy();
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
+                         SVInVec, DAG.getConstant(OrigElt, IndexTy));
+    }
+  }
+
+  bool BCNumEltsChanged = false;
+  EVT ExtVT = VT.getVectorElementType();
+  EVT LVT = ExtVT;
+
+  // If the result of load has to be truncated, then it's not necessarily
+  // profitable.
+  if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+    return SDValue();
+
+  if (InVec.getOpcode() == ISD::BITCAST) {
+    // Don't duplicate a load with other uses.
+    if (!InVec.hasOneUse())
+      return SDValue();
+
+    EVT BCVT = InVec.getOperand(0).getValueType();
+    if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+      return SDValue();
+    if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+      BCNumEltsChanged = true;
+    InVec = InVec.getOperand(0);
+    ExtVT = BCVT.getVectorElementType();
+  }
+
+  // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
+  if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
+      ISD::isNormalLoad(InVec.getNode()) &&
+      !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
+    SDValue Index = N->getOperand(1);
+    if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
+      return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
+                                                           OrigLoad);
    }
  
    // Perform only after legalization to ensure build_vector / vector_shuffle
@@ -9561,33 +10184,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  
    if (ConstEltNo) {
      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-    bool NewLoad = false;
-    bool BCNumEltsChanged = false;
-    EVT ExtVT = VT.getVectorElementType();
-    EVT LVT = ExtVT;
-
-    // If the result of load has to be truncated, then it's not necessarily
-    // profitable.
-    if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
-      return SDValue();
-
-    if (InVec.getOpcode() == ISD::BITCAST) {
-      // Don't duplicate a load with other uses.
-      if (!InVec.hasOneUse())
-        return SDValue();
-
-      EVT BCVT = InVec.getOperand(0).getValueType();
-      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
-        return SDValue();
-      if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
-        BCNumEltsChanged = true;
-      InVec = InVec.getOperand(0);
-      ExtVT = BCVT.getVectorElementType();
-      NewLoad = true;
-    }
  
-    LoadSDNode *LN0 = NULL;
-    const ShuffleVectorSDNode *SVN = NULL;
+    LoadSDNode *LN0 = nullptr;
+    const ShuffleVectorSDNode *SVN = nullptr;
      if (ISD::isNormalLoad(InVec.getNode())) {
        LN0 = cast<LoadSDNode>(InVec);
      } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
@@ -9627,6 +10226,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
        if (ISD::isNormalLoad(InVec.getNode())) {
          LN0 = cast<LoadSDNode>(InVec);
          Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+        EltNo = DAG.getConstant(Elt, EltNo.getValueType());
        }
      }
  
@@ -9639,72 +10239,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      if (Elt == -1)
        return DAG.getUNDEF(LVT);
  
-    unsigned Align = LN0->getAlignment();
-    if (NewLoad) {
-      // Check the resultant load doesn't need a higher alignment than the
-      // original load.
-      unsigned NewAlign =
-        TLI.getDataLayout()
-            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
-
-      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
-        return SDValue();
-
-      Align = NewAlign;
-    }
-
-    SDValue NewPtr = LN0->getBasePtr();
-    unsigned PtrOff = 0;
-
-    if (Elt) {
-      PtrOff = LVT.getSizeInBits() * Elt / 8;
-      EVT PtrType = NewPtr.getValueType();
-      if (TLI.isBigEndian())
-        PtrOff = VT.getSizeInBits() / 8 - PtrOff;
-      NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
-                           DAG.getConstant(PtrOff, PtrType));
-    }
-
-    // The replacement we need to do here is a little tricky: we need to
-    // replace an extractelement of a load with a load.
-    // Use ReplaceAllUsesOfValuesWith to do the replacement.
-    // Note that this replacement assumes that the extractvalue is the only
-    // use of the load; that's okay because we don't want to perform this
-    // transformation in other cases anyway.
-    SDValue Load;
-    SDValue Chain;
-    if (NVT.bitsGT(LVT)) {
-      // If the result type of vextract is wider than the load, then issue an
-      // extending load instead.
-      ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
-        ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-      Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
-                            NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
-                            LVT, LN0->isVolatile(), LN0->isNonTemporal(),
-                            Align, LN0->getTBAAInfo());
-      Chain = Load.getValue(1);
-    } else {
-      Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
-                         LN0->getPointerInfo().getWithOffset(PtrOff),
-                         LN0->isVolatile(), LN0->isNonTemporal(),
-                         LN0->isInvariant(), Align, LN0->getTBAAInfo());
-      Chain = Load.getValue(1);
-      if (NVT.bitsLT(LVT))
-        Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
-      else
-        Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
-    }
-    WorkListRemover DeadNodes(*this);
-    SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
-    SDValue To[] = { Load, Chain };
-    DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
-    // Since we're explcitly calling ReplaceAllUses, add the new node to the
-    // worklist explicitly as well.
-    AddToWorkList(Load.getNode());
-    AddUsersToWorkList(Load.getNode()); // Add users too
-    // Make sure to revisit this node to clean it up; it will usually be dead.
-    AddToWorkList(N);
-    return SDValue(N, 0);
+    return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
    }
  
    return SDValue();
@@ -9812,10 +10347,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
    if (!isTypeLegal(VecVT)) return SDValue();
  
    // Make the new BUILD_VECTOR.
-  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
  
    // The new BUILD_VECTOR node has the potential to be further optimized.
-  AddToWorkList(BV.getNode());
+  AddToWorklist(BV.getNode());
    // Bitcast to the desired type.
    return DAG.getNode(ISD::BITCAST, dl, VT, BV);
  }
@@ -9880,9 +10415,8 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
      else
        Opnds.push_back(In.getOperand(0));
    }
-  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
-                           &Opnds[0], Opnds.size());
-  AddToWorkList(BV.getNode());
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
+  AddToWorklist(BV.getNode());
  
    return DAG.getNode(Opcode, dl, VT, BV);
  }
@@ -9908,9 +10442,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
    // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
    // at most two distinct vectors, turn this into a shuffle node.
  
+  // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
+  if (!isTypeLegal(VT))
+    return SDValue();
+
    // May only combine to shuffle after legalize if shuffle is legal.
-  if (LegalOperations &&
-      !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+  if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
      return SDValue();
  
    SDValue VecIn1, VecIn2;
@@ -9922,7 +10459,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      // constant index, bail out.
      if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
          !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
-      VecIn1 = VecIn2 = SDValue(0, 0);
+      VecIn1 = VecIn2 = SDValue(nullptr, 0);
        break;
      }
  
@@ -9931,18 +10468,18 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
        continue;
  
-    if (VecIn1.getNode() == 0) {
+    if (!VecIn1.getNode()) {
        VecIn1 = ExtractedFromVec;
-    } else if (VecIn2.getNode() == 0) {
+    } else if (!VecIn2.getNode()) {
        VecIn2 = ExtractedFromVec;
      } else {
        // Too many inputs.
-      VecIn1 = VecIn2 = SDValue(0, 0);
+      VecIn1 = VecIn2 = SDValue(nullptr, 0);
        break;
      }
    }
  
-    // If everything is good, we can make a shuffle operation.
+  // If everything is good, we can make a shuffle operation.
    if (VecIn1.getNode()) {
      SmallVector<int, 8> Mask;
      for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -9972,7 +10509,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      // Attempt to transform a single input vector to the correct type.
      if ((VT != VecIn1.getValueType())) {
        // We don't support shuffeling between TWO values of different types.
-      if (VecIn2.getNode() != 0)
+      if (VecIn2.getNode())
          return SDValue();
  
        // We only support widening of vectors which are half the size of the
@@ -10000,10 +10537,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
          VecIn1.getValueType() != VT)
            return SDValue();
  
-    // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
-    if (!isTypeLegal(VT))
-      return SDValue();
-
      // Return the new VECTOR_SHUFFLE node.
      SDValue Ops[2];
      Ops[0] = VecIn1;
@@ -10055,6 +10588,39 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
      }
    }
  
+  // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
+  // -> (BUILD_VECTOR A, B, ..., C, D, ...)
+  if (N->getNumOperands() == 2 &&
+      N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+      N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
+    EVT VT = N->getValueType(0);
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SmallVector<SDValue, 8> Opnds;
+    unsigned BuildVecNumElts =  N0.getNumOperands();
+
+    EVT SclTy0 = N0.getOperand(0)->getValueType(0);
+    EVT SclTy1 = N1.getOperand(0)->getValueType(0);
+    if (SclTy0.isFloatingPoint()) {
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(N0.getOperand(i));
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(N1.getOperand(i));
+    } else {
+      // If BUILD_VECTOR are from built from integer, they may have different
+      // operand types. Get the smaller type and truncate all operands to it.
+      EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+                        N0.getOperand(i)));
+      for (unsigned i = 0; i != BuildVecNumElts; ++i)
+        Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+                        N1.getOperand(i)));
+    }
+
+    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
+  }
+
    // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
    // nodes often generate nop CONCAT_VECTOR nodes.
    // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -10161,6 +10727,92 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
    return SDValue();
  }
  
+static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
+                                                 SDValue V, SelectionDAG &DAG) {
+  SDLoc DL(V);
+  EVT VT = V.getValueType();
+
+  switch (V.getOpcode()) {
+  default:
+    return V;
+
+  case ISD::CONCAT_VECTORS: {
+    EVT OpVT = V->getOperand(0).getValueType();
+    int OpSize = OpVT.getVectorNumElements();
+    SmallBitVector OpUsedElements(OpSize, false);
+    bool FoundSimplification = false;
+    SmallVector<SDValue, 4> NewOps;
+    NewOps.reserve(V->getNumOperands());
+    for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
+      SDValue Op = V->getOperand(i);
+      bool OpUsed = false;
+      for (int j = 0; j < OpSize; ++j)
+        if (UsedElements[i * OpSize + j]) {
+          OpUsedElements[j] = true;
+          OpUsed = true;
+        }
+      NewOps.push_back(
+          OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
+                 : DAG.getUNDEF(OpVT));
+      FoundSimplification |= Op == NewOps.back();
+      OpUsedElements.reset();
+    }
+    if (FoundSimplification)
+      V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
+    return V;
+  }
+
+  case ISD::INSERT_SUBVECTOR: {
+    SDValue BaseV = V->getOperand(0);
+    SDValue SubV = V->getOperand(1);
+    auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
+    if (!IdxN)
+      return V;
+
+    int SubSize = SubV.getValueType().getVectorNumElements();
+    int Idx = IdxN->getZExtValue();
+    bool SubVectorUsed = false;
+    SmallBitVector SubUsedElements(SubSize, false);
+    for (int i = 0; i < SubSize; ++i)
+      if (UsedElements[i + Idx]) {
+        SubVectorUsed = true;
+        SubUsedElements[i] = true;
+        UsedElements[i + Idx] = false;
+      }
+
+    // Now recurse on both the base and sub vectors.
+    SDValue SimplifiedSubV =
+        SubVectorUsed
+            ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
+            : DAG.getUNDEF(SubV.getValueType());
+    SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
+    if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
+      V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+                      SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
+    return V;
+  }
+  }
+}
+
+static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
+                                       SDValue N1, SelectionDAG &DAG) {
+  EVT VT = SVN->getValueType(0);
+  int NumElts = VT.getVectorNumElements();
+  SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
+  for (int M : SVN->getMask())
+    if (M >= 0 && M < NumElts)
+      N0UsedElements[M] = true;
+    else if (M >= NumElts)
+      N1UsedElements[M - NumElts] = true;
+
+  SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
+  SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
+  if (S0 == N0 && S1 == N1)
+    return SDValue();
+
+  return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
+}
+
  // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
  static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
    EVT VT = N->getValueType(0);
@@ -10209,8 +10861,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
      }
    }
  
-  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(),
-                     Ops.size());
+  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
  }
  
  SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
@@ -10314,6 +10965,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
      }
    }
  
+  // There are various patterns used to build up a vector from smaller vectors,
+  // subvectors, or elements. Scan chains of these and replace unused insertions
+  // or components with undef.
+  if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
+    return S;
+
    if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
        Level < AfterLegalizeVectorOps &&
        (N1.getOpcode() == ISD::UNDEF ||
@@ -10326,22 +10983,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
    }
  
    // If this shuffle node is simply a swizzle of another shuffle node,
-  // and it reverses the swizzle of the previous shuffle then we can
-  // optimize shuffle(shuffle(x, undef), undef) -> x.
+  // then try to simplify it.
    if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
        N1.getOpcode() == ISD::UNDEF) {
  
      ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
  
-    // Shuffle nodes can only reverse shuffles with a single non-undef value.
-    if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
-      return SDValue();
-
      // The incoming shuffle must be of the same type as the result of the
      // current shuffle.
      assert(OtherSV->getOperand(0).getValueType() == VT &&
             "Shuffle types don't match");
  
+    SmallVector<int, 4> Mask;
+    // Compute the combined shuffle mask.
      for (unsigned i = 0; i != NumElts; ++i) {
        int Idx = SVN->getMaskElt(i);
        assert(Idx < (int)NumElts && "Index references undef operand");
@@ -10349,20 +11003,211 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
        // shuffle. Adopt the incoming index.
        if (Idx >= 0)
          Idx = OtherSV->getMaskElt(Idx);
+      Mask.push_back(Idx);
+    }
  
-      // The combined shuffle must map each index to itself.
-      if (Idx >= 0 && (unsigned)Idx != i)
+    // Check if all indices in Mask are Undef. In case, propagate Undef.
+    bool isUndefMask = true;
+    for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+      isUndefMask &= Mask[i] < 0;
+
+    if (isUndefMask)
+      return DAG.getUNDEF(VT);
+    
+    bool CommuteOperands = false;
+    if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
+      // To be valid, the combine shuffle mask should only reference elements
+      // from one of the two vectors in input to the inner shufflevector.
+      bool IsValidMask = true;
+      for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+        // See if the combined mask only reference undefs or elements coming
+        // from the first shufflevector operand.
+        IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
+
+      if (!IsValidMask) {
+        IsValidMask = true;
+        for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+          // Check that all the elements come from the second shuffle operand.
+          IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
+        CommuteOperands = IsValidMask;
+      }
+
+      // Early exit if the combined shuffle mask is not valid.
+      if (!IsValidMask)
          return SDValue();
      }
  
-    return OtherSV->getOperand(0);
+    // See if this pair of shuffles can be safely folded according to either
+    // of the following rules:
+    //   shuffle(shuffle(x, y), undef) -> x
+    //   shuffle(shuffle(x, undef), undef) -> x
+    //   shuffle(shuffle(x, y), undef) -> y
+    bool IsIdentityMask = true;
+    unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
+    for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
+      // Skip Undefs.
+      if (Mask[i] < 0)
+        continue;
+
+      // The combined shuffle must map each index to itself.
+      IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
+    }
+    
+    if (IsIdentityMask) {
+      if (CommuteOperands)
+        // optimize shuffle(shuffle(x, y), undef) -> y.
+        return OtherSV->getOperand(1);
+      
+      // optimize shuffle(shuffle(x, undef), undef) -> x
+      // optimize shuffle(shuffle(x, y), undef) -> x
+      return OtherSV->getOperand(0);
+    }
+
+    // It may still be beneficial to combine the two shuffles if the
+    // resulting shuffle is legal.
+    if (TLI.isTypeLegal(VT)) {
+      if (!CommuteOperands) {
+        if (TLI.isShuffleMaskLegal(Mask, VT))
+          // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+          // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+          return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+                                      &Mask[0]);
+      } else {
+        // Compute the commuted shuffle mask.
+        for (unsigned i = 0; i != NumElts; ++i) {
+          int idx = Mask[i];
+          if (idx < 0)
+            continue;
+          else if (idx < (int)NumElts)
+            Mask[i] = idx + NumElts;
+          else
+            Mask[i] = idx - NumElts;
+        }
+
+        if (TLI.isShuffleMaskLegal(Mask, VT))
+          //   shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
+          return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1,
+                                      &Mask[0]);
+      }
+    }
+  }
+
+  // Canonicalize shuffles according to rules:
+  //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
+  //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
+  //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+  if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF &&
+      N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+      TLI.isTypeLegal(VT)) {
+    // The incoming shuffle must be of the same type as the result of the
+    // current shuffle.
+    assert(N1->getOperand(0).getValueType() == VT &&
+           "Shuffle types don't match");
+
+    SDValue SV0 = N1->getOperand(0);
+    SDValue SV1 = N1->getOperand(1);
+    bool HasSameOp0 = N0 == SV0;
+    bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
+    if (HasSameOp0 || IsSV1Undef || N0 == SV1)
+      // Commute the operands of this shuffle so that next rule
+      // will trigger.
+      return DAG.getCommutedVectorShuffle(*SVN);
+  }
+
+  // Try to fold according to rules:
+  //   shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2)
+  //   shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2)
+  //   shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
+  //   shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+  // Don't try to fold shuffles with illegal type.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+      N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) {
+    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+    // The incoming shuffle must be of the same type as the result of the
+    // current shuffle.
+    assert(OtherSV->getOperand(0).getValueType() == VT &&
+           "Shuffle types don't match");
+
+    SDValue SV0 = OtherSV->getOperand(0);
+    SDValue SV1 = OtherSV->getOperand(1);
+    bool HasSameOp0 = N1 == SV0;
+    bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
+    if (!HasSameOp0 && !IsSV1Undef && N1 != SV1)
+      // Early exit.
+      return SDValue();
+
+    SmallVector<int, 4> Mask;
+    // Compute the combined shuffle mask for a shuffle with SV0 as the first
+    // operand, and SV1 as the second operand.
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx < 0) {
+        // Propagate Undef.
+        Mask.push_back(Idx);
+        continue;
+      }
+
+      if (Idx < (int)NumElts) {
+        Idx = OtherSV->getMaskElt(Idx);
+        if (IsSV1Undef && Idx >= (int) NumElts)
+          Idx = -1;  // Propagate Undef.
+      } else
+        Idx = HasSameOp0 ? Idx - NumElts : Idx;
+
+      Mask.push_back(Idx);
+    }
+
+    // Check if all indices in Mask are Undef. In case, propagate Undef.
+    bool isUndefMask = true;
+    for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+      isUndefMask &= Mask[i] < 0;
+
+    if (isUndefMask)
+      return DAG.getUNDEF(VT);
+
+    // Avoid introducing shuffles with illegal mask.
+    if (TLI.isShuffleMaskLegal(Mask, VT)) {
+      if (IsSV1Undef)
+        //   shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
+        //   shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+        return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]);
+      return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N2 = N->getOperand(2);
+
+  // If the input vector is a concatenation, and the insert replaces
+  // one of the halves, we can optimize into a single concat_vectors.
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+      N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
+    APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
+    EVT VT = N->getValueType(0);
+
+    // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+    // (concat_vectors Z, Y)
+    if (InsIdx == 0)
+      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+                         N->getOperand(1), N0.getOperand(1));
+
+    // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+    // (concat_vectors X, Z)
+    if (InsIdx == VT.getVectorNumElements()/2)
+      return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+                         N0.getOperand(0), N->getOperand(1));
    }
  
    return SDValue();
  }
  
-/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
-/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
+/// with the destination vector and a zero vector.
  /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
  ///      vector_shuffle V, Zero, <0, 4, 2, 4>
  SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
@@ -10398,8 +11243,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
        EVT EltVT = RVT.getVectorElementType();
        SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
                                       DAG.getConstant(0, EltVT));
-      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
-                                 RVT, &ZeroOps[0], ZeroOps.size());
+      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
        LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
        SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
        return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
@@ -10409,7 +11253,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
    return SDValue();
  }
  
-/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+/// Visit a binary vector operation, like ADD.
  SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
    assert(N->getValueType(0).isVector() &&
           "SimplifyVBinOp only works on vectors!");
@@ -10464,18 +11308,38 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
            FoldOp.getOpcode() != ISD::ConstantFP)
          break;
        Ops.push_back(FoldOp);
-      AddToWorkList(FoldOp.getNode());
+      AddToWorklist(FoldOp.getNode());
      }
  
      if (Ops.size() == LHS.getNumOperands())
-      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
-                         LHS.getValueType(), &Ops[0], Ops.size());
+      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
+  }
+
+  // Type legalization might introduce new shuffles in the DAG.
+  // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
+  //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
+  if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
+      isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
+      LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
+      RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
+    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
+    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
+
+    if (SVN0->getMask().equals(SVN1->getMask())) {
+      EVT VT = N->getValueType(0);
+      SDValue UndefVector = LHS.getOperand(1);
+      SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+                                     LHS.getOperand(0), RHS.getOperand(0));
+      AddUsersToWorklist(N);
+      return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
+                                  &SVN0->getMask()[0]);
+    }
    }
  
    return SDValue();
  }
  
-/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG.
+/// Visit a binary vector operation, like FABS/FNEG.
  SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
    assert(N->getValueType(0).isVector() &&
           "SimplifyVUnaryOp only works on vectors!");
@@ -10498,14 +11362,13 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
          FoldOp.getOpcode() != ISD::ConstantFP)
        break;
      Ops.push_back(FoldOp);
-    AddToWorkList(FoldOp.getNode());
+    AddToWorklist(FoldOp.getNode());
    }
  
    if (Ops.size() != N0.getNumOperands())
      return SDValue();
  
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
-                     N0.getValueType(), &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
  }
  
  SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
@@ -10526,9 +11389,9 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
                                    N0.getValueType(),
                                    SCC.getOperand(0), SCC.getOperand(1),
                                    SCC.getOperand(4));
-      AddToWorkList(SETCC.getNode());
-      return DAG.getSelect(SDLoc(SCC), SCC.getValueType(),
-                           SCC.getOperand(2), SCC.getOperand(3), SETCC);
+      AddToWorklist(SETCC.getNode());
+      return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+                           SCC.getOperand(2), SCC.getOperand(3));
      }
  
      return SCC;
@@ -10536,12 +11399,11 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
    return SDValue();
  }
  
-/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
-/// are the two values being selected between, see if we can simplify the
-/// select.  Callers of this should assume that TheSelect is deleted if this
-/// returns true.  As such, they should return the appropriate thing (e.g. the
-/// node) back to the top-level of the DAG combiner loop to avoid it being
-/// looked at.
+/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
+/// being selected between, see if we can simplify the select.  Callers of this
+/// should assume that TheSelect is deleted if this returns true.  As such, they
+/// should return the appropriate thing (e.g. the node) back to the top-level of
+/// the DAG combiner loop to avoid it being looked at.
  bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                      SDValue RHS) {
  
@@ -10620,22 +11482,27 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
      }
  
      SDValue Load;
+    // It is safe to replace the two loads if they have different alignments,
+    // but the new load must be the minimum (most restrictive) alignment of the
+    // inputs.
+    bool isInvariant = LLD->getAlignment() & RLD->getAlignment();
+    unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
      if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
        Load = DAG.getLoad(TheSelect->getValueType(0),
                           SDLoc(TheSelect),
-                         // FIXME: Discards pointer and TBAA info.
+                         // FIXME: Discards pointer and AA info.
                           LLD->getChain(), Addr, MachinePointerInfo(),
                           LLD->isVolatile(), LLD->isNonTemporal(),
-                         LLD->isInvariant(), LLD->getAlignment());
+                         isInvariant, Alignment);
      } else {
        Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
                              RLD->getExtensionType() : LLD->getExtensionType(),
                              SDLoc(TheSelect),
                              TheSelect->getValueType(0),
-                            // FIXME: Discards pointer and TBAA info.
+                            // FIXME: Discards pointer and AA info.
                              LLD->getChain(), Addr, MachinePointerInfo(),
                              LLD->getMemoryVT(), LLD->isVolatile(),
-                            LLD->isNonTemporal(), LLD->getAlignment());
+                            LLD->isNonTemporal(), isInvariant, Alignment);
      }
  
      // Users of the select now use the result of the load.
@@ -10651,7 +11518,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
    return false;
  }
  
-/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
  /// where 'cond' is the comparison specified by CC.
  SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
                                        SDValue N2, SDValue N3,
@@ -10667,7 +11534,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
    // Determine if the condition we're dealing with is constant
    SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
                                N0, N1, CC, DL, false);
-  if (SCC.getNode()) AddToWorkList(SCC.getNode());
+  if (SCC.getNode()) AddToWorklist(SCC.getNode());
    ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
  
    // fold select_cc true, x, y -> x
@@ -10707,7 +11574,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
      if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
        if (TLI.isTypeLegal(N2.getValueType()) &&
            (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
-           TargetLowering::Legal) &&
+               TargetLowering::Legal &&
+           !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
+           !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
            // If both constants have multiple uses, then we won't need to do an
            // extra load, they are likely around in registers for other users.
            (TV->hasOneUse() || FV->hasOneUse())) {
@@ -10733,13 +11602,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
          SDValue Cond = DAG.getSetCC(DL,
                                      getSetCCResultType(N0.getValueType()),
                                      N0, N1, CC);
-        AddToWorkList(Cond.getNode());
+        AddToWorklist(Cond.getNode());
          SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
                                            Cond, One, Zero);
-        AddToWorkList(CstOffset.getNode());
+        AddToWorklist(CstOffset.getNode());
          CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
                              CstOffset);
-        AddToWorkList(CPIdx.getNode());
+        AddToWorklist(CPIdx.getNode());
          return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                             MachinePointerInfo::getConstantPool(), false,
                             false, false, Alignment);
@@ -10764,11 +11633,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
                                         getShiftAmountTy(N0.getValueType()));
          SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
                                      XType, N0, ShCt);
-        AddToWorkList(Shift.getNode());
+        AddToWorklist(Shift.getNode());
  
          if (XType.bitsGT(AType)) {
            Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
-          AddToWorkList(Shift.getNode());
+          AddToWorklist(Shift.getNode());
          }
  
          return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
@@ -10778,11 +11647,11 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
                                    XType, N0,
                                    DAG.getConstant(XType.getSizeInBits()-1,
                                           getShiftAmountTy(N0.getValueType())));
-      AddToWorkList(Shift.getNode());
+      AddToWorklist(Shift.getNode());
  
        if (XType.bitsGT(AType)) {
          Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
-        AddToWorkList(Shift.getNode());
+        AddToWorklist(Shift.getNode());
        }
  
        return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
@@ -10822,8 +11691,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
  
    // fold select C, 16, 0 -> shl C, 4
    if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
-    TLI.getBooleanContents(N0.getValueType().isVector()) ==
-      TargetLowering::ZeroOrOneBooleanContent) {
+      TLI.getBooleanContents(N0.getValueType()) ==
+          TargetLowering::ZeroOrOneBooleanContent) {
  
      // If the caller doesn't want us to simplify this into a zext of a compare,
      // don't do it.
@@ -10852,8 +11721,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
                             N2.getValueType(), SCC);
        }
  
-      AddToWorkList(SCC.getNode());
-      AddToWorkList(Temp.getNode());
+      AddToWorklist(SCC.getNode());
+      AddToWorklist(Temp.getNode());
  
        if (N2C->getAPIntValue() == 1)
          return Temp;
@@ -10914,7 +11783,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
    // select_cc setlt    X,  1, -X,  X ->
    // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
    if (N1C) {
-    ConstantSDNode *SubC = NULL;
+    ConstantSDNode *SubC = nullptr;
      if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
           (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
          N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
@@ -10932,8 +11801,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
                                           getShiftAmountTy(N0.getValueType())));
        SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0),
                                  XType, N0, Shift);
-      AddToWorkList(Shift.getNode());
-      AddToWorkList(Add.getNode());
+      AddToWorklist(Shift.getNode());
+      AddToWorklist(Add.getNode());
        return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
      }
    }
@@ -10941,7 +11810,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
    return SDValue();
  }
  
-/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+/// This is a stub for TargetLowering::SimplifySetCC.
  SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
                                     SDValue N1, ISD::CondCode Cond,
                                     SDLoc DL, bool foldBooleans) {
@@ -10950,41 +11819,168 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
    return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
  }
  
-/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
-/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number.  See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// Given an ISD::SDIV node expressing a divide by constant, return
+/// a DAG expression to select that will generate the same value by multiplying
+/// by a magic number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+  if (!C)
+    return SDValue();
+
+  // Avoid division by zero.
+  if (!C->getAPIntValue())
+    return SDValue();
+
    std::vector<SDNode*> Built;
-  SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
+  SDValue S =
+      TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
+
+  for (SDNode *N : Built)
+    AddToWorklist(N);
+  return S;
+}
+
+/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
+/// DAG expression that will generate the same value by right shifting.
+SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
+  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+  if (!C)
+    return SDValue();
  
-  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
-       ii != ee; ++ii)
-    AddToWorkList(*ii);
+  // Avoid division by zero.
+  if (!C->getAPIntValue())
+    return SDValue();
+
+  std::vector<SDNode *> Built;
+  SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
+
+  for (SDNode *N : Built)
+    AddToWorklist(N);
    return S;
  }
  
-/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
-/// return a DAG expression to select that will generate the same value by
-/// multiplying by a magic number.  See:
-/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
+/// expression that will generate the same value by multiplying by a magic
+/// number.
+/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
  SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+  ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+  if (!C)
+    return SDValue();
+
+  // Avoid division by zero.
+  if (!C->getAPIntValue())
+    return SDValue();
+
    std::vector<SDNode*> Built;
-  SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
+  SDValue S =
+      TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
  
-  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
-       ii != ee; ++ii)
-    AddToWorkList(*ii);
+  for (SDNode *N : Built)
+    AddToWorklist(N);
    return S;
  }
  
-/// FindBaseOffset - Return true if base is a frame index, which is known not
-// to alias with anything but itself.  Provides base object and offset as
-// results.
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+  if (Level >= AfterLegalizeDAG)
+    return SDValue();
+
+  // Expose the DAG combiner to the target combiner implementations.
+  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+
+  unsigned Iterations = 0;
+  if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
+    if (Iterations) {
+      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+      // For the reciprocal, we need to find the zero of the function:
+      //   F(X) = A X - 1 [which has a zero at X = 1/A]
+      //     =>
+      //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+      //     does not require additional intermediate precision]
+      EVT VT = Op.getValueType();
+      SDLoc DL(Op);
+      SDValue FPOne = DAG.getConstantFP(1.0, VT);
+
+      AddToWorklist(Est.getNode());
+
+      // Newton iterations: Est = Est + Est (1 - Arg * Est)
+      for (unsigned i = 0; i < Iterations; ++i) {
+        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+        AddToWorklist(NewEst.getNode());
+
+        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+        AddToWorklist(NewEst.getNode());
+
+        NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+        AddToWorklist(NewEst.getNode());
+
+        Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+        AddToWorklist(Est.getNode());
+      }
+    }
+    return Est;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+  if (Level >= AfterLegalizeDAG)
+    return SDValue();
+
+  // Expose the DAG combiner to the target combiner implementations.
+  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+  unsigned Iterations = 0;
+  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) {
+    if (Iterations) {
+      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+      // For the reciprocal sqrt, we need to find the zero of the function:
+      //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+      //     =>
+      //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+      // As a result, we precompute A/2 prior to the iteration loop.
+      EVT VT = Op.getValueType();
+      SDLoc DL(Op);
+      SDValue FPThreeHalves = DAG.getConstantFP(1.5, VT);
+
+      AddToWorklist(Est.getNode());
+
+      // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
+      // this entire sequence requires only one FP constant.
+      SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, FPThreeHalves, Op);
+      AddToWorklist(HalfArg.getNode());
+
+      HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Op);
+      AddToWorklist(HalfArg.getNode());
+
+      // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+      for (unsigned i = 0; i < Iterations; ++i) {
+        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+        AddToWorklist(NewEst.getNode());
+
+        NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+        AddToWorklist(NewEst.getNode());
+
+        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPThreeHalves, NewEst);
+        AddToWorklist(NewEst.getNode());
+
+        Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+        AddToWorklist(Est.getNode());
+      }
+    }
+    return Est;
+  }
+
+  return SDValue();
+}
+
+/// Return true if base is a frame index, which is known not to alias with
+/// anything but itself.  Provides base object and offset as results.
  static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
                             const GlobalValue *&GV, const void *&CV) {
    // Assume it is a primitive operation.
-  Base = Ptr; Offset = 0; GV = 0; CV = 0;
+  Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
  
    // If it's an adding a simple constant then integrate the offset.
    if (Base.getOpcode() == ISD::ADD) {
@@ -11016,33 +12012,28 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
    return isa<FrameIndexSDNode>(Base);
  }
  
-/// isAlias - Return true if there is any possibility that the two addresses
-/// overlap.
-bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
-                          const Value *SrcValue1, int SrcValueOffset1,
-                          unsigned SrcValueAlign1,
-                          const MDNode *TBAAInfo1,
-                          SDValue Ptr2, int64_t Size2, bool IsVolatile2,
-                          const Value *SrcValue2, int SrcValueOffset2,
-                          unsigned SrcValueAlign2,
-                          const MDNode *TBAAInfo2) const {
+/// Return true if there is any possibility that the two addresses overlap.
+bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
    // If they are the same then they must be aliases.
-  if (Ptr1 == Ptr2) return true;
+  if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
  
    // If they are both volatile then they cannot be reordered.
-  if (IsVolatile1 && IsVolatile2) return true;
+  if (Op0->isVolatile() && Op1->isVolatile()) return true;
  
    // Gather base node and offset information.
    SDValue Base1, Base2;
    int64_t Offset1, Offset2;
    const GlobalValue *GV1, *GV2;
    const void *CV1, *CV2;
-  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
-  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+  bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
+                                      Base1, Offset1, GV1, CV1);
+  bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
+                                      Base2, Offset2, GV2, CV2);
  
    // If they have a same base address then check to see if they overlap.
    if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
-    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+    return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
+             (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
  
    // It is possible for different frame indices to alias each other, mostly
    // when tail call optimization reuses return address slots for arguments.
@@ -11052,7 +12043,8 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
      MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
      Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
      Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
-    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+    return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
+             (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
    }
  
    // Otherwise, if we know what the bases are, and they aren't identical, then
@@ -11064,15 +12056,18 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
    // compared to the size and offset of the access, we may be able to prove they
    // do not alias.  This check is conservative for now to catch cases created by
    // splitting vector types.
-  if ((SrcValueAlign1 == SrcValueAlign2) &&
-      (SrcValueOffset1 != SrcValueOffset2) &&
-      (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
-    int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
-    int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+  if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
+      (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
+      (Op0->getMemoryVT().getSizeInBits() >> 3 ==
+       Op1->getMemoryVT().getSizeInBits() >> 3) &&
+      (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) {
+    int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
+    int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
  
      // There is no overlap between these relatively aligned accesses of similar
      // size, return no alias.
-    if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+    if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
+        (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
        return false;
    }
  
@@ -11083,16 +12078,22 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
        CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
      UseAA = false;
  #endif
-  if (UseAA && SrcValue1 && SrcValue2) {
+  if (UseAA &&
+      Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
      // Use alias analysis information.
-    int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
-    int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
-    int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+    int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
+                                 Op1->getSrcValueOffset());
+    int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
+        Op0->getSrcValueOffset() - MinOffset;
+    int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
+        Op1->getSrcValueOffset() - MinOffset;
      AliasAnalysis::AliasResult AAResult =
-      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1,
-                                       UseTBAA ? TBAAInfo1 : 0),
-               AliasAnalysis::Location(SrcValue2, Overlap2,
-                                       UseTBAA ? TBAAInfo2 : 0));
+        AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(),
+                                         Overlap1,
+                                         UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
+                 AliasAnalysis::Location(Op1->getMemOperand()->getValue(),
+                                         Overlap2,
+                                         UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
      if (AAResult == AliasAnalysis::NoAlias)
        return false;
    }
@@ -11101,45 +12102,7 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
    return true;
  }
  
-bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
-  SDValue Ptr0, Ptr1;
-  int64_t Size0, Size1;
-  bool IsVolatile0, IsVolatile1;
-  const Value *SrcValue0, *SrcValue1;
-  int SrcValueOffset0, SrcValueOffset1;
-  unsigned SrcValueAlign0, SrcValueAlign1;
-  const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
-  FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
-                SrcValueAlign0, SrcTBAAInfo0);
-  FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
-                SrcValueAlign1, SrcTBAAInfo1);
-  return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
-                 SrcValueAlign0, SrcTBAAInfo0,
-                 Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
-                 SrcValueAlign1, SrcTBAAInfo1);
-}
-
-/// FindAliasInfo - Extracts the relevant alias information from the memory
-/// node.  Returns true if the operand was a nonvolatile load.
-bool DAGCombiner::FindAliasInfo(SDNode *N,
-                                SDValue &Ptr, int64_t &Size, bool &IsVolatile,
-                                const Value *&SrcValue,
-                                int &SrcValueOffset,
-                                unsigned &SrcValueAlign,
-                                const MDNode *&TBAAInfo) const {
-  LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
-
-  Ptr = LS->getBasePtr();
-  Size = LS->getMemoryVT().getSizeInBits() >> 3;
-  IsVolatile = LS->isVolatile();
-  SrcValue = LS->getSrcValue();
-  SrcValueOffset = LS->getSrcValueOffset();
-  SrcValueAlign = LS->getOriginalAlignment();
-  TBAAInfo = LS->getTBAAInfo();
-  return isa<LoadSDNode>(LS) && !IsVolatile;
-}
-
-/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// Walk up chain skipping non-aliasing memory nodes,
  /// looking for aliasing nodes and adding them to the Aliases vector.
  void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
                                     SmallVectorImpl<SDValue> &Aliases) {
@@ -11147,15 +12110,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
    SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
  
    // Get alias information for node.
-  SDValue Ptr;
-  int64_t Size;
-  bool IsVolatile;
-  const Value *SrcValue;
-  int SrcValueOffset;
-  unsigned SrcValueAlign;
-  const MDNode *SrcTBAAInfo;
-  bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue,
-                              SrcValueOffset, SrcValueAlign, SrcTBAAInfo);
+  bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
  
    // Starting off.
    Chains.push_back(OriginalChain);
@@ -11194,24 +12149,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
      case ISD::LOAD:
      case ISD::STORE: {
        // Get alias information for Chain.
-      SDValue OpPtr;
-      int64_t OpSize;
-      bool OpIsVolatile;
-      const Value *OpSrcValue;
-      int OpSrcValueOffset;
-      unsigned OpSrcValueAlign;
-      const MDNode *OpSrcTBAAInfo;
-      bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
-                                    OpIsVolatile, OpSrcValue, OpSrcValueOffset,
-                                    OpSrcValueAlign,
-                                    OpSrcTBAAInfo);
+      bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
+          !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
  
        // If chain is alias then stop here.
        if (!(IsLoad && IsOpLoad) &&
-          isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset,
-                  SrcValueAlign, SrcTBAAInfo,
-                  OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset,
-                  OpSrcValueAlign, OpSrcTBAAInfo)) {
+          isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
          Aliases.push_back(Chain);
        } else {
          // Look further up the chain.
@@ -11270,10 +12213,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
    // like register copies will interfere with trivial cases).
  
    SmallVector<const SDNode *, 16> Worklist;
-  for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(),
-       IE = Visited.end(); I != IE; ++I)
-    if (*I != OriginalChain.getNode())
-      Worklist.push_back(*I);
+  for (const SDNode *N : Visited)
+    if (N != OriginalChain.getNode())
+      Worklist.push_back(N);
  
    while (!Worklist.empty()) {
      const SDNode *M = Worklist.pop_back_val();
@@ -11300,8 +12242,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
    }
  }
  
-/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
-/// for a better chain (aliasing node.)
+/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
+/// (aliasing node.)
  SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
    SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
  
@@ -11317,15 +12259,12 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
      return Aliases[0];
  
    // Construct a custom tailored token factor.
-  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
-                     &Aliases[0], Aliases.size());
+  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
  }
  
-// SelectionDAG::Combine - This is the entry point for the file.
-//
+/// This is the entry point for the file.
  void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
                             CodeGenOpt::Level OptLevel) {
-  /// run - This is the main entry point to this class.
-  ///
+  /// This is the main entry point to this class.
    DAGCombiner(*this, AA, OptLevel).Run(Level);
  }