Masked Load / Store Intrinsics - the CodeGen part.

[oota-llvm.git] / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 6fe4ade4aa8534a1e74815199b1a243c7fbf56cc..7347111728e1062121efb5c707ef56938d57841b 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -303,6 +303,8 @@ namespace {
      SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
      SDValue visitVECTOR_SHUFFLE(SDNode *N);
      SDValue visitINSERT_SUBVECTOR(SDNode *N);
+    SDValue visitMLOAD(SDNode *N);
+    SDValue visitMSTORE(SDNode *N);
  
      SDValue XformToShuffleWithZero(SDNode *N);
      SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -331,6 +333,8 @@ namespace {
      SDValue BuildUDIV(SDNode *N);
      SDValue BuildReciprocalEstimate(SDValue Op);
      SDValue BuildRsqrtEstimate(SDValue Op);
+    SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
+    SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
      SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                 bool DemandHighBits = true);
      SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -643,6 +647,10 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
        !TLI.isConstFalseVal(N.getOperand(3).getNode()))
      return false;
  
+  if (TLI.getBooleanContents(N.getValueType()) ==
+      TargetLowering::UndefinedBooleanContent)
+    return false;
+
    LHS = N.getOperand(0);
    RHS = N.getOperand(1);
    CC  = N.getOperand(4);
@@ -1153,6 +1161,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
    LegalOperations = Level >= AfterLegalizeVectorOps;
    LegalTypes = Level >= AfterLegalizeTypes;
  
+  // Early exit if this basic block is in an optnone function.
+  AttributeSet FnAttrs =
+    DAG.getMachineFunction().getFunction()->getAttributes();
+  if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                           Attribute::OptimizeNone))
+    return;
+
    // Add all the dag nodes to the worklist.
    for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
         E = DAG.allnodes_end(); I != E; ++I)
@@ -1338,6 +1353,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
    case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
    case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
+  case ISD::MLOAD:              return visitMLOAD(N);
+  case ISD::MSTORE:             return visitMSTORE(N);
    }
    return SDValue();
  }
@@ -1480,7 +1497,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
  
        default:
          // Only add if it isn't already in the list.
-        if (SeenOps.insert(Op.getNode()))
+        if (SeenOps.insert(Op.getNode()).second)
            Ops.push_back(Op);
          else
            Changed = true;
@@ -3817,7 +3834,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
      return RXOR;
  
    // fold !(x cc y) -> (x !cc y)
-  if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+  if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
      bool isInt = LHS.getValueType().isInteger();
      ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
                                                 isInt);
@@ -4676,7 +4693,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
    if (N0.getOpcode() == ISD::SETCC) {
      if ((!LegalOperations &&
           TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
-       TLI.isOperationLegal(ISD::SELECT_CC, VT))
+        TLI.isOperationLegal(ISD::SELECT_CC, VT))
        return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
                           N0.getOperand(0), N0.getOperand(1),
                           N1, N2, N0.getOperand(2));
@@ -4758,6 +4775,162 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
        TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
  }
  
+SDValue DAGCombiner::visitMSTORE(SDNode *N) {
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
+  MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+  SDValue Mask = MST->getMask();
+  SDValue Data  = MST->getData();
+  SDLoc DL(N);
+
+  // If the MSTORE data type requires splitting and the mask is provided by a
+  // SETCC, then split both nodes and its operands before legalization. This
+  // prevents the type legalizer from unrolling SETCC into scalar comparisons
+  // and enables future optimizations (e.g. min/max pattern matching on X86).
+  if (Mask.getOpcode() == ISD::SETCC) {
+
+    // Check if any splitting is required.
+    if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+        TargetLowering::TypeSplitVector)
+      return SDValue();
+
+    SDValue MaskLo, MaskHi, Lo, Hi;
+    std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+    EVT LoVT, HiVT;
+    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
+
+    SDValue Chain = MST->getChain();
+    SDValue Ptr   = MST->getBasePtr();
+
+    EVT MemoryVT = MST->getMemoryVT();
+    unsigned Alignment = MST->getOriginalAlignment();
+
+    // if Alignment is equal to the vector size,
+    // take the half of it for the second part
+    unsigned SecondHalfAlignment =
+      (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
+         Alignment/2 : Alignment;
+
+    EVT LoMemVT, HiMemVT;
+    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+    SDValue DataLo, DataHi;
+    std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+    MachineMemOperand *MMO = DAG.getMachineFunction().
+      getMachineMemOperand(MST->getPointerInfo(), 
+                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
+                           Alignment, MST->getAAInfo(), MST->getRanges());
+
+    Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO);
+
+    unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+    MMO = DAG.getMachineFunction().
+      getMachineMemOperand(MST->getPointerInfo(), 
+                           MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
+                           SecondHalfAlignment, MST->getAAInfo(),
+                           MST->getRanges());
+
+    Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO);
+
+    AddToWorklist(Lo.getNode());
+    AddToWorklist(Hi.getNode());
+
+    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMLOAD(SDNode *N) {
+
+  if (Level >= AfterLegalizeTypes)
+    return SDValue();
+
+  MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+  SDValue Mask = MLD->getMask();
+  SDLoc DL(N);
+
+  // If the MLOAD result requires splitting and the mask is provided by a
+  // SETCC, then split both nodes and its operands before legalization. This
+  // prevents the type legalizer from unrolling SETCC into scalar comparisons
+  // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+  if (Mask.getOpcode() == ISD::SETCC) {
+    EVT VT = N->getValueType(0);
+
+    // Check if any splitting is required.
+    if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+        TargetLowering::TypeSplitVector)
+      return SDValue();
+
+    SDValue MaskLo, MaskHi, Lo, Hi;
+    std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+    SDValue Src0 = MLD->getSrc0();
+    SDValue Src0Lo, Src0Hi;
+    std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+    EVT LoVT, HiVT;
+    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+    SDValue Chain = MLD->getChain();
+    SDValue Ptr   = MLD->getBasePtr();
+    EVT MemoryVT = MLD->getMemoryVT();
+    unsigned Alignment = MLD->getOriginalAlignment();
+
+    // if Alignment is equal to the vector size,
+    // take the half of it for the second part
+    unsigned SecondHalfAlignment =
+      (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
+         Alignment/2 : Alignment;
+
+    EVT LoMemVT, HiMemVT;
+    std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+    MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MLD->getPointerInfo(), 
+                         MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
+                         Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+    Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO);
+
+    unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+    MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MLD->getPointerInfo(), 
+                         MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
+                         SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+
+    Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO);
+
+    AddToWorklist(Lo.getNode());
+    AddToWorklist(Hi.getNode());
+
+    // Build a factor node to remember that this load is independent of the
+    // other one.
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+                        Hi.getValue(1));
+
+    // Legalized the chain result - switch anything that used the old chain to
+    // use the new one.
+    DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
+
+    SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+    SDValue RetOps[] = { LoadRes, Chain };
+    return DAG.getMergeValues(RetOps, DL);
+  }
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitVSELECT(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -6569,7 +6742,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
    EVT VT = N->getValueType(0);
    const TargetOptions &Options = DAG.getTarget().Options;
-  
+
    // fold vector ops
    if (VT.isVector()) {
      SDValue FoldedVOp = SimplifyVBinOp(N);
@@ -6589,7 +6762,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
        isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
      return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0,
                         GetNegatedExpression(N1, DAG, LegalOperations));
-  
+
    // fold (fadd (fneg A), B) -> (fsub B, A)
    if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
        isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
@@ -6601,7 +6774,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
      // No FP constant should be created after legalization as Instruction
      // Selection pass has a hard time dealing with FP constants.
      bool AllowNewConst = (Level < AfterLegalizeDAG);
-    
+
      // fold (fadd A, 0) -> A
      if (N1CFP && N1CFP->getValueAPF().isZero())
        return N0;
@@ -6612,15 +6785,15 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
        return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0),
                           DAG.getNode(ISD::FADD, SDLoc(N), VT,
                                       N0.getOperand(1), N1));
-    
+
      // If allowed, fold (fadd (fneg x), x) -> 0.0
      if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
        return DAG.getConstantFP(0.0, VT);
-    
+
      // If allowed, fold (fadd x, (fneg x)) -> 0.0
      if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
        return DAG.getConstantFP(0.0, VT);
-    
+
      // We can fold chains of FADD's of the same value into multiplications.
      // This transform is not safe in general because we are reducing the number
      // of rounding steps.
@@ -6628,7 +6801,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
        if (N0.getOpcode() == ISD::FMUL) {
          ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
          ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
-        
+
          // (fadd (fmul x, c), x) -> (fmul x, c+1)
          if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
            SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
@@ -6636,7 +6809,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                                         DAG.getConstantFP(1.0, VT));
            return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP);
          }
-        
+
          // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
          if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
              N1.getOperand(0) == N1.getOperand(1) &&
@@ -6648,11 +6821,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                               N0.getOperand(0), NewCFP);
          }
        }
-      
+
        if (N1.getOpcode() == ISD::FMUL) {
          ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
          ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
-        
+
          // (fadd x, (fmul x, c)) -> (fmul x, c+1)
          if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
            SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT,
@@ -6680,7 +6853,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
            return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
                               N1, DAG.getConstantFP(3.0, VT));
        }
-      
+
        if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
          ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
          // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
@@ -6689,7 +6862,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
            return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
                               N0, DAG.getConstantFP(3.0, VT));
        }
-      
+
        // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
        if (AllowNewConst &&
            N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
@@ -6700,7 +6873,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
                             N0.getOperand(0), DAG.getConstantFP(4.0, VT));
      }
    } // enable-unsafe-fp-math
-  
+
    // FADD -> FMA combines:
    if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
        TLI.isFMAFasterThanFMulAndFAdd(VT) &&
@@ -7028,18 +7201,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
          return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0,
                             DAG.getConstantFP(Recip, VT));
      }
-    
+
      // If this FDIV is part of a reciprocal square root, it may be folded
      // into a target-specific square root estimate instruction.
      if (N1.getOpcode() == ISD::FSQRT) {
        if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
-        AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
        }
      } else if (N1.getOpcode() == ISD::FP_EXTEND &&
                 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
        if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
-        AddToWorklist(RV.getNode());
          RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
          AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -7047,7 +7218,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
      } else if (N1.getOpcode() == ISD::FP_ROUND &&
                 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
        if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
-        AddToWorklist(RV.getNode());
          RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
          AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -7068,14 +7238,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
          // We found a FSQRT, so try to make this fold:
          // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
          if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
-          AddToWorklist(RV.getNode());
            RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
            AddToWorklist(RV.getNode());
            return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
          }
        }
      }
-    
+
      // Fold into a reciprocal estimate and multiply instead of a real divide.
      if (SDValue RV = BuildReciprocalEstimate(N1)) {
        AddToWorklist(RV.getNode());
@@ -7095,6 +7264,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
      }
    }
  
+  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+  // reciprocal.
+  // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+  // Notice that this is not always beneficial. One reason is different target
+  // may have different costs for FDIV and FMUL, so sometimes the cost of two
+  // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+  // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+  if (Options.UnsafeFPMath) {
+    // Skip if current node is a reciprocal.
+    if (N0CFP && N0CFP->isExactlyValue(1.0))
+      return SDValue();
+
+    SmallVector<SDNode *, 4> Users;
+    // Find all FDIV users of the same divisor.
+    for (SDNode::use_iterator UI = N1.getNode()->use_begin(),
+                              UE = N1.getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = UI.getUse().getUser();
+      if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1)
+        Users.push_back(User);
+    }
+
+    if (TLI.combineRepeatedFPDivisors(Users.size())) {
+      SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0
+      SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1);
+
+      // Dividend / Divisor -> Dividend * Reciprocal
+      for (auto I = Users.begin(), E = Users.end(); I != E; ++I) {
+        if ((*I)->getOperand(0) != FPOne) {
+          SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT,
+                                        (*I)->getOperand(0), Reciprocal);
+          DAG.ReplaceAllUsesWith(*I, NewNode.getNode());
+        }
+      }
+      return SDValue();
+    }
+  }
+
    return SDValue();
  }
  
@@ -7116,7 +7323,6 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
    if (DAG.getTarget().Options.UnsafeFPMath) {
      // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
      if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
-      AddToWorklist(RV.getNode());
        EVT VT = RV.getValueType();
        RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
        AddToWorklist(RV.getNode());
@@ -7551,7 +7757,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
    // fold (fabs c1) -> fabs(c1)
    if (isa<ConstantFPSDNode>(N0))
      return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
-  
+
    // fold (fabs (fabs x)) -> (fabs x)
    if (N0.getOpcode() == ISD::FABS)
      return N->getOperand(0);
@@ -10511,26 +10717,37 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
      return SDValue();
  
    SDValue VecIn1, VecIn2;
+  bool UsesZeroVector = false;
    for (unsigned i = 0; i != NumInScalars; ++i) {
+    SDValue Op = N->getOperand(i);
      // Ignore undef inputs.
-    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (Op.getOpcode() == ISD::UNDEF) continue;
+
+    // See if we can combine this build_vector into a blend with a zero vector.
+    if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant &&
+        cast<ConstantSDNode>(Op.getNode())->isNullValue()) ||
+        (Op.getOpcode() == ISD::ConstantFP &&
+        cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) {
+      UsesZeroVector = true;
+      continue;
+    }
  
      // If this input is something other than a EXTRACT_VECTOR_ELT with a
      // constant index, bail out.
-    if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-        !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+    if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(Op.getOperand(1))) {
        VecIn1 = VecIn2 = SDValue(nullptr, 0);
        break;
      }
  
      // We allow up to two distinct input vectors.
-    SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+    SDValue ExtractedFromVec = Op.getOperand(0);
      if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
        continue;
  
      if (!VecIn1.getNode()) {
        VecIn1 = ExtractedFromVec;
-    } else if (!VecIn2.getNode()) {
+    } else if (!VecIn2.getNode() && !UsesZeroVector) {
        VecIn2 = ExtractedFromVec;
      } else {
        // Too many inputs.
@@ -10543,16 +10760,26 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
    if (VecIn1.getNode()) {
      SmallVector<int, 8> Mask;
      for (unsigned i = 0; i != NumInScalars; ++i) {
-      if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+      unsigned Opcode = N->getOperand(i).getOpcode();
+      if (Opcode == ISD::UNDEF) {
          Mask.push_back(-1);
          continue;
        }
  
+      // Operands can also be zero.
+      if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
+        assert(UsesZeroVector &&
+               (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
+               "Unexpected node found!");
+        Mask.push_back(NumInScalars+i);
+        continue;
+      }
+
        // If extracting from the first vector, just use the index directly.
        SDValue Extract = N->getOperand(i);
        SDValue ExtVal = Extract.getOperand(1);
+      unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
        if (Extract.getOperand(0) == VecIn1) {
-        unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
          if (ExtIndex > VT.getVectorNumElements())
            return SDValue();
  
@@ -10561,10 +10788,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
        }
  
        // Otherwise, use InIdx + VecSize
-      unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
-      Mask.push_back(Idx+NumInScalars);
+      Mask.push_back(NumInScalars+ExtIndex);
      }
  
+    // Avoid introducing illegal shuffles with zero.
+    if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+      return SDValue();
+
      // We can't generate a shuffle node with mismatched input and output types.
      // Attempt to transform a single input vector to the correct type.
      if ((VT != VecIn1.getValueType())) {
@@ -10588,8 +10818,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
                             VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
      }
  
-    // If VecIn2 is unused then change it to undef.
-    VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+    if (UsesZeroVector)
+      VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) :
+                                DAG.getConstantFP(0.0, VT);
+    else
+      // If VecIn2 is unused then change it to undef.
+      VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
  
      // Check that we were able to transform all incoming values to the same
      // type.
@@ -11042,121 +11276,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
        return V;
    }
  
-  // If this shuffle node is simply a swizzle of another shuffle node,
-  // then try to simplify it.
-  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
-      N1.getOpcode() == ISD::UNDEF) {
-
-    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
-
-    // The incoming shuffle must be of the same type as the result of the
-    // current shuffle.
-    assert(OtherSV->getOperand(0).getValueType() == VT &&
-           "Shuffle types don't match");
-
-    SmallVector<int, 4> Mask;
-    // Compute the combined shuffle mask.
-    for (unsigned i = 0; i != NumElts; ++i) {
-      int Idx = SVN->getMaskElt(i);
-      assert(Idx < (int)NumElts && "Index references undef operand");
-      // Next, this index comes from the first value, which is the incoming
-      // shuffle. Adopt the incoming index.
-      if (Idx >= 0)
-        Idx = OtherSV->getMaskElt(Idx);
-      Mask.push_back(Idx);
-    }
-
-    // Check if all indices in Mask are Undef. In case, propagate Undef.
-    bool isUndefMask = true;
-    for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
-      isUndefMask &= Mask[i] < 0;
-
-    if (isUndefMask)
-      return DAG.getUNDEF(VT);
-    
-    bool CommuteOperands = false;
-    if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
-      // To be valid, the combine shuffle mask should only reference elements
-      // from one of the two vectors in input to the inner shufflevector.
-      bool IsValidMask = true;
-      for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
-        // See if the combined mask only reference undefs or elements coming
-        // from the first shufflevector operand.
-        IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
-
-      if (!IsValidMask) {
-        IsValidMask = true;
-        for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
-          // Check that all the elements come from the second shuffle operand.
-          IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
-        CommuteOperands = IsValidMask;
-      }
-
-      // Early exit if the combined shuffle mask is not valid.
-      if (!IsValidMask)
-        return SDValue();
-    }
-
-    // See if this pair of shuffles can be safely folded according to either
-    // of the following rules:
-    //   shuffle(shuffle(x, y), undef) -> x
-    //   shuffle(shuffle(x, undef), undef) -> x
-    //   shuffle(shuffle(x, y), undef) -> y
-    bool IsIdentityMask = true;
-    unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
-    for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
-      // Skip Undefs.
-      if (Mask[i] < 0)
-        continue;
-
-      // The combined shuffle must map each index to itself.
-      IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
-    }
-    
-    if (IsIdentityMask) {
-      if (CommuteOperands)
-        // optimize shuffle(shuffle(x, y), undef) -> y.
-        return OtherSV->getOperand(1);
-      
-      // optimize shuffle(shuffle(x, undef), undef) -> x
-      // optimize shuffle(shuffle(x, y), undef) -> x
-      return OtherSV->getOperand(0);
-    }
-
-    // It may still be beneficial to combine the two shuffles if the
-    // resulting shuffle is legal.
-    if (TLI.isTypeLegal(VT)) {
-      if (!CommuteOperands) {
-        if (TLI.isShuffleMaskLegal(Mask, VT))
-          // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
-          // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
-          return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
-                                      &Mask[0]);
-      } else {
-        // Compute the commuted shuffle mask.
-        for (unsigned i = 0; i != NumElts; ++i) {
-          int idx = Mask[i];
-          if (idx < 0)
-            continue;
-          else if (idx < (int)NumElts)
-            Mask[i] = idx + NumElts;
-          else
-            Mask[i] = idx - NumElts;
-        }
-
-        if (TLI.isShuffleMaskLegal(Mask, VT))
-          //   shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
-          return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1,
-                                      &Mask[0]);
-      }
-    }
-  }
-
    // Canonicalize shuffles according to rules:
    //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
    //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
    //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
-  if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF &&
+  if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
        N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
        TLI.isTypeLegal(VT)) {
      // The incoming shuffle must be of the same type as the result of the
@@ -11175,13 +11299,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
    }
  
    // Try to fold according to rules:
-  //   shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2)
-  //   shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2)
-  //   shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
-  //   shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
+  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+  //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
    // Don't try to fold shuffles with illegal type.
    if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
-      N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) {
+      TLI.isTypeLegal(VT)) {
      ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
  
      // The incoming shuffle must be of the same type as the result of the
@@ -11189,14 +11312,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
      assert(OtherSV->getOperand(0).getValueType() == VT &&
             "Shuffle types don't match");
  
-    SDValue SV0 = OtherSV->getOperand(0);
-    SDValue SV1 = OtherSV->getOperand(1);
-    bool HasSameOp0 = N1 == SV0;
-    bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
-    if (!HasSameOp0 && !IsSV1Undef && N1 != SV1)
-      // Early exit.
-      return SDValue();
-
+    SDValue SV0, SV1;
      SmallVector<int, 4> Mask;
      // Compute the combined shuffle mask for a shuffle with SV0 as the first
      // operand, and SV1 as the second operand.
@@ -11208,14 +11324,49 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
          continue;
        }
  
+      SDValue CurrentVec;
        if (Idx < (int)NumElts) {
+        // This shuffle index refers to the inner shuffle N0. Lookup the inner
+        // shuffle mask to identify which vector is actually referenced.
          Idx = OtherSV->getMaskElt(Idx);
-        if (IsSV1Undef && Idx >= (int) NumElts)
-          Idx = -1;  // Propagate Undef.
-      } else
-        Idx = HasSameOp0 ? Idx - NumElts : Idx;
+        if (Idx < 0) {
+          // Propagate Undef.
+          Mask.push_back(Idx);
+          continue;
+        }
+
+        CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
+                                           : OtherSV->getOperand(1);
+      } else {
+        // This shuffle index references an element within N1.
+        CurrentVec = N1;
+      }
  
-      Mask.push_back(Idx);
+      // Simple case where 'CurrentVec' is UNDEF.
+      if (CurrentVec.getOpcode() == ISD::UNDEF) {
+        Mask.push_back(-1);
+        continue;
+      }
+
+      // Canonicalize the shuffle index. We don't know yet if CurrentVec
+      // will be the first or second operand of the combined shuffle.
+      Idx = Idx % NumElts;
+      if (!SV0.getNode() || SV0 == CurrentVec) {
+        // Ok. CurrentVec is the left hand side.
+        // Update the mask accordingly.
+        SV0 = CurrentVec;
+        Mask.push_back(Idx);
+        continue;
+      }
+
+      // Bail out if we cannot convert the shuffle pair into a single shuffle.
+      if (SV1.getNode() && SV1 != CurrentVec)
+        return SDValue();
+
+      // Ok. CurrentVec is the right hand side.
+      // Update the mask accordingly.
+      SV1 = CurrentVec;
+      Mask.push_back(Idx + NumElts);
      }
  
      // Check if all indices in Mask are Undef. In case, propagate Undef.
@@ -11226,14 +11377,37 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
      if (isUndefMask)
        return DAG.getUNDEF(VT);
  
+    if (!SV0.getNode())
+      SV0 = DAG.getUNDEF(VT);
+    if (!SV1.getNode())
+      SV1 = DAG.getUNDEF(VT);
+
      // Avoid introducing shuffles with illegal mask.
-    if (TLI.isShuffleMaskLegal(Mask, VT)) {
-      if (IsSV1Undef)
-        //   shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2)
-        //   shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2)
-        return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]);
-      return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
+    if (!TLI.isShuffleMaskLegal(Mask, VT)) {
+      // Compute the commuted shuffle mask and test again.
+      for (unsigned i = 0; i != NumElts; ++i) {
+        int idx = Mask[i];
+        if (idx < 0)
+          continue;
+        else if (idx < (int)NumElts)
+          Mask[i] = idx + NumElts;
+        else
+          Mask[i] = idx - NumElts;
+      }
+
+      if (!TLI.isShuffleMaskLegal(Mask, VT))
+        return SDValue();
+ 
+      //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+      //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+      //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+      std::swap(SV0, SV1);
      }
+
+    //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
+    //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
+    //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
+    return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
    }
  
    return SDValue();
@@ -11289,7 +11463,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
          if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
            Indices.push_back(i);
          else if (cast<ConstantSDNode>(Elt)->isNullValue())
-          Indices.push_back(NumElts);
+          Indices.push_back(NumElts+i);
          else
            return SDValue();
        }
@@ -11545,7 +11719,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
      // It is safe to replace the two loads if they have different alignments,
      // but the new load must be the minimum (most restrictive) alignment of the
      // inputs.
-    bool isInvariant = LLD->getAlignment() & RLD->getAlignment();
+    bool isInvariant = LLD->isInvariant() & RLD->isInvariant();
      unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
      if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
        Load = DAG.getLoad(TheSelect->getValueType(0),
@@ -11985,49 +12159,89 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
    return SDValue();
  }
  
-SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
-  if (Level >= AfterLegalizeDAG)
-    return SDValue();
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+///     =>
+///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+/// As a result, we precompute A/2 prior to the iteration loop.
+SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
+                                          unsigned Iterations) {
+  EVT VT = Arg.getValueType();
+  SDLoc DL(Arg);
+  SDValue ThreeHalves = DAG.getConstantFP(1.5, VT);
  
-  // Expose the DAG combiner to the target combiner implementations.
-  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
-  unsigned Iterations = 0;
-  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) {
-    if (Iterations) {
-      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
-      // For the reciprocal sqrt, we need to find the zero of the function:
-      //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
-      //     =>
-      //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
-      // As a result, we precompute A/2 prior to the iteration loop.
-      EVT VT = Op.getValueType();
-      SDLoc DL(Op);
-      SDValue FPThreeHalves = DAG.getConstantFP(1.5, VT);
+  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
+  // this entire sequence requires only one FP constant.
+  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+  AddToWorklist(HalfArg.getNode());
  
-      AddToWorklist(Est.getNode());
+  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+  AddToWorklist(HalfArg.getNode());
  
-      // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
-      // this entire sequence requires only one FP constant.
-      SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, FPThreeHalves, Op);
-      AddToWorklist(HalfArg.getNode());
+  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+  for (unsigned i = 0; i < Iterations; ++i) {
+    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    AddToWorklist(NewEst.getNode());
  
-      HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Op);
-      AddToWorklist(HalfArg.getNode());
+    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+    AddToWorklist(NewEst.getNode());
  
-      // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
-      for (unsigned i = 0; i < Iterations; ++i) {
-        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
-        AddToWorklist(NewEst.getNode());
+    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+    AddToWorklist(NewEst.getNode());
  
-        NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
-        AddToWorklist(NewEst.getNode());
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+    AddToWorklist(Est.getNode());
+  }
+  return Est;
+}
  
-        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPThreeHalves, NewEst);
-        AddToWorklist(NewEst.getNode());
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+///     =>
+///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
+SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
+                                          unsigned Iterations) {
+  EVT VT = Arg.getValueType();
+  SDLoc DL(Arg);
+  SDValue MinusThree = DAG.getConstantFP(-3.0, VT);
+  SDValue MinusHalf = DAG.getConstantFP(-0.5, VT);
  
-        Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
-        AddToWorklist(Est.getNode());
-      }
+  // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
+  for (unsigned i = 0; i < Iterations; ++i) {
+    SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+    AddToWorklist(HalfEst.getNode());
+
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    AddToWorklist(Est.getNode());
+
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+    AddToWorklist(Est.getNode());
+
+    Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+    AddToWorklist(Est.getNode());
+
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+    AddToWorklist(Est.getNode());
+  }
+  return Est;
+}
+
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+  if (Level >= AfterLegalizeDAG)
+    return SDValue();
+
+  // Expose the DAG combiner to the target combiner implementations.
+  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+  unsigned Iterations = 0;
+  bool UseOneConstNR = false;
+  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+    AddToWorklist(Est.getNode());
+    if (Iterations) {
+      Est = UseOneConstNR ?
+        BuildRsqrtNROneConst(Op, Est, Iterations) :
+        BuildRsqrtNRTwoConst(Op, Est, Iterations);
      }
      return Est;
    }
@@ -12199,7 +12413,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
      }
  
      // Don't bother if we've been before.
-    if (!Visited.insert(Chain.getNode()))
+    if (!Visited.insert(Chain.getNode()).second)
        continue;
  
      switch (Chain.getOpcode()) {
@@ -12287,7 +12501,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
  
      for (SDNode::use_iterator UI = M->use_begin(),
           UIE = M->use_end(); UI != UIE; ++UI)
-      if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) {
+      if (UI.getUse().getValueType() == MVT::Other &&
+          Visited.insert(*UI).second) {
          if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
            // We've not visited this use, and we care about it (it could have an
            // ordering dependency with the original node).