Masked gather and scatter: Added code for SelectionDAG.

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAG.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 408a25b9098f3ecece382f270d41dc5ff9ec4fb2..390017647177b95f4a5de924052ee5c18393ff3a 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -196,6 +196,22 @@ bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) {
    return true;
  }
  
+/// \brief Return true if the specified node is a BUILD_VECTOR node of
+/// all ConstantFPSDNode or undef.
+bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue Op = N->getOperand(i);
+    if (Op.getOpcode() == ISD::UNDEF)
+      continue;
+    if (!isa<ConstantFPSDNode>(Op))
+      return false;
+  }
+  return true;
+}
+
  /// isScalarToVector - Return true if the specified node is a
  /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
  /// element is not an undef.
@@ -1446,13 +1462,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
  // N2 to point at N1.
  static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
    std::swap(N1, N2);
-  int NElts = M.size();
-  for (int i = 0; i != NElts; ++i) {
-    if (M[i] >= NElts)
-      M[i] -= NElts;
-    else if (M[i] >= 0)
-      M[i] += NElts;
-  }
+  ShuffleVectorSDNode::commuteMask(M);
  }
  
  SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
@@ -1484,6 +1494,34 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
    if (N1.getOpcode() == ISD::UNDEF)
      commuteShuffle(N1, N2, MaskVec);
  
+  // If shuffling a splat, try to blend the splat instead. We do this here so
+  // that even when this arises during lowering we don't have to re-handle it.
+  auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) {
+    BitVector UndefElements;
+    SDValue Splat = BV->getSplatValue(&UndefElements);
+    if (!Splat)
+      return;
+
+    for (int i = 0; i < (int)NElts; ++i) {
+      if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts))
+        continue;
+
+      // If this input comes from undef, mark it as such.
+      if (UndefElements[MaskVec[i] - Offset]) {
+        MaskVec[i] = -1;
+        continue;
+      }
+
+      // If we can blend a non-undef lane, use that instead.
+      if (!UndefElements[i])
+        MaskVec[i] = i + Offset;
+    }
+  };
+  if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+    BlendSplat(N1BV, 0);
+  if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2))
+    BlendSplat(N2BV, NElts);
+
    // Canonicalize all index into lhs, -> shuffle lhs, undef
    // Canonicalize all index into rhs, -> shuffle rhs, undef
    bool AllLHS = true, AllRHS = true;
@@ -1553,24 +1591,19 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
              return N1;
        }
  
-      // If the shuffle itself creates a constant splat, build the vector
-      // directly.
+      // If the shuffle itself creates a splat, build the vector directly.
        if (AllSame && SameNumElts) {
-         const SDValue &Splatted = BV->getOperand(MaskVec[0]);
-         if (isa<ConstantSDNode>(Splatted) || isa<ConstantFPSDNode>(Splatted)) {
-           SmallVector<SDValue, 8> Ops;
-           for (unsigned i = 0; i != NElts; ++i)
-             Ops.push_back(Splatted);
-
-           SDValue NewBV =
-               getNode(ISD::BUILD_VECTOR, dl, BV->getValueType(0), Ops);
-
-           // We may have jumped through bitcasts, so the type of the
-           // BUILD_VECTOR may not match the type of the shuffle.
-           if (BV->getValueType(0) != VT)
-             NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
-           return NewBV;
-         }
+        const SDValue &Splatted = BV->getOperand(MaskVec[0]);
+        SmallVector<SDValue, 8> Ops(NElts, Splatted);
+
+        EVT BuildVT = BV->getValueType(0);
+        SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops);
+
+        // We may have jumped through bitcasts, so the type of the
+        // BUILD_VECTOR may not match the type of the shuffle.
+        if (BuildVT != VT)
+          NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
+        return NewBV;
        }
      }
    }
@@ -1602,19 +1635,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
  
  SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
    MVT VT = SV.getSimpleValueType(0);
-  unsigned NumElems = VT.getVectorNumElements();
-  SmallVector<int, 8> MaskVec;
-
-  for (unsigned i = 0; i != NumElems; ++i) {
-    int Idx = SV.getMaskElt(i);
-    if (Idx >= 0) {
-      if (Idx < (int)NumElems)
-        Idx += NumElems;
-      else
-        Idx -= NumElems;
-    }
-    MaskVec.push_back(Idx);
-  }
+  SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end());
+  ShuffleVectorSDNode::commuteMask(MaskVec);
  
    SDValue Op0 = SV.getOperand(0);
    SDValue Op1 = SV.getOperand(1);
@@ -2821,7 +2843,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
      }
    }
  
-  // Constant fold unary operations with a vector integer operand.
+  // Constant fold unary operations with a vector integer or float operand.
    if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
      if (BV->isConstant()) {
        switch (Opcode) {
@@ -2829,18 +2851,32 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
          // FIXME: Entirely reasonable to perform folding of other unary
          // operations here as the need arises.
          break;
+      case ISD::TRUNCATE:
+        // Constant build vector truncation can be done with the original scalar
+        // operands but with a new build vector with the truncated value type.
+        return getNode(ISD::BUILD_VECTOR, DL, VT, BV->ops());
+      case ISD::FNEG:
+      case ISD::FABS:
+      case ISD::FCEIL:
+      case ISD::FTRUNC:
+      case ISD::FFLOOR:
+      case ISD::FP_EXTEND:
        case ISD::UINT_TO_FP:
        case ISD::SINT_TO_FP: {
+        // Let the above scalar folding handle the folding of each element.
          SmallVector<SDValue, 8> Ops;
          for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
            SDValue OpN = BV->getOperand(i);
-          // Let the above scalar folding handle the conversion of each
-          // element.
-          OpN = getNode(ISD::SINT_TO_FP, DL, VT.getVectorElementType(),
-                        OpN);
+          OpN = getNode(Opcode, DL, VT.getVectorElementType(), OpN);
+          if (OpN.getOpcode() != ISD::UNDEF &&
+              OpN.getOpcode() != ISD::Constant &&
+              OpN.getOpcode() != ISD::ConstantFP)
+            break;
            Ops.push_back(OpN);
          }
-        return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+        if (Ops.size() == VT.getVectorNumElements())
+          return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
+        break;
        }
        }
      }
@@ -3162,6 +3198,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
        SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
                                      N1.getNode()->op_end());
        Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+
+      // BUILD_VECTOR requires all inputs to be of the same type, find the
+      // maximum type and extend them all.
+      EVT SVT = VT.getScalarType();
+      for (SDValue Op : Elts)
+        SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+      if (SVT.bitsGT(VT.getScalarType()))
+        for (SDValue &Op : Elts)
+          Op = TLI->isZExtFree(Op.getValueType(), SVT)
+             ? getZExtOrTrunc(Op, DL, SVT)
+             : getSExtOrTrunc(Op, DL, SVT);
+
        return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
      }
      break;
@@ -3599,7 +3647,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
  
      CSEMap.InsertNode(N, IP);
    } else {
-
      N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
    }
  
@@ -3762,12 +3809,27 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
      return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT);
    }
  
-  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+  assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?");
+  EVT IntVT = VT.getScalarType();
+  if (!IntVT.isInteger())
+    IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits());
+
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value);
    if (NumBits > 8) {
      // Use a multiplication with 0x010101... to extend the input to the
      // required length.
      APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
-    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+    Value = DAG.getNode(ISD::MUL, dl, IntVT, Value,
+                        DAG.getConstant(Magic, IntVT));
+  }
+
+  if (VT != Value.getValueType() && !VT.isInteger())
+    Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value);
+  if (VT != Value.getValueType()) {
+    assert(VT.getVectorElementType() == Value.getValueType() &&
+           "value type should be one vector element here");
+    SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value);
+    Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps);
    }
  
    return Value;
@@ -3971,9 +4033,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
    bool DstAlignCanChange = false;
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize =
-    MF.getFunction()->getAttributes().
-      hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
      DstAlignCanChange = true;
@@ -4086,8 +4146,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
    bool DstAlignCanChange = false;
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
      DstAlignCanChange = true;
@@ -4181,8 +4240,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
    bool DstAlignCanChange = false;
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->getAttributes().
-    hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
    FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
    if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
      DstAlignCanChange = true;
@@ -4251,7 +4309,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
  SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                                  SDValue Src, SDValue Size,
                                  unsigned Align, bool isVol, bool AlwaysInline,
-                                MachinePointerInfo DstPtrInfo,
+                                bool isTailCall, MachinePointerInfo DstPtrInfo,
                                  MachinePointerInfo SrcPtrInfo) {
    assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
  
@@ -4309,15 +4367,16 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                 Type::getVoidTy(*getContext()),
                 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
                                   TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
  
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
    return CallResult.second;
  }
  
  SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                                   SDValue Src, SDValue Size,
-                                 unsigned Align, bool isVol,
+                                 unsigned Align, bool isVol, bool isTailCall,
                                   MachinePointerInfo DstPtrInfo,
                                   MachinePointerInfo SrcPtrInfo) {
    assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
@@ -4364,15 +4423,16 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
                 Type::getVoidTy(*getContext()),
                 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
                                   TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+    .setDiscardResult()
+    .setTailCall(isTailCall);
  
+  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
    return CallResult.second;
  }
  
  SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                                  SDValue Src, SDValue Size,
-                                unsigned Align, bool isVol,
+                                unsigned Align, bool isVol, bool isTailCall,
                                  MachinePointerInfo DstPtrInfo) {
    assert(Align && "The SDAG layer expects explicit alignment and reserves 0");
  
@@ -4421,7 +4481,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
                 Type::getVoidTy(*getContext()),
                 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
                                   TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
+    .setDiscardResult()
+    .setTailCall(isTailCall);
  
    std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
    return CallResult.second;
@@ -5036,6 +5097,55 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
    return SDValue(N, 0);
  }
  
+SDValue
+SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
+                              ArrayRef<SDValue> Ops,
+                              MachineMemOperand *MMO) {
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
+  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
+                                     MMO->isVolatile(),
+                                     MMO->isNonTemporal(),
+                                     MMO->isInvariant()));
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+  void *IP = nullptr;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  MaskedGatherSDNode *N = 
+    new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(),
+                                           Ops, VTs, VT, MMO);
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl,
+                                       ArrayRef<SDValue> Ops,
+                                       MachineMemOperand *MMO) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
+  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal(),
+                                     MMO->isInvariant()));
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+  void *IP = nullptr;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<MaskedScatterSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N =
+    new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(),
+                                            Ops, VTs, VT, MMO);
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+  return SDValue(N, 0);
+}
+
  SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
                                 SDValue Chain, SDValue Ptr,
                                 SDValue SV,
@@ -5399,17 +5509,9 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
    assert(N->getNumOperands() == NumOps &&
           "Update with wrong number of operands");
  
-  // Check to see if there is no change.
-  bool AnyChange = false;
-  for (unsigned i = 0; i != NumOps; ++i) {
-    if (Ops[i] != N->getOperand(i)) {
-      AnyChange = true;
-      break;
-    }
-  }
-
-  // No operands changed, just return the input node.
-  if (!AnyChange) return N;
+  // If no operands changed just return the input node.
+  if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+    return N;
  
    // See if the modified node already exists.
    void *InsertPos = nullptr;
@@ -5557,8 +5659,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
  /// For IROrder, we keep the smaller of the two
  SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
    DebugLoc NLoc = N->getDebugLoc();
-  if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) &&
-    (OLoc.getDebugLoc() != NLoc)) {
+  if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
      N->setDebugLoc(DebugLoc());
    }
    unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder());
@@ -5868,6 +5969,8 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
  SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
                                        unsigned R, bool IsIndirect, uint64_t Off,
                                        DebugLoc DL, unsigned O) {
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
    return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
  }
  
@@ -5875,6 +5978,8 @@ SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
  SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
                                                const Value *C, uint64_t Off,
                                                DebugLoc DL, unsigned O) {
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
    return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O);
  }
  
@@ -5882,6 +5987,8 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
  SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
                                                  unsigned FI, uint64_t Off,
                                                  DebugLoc DL, unsigned O) {
+  assert(cast<MDLocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
+         "Expected inlined-at fields to agree");
    return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O);
  }
  
@@ -6654,8 +6761,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
    if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
      unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
      APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
-    llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
-                           TLI->getDataLayout());
+    llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
+                           *TLI->getDataLayout());
      unsigned AlignBits = KnownZero.countTrailingOnes();
      unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
      if (Align)