bool CombineToPostIndexedLoadStore(SDNode *N);
bool SliceUpLoad(SDNode *N);
+ /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+ /// load.
+ ///
+ /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+ /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+ /// \param EltNo index of the vector element to load.
+ /// \param OriginalLoad load that EVE came from to be replaced.
+ /// \returns EVE on success SDValue() on failure.
+ SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- ConstantSDNode *CN = BV->getConstantSplatValue();
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
- if (CN && CN->getValueType(0) == N.getValueType().getScalarType())
+ // FIXME: We blindly ignore splats which include undef which is overly
+ // pessimistic.
+ if (CN && UndefElements.none() &&
+ CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
- SDValue Ops[] = { N1, N0 };
- SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
- Ops, 2);
+ SDValue Ops[] = {N1, N0};
+ SDNode *CSENode;
+ if (const BinaryWithFlagsSDNode *BinNode =
+ dyn_cast<BinaryWithFlagsSDNode>(N)) {
+ CSENode = DAG.getNodeIfExists(
+ N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
+ BinNode->hasNoSignedWrap(), BinNode->isExact());
+ } else {
+ CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
+ }
if (CSENode)
return SDValue(CSENode, 0);
}
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+ DAG.computeKnownBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+ DAG.computeKnownBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+ DAG.computeKnownBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+ DAG.computeKnownBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
"Inputs to shuffles are not the same type");
-
+
// Check that both shuffles use the same mask. The masks are known to be of
// the same length because the result vector type is the same.
// Check also that shuffles have only one use to avoid introducing extra
// two ways to fold this node into a shuffle.
SmallVector<int,4> Mask1;
SmallVector<int,4> Mask2;
-
+
for (unsigned i = 0; i != NumElts && CanFold; ++i) {
int M0 = SV0->getMaskElt(i);
int M1 = SV1->getMaskElt(i);
-
+
// Both shuffle indexes are undef. Propagate Undef.
if (M0 < 0 && M1 < 0) {
Mask1.push_back(M0);
CanFold = false;
break;
}
-
+
Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
}
return SDValue();
}
+ if (!TLI.isDesirableToCommuteWithShift(LHS))
+ return SDValue();
+
// Fold the constants, shifting the binop RHS by the shift amount.
SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
N->getValueType(0),
// If setcc produces all-one true value then:
// (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
if (N1CV && N1CV->isConstant()) {
- if (N0.getOpcode() == ISD::AND &&
- TLI.getBooleanContents(true) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ if (N0.getOpcode() == ISD::AND) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
- if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+ TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
if (C.getNode())
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
+ DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
if (VT.isInteger() &&
- (VT0 == MVT::i1 ||
- (VT0.isInteger() &&
- TLI.getBooleanContents(false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
+ (VT0 == MVT::i1 || (VT0.isInteger() &&
+ TLI.getBooleanContents(false, false) ==
+ TLI.getBooleanContents(false, true) &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
SDValue XORNode;
if (VT == VT0)
// fold selects based on a setcc into other things, such as min/max/abs
if (N0.getOpcode() == ISD::SETCC) {
- // FIXME:
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
- TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ if ((!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
N1, N2, N0.getOperand(2));
return std::make_pair(Lo, Hi);
}
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ MVT VT = N->getSimpleValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+ // We're sure we have an even number of elements due to the
+ // concat_vectors we have as arguments to vselect.
+ // Skip BV elements until we find one that's not an UNDEF
+ // After we find an UNDEF element, keep looping until we get to half the
+ // length of the BV and see if all the non-undef nodes are the same.
+ ConstantSDNode *BottomHalf = nullptr;
+ for (int i = 0; i < NumElems / 2; ++i) {
+ if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (BottomHalf == nullptr)
+ BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != BottomHalf)
+ return SDValue();
+ }
+
+ // Do the same for the second half of the BuildVector
+ ConstantSDNode *TopHalf = nullptr;
+ for (int i = NumElems / 2; i < NumElems; ++i) {
+ if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (TopHalf == nullptr)
+ TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != TopHalf)
+ return SDValue();
+ }
+
+ assert(TopHalf && BottomHalf &&
+ "One half of the selector was all UNDEFs and the other was all the "
+ "same value. This should have been addressed before this function.");
+ return DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, VT,
+ BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return N2;
+ // The ConvertSelectToConcatVector function is assuming both the above
+ // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+ // and addressed.
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N2.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ SDValue CV = ConvertSelectToConcatVector(N, DAG);
+ if (CV.getNode())
+ return CV;
+ }
+
return SDValue();
}
// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
// dag node into a ConstantSDNode or a build_vector of constants.
// This function is called by the DAGCombiner when visiting sext/zext/aext
-// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
// Vector extends are not folded if operations are legal; this is to
// avoid introducing illegal build_vector dag nodes.
static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
(!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
return nullptr;
-
+
// We can fold this node into a build_vector.
unsigned VTBits = SVT.getSizeInBits();
unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
}
if (N0.getOpcode() == ISD::SETCC) {
+ EVT N0VT = N0.getOperand(0).getValueType();
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations &&
- TLI.getBooleanContents(true) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent) {
- EVT N0VT = N0.getOperand(0).getValueType();
+ TLI.getBooleanContents(N0VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
// if this is the case.
// isTruncateOf - If N is a truncate of some other value, return true, record
// the value being truncated in Op and which of Op's bits are zero in KnownZero.
// This function computes KnownZero to avoid a duplicated call to
-// ComputeMaskedBits in the caller.
+// computeKnownBits in the caller.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
APInt &KnownZero) {
APInt KnownOne;
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
- DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
return true;
}
else
return false;
- DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
return false;
}
}
+ // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
+ if (N0.getOpcode() == ISD::SELECT) {
+ EVT SrcVT = N0.getValueType();
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
+ TLI.isTruncateFree(SrcVT, VT)) {
+ SDLoc SL(N0);
+ SDValue Cond = N0.getOperand(0);
+ SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
+ }
+ }
+
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
+ // Do not remove the cast if the types differ in endian layout.
+ TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
+ TLI.hasBigEndianPartOrdering(VT) &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- // (See also visitSELECT)
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- // (See also visitSELECT)
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
// (fneg (fmul c, x)) -> (fmul -c, x)
if (N0.getOpcode() == ISD::FMUL) {
ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
- if (CFP1)
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N0.getOperand(1)));
+ if (CFP1) {
+ APFloat CVal = CFP1->getValueAPF();
+ CVal.changeSign();
+ if (Level >= AfterLegalizeDAG &&
+ (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
+ TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
+ return DAG.getNode(
+ ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+ }
}
return SDValue();
return SDValue();
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ // Canonicalize insert_vector_elt dag nodes.
+ // Example:
+ // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+ // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+ //
+ // Do this only if the child insert_vector node has one use; also
+ // do this only if indices are both constants and Idx1 < Idx0.
+ if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+ && isa<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned OtherElt =
+ cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+ if (Elt < OtherElt) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+ InVec.getOperand(0), InVal, EltNo);
+ AddToWorkList(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+ VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+ }
+ }
+
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
+SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+ EVT ResultVT = EVE->getValueType(0);
+ EVT VecEltVT = InVecVT.getVectorElementType();
+ unsigned Align = OriginalLoad->getAlignment();
+ unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ VecEltVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+ return SDValue();
+
+ Align = NewAlign;
+
+ SDValue NewPtr = OriginalLoad->getBasePtr();
+ SDValue Offset;
+ EVT PtrType = NewPtr.getValueType();
+ MachinePointerInfo MPI;
+ if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+ int Elt = ConstEltNo->getZExtValue();
+ unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+ if (TLI.isBigEndian())
+ PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
+ Offset = DAG.getConstant(PtrOff, PtrType);
+ MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ } else {
+ Offset = DAG.getNode(
+ ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
+ DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
+ if (TLI.isBigEndian())
+ Offset = DAG.getNode(
+ ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
+ DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+ MPI = OriginalLoad->getPointerInfo();
+ }
+ NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (ResultVT.bitsGT(VecEltVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
+ ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
+ NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
+ OriginalLoad->isNonTemporal(), Align,
+ OriginalLoad->getTBAAInfo());
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(
+ VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
+ OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+ OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
+ Chain = Load.getValue(1);
+ if (ResultVT.bitsLT(VecEltVT))
+ Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Since we're explicitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(EVE);
+ ++OpsNarrowed;
+ return SDValue(EVE, 0);
+}
+
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
}
}
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ }
+
+ // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
+ if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
+ ISD::isNormalLoad(InVec.getNode())) {
+ SDValue Index = N->getOperand(1);
+ if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
+ OrigLoad);
+ }
+
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- bool NewLoad = false;
- bool BCNumEltsChanged = false;
- EVT ExtVT = VT.getVectorElementType();
- EVT LVT = ExtVT;
-
- // If the result of load has to be truncated, then it's not necessarily
- // profitable.
- if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
- return SDValue();
-
- if (InVec.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- EVT BCVT = InVec.getOperand(0).getValueType();
- if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
- return SDValue();
- if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
- BCNumEltsChanged = true;
- InVec = InVec.getOperand(0);
- ExtVT = BCVT.getVectorElementType();
- NewLoad = true;
- }
LoadSDNode *LN0 = nullptr;
const ShuffleVectorSDNode *SVN = nullptr;
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+ EltNo = DAG.getConstant(Elt, EltNo.getValueType());
}
}
if (Elt == -1)
return DAG.getUNDEF(LVT);
- unsigned Align = LN0->getAlignment();
- if (NewLoad) {
- // Check the resultant load doesn't need a higher alignment than the
- // original load.
- unsigned NewAlign =
- TLI.getDataLayout()
- ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
- return SDValue();
-
- Align = NewAlign;
- }
-
- SDValue NewPtr = LN0->getBasePtr();
- unsigned PtrOff = 0;
-
- if (Elt) {
- PtrOff = LVT.getSizeInBits() * Elt / 8;
- EVT PtrType = NewPtr.getValueType();
- if (TLI.isBigEndian())
- PtrOff = VT.getSizeInBits() / 8 - PtrOff;
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
- DAG.getConstant(PtrOff, PtrType));
- }
-
- // The replacement we need to do here is a little tricky: we need to
- // replace an extractelement of a load with a load.
- // Use ReplaceAllUsesOfValuesWith to do the replacement.
- // Note that this replacement assumes that the extractvalue is the only
- // use of the load; that's okay because we don't want to perform this
- // transformation in other cases anyway.
- SDValue Load;
- SDValue Chain;
- if (NVT.bitsGT(LVT)) {
- // If the result type of vextract is wider than the load, then issue an
- // extending load instead.
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
- ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
- NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
- LVT, LN0->isVolatile(), LN0->isNonTemporal(),
- Align, LN0->getTBAAInfo());
- Chain = Load.getValue(1);
- } else {
- Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), Align, LN0->getTBAAInfo());
- Chain = Load.getValue(1);
- if (NVT.bitsLT(LVT))
- Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
- else
- Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
- }
- WorkListRemover DeadNodes(*this);
- SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
- SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
- // Since we're explcitly calling ReplaceAllUses, add the new node to the
- // worklist explicitly as well.
- AddToWorkList(Load.getNode());
- AddUsersToWorkList(Load.getNode()); // Add users too
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorkList(N);
- return SDValue(N, 0);
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
}
return SDValue();
}
}
- // If everything is good, we can make a shuffle operation.
+ // If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
SmallVector<SDValue, 8> Opnds;
unsigned BuildVecNumElts = N0.getNumOperands();
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
+ EVT SclTy0 = N0.getOperand(0)->getValueType(0);
+ EVT SclTy1 = N1.getOperand(0)->getValueType(0);
+ if (SclTy0.isFloatingPoint()) {
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N0.getOperand(i));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N1.getOperand(i));
+ } else {
+ // If BUILD_VECTOR are from built from integer, they may have different
+ // operand types. Get the smaller type and truncate all operands to it.
+ EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N0.getOperand(i)));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N1.getOperand(i)));
+ }
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
}
// If this shuffle node is simply a swizzle of another shuffle node,
- // and it reverses the swizzle of the previous shuffle then we can
- // optimize shuffle(shuffle(x, undef), undef) -> x.
+ // then try to simplify it.
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
N1.getOpcode() == ISD::UNDEF) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
- // Shuffle nodes can only reverse shuffles with a single non-undef value.
- if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
- return SDValue();
-
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
assert(OtherSV->getOperand(0).getValueType() == VT &&
"Shuffle types don't match");
+ SmallVector<int, 4> Mask;
+ // Compute the combined shuffle mask.
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
assert(Idx < (int)NumElts && "Index references undef operand");
// shuffle. Adopt the incoming index.
if (Idx >= 0)
Idx = OtherSV->getMaskElt(Idx);
+ Mask.push_back(Idx);
+ }
+
+ bool CommuteOperands = false;
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
+ // To be valid, the combine shuffle mask should only reference elements
+ // from one of the two vectors in input to the inner shufflevector.
+ bool IsValidMask = true;
+ for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+ // See if the combined mask only reference undefs or elements coming
+ // from the first shufflevector operand.
+ IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
+
+ if (!IsValidMask) {
+ IsValidMask = true;
+ for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+ // Check that all the elements come from the second shuffle operand.
+ IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
+ CommuteOperands = IsValidMask;
+ }
- // The combined shuffle must map each index to itself.
- if (Idx >= 0 && (unsigned)Idx != i)
+ // Early exit if the combined shuffle mask is not valid.
+ if (!IsValidMask)
return SDValue();
}
- return OtherSV->getOperand(0);
+ // See if this pair of shuffles can be safely folded according to either
+ // of the following rules:
+ // shuffle(shuffle(x, y), undef) -> x
+ // shuffle(shuffle(x, undef), undef) -> x
+ // shuffle(shuffle(x, y), undef) -> y
+ bool IsIdentityMask = true;
+ unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
+ for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
+ // Skip Undefs.
+ if (Mask[i] < 0)
+ continue;
+
+ // The combined shuffle must map each index to itself.
+ IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
+ }
+
+ if (IsIdentityMask) {
+ if (CommuteOperands)
+ // optimize shuffle(shuffle(x, y), undef) -> y.
+ return OtherSV->getOperand(1);
+
+ // optimize shuffle(shuffle(x, undef), undef) -> x
+ // optimize shuffle(shuffle(x, y), undef) -> x
+ return OtherSV->getOperand(0);
+ }
+
+ // It may still be beneficial to combine the two shuffles if the
+ // resulting shuffle is legal.
+ if (TLI.isTypeLegal(VT) && TLI.isShuffleMaskLegal(Mask, VT)) {
+ if (!CommuteOperands)
+ // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+ &Mask[0]);
+
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
+ &Mask[0]);
+ }
+ }
+
+ // Try to fold according to rules:
+ // shuffle(shuffle A, B, M0), B, M1) -> shuffle(A, B, M2)
+ // shuffle(shuffle A, B, M0), A, M1) -> shuffle(A, B, M2)
+ // Don't try to fold shuffles with illegal type.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+ TLI.isTypeLegal(VT)) {
+ ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+ // The incoming shuffle must be of the same type as the result of the
+ // current shuffle.
+ assert(OtherSV->getOperand(0).getValueType() == VT &&
+ "Shuffle types don't match");
+
+ SDValue SV0 = OtherSV->getOperand(0);
+ SDValue SV1 = OtherSV->getOperand(1);
+ bool HasSameOp0 = N1 == SV0;
+ if (!HasSameOp0 && N1 != SV1)
+ // Early exit.
+ return SDValue();
+
+ SmallVector<int, 4> Mask;
+ // Compute the combined shuffle mask for a shuffle with SV0 as the first
+ // operand, and SV1 as the second operand.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (Idx < 0) {
+ // Propagate Undef.
+ Mask.push_back(Idx);
+ continue;
+ }
+
+ if (Idx < (int)NumElts)
+ Idx = OtherSV->getMaskElt(Idx);
+ else
+ Idx = HasSameOp0 ? Idx - NumElts : Idx;
+
+ Mask.push_back(Idx);
+ }
+
+ // Avoid introducing shuffles with illegal mask.
+ if (TLI.isShuffleMaskLegal(Mask, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
}
return SDValue();
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
}
+ // Type legalization might introduce new shuffles in the DAG.
+ // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
+ // -> (shuffle (VBinOp (A, B)), Undef, Mask).
+ if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
+ isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
+ RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
+
+ if (SVN0->getMask().equals(SVN1->getMask())) {
+ EVT VT = N->getValueType(0);
+ SDValue UndefVector = LHS.getOperand(1);
+ SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ LHS.getOperand(0), RHS.getOperand(0));
+ AddUsersToWorkList(N);
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
+ &SVN0->getMask()[0]);
+ }
+ }
+
return SDValue();
}
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
- TLI.getBooleanContents(N0.getValueType().isVector()) ==
- TargetLowering::ZeroOrOneBooleanContent) {
+ TLI.getBooleanContents(N0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent) {
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.