namespace {
static cl::opt<bool>
CombinerAA("combiner-alias-analysis", cl::Hidden,
- cl::desc("Turn on alias analysis during testing"));
+ cl::desc("Enable DAG combiner alias-analysis heuristics"));
static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
- cl::desc("Include global information in alias analysis"));
+ cl::desc("Enable DAG combiner's use of IR alias analysis"));
+
+ static cl::opt<bool>
+ UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
+ cl::desc("Enable DAG combiner's use of TBAA"));
+
+#ifndef NDEBUG
+ static cl::opt<std::string>
+ CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
+ cl::desc("Only use DAG-combiner alias analysis in this"
+ " function"));
+#endif
/// Hidden option to stress test load slicing, i.e., when this option
/// is enabled, load slicing bypasses most of its profitability guards.
/// now.
///
void AddUsersToWorkList(SDNode *N) {
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- UI != UE; ++UI)
- AddToWorkList(*UI);
+ for (SDNode *Node : N->uses())
+ AddToWorkList(Node);
}
/// visit - call the node-specific routine that knows how to fold each
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
+ SDValue visitRotate(SDNode *N);
SDValue visitCTLZ(SDNode *N);
SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTTZ(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitINSERT_SUBVECTOR(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
- SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+ SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
bool NotExtCompare = false);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
SDLoc DL, bool foldBooleans = true);
+
+ bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) const;
+ bool isOneUseSetCC(SDValue N) const;
+
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
/// \return True if some memory operations were changed.
bool MergeConsecutiveStores(StoreSDNode *N);
+ /// \brief Try to transform a truncation where C is a constant:
+ /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
+ ///
+ /// \p N needs to be a truncation and its first operand an AND. Other
+ /// requirements are checked by the function (e.g. that trunc is
+ /// single-use) and if missed an empty SDValue is returned.
+ SDValue distributeTruncateThroughAnd(SDNode *N);
+
public:
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
explicit WorkListRemover(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
+ void NodeDeleted(SDNode *N, SDNode *E) override {
DC.removeFromWorkList(N);
}
};
}
}
-
// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
-// that selects between the values 1 and 0, making it equivalent to a setcc.
-// Also, set the incoming LHS, RHS, and CC references to the appropriate
-// nodes based on the type of node we are checking. This simplifies life a
-// bit for the callers.
-static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
- SDValue &CC) {
+// that selects between the target values used for true and false, making it
+// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
+// the appropriate nodes based on the type of node we are checking. This
+// simplifies life a bit for the callers.
+bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+ SDValue &CC) const {
if (N.getOpcode() == ISD::SETCC) {
LHS = N.getOperand(0);
RHS = N.getOperand(1);
CC = N.getOperand(2);
return true;
}
- if (N.getOpcode() == ISD::SELECT_CC &&
- N.getOperand(2).getOpcode() == ISD::Constant &&
- N.getOperand(3).getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
- cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
- LHS = N.getOperand(0);
- RHS = N.getOperand(1);
- CC = N.getOperand(4);
- return true;
- }
- return false;
+
+ if (N.getOpcode() != ISD::SELECT_CC ||
+ !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
+ !TLI.isConstFalseVal(N.getOperand(3).getNode()))
+ return false;
+
+ LHS = N.getOperand(0);
+ RHS = N.getOperand(1);
+ CC = N.getOperand(4);
+ return true;
}
// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
// one use. If this is true, it allows the users to invert the operation for
// free when it is profitable to do so.
-static bool isOneUseSetCC(SDValue N) {
+bool DAGCombiner::isOneUseSetCC(SDValue N) const {
SDValue N0, N1, N2;
if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
return true;
return false;
}
-// \brief Returns the SDNode if it is a constant BuildVector or constant int.
+/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
+/// elements are all the same constant or undefined.
+static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
+ BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
+ if (!C)
+ return false;
+
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs) &&
+ EltVT.getSizeInBits() >= SplatBitSize);
+}
+
+// \brief Returns the SDNode if it is a constant BuildVector or constant.
static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
return N.getNode();
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
if(BV && BV->isConstant())
return BV;
- return NULL;
+ return nullptr;
+}
+
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// int.
+static ConstantSDNode *isConstOrConstSplat(SDValue N) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N))
+ return BV->getConstantSplatValue();
+
+ return nullptr;
}
SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDLoc dl(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
- if (NewOp.getNode() == 0)
+ if (!NewOp.getNode())
return SDValue();
AddToWorkList(NewOp.getNode());
SDLoc dl(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
- if (NewOp.getNode() == 0)
+ if (!NewOp.getNode())
return SDValue();
AddToWorkList(NewOp.getNode());
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
- if (NN0.getNode() == 0)
+ if (!NN0.getNode())
return SDValue();
bool Replace1 = false;
NN1 = NN0;
else {
NN1 = PromoteOperand(N1, PVT, Replace1);
- if (NN1.getNode() == 0)
+ if (!NN1.getNode())
return SDValue();
}
N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
- if (N0.getNode() == 0)
+ if (!N0.getNode())
return SDValue();
AddToWorkList(N0.getNode());
SDValue RV = combine(N);
- if (RV.getNode() == 0)
+ if (!RV.getNode())
continue;
++NodesCombined;
case ISD::SHL: return visitSHL(N);
case ISD::SRA: return visitSRA(N);
case ISD::SRL: return visitSRL(N);
+ case ISD::ROTR:
+ case ISD::ROTL: return visitRotate(N);
case ISD::CTLZ: return visitCTLZ(N);
case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
case ISD::CTTZ: return visitCTTZ(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
}
return SDValue();
}
SDValue RV = visit(N);
// If nothing happened, try a target-specific DAG combine.
- if (RV.getNode() == 0) {
+ if (!RV.getNode()) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned NULL!");
}
// If nothing happened still, try promoting the operation.
- if (RV.getNode() == 0) {
+ if (!RV.getNode()) {
switch (N->getOpcode()) {
default: break;
case ISD::ADD:
// If N is a commutative binary node, try commuting it to enable more
// sdisel CSE.
- if (RV.getNode() == 0 &&
- SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
N0.getOperand(1));
// reassociate add
SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
- if (RADD.getNode() != 0)
+ if (RADD.getNode())
return RADD;
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+ if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
+ if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+ }
}
}
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
return SDValue();
}
-/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
-/// elements are all the same constant or undefined.
-static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
- BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
- if (!C)
- return false;
-
- APInt SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- EVT EltVT = N->getValueType(0).getVectorElementType();
- return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
- HasAnyUndefs) &&
- EltVT.getSizeInBits() >= SplatBitSize);
-}
-
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
} else {
- N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0;
+ N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
: APInt();
- N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0;
+ N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
: APInt();
}
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
- SDValue Sh(0,0), Y(0,0);
+ SDValue Sh(nullptr,0), Y(nullptr,0);
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
(isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
// reassociate mul
SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
- if (RMUL.getNode() != 0)
+ if (RMUL.getNode())
return RMUL;
return SDValue();
// The type-legalizer generates this pattern when loading illegal
// vector types from memory. In many cases this allows additional shuffle
// optimizations.
- if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
- N0.getOperand(1).getOpcode() == ISD::UNDEF &&
- N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+ // There are other cases where moving the shuffle after the xor/and/or
+ // is profitable even if shuffles don't perform a swizzle.
+ // If both shuffles use the same mask, and both shuffles have the same first
+ // or second operand, then it might still be profitable to move the shuffle
+ // after the xor/and/or operation.
+ if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
- assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+ assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
"Inputs to shuffles are not the same type");
-
- unsigned NumElts = VT.getVectorNumElements();
-
+
// Check that both shuffles use the same mask. The masks are known to be of
// the same length because the result vector type is the same.
- bool SameMask = true;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx0 = SVN0->getMaskElt(i);
- int Idx1 = SVN1->getMaskElt(i);
- if (Idx0 != Idx1) {
- SameMask = false;
- break;
+ // Check also that shuffles have only one use to avoid introducing extra
+ // instructions.
+ if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
+ SVN0->getMask().equals(SVN1->getMask())) {
+ SDValue ShOp = N0->getOperand(1);
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+ if (!LegalTypes)
+ ShOp = DAG.getConstant(0, VT);
+ else
+ ShOp = SDValue();
}
- }
- if (SameMask) {
- SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- N0.getOperand(0), N1.getOperand(0));
- AddToWorkList(Op.getNode());
- return DAG.getVectorShuffle(VT, SDLoc(N), Op,
- DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+ // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
+ // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
+ // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
+ if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
+ SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ N0->getOperand(0), N1->getOperand(0));
+ AddToWorkList(NewNode.getNode());
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
+ &SVN0->getMask()[0]);
+ }
+
+ // Don't try to fold this node if it requires introducing a
+ // build vector of all zeros that might be illegal at this stage.
+ ShOp = N0->getOperand(0);
+ if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
+ if (!LegalTypes)
+ ShOp = DAG.getConstant(0, VT);
+ else
+ ShOp = SDValue();
+ }
+
+ // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
+ // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
+ // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
+ if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
+ SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ N0->getOperand(1), N1->getOperand(1));
+ AddToWorkList(NewNode.getNode());
+ return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
+ &SVN0->getMask()[0]);
+ }
}
}
return DAG.getConstant(0, VT);
// reassociate and
SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
- if (RAND.getNode() != 0)
+ if (RAND.getNode())
return RAND;
// fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
if (!TLI.isOperationLegal(ISD::BSWAP, VT))
return SDValue();
- SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+ SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr);
// Look for either
// (or (or (and), (and)), (or (and), (and)))
// (or (or (or (and), (and)), (and)), (and))
return N0;
if (ISD::isBuildVectorAllOnes(N1.getNode()))
return N1;
+
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
+ // Do this only if the resulting shuffle is legal.
+ if (isa<ShuffleVectorSDNode>(N0) &&
+ isa<ShuffleVectorSDNode>(N1) &&
+ N0->getOperand(1) == N1->getOperand(1) &&
+ ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
+ bool CanFold = true;
+ unsigned NumElts = VT.getVectorNumElements();
+ const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
+ const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
+ // We construct two shuffle masks:
+ // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
+ // and N1 as the second operand.
+ // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
+ // and N0 as the second operand.
+ // We do this because OR is commutable and therefore there might be
+ // two ways to fold this node into a shuffle.
+ SmallVector<int,4> Mask1;
+ SmallVector<int,4> Mask2;
+
+ for (unsigned i = 0; i != NumElts && CanFold; ++i) {
+ int M0 = SV0->getMaskElt(i);
+ int M1 = SV1->getMaskElt(i);
+
+ // Both shuffle indexes are undef. Propagate Undef.
+ if (M0 < 0 && M1 < 0) {
+ Mask1.push_back(M0);
+ Mask2.push_back(M0);
+ continue;
+ }
+
+ if (M0 < 0 || M1 < 0 ||
+ (M0 < (int)NumElts && M1 < (int)NumElts) ||
+ (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
+ CanFold = false;
+ break;
+ }
+
+ Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
+ Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
+ }
+
+ if (CanFold) {
+ // Fold this sequence only if the resulting shuffle is 'legal'.
+ if (TLI.isShuffleMaskLegal(Mask1, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
+ N1->getOperand(0), &Mask1[0]);
+ if (TLI.isShuffleMaskLegal(Mask2, VT))
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
+ N0->getOperand(0), &Mask2[0]);
+ }
+ }
}
// fold (or x, undef) -> -1
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
SDValue BSwap = MatchBSwapHWord(N, N0, N1);
- if (BSwap.getNode() != 0)
+ if (BSwap.getNode())
return BSwap;
BSwap = MatchBSwapHWordLow(N, N0, N1);
- if (BSwap.getNode() != 0)
+ if (BSwap.getNode())
return BSwap;
// reassociate or
SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
- if (ROR.getNode() != 0)
+ if (ROR.getNode())
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
// iff (c1 & c2) == 0.
//
// (or (shift1 X, Neg), (shift2 X, Pos))
//
-// reduces to a rotate in direction shift2 by Pos and a rotate in direction
-// shift1 by Neg. The range [0, OpSize) means that we only need to consider
-// shift amounts with defined behavior.
+// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
+// in direction shift1 by Neg. The range [0, OpSize) means that we only need
+// to consider shift amounts with defined behavior.
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
// If OpSize is a power of 2 then:
//
//
// for all Neg and Pos. Note that the (or ...) then invokes undefined
// behavior if Pos == 0 (and consequently Neg == OpSize).
- //
+ //
// We could actually use [A] whenever OpSize is a power of 2, but the
// only extra cases that it would match are those uninteresting ones
// where Neg and Pos are never in range at the same time. E.g. for
// always invokes undefined behavior for 32-bit X.
//
// Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
- unsigned LoBits = 0;
+ unsigned MaskLoBits = 0;
if (Neg.getOpcode() == ISD::AND &&
isPowerOf2_64(OpSize) &&
Neg.getOperand(1).getOpcode() == ISD::Constant &&
cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
Neg = Neg.getOperand(0);
- LoBits = Log2_64(OpSize);
+ MaskLoBits = Log2_64(OpSize);
}
// Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
return 0;
SDValue NegOp1 = Neg.getOperand(1);
+ // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
+ // Pos'. The truncation is redundant for the purpose of the equality.
+ if (MaskLoBits &&
+ Pos.getOpcode() == ISD::AND &&
+ Pos.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
+ Pos = Pos.getOperand(0);
+
// The condition we need is now:
//
// (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
return false;
// Now we just need to check that OpSize & Mask == Width & Mask.
- if (LoBits)
- return Width.getLoBits(LoBits) == 0;
+ if (MaskLoBits)
+ // Opsize & Mask is 0 since Mask is Opsize - 1.
+ return Width.getLoBits(MaskLoBits) == 0;
return Width == OpSize;
}
}
}
- return 0;
+ return nullptr;
}
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT)) return 0;
+ if (!TLI.isTypeLegal(VT)) return nullptr;
// The target must have at least one rotate flavor.
bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
- if (!HasROTL && !HasROTR) return 0;
+ if (!HasROTL && !HasROTR) return nullptr;
// Match "(X shl/srl V1) & V2" where V2 may not be present.
SDValue LHSShift; // The shift.
SDValue LHSMask; // AND value if any.
if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
- return 0; // Not part of a rotate.
+ return nullptr; // Not part of a rotate.
SDValue RHSShift; // The shift.
SDValue RHSMask; // AND value if any.
if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
- return 0; // Not part of a rotate.
+ return nullptr; // Not part of a rotate.
if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
- return 0; // Not shifting the same value.
+ return nullptr; // Not shifting the same value.
if (LHSShift.getOpcode() == RHSShift.getOpcode())
- return 0; // Shifts must disagree.
+ return nullptr; // Shifts must disagree.
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
if ((LShVal + RShVal) != OpSizeInBits)
- return 0;
+ return nullptr;
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
- return 0;
+ return nullptr;
// If the shift amount is sign/zext/any-extended just peel it off.
SDValue LExtOp0 = LHSShiftAmt;
if (TryR)
return TryR;
- return 0;
+ return nullptr;
}
SDValue DAGCombiner::visitXOR(SDNode *N) {
return N0;
// reassociate xor
SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
- if (RXOR.getNode() != 0)
+ if (RXOR.getNode())
return RXOR;
// fold !(x cc y) -> (x !cc y)
/// visitShiftByConstant - Handle transforms common to the three shifts, when
/// the shift amount is a constant.
-SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+ // We can't and shouldn't fold opaque constants.
+ if (Amt->isOpaque())
+ return SDValue();
+
SDNode *LHS = N->getOperand(0).getNode();
if (!LHS->hasOneUse()) return SDValue();
break;
}
- // We require the RHS of the binop to be a constant as well.
+ // We require the RHS of the binop to be a constant and not opaque as well.
ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
- if (!BinOpCst) return SDValue();
+ if (!BinOpCst || BinOpCst->isOpaque()) return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant.
// If it is not a shift, it pessimizes some common cases like:
SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
N->getValueType(0),
LHS->getOperand(1), N->getOperand(1));
+ assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
// Create the new shift.
SDValue NewShift = DAG.getNode(N->getOpcode(),
return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
}
+SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
+ assert(N->getOpcode() == ISD::TRUNCATE);
+ assert(N->getOperand(0).getOpcode() == ISD::AND);
+
+ // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
+ if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+ SDValue N01 = N->getOperand(0).getOperand(1);
+
+ if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
+ EVT TruncVT = N->getValueType(0);
+ SDValue N00 = N->getOperand(0).getOperand(0);
+ APInt TruncC = N01C->getAPIntValue();
+ TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
+
+ return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
+ DAG.getConstant(TruncC, TruncVT));
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitRotate(SDNode *N) {
+ // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
+ if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), NewOp1);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSHL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
+ // If setcc produces all-one true value then:
+ // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
+ if (N1CV && N1CV->isConstant()) {
+ if (N0.getOpcode() == ISD::AND &&
+ TLI.getBooleanContents(true) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
+
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+ SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
+ if (C.getNode())
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+ }
+ } else {
+ N1C = isConstOrConstSplat(N1);
+ }
+ }
}
// fold (shl c1, c2) -> c1<<c2
return DAG.getConstant(0, VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getOpcode() == ISD::AND &&
- N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
- SDValue N101 = N1.getOperand(0).getOperand(1);
- if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- EVT TruncVT = N1.getValueType();
- SDValue N100 = N1.getOperand(0).getOperand(0);
- APInt TruncC = N101C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getSizeInBits());
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
- DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
- DAG.getNode(ISD::TRUNCATE,
- SDLoc(N),
- TruncVT, N100),
- DAG.getConstant(TruncC, TruncVT)));
- }
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SHL &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ if (N1C && N0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N0C1->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
}
// fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND) &&
- N0.getOperand(0).getOpcode() == ISD::SHL &&
- isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
- uint64_t c1 =
- cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- EVT InnerShiftVT = N0.getOperand(0).getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
- if (c2 >= OpSizeInBits - InnerShiftSize) {
- if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
- DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
- N0.getOperand(0)->getOperand(0)),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ N0.getOperand(0).getOpcode() == ISD::SHL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0Op0.getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
+ DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+ N0Op0->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
}
}
// Only fold this if the inner zext has no other uses to avoid increasing
// the total number of instructions.
if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
- N0.getOperand(0).getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
- uint64_t c1 =
- cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
- if (c1 < VT.getSizeInBits()) {
- uint64_t c2 = N1C->getZExtValue();
- if (c1 == c2) {
- SDValue NewOp0 = N0.getOperand(0);
- EVT CountVT = NewOp0.getOperand(1).getValueType();
- SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
- NewOp0, DAG.getConstant(c2, CountVT));
- AddToWorkList(NewSHL.getNode());
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
+ if (c1 < VT.getScalarSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 == c2) {
+ SDValue NewOp0 = N0.getOperand(0);
+ EVT CountVT = NewOp0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
+ NewOp0, DAG.getConstant(c2, CountVT));
+ AddToWorkList(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ }
}
}
}
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
- if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- if (c1 < VT.getSizeInBits()) {
- uint64_t c2 = N1C->getZExtValue();
- APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
- VT.getSizeInBits() - c1);
- SDValue Shift;
- if (c2 > c1) {
- Mask = Mask.shl(c2-c1);
- Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c2-c1, N1.getValueType()));
- } else {
- Mask = Mask.lshr(c1-c2);
- Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1-c2, N1.getValueType()));
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N0C1->getZExtValue();
+ if (c1 < OpSizeInBits) {
+ uint64_t c2 = N1C->getZExtValue();
+ APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask = Mask.shl(c2 - c1);
+ Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c2 - c1, N1.getValueType()));
+ } else {
+ Mask = Mask.lshr(c1 - c2);
+ Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c1 - c2, N1.getValueType()));
+ }
+ return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
+ DAG.getConstant(Mask, VT));
}
- return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
- DAG.getConstant(Mask, VT));
}
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ unsigned BitSize = VT.getScalarSizeInBits();
SDValue HiBitsMask =
- DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
- VT.getSizeInBits() -
- N1C->getZExtValue()),
- VT);
+ DAG.getConstant(APInt::getHighBitsSet(BitSize,
+ BitSize - N1C->getZExtValue()), VT);
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
HiBitsMask);
}
if (N1C) {
- SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+ SDValue NewSHL = visitShiftByConstant(N, N1C);
if (NewSHL.getNode())
return NewSHL;
}
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ N1C = isConstOrConstSplat(N1);
}
// fold (sra c1, c2) -> (sra c1, c2)
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRA) {
- if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
- if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+ if (Sum >= OpSizeInBits)
+ Sum = OpSizeInBits - 1;
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(Sum, N1C->getValueType(0)));
+ DAG.getConstant(Sum, N1.getValueType()));
}
}
// result_size - n != m.
// If truncate is free for the target sext(shl) is likely to result in better
// code.
- if (N0.getOpcode() == ISD::SHL) {
+ if (N0.getOpcode() == ISD::SHL && N1C) {
// Get the two constanst of the shifts, CN0 = m, CN = n.
- const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (N01C && N1C) {
+ const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
+ if (N01C) {
+ LLVMContext &Ctx = *DAG.getContext();
// Determine what the truncate's result bitsize and type would be.
- EVT TruncVT =
- EVT::getIntegerVT(*DAG.getContext(),
- OpSizeInBits - N1C->getZExtValue());
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
+
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getOpcode() == ISD::AND &&
- N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
- SDValue N101 = N1.getOperand(0).getOperand(1);
- if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- EVT TruncVT = N1.getValueType();
- SDValue N100 = N1.getOperand(0).getOperand(0);
- APInt TruncC = N101C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
- return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
- DAG.getNode(ISD::AND, SDLoc(N),
- TruncVT,
- DAG.getNode(ISD::TRUNCATE,
- SDLoc(N),
- TruncVT, N100),
- DAG.getConstant(TruncC, TruncVT)));
- }
- }
-
- // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
+ }
+
+ // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
// if c1 is equal to the number of bits the trunc removes
if (N0.getOpcode() == ISD::TRUNCATE &&
(N0.getOperand(0).getOpcode() == ISD::SRL ||
N0.getOperand(0).getOpcode() == ISD::SRA) &&
N0.getOperand(0).hasOneUse() &&
N0.getOperand(0).getOperand(1).hasOneUse() &&
- N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
- EVT LargeVT = N0.getOperand(0).getValueType();
- ConstantSDNode *LargeShiftAmt =
- cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
-
- if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
- LargeShiftAmt->getZExtValue()) {
- SDValue Amt =
- DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
- getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
- SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
- N0.getOperand(0).getOperand(0), Amt);
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+ N1C) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ unsigned LargeShiftVal = LargeShift->getZExtValue();
+ EVT LargeVT = N0Op0.getValueType();
+
+ if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+ getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
+ SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+ N0Op0.getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+ }
}
}
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
if (N1C) {
- SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+ SDValue NewSRA = visitShiftByConstant(N, N1C);
if (NewSRA.getNode())
return NewSRA;
}
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ N1C = isConstOrConstSplat(N1);
}
// fold (srl c1, c2) -> c1 >>u c2
return DAG.getConstant(0, VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SRL &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ if (N1C && N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N01C->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
}
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
}
// fold (srl (shl x, c), c) -> (and x, cst2)
- if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
- N0.getValueSizeInBits() <= 64) {
- uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(~0ULL >> ShAmt, VT));
+ if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
+ unsigned BitSize = N0.getScalarValueSizeInBits();
+ if (BitSize <= 64) {
+ uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL >> ShAmt, VT));
+ }
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
EVT SmallVT = N0.getOperand(0).getValueType();
- if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+ unsigned BitSize = SmallVT.getScalarSizeInBits();
+ if (N1C->getZExtValue() >= BitSize)
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
N0.getOperand(0),
DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
AddToWorkList(SmallShift.getNode());
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
DAG.getConstant(Mask, VT));
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
// bit, which is unmodified by sra.
- if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+ if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
if (N0.getOpcode() == ISD::SRA)
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
- N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+ N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
APInt KnownZero, KnownOne;
DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
// fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
- N1.getOperand(0).getOpcode() == ISD::AND &&
- N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
- SDValue N101 = N1.getOperand(0).getOperand(1);
- if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
- EVT TruncVT = N1.getValueType();
- SDValue N100 = N1.getOperand(0).getOperand(0);
- APInt TruncC = N101C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getSizeInBits());
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0,
- DAG.getNode(ISD::AND, SDLoc(N),
- TruncVT,
- DAG.getNode(ISD::TRUNCATE,
- SDLoc(N),
- TruncVT, N100),
- DAG.getConstant(TruncC, TruncVT)));
- }
+ N1.getOperand(0).getOpcode() == ISD::AND) {
+ SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
+ if (NewOp1.getNode())
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
}
// fold operands of srl based on knowledge that the low bits are not
return SDValue(N, 0);
if (N1C) {
- SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+ SDValue NewSRL = visitShiftByConstant(N, N1C);
if (NewSRL.getNode())
return NewSRL;
}
std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT LoVT, HiVT;
- llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
// Split the inputs.
SDValue Lo, Hi, LL, LH, RL, RH;
- llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
return SDValue();
SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
- llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
- llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
- llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
+ std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
SDLoc(N));
}
+// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
+// dag node into a ConstantSDNode or a build_vector of constants.
+// This function is called by the DAGCombiner when visiting sext/zext/aext
+// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+// Vector extends are not folded if operations are legal; this is to
+// avoid introducing illegal build_vector dag nodes.
+static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
+ SelectionDAG &DAG, bool LegalTypes,
+ bool LegalOperations) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
+ Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!");
+
+ // fold (sext c1) -> c1
+ // fold (zext c1) -> c1
+ // fold (aext c1) -> c1
+ if (isa<ConstantSDNode>(N0))
+ return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
+
+ // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
+ // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
+ EVT SVT = VT.getScalarType();
+ if (!(VT.isVector() &&
+ (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
+ return nullptr;
+
+ // We can fold this node into a build_vector.
+ unsigned VTBits = SVT.getSizeInBits();
+ unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
+ unsigned ShAmt = VTBits - EVTBits;
+ SmallVector<SDValue, 8> Elts;
+ unsigned NumElts = N0->getNumOperands();
+ SDLoc DL(N);
+
+ for (unsigned i=0; i != NumElts; ++i) {
+ SDValue Op = N0->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF) {
+ Elts.push_back(DAG.getUNDEF(SVT));
+ continue;
+ }
+
+ ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
+ const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
+ if (Opcode == ISD::SIGN_EXTEND)
+ Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
+ SVT));
+ else
+ Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
+ SVT));
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], NumElts).getNode();
+}
+
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
// transformation. Returns true if extension are possible and the above
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // fold (sext c1) -> c1
- if (isa<ConstantSDNode>(N0))
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0);
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
bool DoXform = true;
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
- if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+ if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
}
}
- // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+ // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
unsigned ElementWidth = VT.getScalarType().getSizeInBits();
SDValue NegOne =
DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
NegOne, DAG.getConstant(0, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
if (SCC.getNode()) return SCC;
- if (!VT.isVector() &&
- (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) {
- return DAG.getSelect(SDLoc(N), VT,
- DAG.getSetCC(SDLoc(N),
- getSetCCResultType(VT),
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get()),
- NegOne, DAG.getConstant(0, VT));
+
+ if (!VT.isVector()) {
+ EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
+ if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
+ SDLoc DL(N);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ SDValue SetCC = DAG.getSetCC(DL,
+ SetCCVT,
+ N0.getOperand(0), N0.getOperand(1), CC);
+ EVT SelectVT = getSetCCResultType(VT);
+ return DAG.getSelect(DL, VT,
+ DAG.getSExtOrTrunc(SetCC, DL, SelectVT),
+ NegOne, DAG.getConstant(0, VT));
+
+ }
}
}
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // fold (zext c1) -> c1
- if (isa<ConstantSDNode>(N0))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
// fold (zext (zext x)) -> (zext x)
// fold (zext (aext x)) -> (zext x)
if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
bool DoXform = true;
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
- if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+ if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- // fold (aext c1) -> c1
- if (isa<ConstantSDNode>(N0))
- return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0);
+ if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
+ LegalOperations))
+ return SDValue(Res, 0);
+
// fold (aext (aext x)) -> (aext x)
// fold (aext (zext x)) -> (zext x)
// fold (aext (sext x)) -> (sext x)
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
bool DoXform = true;
!ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ ISD::LoadExtType ExtType = LN0->getExtensionType();
EVT MemVT = LN0->getMemoryVT();
- SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N),
- VT, LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad),
- ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
}
if (N0.getOpcode() == ISD::SETCC) {
default: break;
case ISD::Constant: {
const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
- assert(CV != 0 && "Const value should be ConstSDNode.");
+ assert(CV && "Const value should be ConstSDNode.");
const APInt &CVal = CV->getAPIntValue();
APInt NewVal = CVal & Mask;
if (NewVal != CVal)
if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
N0.getOperand(1), false);
- if (BSwap.getNode() != 0)
+ if (BSwap.getNode())
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
BSwap, N1);
}
// creates this pattern) and before operation legalization after which
// we need to be more careful about the vector instructions that we generate.
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- LegalTypes && !LegalOperations && N0->hasOneUse()) {
+ LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
EVT VecTy = N0.getOperand(0).getValueType();
EVT ExTy = N0.getValueType();
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
- LD1->getPointerInfo().getAddrSpace() !=
- LD2->getPointerInfo().getAddrSpace())
+ LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
(N1.getOperand(0).hasOneUse() &&
N1.getOperand(0).getOpcode() == ISD::SRL))) {
- SDNode *Trunc = 0;
+ SDNode *Trunc = nullptr;
if (N1.getOpcode() == ISD::TRUNCATE) {
// Look pass the truncate.
Trunc = N1.getNode();
// a copy of the original base pointer.
SmallVector<SDNode *, 16> OtherUses;
if (isa<ConstantSDNode>(Offset))
- for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
- E = BasePtr.getNode()->use_end(); I != E; ++I) {
- SDNode *Use = *I;
+ for (SDNode *Use : BasePtr.getNode()->uses()) {
if (Use == Ptr.getNode())
continue;
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
- for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
- E = Ptr.getNode()->use_end(); I != E; ++I) {
- SDNode *Use = *I;
+ for (SDNode *Use : Ptr.getNode()->uses()) {
if (Use == N)
continue;
if (N->hasPredecessorHelper(Use, Visited, Worklist))
if (Ptr.getNode()->hasOneUse())
return false;
- for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
- E = Ptr.getNode()->use_end(); I != E; ++I) {
- SDNode *Op = *I;
+ for (SDNode *Op : Ptr.getNode()->uses()) {
if (Op == N ||
(Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
continue;
// Check for #1.
bool TryNext = false;
- for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
- EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
- SDNode *Use = *II;
+ for (SDNode *Use : BasePtr.getNode()->uses()) {
if (Use == Ptr.getNode())
continue;
// transformation.
if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
bool RealUse = false;
- for (SDNode::use_iterator III = Use->use_begin(),
- EEE = Use->use_end(); III != EEE; ++III) {
- SDNode *UseUse = *III;
+ for (SDNode *UseUse : Use->uses()) {
if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
RealUse = true;
}
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
- if (UseAA) {
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
+ if (UseAA && LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
// This is used to get some contextual information about legal types, etc.
SelectionDAG *DAG;
- LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL,
- unsigned Shift = 0, SelectionDAG *DAG = NULL)
+ LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
+ unsigned Shift = 0, SelectionDAG *DAG = nullptr)
: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
LoadedSlice(const LoadedSlice &LS)
/// \brief Get the offset in bytes of this slice in the original chunk of
/// bits.
- /// \pre DAG != NULL.
+ /// \pre DAG != nullptr.
uint64_t getOffsetFromBase() const {
assert(DAG && "Missing context.");
bool IsBigEndian =
};
}
-/// \brief Sorts LoadedSlice according to their offset.
-struct LoadedSliceSorter {
- bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) {
- assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
- return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
- }
-};
-
/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// Sort the slices so that elements that are likely to be next to each
// other in memory are next to each other in the list.
- std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter());
+ std::sort(LoadedSlices.begin(), LoadedSlices.end(),
+ [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
+ assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
+ return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
+ });
const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
// First (resp. Second) is the first (resp. Second) potentially candidate
// to be placed in a paired load.
- const LoadedSlice *First = NULL;
- const LoadedSlice *Second = NULL;
+ const LoadedSlice *First = nullptr;
+ const LoadedSlice *Second = nullptr;
for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
// Set the beginning of the pair.
First = Second) {
unsigned RequiredAlignment = 0;
if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
// move to the next pair, this type is hopeless.
- Second = NULL;
+ Second = nullptr;
continue;
}
// Check if we meet the alignment requirement.
assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
--GlobalLSCost.Loads;
// Move to the next pair.
- Second = NULL;
+ Second = nullptr;
}
}
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
- if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
// legalization.
MVT VT = MVT::getIntegerVT(NumBytes*8);
if (!DC->isTypeLegal(VT))
- return 0;
+ return nullptr;
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
unsigned SequenceNum;
};
-/// Sorts store nodes in a link according to their offset from a shared
-// base ptr.
-struct ConsecutiveMemoryChainSorter {
- bool operator()(MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase;
- }
-};
-
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits()/8;
break;
} else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
if (Ldn->isVolatile()) {
- Index = NULL;
+ Index = nullptr;
break;
}
NextInChain = Ldn->getChain().getNode();
continue;
} else {
- Index = NULL;
+ Index = nullptr;
break;
}
}
// Sort the memory operands according to their distance from the base pointer.
std::sort(StoreNodes.begin(), StoreNodes.end(),
- ConsecutiveMemoryChainSorter());
+ [](MemOpLink LHS, MemOpLink RHS) {
+ return LHS.OffsetFromBase < RHS.OffsetFromBase ||
+ (LHS.OffsetFromBase == RHS.OffsetFromBase &&
+ LHS.SequenceNum > RHS.SequenceNum);
+ });
// Scan the memory operations on the chain and find the first non-consecutive
// store memory address.
bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA :
TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
- if (UseAA) {
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
+ if (UseAA && ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
// We only perform this optimization before the op legalization phase because
// we may introduce new vector instructions which are not backed by TD
// patterns. For example on AVX, extracting elements from a wide vector
- // without using extract_subvector.
+ // without using extract_subvector. However, if we can find an underlying
+ // scalar value, then we can always use that.
if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
- && ConstEltNo && !LegalOperations) {
+ && ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
int NumElem = VT.getVectorNumElements();
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
return DAG.getUNDEF(NVT);
// Select the right vector half to extract from.
+ SDValue SVInVec;
if (OrigElt < NumElem) {
- InVec = InVec->getOperand(0);
+ SVInVec = InVec->getOperand(0);
} else {
- InVec = InVec->getOperand(1);
+ SVInVec = InVec->getOperand(1);
OrigElt -= NumElem;
}
- EVT IndexTy = TLI.getVectorIdxTy();
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
- InVec, DAG.getConstant(OrigElt, IndexTy));
+ if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue InOp = SVInVec.getOperand(OrigElt);
+ if (InOp.getValueType() != NVT) {
+ assert(InOp.getValueType().isInteger() && NVT.isInteger());
+ InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
+ }
+
+ return InOp;
+ }
+
+ // FIXME: We should handle recursing on other vector shuffles and
+ // scalar_to_vector here as well.
+
+ if (!LegalOperations) {
+ EVT IndexTy = TLI.getVectorIdxTy();
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT,
+ SVInVec, DAG.getConstant(OrigElt, IndexTy));
+ }
}
// Perform only after legalization to ensure build_vector / vector_shuffle
NewLoad = true;
}
- LoadSDNode *LN0 = NULL;
- const ShuffleVectorSDNode *SVN = NULL;
+ LoadSDNode *LN0 = nullptr;
+ const ShuffleVectorSDNode *SVN = nullptr;
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
// constant index, bail out.
if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
- VecIn1 = VecIn2 = SDValue(0, 0);
+ VecIn1 = VecIn2 = SDValue(nullptr, 0);
break;
}
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
- if (VecIn1.getNode() == 0) {
+ if (!VecIn1.getNode()) {
VecIn1 = ExtractedFromVec;
- } else if (VecIn2.getNode() == 0) {
+ } else if (!VecIn2.getNode()) {
VecIn2 = ExtractedFromVec;
} else {
// Too many inputs.
- VecIn1 = VecIn2 = SDValue(0, 0);
+ VecIn1 = VecIn2 = SDValue(nullptr, 0);
break;
}
}
// Attempt to transform a single input vector to the correct type.
if ((VT != VecIn1.getValueType())) {
// We don't support shuffeling between TWO values of different types.
- if (VecIn2.getNode() != 0)
+ if (VecIn2.getNode())
return SDValue();
// We only support widening of vectors which are half the size of the
}
}
+ // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
+ // -> (BUILD_VECTOR A, B, ..., C, D, ...)
+ if (N->getNumOperands() == 2 &&
+ N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+ N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SmallVector<SDValue, 8> Opnds;
+ unsigned BuildVecNumElts = N0.getNumOperands();
+
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N0.getOperand(i));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N1.getOperand(i));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
+ Opnds.size());
+ }
+
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
// nodes often generate nop CONCAT_VECTOR nodes.
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
return SDValue();
}
+SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N2 = N->getOperand(2);
+
+ // If the input vector is a concatenation, and the insert replaces
+ // one of the halves, we can optimize into a single concat_vectors.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+ N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
+ APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
+ EVT VT = N->getValueType(0);
+
+ // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+ // (concat_vectors Z, Y)
+ if (InsIdx == 0)
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+ N->getOperand(1), N0.getOperand(1));
+
+ // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
+ // (concat_vectors X, Z)
+ if (InsIdx == VT.getVectorNumElements()/2)
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+ N0.getOperand(0), N->getOperand(1));
+ }
+
+ return SDValue();
+}
+
/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
/// an AND to a vector_shuffle with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
// select_cc setlt X, 1, -X, X ->
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
if (N1C) {
- ConstantSDNode *SubC = NULL;
+ ConstantSDNode *SubC = nullptr;
if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
(N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, const void *&CV) {
// Assume it is a primitive operation.
- Base = Ptr; Offset = 0; GV = 0; CV = 0;
+ Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
// If it's an adding a simple constant then integrate the offset.
if (Base.getOpcode() == ISD::ADD) {
bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA :
TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA();
+#ifndef NDEBUG
+ if (CombinerAAOnlyFunc.getNumOccurrences() &&
+ CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
+ UseAA = false;
+#endif
if (UseAA && SrcValue1 && SrcValue2) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
AliasAnalysis::AliasResult AAResult =
- AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
- AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
+ AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1,
+ UseTBAA ? TBAAInfo1 : nullptr),
+ AliasAnalysis::Location(SrcValue2, Overlap2,
+ UseTBAA ? TBAAInfo2 : nullptr));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
if (Depth > 6 || Aliases.size() == 2) {
Aliases.clear();
Aliases.push_back(OriginalChain);
- break;
+ return;
}
// Don't bother if we've been before.
break;
}
}
+
+ // We need to be careful here to also search for aliases through the
+ // value operand of a store, etc. Consider the following situation:
+ // Token1 = ...
+ // L1 = load Token1, %52
+ // S1 = store Token1, L1, %51
+ // L2 = load Token1, %52+8
+ // S2 = store Token1, L2, %51+8
+ // Token2 = Token(S1, S2)
+ // L3 = load Token2, %53
+ // S3 = store Token2, L3, %52
+ // L4 = load Token2, %53+8
+ // S4 = store Token2, L4, %52+8
+ // If we search for aliases of S3 (which loads address %52), and we look
+ // only through the chain, then we'll miss the trivial dependence on L1
+ // (which also loads from %52). We then might change all loads and
+ // stores to use Token1 as their chain operand, which could result in
+ // copying %53 into %52 before copying %52 into %51 (which should
+ // happen first).
+ //
+ // The problem is, however, that searching for such data dependencies
+ // can become expensive, and the cost is not directly related to the
+ // chain depth. Instead, we'll rule out such configurations here by
+ // insisting that we've visited all chain users (except for users
+ // of the original chain, which is not necessary). When doing this,
+ // we need to look through nodes we don't care about (otherwise, things
+ // like register copies will interfere with trivial cases).
+
+ SmallVector<const SDNode *, 16> Worklist;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(),
+ IE = Visited.end(); I != IE; ++I)
+ if (*I != OriginalChain.getNode())
+ Worklist.push_back(*I);
+
+ while (!Worklist.empty()) {
+ const SDNode *M = Worklist.pop_back_val();
+
+ // We have already visited M, and want to make sure we've visited any uses
+ // of M that we care about. For uses that we've not visisted, and don't
+ // care about, queue them to the worklist.
+
+ for (SDNode::use_iterator UI = M->use_begin(),
+ UIE = M->use_end(); UI != UIE; ++UI)
+ if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) {
+ if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) {
+ // We've not visited this use, and we care about it (it could have an
+ // ordering dependency with the original node).
+ Aliases.clear();
+ Aliases.push_back(OriginalChain);
+ return;
+ }
+
+ // We've not visited this use, but we don't care about it. Mark it as
+ // visited and enqueue it to the worklist.
+ Worklist.push_back(*UI);
+ }
+ }
}
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking