SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
- SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+ SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
return false;
}
-// \brief Returns the SDNode if it is a constant BuildVector or constant int.
+/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
+/// elements are all the same constant or undefined.
+static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
+ BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
+ if (!C)
+ return false;
+
+ APInt SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs) &&
+ EltVT.getSizeInBits() >= SplatBitSize);
+}
+
+// \brief Returns the SDNode if it is a constant BuildVector or constant.
static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
if (isa<ConstantSDNode>(N))
return N.getNode();
return NULL;
}
+// \brief Returns the SDNode if it is a constant splat BuildVector or constant
+// int.
+static ConstantSDNode *isConstOrConstSplat(SDValue N) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N))
+ return BV->isConstantSplat();
+
+ return nullptr;
+}
+
SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
SDValue N0, SDValue N1) {
EVT VT = N0.getValueType();
return SDValue();
}
-/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose
-/// elements are all the same constant or undefined.
-static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
- BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
- if (!C)
- return false;
-
- APInt SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- EVT EltVT = N->getValueType(0).getVectorElementType();
- return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
- HasAnyUndefs) &&
- EltVT.getSizeInBits() >= SplatBitSize);
-}
-
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
/// visitShiftByConstant - Handle transforms common to the three shifts, when
/// the shift amount is a constant.
-SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
- assert(isa<ConstantSDNode>(N->getOperand(1)) &&
- "Expected an ConstantSDNode operand.");
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
// We can't and shouldn't fold opaque constants.
- if (cast<ConstantSDNode>(N->getOperand(1))->isOpaque())
+ if (Amt->isOpaque())
return SDValue();
SDNode *LHS = N->getOperand(0).getNode();
if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
SDValue N01 = N->getOperand(0).getOperand(1);
- if (ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01)) {
+ if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
EVT TruncVT = N->getValueType(0);
SDValue N00 = N->getOperand(0).getOperand(0);
APInt TruncC = N01C->getAPIntValue();
- TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
return DAG.getNode(ISD::AND, SDLoc(N), TruncVT,
DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00),
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
if (VT.isVector()) {
BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
// If setcc produces all-one true value then:
// (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
- if (N1CV && N1CV->isConstant() &&
- TLI.getBooleanContents(true) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent &&
- N0.getOpcode() == ISD::AND) {
- SDValue N00 = N0->getOperand(0);
- SDValue N01 = N0->getOperand(1);
- BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
-
- if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
- SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
- if (C.getNode())
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+ if (N1CV && N1CV->isConstant()) {
+ if (N0.getOpcode() == ISD::AND &&
+ TLI.getBooleanContents(true) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
+
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+ SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
+ if (C.getNode())
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
+ }
+ } else {
+ N1C = isConstOrConstSplat(N1);
}
}
}
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SHL &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ if (N1C && N0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N0C1->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
}
// fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND) &&
- N0.getOperand(0).getOpcode() == ISD::SHL &&
- isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
- uint64_t c1 =
- cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- EVT InnerShiftVT = N0.getOperand(0).getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
- if (c2 >= OpSizeInBits - InnerShiftSize) {
- if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
- DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
- N0.getOperand(0)->getOperand(0)),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ N0.getOperand(0).getOpcode() == ISD::SHL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0Op0.getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, SDLoc(N0), VT,
+ DAG.getNode(N0.getOpcode(), SDLoc(N0), VT,
+ N0Op0->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
}
}
// Only fold this if the inner zext has no other uses to avoid increasing
// the total number of instructions.
if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
- N0.getOperand(0).getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
- uint64_t c1 =
- cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
- if (c1 < VT.getSizeInBits()) {
- uint64_t c2 = N1C->getZExtValue();
- if (c1 == c2) {
- SDValue NewOp0 = N0.getOperand(0);
- EVT CountVT = NewOp0.getOperand(1).getValueType();
- SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
- NewOp0, DAG.getConstant(c2, CountVT));
- AddToWorkList(NewSHL.getNode());
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ N0.getOperand(0).getOpcode() == ISD::SRL) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ uint64_t c1 = N0Op0C1->getZExtValue();
+ if (c1 < VT.getScalarSizeInBits()) {
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 == c2) {
+ SDValue NewOp0 = N0.getOperand(0);
+ EVT CountVT = NewOp0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(),
+ NewOp0, DAG.getConstant(c2, CountVT));
+ AddToWorkList(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
+ }
}
}
}
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
- if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- if (c1 < VT.getSizeInBits()) {
- uint64_t c2 = N1C->getZExtValue();
- APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
- VT.getSizeInBits() - c1);
- SDValue Shift;
- if (c2 > c1) {
- Mask = Mask.shl(c2-c1);
- Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c2-c1, N1.getValueType()));
- } else {
- Mask = Mask.lshr(c1-c2);
- Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1-c2, N1.getValueType()));
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+ if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N0C1->getZExtValue();
+ if (c1 < OpSizeInBits) {
+ uint64_t c2 = N1C->getZExtValue();
+ APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
+ SDValue Shift;
+ if (c2 > c1) {
+ Mask = Mask.shl(c2 - c1);
+ Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c2 - c1, N1.getValueType()));
+ } else {
+ Mask = Mask.lshr(c1 - c2);
+ Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c1 - c2, N1.getValueType()));
+ }
+ return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
+ DAG.getConstant(Mask, VT));
}
- return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift,
- DAG.getConstant(Mask, VT));
}
}
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+ unsigned BitSize = VT.getScalarSizeInBits();
SDValue HiBitsMask =
- DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
- VT.getSizeInBits() -
- N1C->getZExtValue()),
- VT);
+ DAG.getConstant(APInt::getHighBitsSet(BitSize,
+ BitSize - N1C->getZExtValue()), VT);
return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
HiBitsMask);
}
if (N1C) {
- SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+ SDValue NewSHL = visitShiftByConstant(N, N1C);
if (NewSHL.getNode())
return NewSHL;
}
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ N1C = isConstOrConstSplat(N1);
}
// fold (sra c1, c2) -> (sra c1, c2)
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
if (N1C && N0.getOpcode() == ISD::SRA) {
- if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
- if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+ if (Sum >= OpSizeInBits)
+ Sum = OpSizeInBits - 1;
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(Sum, N1C->getValueType(0)));
+ DAG.getConstant(Sum, N1.getValueType()));
}
}
// result_size - n != m.
// If truncate is free for the target sext(shl) is likely to result in better
// code.
- if (N0.getOpcode() == ISD::SHL) {
+ if (N0.getOpcode() == ISD::SHL && N1C) {
// Get the two constanst of the shifts, CN0 = m, CN = n.
- const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (N01C && N1C) {
+ const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
+ if (N01C) {
+ LLVMContext &Ctx = *DAG.getContext();
// Determine what the truncate's result bitsize and type would be.
- EVT TruncVT =
- EVT::getIntegerVT(*DAG.getContext(),
- OpSizeInBits - N1C->getZExtValue());
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
+
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
}
- // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
// if c1 is equal to the number of bits the trunc removes
if (N0.getOpcode() == ISD::TRUNCATE &&
(N0.getOperand(0).getOpcode() == ISD::SRL ||
N0.getOperand(0).getOpcode() == ISD::SRA) &&
N0.getOperand(0).hasOneUse() &&
N0.getOperand(0).getOperand(1).hasOneUse() &&
- N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
- EVT LargeVT = N0.getOperand(0).getValueType();
- ConstantSDNode *LargeShiftAmt =
- cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
-
- if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
- LargeShiftAmt->getZExtValue()) {
- SDValue Amt =
- DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
- getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
- SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
- N0.getOperand(0).getOperand(0), Amt);
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+ N1C) {
+ SDValue N0Op0 = N0.getOperand(0);
+ if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
+ unsigned LargeShiftVal = LargeShift->getZExtValue();
+ EVT LargeVT = N0Op0.getValueType();
+
+ if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftVal + N1C->getZExtValue(),
+ getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
+ SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT,
+ N0Op0.getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA);
+ }
}
}
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
if (N1C) {
- SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+ SDValue NewSRA = visitShiftByConstant(N, N1C);
if (NewSRA.getNode())
return NewSRA;
}
if (VT.isVector()) {
SDValue FoldedVOp = SimplifyVBinOp(N);
if (FoldedVOp.getNode()) return FoldedVOp;
+
+ N1C = isConstOrConstSplat(N1);
}
// fold (srl c1, c2) -> c1 >>u c2
return DAG.getConstant(0, VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SRL &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 >= OpSizeInBits)
- return DAG.getConstant(0, VT);
- return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(c1 + c2, N1.getValueType()));
+ if (N1C && N0.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
+ uint64_t c1 = N01C->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
}
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
}
// fold (srl (shl x, c), c) -> (and x, cst2)
- if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
- N0.getValueSizeInBits() <= 64) {
- uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
- return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
- DAG.getConstant(~0ULL >> ShAmt, VT));
+ if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
+ unsigned BitSize = N0.getScalarValueSizeInBits();
+ if (BitSize <= 64) {
+ uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getConstant(~0ULL >> ShAmt, VT));
+ }
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
EVT SmallVT = N0.getOperand(0).getValueType();
- if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+ unsigned BitSize = SmallVT.getScalarSizeInBits();
+ if (N1C->getZExtValue() >= BitSize)
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
N0.getOperand(0),
DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
AddToWorkList(SmallShift.getNode());
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
+ APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
DAG.getConstant(Mask, VT));
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
// bit, which is unmodified by sra.
- if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+ if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
if (N0.getOpcode() == ISD::SRA)
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
// fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
- N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+ N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
APInt KnownZero, KnownOne;
DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
return SDValue(N, 0);
if (N1C) {
- SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+ SDValue NewSRL = visitShiftByConstant(N, N1C);
if (NewSRL.getNode())
return NewSRL;
}
}
; CHECK-LABEL: test_sllw_1:
-; CHECK: psllw $0, %xmm0
-; CHECK-NEXT: ret
+; CHECK-NOT: psllw $0, %xmm0
+; CHECK: ret
define <8 x i16> @test_sllw_2(<8 x i16> %InVec) {
entry:
}
; CHECK-LABEL: test_slld_1:
-; CHECK: pslld $0, %xmm0
-; CHECK-NEXT: ret
+; CHECK-NOT: pslld $0, %xmm0
+; CHECK: ret
define <4 x i32> @test_slld_2(<4 x i32> %InVec) {
entry:
}
; CHECK-LABEL: test_sllq_1:
-; CHECK: psllq $0, %xmm0
-; CHECK-NEXT: ret
+; CHECK-NOT: psllq $0, %xmm0
+; CHECK: ret
define <2 x i64> @test_sllq_2(<2 x i64> %InVec) {
entry:
}
; CHECK-LABEL: test_sraw_1:
-; CHECK: psraw $0, %xmm0
-; CHECK-NEXT: ret
+; CHECK-NOT: psraw $0, %xmm0
+; CHECK: ret
define <8 x i16> @test_sraw_2(<8 x i16> %InVec) {
entry:
}
; CHECK-LABEL: test_srad_1:
-; CHECK: psrad $0, %xmm0
-; CHECK-NEXT: ret
+; CHECK-NOT: psrad $0, %xmm0
+; CHECK: ret
define <4 x i32> @test_srad_2(<4 x i32> %InVec) {
entry:
}
; CHECK-LABEL: test_srlw_1:
-; CHECK: psrlw $0, %xmm0
-; CHECK-NEXT: ret
+; CHECK-NOT: psrlw $0, %xmm0
+; CHECK: ret
define <8 x i16> @test_srlw_2(<8 x i16> %InVec) {
entry:
}
; CHECK-LABEL: test_srld_1:
-; CHECK: psrld $0, %xmm0
-; CHECK-NEXT: ret
+; CHECK-NOT: psrld $0, %xmm0
+; CHECK: ret
define <4 x i32> @test_srld_2(<4 x i32> %InVec) {
entry:
}
; CHECK-LABEL: test_srlq_1:
-; CHECK: psrlq $0, %xmm0
-; CHECK-NEXT: ret
+; CHECK-NOT: psrlq $0, %xmm0
+; CHECK: ret
define <2 x i64> @test_srlq_2(<2 x i64> %InVec) {
entry:
; CHECK-LABEL: test_srlq_3:
; CHECK: psrlq $63, %xmm0
; CHECK-NEXT: ret
+
+
+; CHECK-LABEL: sra_sra_v4i32:
+; CHECK: psrad $6, %xmm0
+; CHECK-NEXT: retq
+define <4 x i32> @sra_sra_v4i32(<4 x i32> %x) nounwind {
+ %sra0 = ashr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
+ %sra1 = ashr <4 x i32> %sra0, <i32 4, i32 4, i32 4, i32 4>
+ ret <4 x i32> %sra1
+}
+
+; CHECK-LABEL: @srl_srl_v4i32
+; CHECK: psrld $6, %xmm0
+; CHECK-NEXT: ret
+define <4 x i32> @srl_srl_v4i32(<4 x i32> %x) nounwind {
+ %srl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
+ %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
+ ret <4 x i32> %srl1
+}
+
+; CHECK-LABEL: @srl_shl_v4i32
+; CHECK: andps
+; CHECK-NEXT: retq
+define <4 x i32> @srl_shl_v4i32(<4 x i32> %x) nounwind {
+ %srl0 = shl <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
+ %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
+ ret <4 x i32> %srl1
+}
+
+; CHECK-LABEL: @srl_sra_31_v4i32
+; CHECK: psrld $31, %xmm0
+; CHECK-NEXT: ret
+define <4 x i32> @srl_sra_31_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+ %sra = ashr <4 x i32> %x, %y
+ %srl1 = lshr <4 x i32> %sra, <i32 31, i32 31, i32 31, i32 31>
+ ret <4 x i32> %srl1
+}
+
+; CHECK-LABEL: @shl_shl_v4i32
+; CHECK: pslld $6, %xmm0
+; CHECK-NEXT: ret
+define <4 x i32> @shl_shl_v4i32(<4 x i32> %x) nounwind {
+ %shl0 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
+ %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
+ ret <4 x i32> %shl1
+}
+
+; CHECK-LABEL: @shl_sra_v4i32
+; CHECK: andps
+; CHECK-NEXT: ret
+define <4 x i32> @shl_sra_v4i32(<4 x i32> %x) nounwind {
+ %shl0 = ashr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
+ %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
+ ret <4 x i32> %shl1
+}
+
+; CHECK-LABEL: @shl_srl_v4i32
+; CHECK: pslld $3, %xmm0
+; CHECK-NEXT: pand
+; CHECK-NEXT: ret
+define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind {
+ %shl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
+ %shl1 = shl <4 x i32> %shl0, <i32 5, i32 5, i32 5, i32 5>
+ ret <4 x i32> %shl1
+}
+
+; CHECK-LABEL: @shl_zext_srl_v4i32
+; CHECK: andps
+; CHECK-NEXT: ret
+define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind {
+ %srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
+ %zext = zext <4 x i16> %srl to <4 x i32>
+ %shl = shl <4 x i32> %zext, <i32 2, i32 2, i32 2, i32 2>
+ ret <4 x i32> %shl
+}
+
+; CHECK: @sra_trunc_srl_v4i32
+; CHECK: psrad $19, %xmm0
+; CHECK-NEXT: retq
+define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind {
+ %srl = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
+ %trunc = trunc <4 x i32> %srl to <4 x i16>
+ %sra = ashr <4 x i16> %trunc, <i16 3, i16 3, i16 3, i16 3>
+ ret <4 x i16> %sra
+}
+
+; CHECK-LABEL: @shl_zext_shl_v4i32
+; CHECK: pand
+; CHECK-NEXT: pslld $19, %xmm0
+; CHECK-NEXT: ret
+define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind {
+ %shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
+ %ext = zext <4 x i16> %shl0 to <4 x i32>
+ %shl1 = shl <4 x i32> %ext, <i32 17, i32 17, i32 17, i32 17>
+ ret <4 x i32> %shl1
+}
+
+; CHECK-LABEL: @sra_v4i32
+; CHECK: psrad $3, %xmm0
+; CHECK-NEXT: ret
+define <4 x i32> @sra_v4i32(<4 x i32> %x) nounwind {
+ %sra = ashr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x i32> %sra
+}
+
+; CHECK-LABEL: @srl_v4i32
+; CHECK: psrld $3, %xmm0
+; CHECK-NEXT: ret
+define <4 x i32> @srl_v4i32(<4 x i32> %x) nounwind {
+ %sra = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x i32> %sra
+}
+
+; CHECK-LABEL: @shl_v4i32
+; CHECK: pslld $3, %xmm0
+; CHECK-NEXT: ret
+define <4 x i32> @shl_v4i32(<4 x i32> %x) nounwind {
+ %sra = shl <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
+ ret <4 x i32> %sra
+}