/// Returns a vector_shuffle mask for an movs{s|d}, movd
/// operation of specified width.
-static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
+static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
SmallVector<int, 8> Mask;
}
}
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue V1, unsigned TargetMask,
SelectionDAG &DAG) {
switch(Opc) {
}
}
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue V1, SDValue V2, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
// Build a vector of constants
// Use an UNDEF node if MaskElt == -1.
// Spilt 64-bit constants in the 32-bit mode.
-static SDValue getConstVector(ArrayRef<int> Values, EVT VT,
+static SDValue getConstVector(ArrayRef<int> Values, MVT VT,
SelectionDAG &DAG,
SDLoc dl, bool IsMask = false) {
SmallVector<SDValue, 32> Ops;
bool Split = false;
- EVT ConstVecVT = VT;
+ MVT ConstVecVT = VT;
unsigned NumElts = VT.getVectorNumElements();
bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
if (!In64BitMode && VT.getScalarType() == MVT::i64) {
Split = true;
}
- EVT EltVT = ConstVecVT.getScalarType();
+ MVT EltVT = ConstVecVT.getVectorElementType();
for (unsigned i = 0; i < NumElts; ++i) {
bool IsUndef = Values[i] < 0 && IsMask;
SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
- * ElemsPerChunk);
+ // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+ IdxVal &= ~(ElemsPerChunk - 1);
// If the input is a buildvector just emit a smaller one.
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
- makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
- ElemsPerChunk));
+ makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+ SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
}
// Insert the relevant vectorWidth bits.
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
- * ElemsPerChunk);
+ // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+ IdxVal &= ~(ElemsPerChunk - 1);
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+ SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
}
// FALLTHROUGH
case MVT::v16i8:
case MVT::v32i8: {
- assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) &&
+ assert((VT.is128BitVector() || Subtarget->hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
}
- assert(VT.getSizeInBits() == 128 &&
+ assert(VT.is128BitVector() &&
"Rotate-based lowering only supports 128-bit lowering!");
assert(Mask.size() <= 16 &&
"Can shuffle at most 16 bytes in a 128-bit vector!");
if (Subtarget->hasSSE41()) {
// Not worth offseting 128-bit vectors if scale == 2, a pattern using
// PUNPCK will catch this in a later shuffle match.
- if (Offset && Scale == 2 && VT.getSizeInBits() == 128)
+ if (Offset && Scale == 2 && VT.is128BitVector())
return SDValue();
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
return DAG.getBitcast(VT, InputV);
}
- assert(VT.getSizeInBits() == 128 && "Only 128-bit vectors can be extended.");
+ assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");
// For any extends we can cheat for larger element sizes and use shuffle
// instructions that can fold with a load and/or copy.
// to 64-bits.
if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
- assert(VT.getSizeInBits() == 128 && "Unexpected vector width!");
+ assert(VT.is128BitVector() && "Unexpected vector width!");
int LoIdx = Offset * EltBits;
SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
// FIXME: This should probably be generalized for 512-bit vectors as well.
- assert(VT.getSizeInBits() == 256 && "Only for 256-bit vector shuffles!");
+ assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
int LaneSize = Mask.size() / 2;
// If there are only inputs from one 128-bit lane, splitting will in fact be
/// \brief Try to lower a vector shuffle as a 128-bit shuffles.
static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT,
- ArrayRef<int> Mask,
- SDValue V1, SDValue V2,
- SelectionDAG &DAG) {
+ ArrayRef<int> Mask,
+ SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
assert(VT.getScalarSizeInBits() == 64 &&
"Unexpected element type size for 128bit shuffle.");
// To handle 256 bit vector requires VLX and most probably
// function lowerV2X128VectorShuffle() is better solution.
- assert(VT.getSizeInBits() == 512 &&
- "Unexpected vector size for 128bit shuffle.");
+ assert(VT.is512BitVector() && "Unexpected vector size for 128bit shuffle.");
SmallVector<int, 4> WidenedMask;
if (!canWidenShuffleElements(Mask, WidenedMask))
/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
ArrayRef<int> Mask = SVOp->getMask();
assert(Subtarget->hasAVX512() &&
"Cannot lower 512-bit vectors w/o basic ISA!");
- EVT ExtVT;
+ MVT ExtVT;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Expected a vector of i1 elements");
}
// For each vector width, delegate to a specialized lowering routine.
- if (VT.getSizeInBits() == 128)
+ if (VT.is128BitVector())
return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
- if (VT.getSizeInBits() == 256)
+ if (VT.is256BitVector())
return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
- if (VT.getSizeInBits() == 512)
+ if (VT.is512BitVector())
return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
if (Is1BitVector)
MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
- //if (IdxVal >= NumElems/2)
- // IdxVal -= NumElems/2;
- IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
+ // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
+ // this can be done with a mask.
+ IdxVal &= ElemsPerChunk - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getConstant(IdxVal, dl, MVT::i32));
}
// Insert the element into the desired chunk.
unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
- unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
+ assert(isPowerOf2_32(NumEltsIn128));
+ // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
+ unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, dl, MVT::i32));
// memory. In practice, we ''widen'' MemVT.
EVT WideVecVT =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- loadRegZize / MemVT.getScalarType().getSizeInBits());
+ loadRegZize / MemVT.getScalarSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
}
switch (Op.getOpcode()) {
- default: break;
- case X86ISD::PCMPEQM:
- case X86ISD::PCMPGTM:
- case X86ISD::CMPM:
- case X86ISD::CMPMU:
- return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
- case X86ISD::VFPCLASS:
- return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
- case X86ISD::VTRUNC:
- case X86ISD::VTRUNCS:
- case X86ISD::VTRUNCUS:
- // We can't use ISD::VSELECT here because it is not always "Legal"
- // for the destination type. For example vpmovqb require only AVX512
- // and vselect that can operate on byte element type require BWI
- OpcodeSelect = X86ISD::SELECT;
- break;
+ default: break;
+ case X86ISD::PCMPEQM:
+ case X86ISD::PCMPGTM:
+ case X86ISD::CMPM:
+ case X86ISD::CMPMU:
+ return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
+ case X86ISD::VFPCLASS:
+ return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
+ case X86ISD::VTRUNC:
+ case X86ISD::VTRUNCS:
+ case X86ISD::VTRUNCUS:
+ // We can't use ISD::VSELECT here because it is not always "Legal"
+ // for the destination type. For example vpmovqb require only AVX512
+ // and vselect that can operate on byte element type require BWI
+ OpcodeSelect = X86ISD::SELECT;
+ break;
}
if (PreservedSrc.getOpcode() == ISD::UNDEF)
PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
llvm_unreachable("Valid scale values are 1, 2, 4, 8");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
- EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ MVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ MVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- EVT MaskVT =
+ MVT MaskVT =
MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
SmallVector<SDValue, 8> Elts;
EVT SVT = VT.getScalarType();
unsigned SVTBits = SVT.getSizeInBits();
- const APInt &One = APInt(SVTBits, 1);
+ APInt One(SVTBits, 1);
unsigned NumElems = VT.getVectorNumElements();
for (unsigned i=0; i !=NumElems; ++i) {
}
ConstantSDNode *ND = cast<ConstantSDNode>(Op);
- const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue());
+ APInt C(SVTBits, ND->getAPIntValue().getZExtValue());
uint64_t ShAmt = C.getZExtValue();
if (ShAmt >= SVTBits) {
Elts.push_back(DAG.getUNDEF(SVT));
if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
isa<ConstantSDNode>(Amt2)) {
// Replace this node with two shifts followed by a MOVSS/MOVSD.
- EVT CastVT = MVT::v4i32;
+ MVT CastVT = MVT::v4i32;
SDValue Splat1 =
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType();
- EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
// Extract the two vectors
SDValue V1 = Extract128BitVector(R, 0, DAG, dl);
// +ve/-ve Amt = rotate left/right.
// Split 256-bit integers.
- if (VT.getSizeInBits() == 256)
+ if (VT.is256BitVector())
return Lower256IntArith(Op, DAG);
- assert(VT.getSizeInBits() == 128 && "Only rotate 128-bit vectors!");
+ assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
// Attempt to rotate by immediate.
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
unsigned Depth) const {
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
if (Op.getOpcode() == X86ISD::SETCC_CARRY)
- return Op.getValueType().getScalarType().getSizeInBits();
+ return Op.getValueType().getScalarSizeInBits();
// Fallback case.
return 1;
// doesn't preclude something switching to the shorter encoding post-RA.
//
// FIXME: Should teach these routines about AVX vector widths.
- if (FloatDomain && VT.getSizeInBits() == 128) {
+ if (FloatDomain && VT.is128BitVector()) {
if (Mask.equals({0, 0}) || Mask.equals({1, 1})) {
bool Lo = Mask.equals({0, 0});
unsigned Shuffle;
// We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
// variants as none of these have single-instruction variants that are
// superior to the UNPCK formulation.
- if (!FloatDomain && VT.getSizeInBits() == 128 &&
+ if (!FloatDomain && VT.is128BitVector() &&
(Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) ||
Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) ||
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() &&
!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
- unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = Cond.getValueType().getScalarSizeInBits();
// Don't optimize vector selects that map to mask-registers.
if (BitWidth == 1)
if (VT.getScalarType() == MVT::i16)
return SDValue();
// Dynamic blending was only available from SSE4.1 onward.
- if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41())
+ if (VT.is128BitVector() && !Subtarget->hasSSE41())
return SDValue();
// Byte blends are only available in AVX2
- if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 &&
- !Subtarget->hasAVX2())
+ if (VT == MVT::v32i8 && !Subtarget->hasAVX2())
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
case ISD::ANY_EXTEND:
return Op;
case ISD::ZERO_EXTEND: {
- unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
+ unsigned InBits = NarrowVT.getScalarSizeInBits();
APInt Mask = APInt::getAllOnesValue(InBits);
- Mask = Mask.zext(VT.getScalarType().getSizeInBits());
+ Mask = Mask.zext(VT.getScalarSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
Op, DAG.getConstant(Mask, DL, VT));
}
}
}
- if (Subtarget->hasAVX() && VT.isVector() && VT.getSizeInBits() == 256)
+ if (Subtarget->hasAVX() && VT.is256BitVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;