for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v8i64);
if (MaskNode->getOpcode() == X86ISD::VBROADCAST) {
unsigned NumEltsInMask = MaskNode->getNumOperands();
MaskNode = MaskNode->getOperand(0);
- auto *CN = dyn_cast<ConstantSDNode>(MaskNode);
- if (CN) {
+ if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode)) {
APInt MaskEltValue = CN->getAPIntValue();
for (unsigned i = 0; i < NumEltsInMask; ++i)
RawMask.push_back(MaskEltValue.getLoBits(MaskLoBits).getZExtValue());
if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
return false;
- auto *C = dyn_cast<Constant>(MaskCP->getConstVal());
- if (C) {
+ if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodeVPERMVMask(C, VT, Mask);
if (Mask.empty())
return false;
if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
return false;
- auto *C = dyn_cast<Constant>(MaskCP->getConstVal());
- if (C) {
+ if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
DecodeVPERMV3Mask(C, VT, Mask);
if (Mask.empty())
return false;
return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
}
+/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
+/// This allows for fast cases such as subvector extraction/insertion
+/// or shuffling smaller vector types which can lower more efficiently.
+static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(VT.getSizeInBits() == 256 && "Expected 256-bit vector");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
+
+ bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts);
+ bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts);
+ if (!UndefLower && !UndefUpper)
+ return SDValue();
+
+ // Upper half is undef and lower half is whole upper subvector.
+ // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ if (UndefUpper &&
+ isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getIntPtrConstant(HalfNumElts, DL));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
+ // Lower half is undef and upper half is whole lower subvector.
+ // e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ if (UndefLower &&
+ isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+ DAG.getIntPtrConstant(HalfNumElts, DL));
+ }
+
+ // AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
+ if (UndefLower && Subtarget->hasAVX2() &&
+ (VT == MVT::v4f64 || VT == MVT::v4i64))
+ return SDValue();
+
+ // If the shuffle only uses the lower halves of the input operands,
+ // then extract them and perform the 'half' shuffle at half width.
+ // e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
+ int HalfIdx1 = -1, HalfIdx2 = -1;
+ SmallVector<int, 8> HalfMask;
+ unsigned Offset = UndefLower ? HalfNumElts : 0;
+ for (unsigned i = 0; i != HalfNumElts; ++i) {
+ int M = Mask[i + Offset];
+ if (M < 0) {
+ HalfMask.push_back(M);
+ continue;
+ }
+
+ // Determine which of the 4 half vectors this element is from.
+ // i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2.
+ int HalfIdx = M / HalfNumElts;
+
+ // Only shuffle using the lower halves of the inputs.
+ // TODO: Investigate usefulness of shuffling with upper halves.
+ if (HalfIdx != 0 && HalfIdx != 2)
+ return SDValue();
+
+ // Determine the element index into its half vector source.
+ int HalfElt = M % HalfNumElts;
+
+ // We can shuffle with up to 2 half vectors, set the new 'half'
+ // shuffle mask accordingly.
+ if (-1 == HalfIdx1 || HalfIdx1 == HalfIdx) {
+ HalfMask.push_back(HalfElt);
+ HalfIdx1 = HalfIdx;
+ continue;
+ }
+ if (-1 == HalfIdx2 || HalfIdx2 == HalfIdx) {
+ HalfMask.push_back(HalfElt + HalfNumElts);
+ HalfIdx2 = HalfIdx;
+ continue;
+ }
+
+ // Too many half vectors referenced.
+ return SDValue();
+ }
+ assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
+
+ auto GetHalfVector = [&](int HalfIdx) {
+ if (HalfIdx < 0)
+ return DAG.getUNDEF(HalfVT);
+ SDValue V = (HalfIdx < 2 ? V1 : V2);
+ HalfIdx = (HalfIdx % 2) * HalfNumElts;
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
+ DAG.getIntPtrConstant(HalfIdx, DL));
+ };
+
+ SDValue Half1 = GetHalfVector(HalfIdx1);
+ SDValue Half2 = GetHalfVector(HalfIdx2);
+ SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
+ DAG.getIntPtrConstant(Offset, DL));
+}
+
/// \brief Test whether the specified input (0 or 1) is in-place blended by the
/// given mask.
///
DL, VT, V1, V2, Mask, Subtarget, DAG))
return Insertion;
+ // Handle special cases where the lower or upper half is UNDEF.
+ if (SDValue V =
+ lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return V;
+
// There is a really nice hard cut-over between AVX1 and AVX2 that means we
// can check for those subtargets here and avoid much of the subtarget
// querying in the per-vector-type lowering routines. With AVX1 we have
Src2, Src1);
return DAG.getBitcast(VT, Res);
}
+ case CONVERT_MASK_TO_VEC: {
+ SDValue Mask = Op.getOperand(1);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getNode(IntrData->Opc0, dl, VT, VMask);
+ }
default:
break;
}
Op.getOpcode() == ISD::SRA && !Subtarget->hasXOP())
return ArithmeticShiftRight64(ShiftAmt);
- if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) {
+ if (VT == MVT::v16i8 ||
+ (Subtarget->hasInt256() && VT == MVT::v32i8) ||
+ VT == MVT::v64i8) {
unsigned NumElts = VT.getVectorNumElements();
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
R, ShiftAmt, DAG);
SHL = DAG.getBitcast(VT, SHL);
// Zero out the rightmost bits.
- SmallVector<SDValue, 32> V(
- NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
+ DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
R, ShiftAmt, DAG);
SRL = DAG.getBitcast(VT, SRL);
// Zero out the leftmost bits.
- SmallVector<SDValue, 32> V(
- NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
}
if (Op.getOpcode() == ISD::SRA) {
// ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
- SmallVector<SDValue, 32> V(NumElts,
- DAG.getConstant(128 >> ShiftAmt, dl,
- MVT::i8));
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
+
+ SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
// Use the original mask here, do not modify the mask twice
- Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);
+ Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);
// The value that should be stored
MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
-/// same as extracting the high 128-bit part of 256-bit vector and then
-/// inserting the result into the low part of a new 256-bit vector
-static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
-
- // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
- for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
- if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
- SVOp->getMaskElt(j) >= 0)
- return false;
-
- return true;
-}
-
-/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
-/// same as extracting the low 128-bit part of 256-bit vector and then
-/// inserting the result into the high part of a new 256-bit vector
-static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
-
- // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
- for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
- if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
- SVOp->getMaskElt(j) >= 0)
- return false;
-
- return true;
-}
-
/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
+/// FIXME: This could be expanded to support 512 bit vectors as well.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget* Subtarget) {
return DCI.CombineTo(N, InsV);
}
- //===--------------------------------------------------------------------===//
- // Combine some shuffles into subvector extracts and inserts:
- //
-
- // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
- if (isShuffleHigh128VectorInsertLow(SVOp)) {
- SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
- SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
- return DCI.CombineTo(N, InsV);
- }
-
- // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
- if (isShuffleLow128VectorInsertHigh(SVOp)) {
- SDValue V = Extract128BitVector(V1, 0, DAG, dl);
- SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
- return DCI.CombineTo(N, InsV);
- }
-
return SDValue();
}
// If we're negating a FMUL node on a target with FMA, then we can avoid the
// use of a constant by performing (-0 - A*B) instead.
- // FIXME: Check rounding control flags as well once it becomes available.
+ // FIXME: Check rounding control flags as well once it becomes available.
if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) &&
Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) {
SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass ||
Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) {
unsigned Size = VT.getSizeInBits();
- MVT::SimpleValueType SimpleTy = Size == 1 || Size == 8 ? MVT::i8
- : Size == 16 ? MVT::i16
- : Size == 32 ? MVT::i32
- : Size == 64 ? MVT::i64
- : MVT::Other;
- unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, SimpleTy);
+ if (Size == 1) Size = 8;
+ unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
if (DestReg > 0) {
Res.first = DestReg;
- Res.second = SimpleTy == MVT::i8 ? &X86::GR8RegClass
- : SimpleTy == MVT::i16 ? &X86::GR16RegClass
- : SimpleTy == MVT::i32 ? &X86::GR32RegClass
+ Res.second = Size == 8 ? &X86::GR8RegClass
+ : Size == 16 ? &X86::GR16RegClass
+ : Size == 32 ? &X86::GR32RegClass
: &X86::GR64RegClass;
assert(Res.second->contains(Res.first) && "Register in register class");
} else {