- SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), DL,
- EltVT);
- SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), DL,
- EltVT);
- SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), DL,
- EltVT);
+ auto GetShift = [&](unsigned OpCode, SDValue V, int Shifter) {
+ MVT VT = V.getSimpleValueType();
+ SmallVector<SDValue, 32> Shifters(
+ VT.getVectorNumElements(),
+ DAG.getConstant(Shifter, DL, VT.getVectorElementType()));
+ return DAG.getNode(OpCode, DL, VT, V,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Shifters));
+ };
+ auto GetMask = [&](SDValue V, APInt Mask) {
+ MVT VT = V.getSimpleValueType();
+ SmallVector<SDValue, 32> Masks(
+ VT.getVectorNumElements(),
+ DAG.getConstant(Mask, DL, VT.getVectorElementType()));
+ return DAG.getNode(ISD::AND, DL, VT, V,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Masks));
+ };
+
+ // We don't want to incur the implicit masks required to SRL vNi8 vectors on
+ // x86, so set the SRL type to have elements at least i16 wide. This is
+ // correct because all of our SRLs are followed immediately by a mask anyways
+ // that handles any bits that sneak into the high bits of the byte elements.
+ MVT SrlVT = Len > 8 ? VT : MVT::getVectorVT(MVT::i16, VecSize / 16);