static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
-static SDValue Insert128BitVector(SDValue Result,
- SDValue Vec,
- SDValue Idx,
- SelectionDAG &DAG,
- DebugLoc dl);
-
-static SDValue Extract128BitVector(SDValue Vec,
- SDValue Idx,
- SelectionDAG &DAG,
- DebugLoc dl);
-
/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
/// sets things up to match to an AVX VEXTRACTF128 instruction or a
/// simple subregister reference. Idx is an index in the 128 bits we
const TargetLowering &TLI, DebugLoc dl) {
assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
EVT ShVT = MVT::v2i64;
- unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+ unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp,
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
// Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
// and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
- if (Op.getValueType() == MVT::v4i32 ||
- Op.getValueType() == MVT::v8i32)
+ if (VT == MVT::v4i32 || VT == MVT::v8i32)
return Op;
- return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(),
+ return getZeroVector(VT, Subtarget->hasSSE2(),
Subtarget->hasAVX2(), DAG, dl);
}
// vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
// vpcmpeqd on 256-bit vectors.
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
- if (Op.getValueType() == MVT::v4i32 ||
- (Op.getValueType() == MVT::v8i32 && Subtarget->hasAVX2()))
+ if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasAVX2()))
return Op;
- return getOnesVector(Op.getValueType(), Subtarget->hasAVX2(), DAG, dl);
+ return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
}
SDValue LD = isVectorBroadcast(Op, Subtarget);
DAG.getUNDEF(Item.getValueType()),
&Mask[0]);
}
- return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Item);
}
}
return CommuteVectorShuffle(SVOp, DAG);
if (isShift) {
- // No better options. Use a vshl / vsrl.
+ // No better options. Use a vshldq / vsrldq.
EVT EltVT = VT.getVectorElementType();
ShAmt *= EltVT.getSizeInBits();
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
EVT EltVT = Op0.getValueType().getVectorElementType();
assert(EltVT == MVT::f32 || EltVT == MVT::f64);
- unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
bool Swap = false;
// SSE Condition code mapping:
if (SSECC == 8) {
if (SetCCOpcode == ISD::SETUEQ) {
SDValue UNORD, EQ;
- UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
- EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+ UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(3, MVT::i8));
+ EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(0, MVT::i8));
return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
} else if (SetCCOpcode == ISD::SETONE) {
SDValue ORD, NEQ;
- ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
- NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+ ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(7, MVT::i8));
+ NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(4, MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
}
llvm_unreachable("Illegal FP comparison");
}
// Handle all other FP comparisons here.
- return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+ return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(SSECC, MVT::i8));
}
// Break 256-bit integer vector compare into smaller ones.
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+ unsigned Opc = 0;
bool Swap = false, Invert = false, FlipSigns = false;
- switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
- default: break;
- case MVT::i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
- case MVT::i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
- case MVT::i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
- case MVT::i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
- }
-
switch (SetCCOpcode) {
default: break;
case ISD::SETNE: Invert = true;
- case ISD::SETEQ: Opc = EQOpc; break;
+ case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
- case ISD::SETGT: Opc = GTOpc; break;
+ case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
case ISD::SETGE: Swap = true;
- case ISD::SETLE: Opc = GTOpc; Invert = true; break;
+ case ISD::SETLE: Opc = X86ISD::PCMPGT; Invert = true; break;
case ISD::SETULT: Swap = true;
- case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+ case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break;
case ISD::SETUGE: Swap = true;
- case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+ case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break;
}
if (Swap)
std::swap(Op0, Op1);
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
- if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42())
+ if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42())
return SDValue();
- if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41())
+ if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41())
return SDValue();
// Since SSE has no unsigned integer comparisons, we need to flip the sign
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
+// getTargetVShiftNOde - Handle vector element shifts where the shift amount
+// may or may not be a constant. Takes immediate version of shift as input.
+static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue SrcOp, SDValue ShAmt,
+ SelectionDAG &DAG) {
+ assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
+
+ if (isa<ConstantSDNode>(ShAmt)) {
+ switch (Opc) {
+ default: llvm_unreachable("Unknown target vector shift node");
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI:
+ return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+ }
+ }
+
+ // Change opcode to non-immediate version
+ switch (Opc) {
+ default: llvm_unreachable("Unknown target vector shift node");
+ case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
+ case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
+ case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
+ }
+
+ // Need to build a vector containing shift amount
+ // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
+ SDValue ShOps[4];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShOps[2] = DAG.getUNDEF(MVT::i32);
+ ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+ return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+}
+
SDValue
X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
case Intrinsic::x86_avx2_psrav_d_256:
return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_pcmpeq_b:
+ case Intrinsic::x86_sse2_pcmpeq_w:
+ case Intrinsic::x86_sse2_pcmpeq_d:
+ case Intrinsic::x86_sse41_pcmpeqq:
+ case Intrinsic::x86_avx2_pcmpeq_b:
+ case Intrinsic::x86_avx2_pcmpeq_w:
+ case Intrinsic::x86_avx2_pcmpeq_d:
+ case Intrinsic::x86_avx2_pcmpeq_q:
+ return DAG.getNode(X86ISD::PCMPEQ, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_pcmpgt_b:
+ case Intrinsic::x86_sse2_pcmpgt_w:
+ case Intrinsic::x86_sse2_pcmpgt_d:
+ case Intrinsic::x86_sse42_pcmpgtq:
+ case Intrinsic::x86_avx2_pcmpgt_b:
+ case Intrinsic::x86_avx2_pcmpgt_w:
+ case Intrinsic::x86_avx2_pcmpgt_d:
+ case Intrinsic::x86_avx2_pcmpgt_q:
+ return DAG.getNode(X86ISD::PCMPGT, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
// ptest and testp intrinsics. The intrinsic these come from are designed to
// return an integer value, not just an instruction so lower it to the ptest
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- // Fix vector shift instructions where the last operand is a non-immediate
- // i32 value.
- case Intrinsic::x86_avx2_pslli_w:
- case Intrinsic::x86_avx2_pslli_d:
- case Intrinsic::x86_avx2_pslli_q:
- case Intrinsic::x86_avx2_psrli_w:
- case Intrinsic::x86_avx2_psrli_d:
- case Intrinsic::x86_avx2_psrli_q:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
+ // SSE/AVX shift intrinsics
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2), DAG);
+ // Fix vector shift instructions where the last operand is a non-immediate
+ // i32 value.
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
case Intrinsic::x86_mmx_pslli_q:
return SDValue();
unsigned NewIntNo = 0;
- EVT ShAmtVT = MVT::v4i32;
switch (IntNo) {
- case Intrinsic::x86_sse2_pslli_w:
- NewIntNo = Intrinsic::x86_sse2_psll_w;
- break;
- case Intrinsic::x86_sse2_pslli_d:
- NewIntNo = Intrinsic::x86_sse2_psll_d;
- break;
- case Intrinsic::x86_sse2_pslli_q:
- NewIntNo = Intrinsic::x86_sse2_psll_q;
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntNo = Intrinsic::x86_mmx_psll_w;
break;
- case Intrinsic::x86_sse2_psrli_w:
- NewIntNo = Intrinsic::x86_sse2_psrl_w;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntNo = Intrinsic::x86_mmx_psll_d;
break;
- case Intrinsic::x86_sse2_psrli_d:
- NewIntNo = Intrinsic::x86_sse2_psrl_d;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntNo = Intrinsic::x86_mmx_psll_q;
break;
- case Intrinsic::x86_sse2_psrli_q:
- NewIntNo = Intrinsic::x86_sse2_psrl_q;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntNo = Intrinsic::x86_mmx_psrl_w;
break;
- case Intrinsic::x86_sse2_psrai_w:
- NewIntNo = Intrinsic::x86_sse2_psra_w;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntNo = Intrinsic::x86_mmx_psrl_d;
break;
- case Intrinsic::x86_sse2_psrai_d:
- NewIntNo = Intrinsic::x86_sse2_psra_d;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntNo = Intrinsic::x86_mmx_psrl_q;
break;
- case Intrinsic::x86_avx2_pslli_w:
- NewIntNo = Intrinsic::x86_avx2_psll_w;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntNo = Intrinsic::x86_mmx_psra_w;
break;
- case Intrinsic::x86_avx2_pslli_d:
- NewIntNo = Intrinsic::x86_avx2_psll_d;
- break;
- case Intrinsic::x86_avx2_pslli_q:
- NewIntNo = Intrinsic::x86_avx2_psll_q;
- break;
- case Intrinsic::x86_avx2_psrli_w:
- NewIntNo = Intrinsic::x86_avx2_psrl_w;
- break;
- case Intrinsic::x86_avx2_psrli_d:
- NewIntNo = Intrinsic::x86_avx2_psrl_d;
- break;
- case Intrinsic::x86_avx2_psrli_q:
- NewIntNo = Intrinsic::x86_avx2_psrl_q;
- break;
- case Intrinsic::x86_avx2_psrai_w:
- NewIntNo = Intrinsic::x86_avx2_psra_w;
- break;
- case Intrinsic::x86_avx2_psrai_d:
- NewIntNo = Intrinsic::x86_avx2_psra_d;
- break;
- default: {
- ShAmtVT = MVT::v2i32;
- switch (IntNo) {
- case Intrinsic::x86_mmx_pslli_w:
- NewIntNo = Intrinsic::x86_mmx_psll_w;
- break;
- case Intrinsic::x86_mmx_pslli_d:
- NewIntNo = Intrinsic::x86_mmx_psll_d;
- break;
- case Intrinsic::x86_mmx_pslli_q:
- NewIntNo = Intrinsic::x86_mmx_psll_q;
- break;
- case Intrinsic::x86_mmx_psrli_w:
- NewIntNo = Intrinsic::x86_mmx_psrl_w;
- break;
- case Intrinsic::x86_mmx_psrli_d:
- NewIntNo = Intrinsic::x86_mmx_psrl_d;
- break;
- case Intrinsic::x86_mmx_psrli_q:
- NewIntNo = Intrinsic::x86_mmx_psrl_q;
- break;
- case Intrinsic::x86_mmx_psrai_w:
- NewIntNo = Intrinsic::x86_mmx_psra_w;
- break;
- case Intrinsic::x86_mmx_psrai_d:
- NewIntNo = Intrinsic::x86_mmx_psra_d;
- break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- }
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntNo = Intrinsic::x86_mmx_psra_d;
break;
- }
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
}
// The vector shift intrinsics with scalars uses 32b shift amounts but
// the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
// to be zero.
- SDValue ShOps[4];
- ShOps[0] = ShAmt;
- ShOps[1] = DAG.getConstant(0, MVT::i32);
- if (ShAmtVT == MVT::v4i32) {
- ShOps[2] = DAG.getUNDEF(MVT::i32);
- ShOps[3] = DAG.getUNDEF(MVT::i32);
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
- } else {
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt,
+ DAG.getConstant(0, MVT::i32));
// FIXME this must be lowered to get rid of the invalid type.
- }
EVT VT = Op.getValueType();
ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
// AhiBlo = __builtin_ia32_psllqi256( AhiBlo, 32 );
// return AloBlo + AloBhi + AhiBlo;
- SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
- A, DAG.getConstant(32, MVT::i32));
- SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
- B, DAG.getConstant(32, MVT::i32));
+ SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A,
+ DAG.getConstant(32, MVT::i32));
+ SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B,
+ DAG.getConstant(32, MVT::i32));
SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32),
A, B);
SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32),
Ahi, B);
- AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
- AloBhi, DAG.getConstant(32, MVT::i32));
- AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
- AhiBlo, DAG.getConstant(32, MVT::i32));
+ AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi,
+ DAG.getConstant(32, MVT::i32));
+ AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo,
+ DAG.getConstant(32, MVT::i32));
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
return Res;
// AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
// return AloBlo + AloBhi + AhiBlo;
- SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
- A, DAG.getConstant(32, MVT::i32));
- SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
- B, DAG.getConstant(32, MVT::i32));
+ SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A,
+ DAG.getConstant(32, MVT::i32));
+ SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B,
+ DAG.getConstant(32, MVT::i32));
SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
A, B);
SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
Ahi, B);
- AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
- AloBhi, DAG.getConstant(32, MVT::i32));
- AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
- AhiBlo, DAG.getConstant(32, MVT::i32));
+ AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi,
+ DAG.getConstant(32, MVT::i32));
+ AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo,
+ DAG.getConstant(32, MVT::i32));
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
return Res;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
uint64_t ShiftAmt = C->getZExtValue();
- if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SHL) {
- // Make a large shift.
- SDValue SHL =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
- // Zero out the rightmost bits.
- SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U << ShiftAmt),
- MVT::i8));
- return DAG.getNode(ISD::AND, dl, VT, SHL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
- }
-
- if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRL) {
- // Make a large shift.
- SDValue SRL =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
- // Zero out the leftmost bits.
- SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
- MVT::i8));
- return DAG.getNode(ISD::AND, dl, VT, SRL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+ if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+ (Subtarget->hasAVX2() &&
+ (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
+ if (Op.getOpcode() == ISD::SHL)
+ return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ if (Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
+ return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
}
- if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
-
- if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRA) {
- if (ShiftAmt == 7) {
- // R s>> 7 === R s< 0
- SDValue Zeros = getZeroVector(VT, /* HasSSE2 */true,
- /* HasAVX2 */false, DAG, dl);
- return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
+ if (VT == MVT::v16i8) {
+ if (Op.getOpcode() == ISD::SHL) {
+ // Make a large shift.
+ SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
+ // Zero out the rightmost bits.
+ SmallVector<SDValue, 16> V(16,
+ DAG.getConstant(uint8_t(-1U << ShiftAmt),
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SHL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
}
+ if (Op.getOpcode() == ISD::SRL) {
+ // Make a large shift.
+ SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
+ // Zero out the leftmost bits.
+ SmallVector<SDValue, 16> V(16,
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+ MVT::i8));
+ return DAG.getNode(ISD::AND, dl, VT, SRL,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+ }
+ if (Op.getOpcode() == ISD::SRA) {
+ if (ShiftAmt == 7) {
+ // R s>> 7 === R s< 0
+ SDValue Zeros = getZeroVector(VT, /* HasSSE2 */true,
+ /* HasAVX2 */false, DAG, dl);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+ }
- // R s>> a === ((R u>> a) ^ m) - m
- SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
- SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
- MVT::i8));
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
- Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
- Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
- return Res;
+ // R s>> a === ((R u>> a) ^ m) - m
+ SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
+ MVT::i8));
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
+ Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+ Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+ return Res;
+ }
}
if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
- SDValue SHL =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
// Zero out the rightmost bits.
- SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt),
- MVT::i8));
+ SmallVector<SDValue, 32> V(32,
+ DAG.getConstant(uint8_t(-1U << ShiftAmt),
+ MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
- SDValue SRL =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
- R, DAG.getConstant(ShiftAmt, MVT::i32));
+ SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+ SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
// Zero out the leftmost bits.
- SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
- MVT::i8));
+ SmallVector<SDValue, 32> V(32,
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+ MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
}
// R s>> 7 === R s< 0
SDValue Zeros = getZeroVector(VT, true /* HasSSE2 */,
true /* HasAVX2 */, DAG, dl);
- return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
}
// R s>> a === ((R u>> a) ^ m) - m
// Lower SHL with variable shift amount.
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
- Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
- Op.getOperand(1), DAG.getConstant(23, MVT::i32));
+ Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
+ DAG.getConstant(23, MVT::i32));
ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
// a = a << 5;
- Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
- Op.getOperand(1), DAG.getConstant(5, MVT::i32));
+ Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
+ DAG.getConstant(5, MVT::i32));
+ Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
// Turn 'a' into a mask suitable for VSELECT
SDValue VSelM = DAG.getConstant(0x80, VT);
SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
- OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
- OpVSel, VSelM);
+ OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
SDValue CM1 = DAG.getConstant(0x0f, VT);
SDValue CM2 = DAG.getConstant(0x3f, VT);
// r = VSELECT(r, psllw(r & (char16)15, 4), a);
SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
- M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
- DAG.getConstant(4, MVT::i32));
+ M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
+ DAG.getConstant(4, MVT::i32), DAG);
+ M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
- OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
- OpVSel, VSelM);
+ OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
// r = VSELECT(r, psllw(r & (char16)63, 2), a);
M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
- M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
- DAG.getConstant(2, MVT::i32));
+ M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
+ DAG.getConstant(2, MVT::i32), DAG);
+ M = DAG.getNode(ISD::BITCAST, dl, VT, M);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
// a += a
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
- OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
- OpVSel, VSelM);
+ OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
// return VSELECT(r, r+r, a);
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
// Decompose 256-bit shifts into smaller 128-bit shifts.
if (VT.getSizeInBits() == 256) {
- int NumElems = VT.getVectorNumElements();
+ unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType().getSimpleVT();
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
// Constant shift amount
SmallVector<SDValue, 4> Amt1Csts;
SmallVector<SDValue, 4> Amt2Csts;
- for (int i = 0; i < NumElems/2; ++i)
+ for (unsigned i = 0; i != NumElems/2; ++i)
Amt1Csts.push_back(Amt->getOperand(i));
- for (int i = NumElems/2; i < NumElems; ++i)
+ for (unsigned i = NumElems/2; i != NumElems; ++i)
Amt2Csts.push_back(Amt->getOperand(i));
Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
EVT VT = Op.getValueType();
- if (Subtarget->hasSSE2() && VT.isVector()) {
- unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
- ExtraVT.getScalarType().getSizeInBits();
- SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+ if (!Subtarget->hasSSE2() || !VT.isVector())
+ return SDValue();
- unsigned SHLIntrinsicsID = 0;
- unsigned SRAIntrinsicsID = 0;
- switch (VT.getSimpleVT().SimpleTy) {
- default:
+ unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+ ExtraVT.getScalarType().getSizeInBits();
+ SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v8i32:
+ case MVT::v16i16:
+ if (!Subtarget->hasAVX())
return SDValue();
- case MVT::v4i32:
- SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
- SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
- break;
- case MVT::v8i16:
- SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
- SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
- break;
- case MVT::v8i32:
- case MVT::v16i16:
- if (!Subtarget->hasAVX())
- return SDValue();
- if (!Subtarget->hasAVX2()) {
- // needs to be split
- int NumElems = VT.getVectorNumElements();
- SDValue Idx0 = DAG.getConstant(0, MVT::i32);
- SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
-
- // Extract the LHS vectors
- SDValue LHS = Op.getOperand(0);
- SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
- SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
-
- MVT EltVT = VT.getVectorElementType().getSimpleVT();
- EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
-
- EVT ExtraEltVT = ExtraVT.getVectorElementType();
- int ExtraNumElems = ExtraVT.getVectorNumElements();
- ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
- ExtraNumElems/2);
- SDValue Extra = DAG.getValueType(ExtraVT);
-
- LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
- LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
- }
- if (VT == MVT::v8i32) {
- SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_d;
- SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_d;
- } else {
- SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_w;
- SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_w;
- }
+ if (!Subtarget->hasAVX2()) {
+ // needs to be split
+ int NumElems = VT.getVectorNumElements();
+ SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+ SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+ // Extract the LHS vectors
+ SDValue LHS = Op.getOperand(0);
+ SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+ SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType().getSimpleVT();
+ EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ EVT ExtraEltVT = ExtraVT.getVectorElementType();
+ int ExtraNumElems = ExtraVT.getVectorNumElements();
+ ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
+ ExtraNumElems/2);
+ SDValue Extra = DAG.getValueType(ExtraVT);
+
+ LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
+ LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
+ }
+ // fall through
+ case MVT::v4i32:
+ case MVT::v8i16: {
+ SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
+ Op.getOperand(0), ShAmt, DAG);
+ return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
}
-
- SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(SHLIntrinsicsID, MVT::i32),
- Op.getOperand(0), ShAmt);
-
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(SRAIntrinsicsID, MVT::i32),
- Tmp1, ShAmt);
}
-
- return SDValue();
}
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
+ case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::VSRL: return "X86ISD::VSRL";
- case X86ISD::CMPPD: return "X86ISD::CMPPD";
- case X86ISD::CMPPS: return "X86ISD::CMPPS";
- case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
- case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW";
- case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD";
- case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ";
- case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB";
- case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW";
- case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD";
- case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ";
+ case X86ISD::VSRA: return "X86ISD::VSRA";
+ case X86ISD::VSHLI: return "X86ISD::VSHLI";
+ case X86ISD::VSRLI: return "X86ISD::VSRLI";
+ case X86ISD::VSRAI: return "X86ISD::VSRAI";
+ case X86ISD::CMPP: return "X86ISD::CMPP";
+ case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
+ case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";
case X86ISD::ADC: return "X86ISD::ADC";
default:
llvm_unreachable("Unknown shift opcode!");
case ISD::SHL:
- if (VT == MVT::v2i64)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v4i32)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v8i16)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v4i64)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v8i32)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v16i16)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
- ValOp, BaseShAmt);
- break;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
+ }
case ISD::SRA:
- if (VT == MVT::v4i32)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v8i16)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v8i32)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v16i16)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32),
- ValOp, BaseShAmt);
- break;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
+ }
case ISD::SRL:
- if (VT == MVT::v2i64)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v4i32)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v8i16)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v4i64)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v8i32)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32),
- ValOp, BaseShAmt);
- if (VT == MVT::v16i16)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
- ValOp, BaseShAmt);
- break;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return SDValue();
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v4i64:
+ case MVT::v8i32:
+ case MVT::v16i16:
+ return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
+ }
}
- return SDValue();
}
Mask = Mask.getOperand(0);
EVT MaskVT = Mask.getValueType();
- // Validate that the Mask operand is a vector sra node. The sra node
- // will be an intrinsic.
- if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
- return SDValue();
-
+ // Validate that the Mask operand is a vector sra node.
// FIXME: what to do for bytes, since there is a psignb/pblendvb, but
// there is no psrai.b
- switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- break;
- default: return SDValue();
- }
+ if (Mask.getOpcode() != X86ISD::VSRAI)
+ return SDValue();
// Check that the SRA is all signbits.
- SDValue SraC = Mask.getOperand(2);
+ SDValue SraC = Mask.getOperand(1);
unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
if ((SraAmt + 1) != EltBits)
Y = Y.getOperand(0);
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
- X.getValueType() == MaskVT && X.getValueType() == Y.getValueType() &&
- (EltBits == 8 || EltBits == 16 || EltBits == 32)) {
- SDValue Sign = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X,
- Mask.getOperand(1));
- return DAG.getNode(ISD::BITCAST, DL, VT, Sign);
+ X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
+ assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
+ "Unsupported VT for PSIGN");
+ Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
}
// PBLENDVB only available on SSE 4.1
if (!Subtarget->hasSSE41())