return SDValue();
}
+// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
+// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
+// constraint of matching input/output vector elements.
+SDValue
+X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ SDNode *N = Op.getNode();
+ EVT VT = Op.getValueType();
+ unsigned NumElts = Op.getNumOperands();
+
+ // Check supported types and sub-targets.
+ //
+ // Only v2f32 -> v2f64 needs special handling.
+ if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
+ return SDValue();
+
+ SDValue VecIn;
+ EVT VecInVT;
+ SmallVector<int, 8> Mask;
+ EVT SrcVT = MVT::Other;
+
+ // Check the patterns could be translated into X86vfpext.
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue In = N->getOperand(i);
+ unsigned Opcode = In.getOpcode();
+
+ // Skip if the element is undefined.
+ if (Opcode == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ // Quit if one of the elements is not defined from 'fpext'.
+ if (Opcode != ISD::FP_EXTEND)
+ return SDValue();
+
+ // Check how the source of 'fpext' is defined.
+ SDValue L2In = In.getOperand(0);
+ EVT L2InVT = L2In.getValueType();
+
+ // Check the original type
+ if (SrcVT == MVT::Other)
+ SrcVT = L2InVT;
+ else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
+ return SDValue();
+
+ // Check whether the value being 'fpext'ed is extracted from the same
+ // source.
+ Opcode = L2In.getOpcode();
+
+ // Quit if it's not extracted with a constant index.
+ if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(L2In.getOperand(1)))
+ return SDValue();
+
+ SDValue ExtractedFromVec = L2In.getOperand(0);
+
+ if (VecIn.getNode() == 0) {
+ VecIn = ExtractedFromVec;
+ VecInVT = ExtractedFromVec.getValueType();
+ } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
+ return SDValue();
+
+ Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
+ }
+
+ // Quit if all operands of BUILD_VECTOR are undefined.
+ if (!VecIn.getNode())
+ return SDValue();
+
+ // Fill the remaining mask as undef.
+ for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
+ Mask.push_back(-1);
+
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+ DAG.getVectorShuffle(VecInVT, DL,
+ VecIn, DAG.getUNDEF(VecInVT),
+ &Mask[0]));
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
if (Broadcast.getNode())
return Broadcast;
+ SDValue FpExt = LowerVectorFpExtend(Op, DAG);
+ if (FpExt.getNode())
+ return FpExt;
+
unsigned EVTBits = ExtVT.getSizeInBits();
unsigned NumZero = 0;
unsigned Opcode = 0;
unsigned NumOperands = 0;
- switch (Op.getNode()->getOpcode()) {
+
+ // Truncate operations may prevent the merge of the SETCC instruction
+ // and the arithmetic intruction before it. Attempt to truncate the operands
+ // of the arithmetic instruction and use a reduced bit-width instruction.
+ bool NeedTruncation = false;
+ SDValue ArithOp = Op;
+ if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
+ SDValue Arith = Op->getOperand(0);
+ // Both the trunc and the arithmetic op need to have one user each.
+ if (Arith->hasOneUse())
+ switch (Arith.getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ NeedTruncation = true;
+ ArithOp = Arith;
+ }
+ }
+ }
+
+ // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
+ // which may be the result of a CAST. We use the variable 'Op', which is the
+ // non-casted variable when we check for possible users.
+ switch (ArithOp.getOpcode()) {
case ISD::ADD:
// Due to an isel shortcoming, be conservative if this add is likely to be
// selected as part of a load-modify-store instruction. When the root node
goto default_case;
if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
if (C->getAPIntValue() == 1) {
Opcode = X86ISD::INC;
if (User->getOpcode() != ISD::BRCOND &&
User->getOpcode() != ISD::SETCC &&
- (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
NonFlagUse = true;
break;
}
goto default_case;
// Otherwise use a regular EFLAGS-setting instruction.
- switch (Op.getNode()->getOpcode()) {
+ switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
- case ISD::SUB:
- Opcode = X86ISD::SUB;
- break;
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::OR: Opcode = X86ISD::OR; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
break;
}
+ // If we found that truncation is beneficial, perform the truncation and
+ // update 'Op'.
+ if (NeedTruncation) {
+ EVT VT = Op.getValueType();
+ SDValue WideVal = Op->getOperand(0);
+ EVT WideVT = WideVal.getValueType();
+ unsigned ConvertedOp = 0;
+ // Use a target machine opcode to prevent further DAGCombine
+ // optimizations that may separate the arithmetic operations
+ // from the setcc node.
+ switch (WideVal.getOpcode()) {
+ default: break;
+ case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
+ case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
+ case ISD::AND: ConvertedOp = X86ISD::AND; break;
+ case ISD::OR: ConvertedOp = X86ISD::OR; break;
+ case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
+ }
+
+ if (ConvertedOp) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
+ SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
+ SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
+ Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
+ }
+ }
+ }
+
if (Opcode == 0)
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
- if (Opcode == X86ISD::CMP) {
- SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0),
- Op.getOperand(1));
- // We can't replace usage of SUB with CMP.
- // The SUB node will be removed later because there is no use of it.
- return SDValue(New.getNode(), 0);
- }
-
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
SmallVector<SDValue, 4> Ops;
for (unsigned i = 0; i != NumOperands; ++i)
DebugLoc dl = Op.getDebugLoc();
if (isFP) {
- unsigned SSECC = 8;
+#ifndef NDEBUG
EVT EltVT = Op0.getValueType().getVectorElementType();
- assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT;
+ assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+#endif
+ unsigned SSECC;
bool Swap = false;
// SSE Condition code mapping:
// 6 - NLE
// 7 - ORD
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETOEQ:
case ISD::SETEQ: SSECC = 0; break;
case ISD::SETOGT:
case ISD::SETUO: SSECC = 3; break;
case ISD::SETUNE:
case ISD::SETNE: SSECC = 4; break;
- case ISD::SETULE: Swap = true;
+ case ISD::SETULE: Swap = true; // Fallthrough
case ISD::SETUGE: SSECC = 5; break;
- case ISD::SETULT: Swap = true;
+ case ISD::SETULT: Swap = true; // Fallthrough
case ISD::SETUGT: SSECC = 6; break;
case ISD::SETO: SSECC = 7; break;
+ case ISD::SETUEQ:
+ case ISD::SETONE: SSECC = 8; break;
}
if (Swap)
std::swap(Op0, Op1);
// In the two special cases we can't handle, emit two comparisons.
if (SSECC == 8) {
+ unsigned CC0, CC1;
+ unsigned CombineOpc;
if (SetCCOpcode == ISD::SETUEQ) {
- SDValue UNORD, EQ;
- UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(3, MVT::i8));
- EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(0, MVT::i8));
- return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
- }
- if (SetCCOpcode == ISD::SETONE) {
- SDValue ORD, NEQ;
- ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(7, MVT::i8));
- NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
- DAG.getConstant(4, MVT::i8));
- return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+ CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
+ } else {
+ assert(SetCCOpcode == ISD::SETONE);
+ CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
}
- llvm_unreachable("Illegal FP comparison");
+
+ SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC0, MVT::i8));
+ SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+ DAG.getConstant(CC1, MVT::i8));
+ return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
}
// Handle all other FP comparisons here.
return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc = 0;
+ unsigned Opc;
bool Swap = false, Invert = false, FlipSigns = false;
switch (SetCCOpcode) {
- default: break;
+ default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETNE: Invert = true;
case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
case ISD::SETLT: Swap = true;
// Check that the operation in question is available (most are plain SSE2,
// but PCMPGTQ and PCMPEQQ have different requirements).
- if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42())
- return SDValue();
- if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41())
- return SDValue();
+ if (VT == MVT::v2i64) {
+ if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
+ return SDValue();
+ if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
+ return SDValue();
+ }
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
SDValue ShOps[4];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
- ShOps[2] = DAG.getUNDEF(MVT::i32);
- ShOps[3] = DAG.getUNDEF(MVT::i32);
+ ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
// The return type has to be a 128-bit type with the same element
case Intrinsic::x86_sse2_ucomigt_sd:
case Intrinsic::x86_sse2_ucomige_sd:
case Intrinsic::x86_sse2_ucomineq_sd: {
- unsigned Opc = 0;
- ISD::CondCode CC = ISD::SETCC_INVALID;
+ unsigned Opc;
+ ISD::CondCode CC;
switch (IntNo) {
default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_sse_comieq_ss:
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+
// Arithmetic intrinsics.
case Intrinsic::x86_sse2_pmulu_dq:
case Intrinsic::x86_avx2_pmulu_dq:
return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
+ // SSE3/AVX horizontal add/sub intrinsics
case Intrinsic::x86_sse3_hadd_ps:
case Intrinsic::x86_sse3_hadd_pd:
case Intrinsic::x86_avx_hadd_ps_256:
case Intrinsic::x86_avx_hadd_pd_256:
- return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse3_hsub_ps:
case Intrinsic::x86_sse3_hsub_pd:
case Intrinsic::x86_avx_hsub_ps_256:
case Intrinsic::x86_avx_hsub_pd_256:
- return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phadd_w_128:
case Intrinsic::x86_ssse3_phadd_d_128:
case Intrinsic::x86_avx2_phadd_w:
case Intrinsic::x86_avx2_phadd_d:
- return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_ssse3_phsub_w_128:
case Intrinsic::x86_ssse3_phsub_d_128:
case Intrinsic::x86_avx2_phsub_w:
- case Intrinsic::x86_avx2_phsub_d:
- return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_phsub_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse3_hadd_ps:
+ case Intrinsic::x86_sse3_hadd_pd:
+ case Intrinsic::x86_avx_hadd_ps_256:
+ case Intrinsic::x86_avx_hadd_pd_256:
+ Opcode = X86ISD::FHADD;
+ break;
+ case Intrinsic::x86_sse3_hsub_ps:
+ case Intrinsic::x86_sse3_hsub_pd:
+ case Intrinsic::x86_avx_hsub_ps_256:
+ case Intrinsic::x86_avx_hsub_pd_256:
+ Opcode = X86ISD::FHSUB;
+ break;
+ case Intrinsic::x86_ssse3_phadd_w_128:
+ case Intrinsic::x86_ssse3_phadd_d_128:
+ case Intrinsic::x86_avx2_phadd_w:
+ case Intrinsic::x86_avx2_phadd_d:
+ Opcode = X86ISD::HADD;
+ break;
+ case Intrinsic::x86_ssse3_phsub_w_128:
+ case Intrinsic::x86_ssse3_phsub_d_128:
+ case Intrinsic::x86_avx2_phsub_w:
+ case Intrinsic::x86_avx2_phsub_d:
+ Opcode = X86ISD::HSUB;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // AVX2 variable shift intrinsics
case Intrinsic::x86_avx2_psllv_d:
case Intrinsic::x86_avx2_psllv_q:
case Intrinsic::x86_avx2_psllv_d_256:
case Intrinsic::x86_avx2_psllv_q_256:
- return DAG.getNode(ISD::SHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrlv_d:
case Intrinsic::x86_avx2_psrlv_q:
case Intrinsic::x86_avx2_psrlv_d_256:
case Intrinsic::x86_avx2_psrlv_q_256:
- return DAG.getNode(ISD::SRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_avx2_psrav_d:
- case Intrinsic::x86_avx2_psrav_d_256:
- return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_avx2_psrav_d_256: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_avx2_psllv_d:
+ case Intrinsic::x86_avx2_psllv_q:
+ case Intrinsic::x86_avx2_psllv_d_256:
+ case Intrinsic::x86_avx2_psllv_q_256:
+ Opcode = ISD::SHL;
+ break;
+ case Intrinsic::x86_avx2_psrlv_d:
+ case Intrinsic::x86_avx2_psrlv_q:
+ case Intrinsic::x86_avx2_psrlv_d_256:
+ case Intrinsic::x86_avx2_psrlv_q_256:
+ Opcode = ISD::SRL;
+ break;
+ case Intrinsic::x86_avx2_psrav_d:
+ case Intrinsic::x86_avx2_psrav_d_256:
+ Opcode = ISD::SRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_ssse3_psign_b_128:
case Intrinsic::x86_ssse3_psign_w_128:
case Intrinsic::x86_ssse3_psign_d_128:
case Intrinsic::x86_avx2_psign_d:
return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_sse41_insertps:
return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx_vperm2f128_ps_256:
case Intrinsic::x86_avx_vperm2f128_pd_256:
case Intrinsic::x86_avx_vperm2f128_si_256:
case Intrinsic::x86_avx2_vperm2i128:
return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
case Intrinsic::x86_avx_vtestc_pd_256:
case Intrinsic::x86_avx_vtestnzc_pd_256: {
bool IsTestPacked = false;
- unsigned X86CC = 0;
+ unsigned X86CC;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
case Intrinsic::x86_avx_vtestz_ps:
case Intrinsic::x86_avx2_psll_w:
case Intrinsic::x86_avx2_psll_d:
case Intrinsic::x86_avx2_psll_q:
- return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psrl_w:
case Intrinsic::x86_sse2_psrl_d:
case Intrinsic::x86_sse2_psrl_q:
case Intrinsic::x86_avx2_psrl_w:
case Intrinsic::x86_avx2_psrl_d:
case Intrinsic::x86_avx2_psrl_q:
- return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
case Intrinsic::x86_sse2_psra_w:
case Intrinsic::x86_sse2_psra_d:
case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d:
- return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_psra_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ Opcode = X86ISD::VSHL;
+ break;
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ Opcode = X86ISD::VSRL;
+ break;
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d:
+ Opcode = X86ISD::VSRA;
+ break;
+ }
+ return DAG.getNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ }
+
+ // SSE/AVX immediate shift intrinsics
case Intrinsic::x86_sse2_pslli_w:
case Intrinsic::x86_sse2_pslli_d:
case Intrinsic::x86_sse2_pslli_q:
case Intrinsic::x86_avx2_pslli_w:
case Intrinsic::x86_avx2_pslli_d:
case Intrinsic::x86_avx2_pslli_q:
- return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrli_w:
case Intrinsic::x86_sse2_psrli_d:
case Intrinsic::x86_sse2_psrli_q:
case Intrinsic::x86_avx2_psrli_w:
case Intrinsic::x86_avx2_psrli_d:
case Intrinsic::x86_avx2_psrli_q:
- return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2), DAG);
case Intrinsic::x86_sse2_psrai_w:
case Intrinsic::x86_sse2_psrai_d:
case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
+ case Intrinsic::x86_avx2_psrai_d: {
+ unsigned Opcode;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ Opcode = X86ISD::VSHLI;
+ break;
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ Opcode = X86ISD::VSRLI;
+ break;
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ Opcode = X86ISD::VSRAI;
+ break;
+ }
+ return getTargetVShiftNode(Opcode, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
+ }
+
// Fix vector shift instructions where the last operand is a non-immediate
// i32 value.
case Intrinsic::x86_mmx_pslli_w:
if (isa<ConstantSDNode>(ShAmt))
return SDValue();
- unsigned NewIntNo = 0;
+ unsigned NewIntNo;
switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
case Intrinsic::x86_mmx_pslli_w:
NewIntNo = Intrinsic::x86_mmx_psll_w;
break;
case Intrinsic::x86_mmx_psrai_d:
NewIntNo = Intrinsic::x86_mmx_psra_d;
break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
}
// The vector shift intrinsics with scalars uses 32b shift amounts but
SDValue(PCMP.getNode(), 1));
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+
case Intrinsic::x86_sse42_pcmpistri128:
case Intrinsic::x86_sse42_pcmpestri128: {
unsigned Opcode;
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
}
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256: {
+ // Only lower intrinsics if FMA is enabled. FMA4 still uses patterns.
+ if (!Subtarget->hasFMA())
+ return SDValue();
+
+ unsigned Opc;
+ switch (IntNo) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::x86_fma_vfmadd_ps:
+ case Intrinsic::x86_fma_vfmadd_pd:
+ case Intrinsic::x86_fma_vfmadd_ps_256:
+ case Intrinsic::x86_fma_vfmadd_pd_256:
+ Opc = X86ISD::FMADD;
+ break;
+ case Intrinsic::x86_fma_vfmsub_ps:
+ case Intrinsic::x86_fma_vfmsub_pd:
+ case Intrinsic::x86_fma_vfmsub_ps_256:
+ case Intrinsic::x86_fma_vfmsub_pd_256:
+ Opc = X86ISD::FMSUB;
+ break;
+ case Intrinsic::x86_fma_vfnmadd_ps:
+ case Intrinsic::x86_fma_vfnmadd_pd:
+ case Intrinsic::x86_fma_vfnmadd_ps_256:
+ case Intrinsic::x86_fma_vfnmadd_pd_256:
+ Opc = X86ISD::FNMADD;
+ break;
+ case Intrinsic::x86_fma_vfnmsub_ps:
+ case Intrinsic::x86_fma_vfnmsub_pd:
+ case Intrinsic::x86_fma_vfnmsub_ps_256:
+ case Intrinsic::x86_fma_vfnmsub_pd_256:
+ Opc = X86ISD::FNMSUB;
+ break;
+ case Intrinsic::x86_fma_vfmaddsub_ps:
+ case Intrinsic::x86_fma_vfmaddsub_pd:
+ case Intrinsic::x86_fma_vfmaddsub_ps_256:
+ case Intrinsic::x86_fma_vfmaddsub_pd_256:
+ Opc = X86ISD::FMADDSUB;
+ break;
+ case Intrinsic::x86_fma_vfmsubadd_ps:
+ case Intrinsic::x86_fma_vfmsubadd_pd:
+ case Intrinsic::x86_fma_vfmsubadd_ps_256:
+ case Intrinsic::x86_fma_vfmsubadd_pd_256:
+ Opc = X86ISD::FMSUBADD;
+ break;
+ }
+
+ return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ }
}
}
Results.push_back(Swap.getValue(1));
}
-void X86TargetLowering::
+static void
ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG, unsigned NewOp) const {
+ SelectionDAG &DAG, unsigned NewOp) {
DebugLoc dl = Node->getDebugLoc();
assert (Node->getValueType(0) == MVT::i64 &&
"Only know how to expand i64 atomics");
return;
}
case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
- return;
case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
- return;
case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
- return;
case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
- return;
case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
- return;
- case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+ case ISD::ATOMIC_SWAP: {
+ unsigned Opc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case ISD::ATOMIC_LOAD_ADD:
+ Opc = X86ISD::ATOMADD64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ Opc = X86ISD::ATOMAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ Opc = X86ISD::ATOMNAND64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ Opc = X86ISD::ATOMOR64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ Opc = X86ISD::ATOMSUB64_DAG;
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ Opc = X86ISD::ATOMXOR64_DAG;
+ break;
+ case ISD::ATOMIC_SWAP:
+ Opc = X86ISD::ATOMSWAP64_DAG;
+ break;
+ }
+ ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
return;
+ }
case ISD::ATOMIC_LOAD:
ReplaceATOMIC_LOAD(N, Results, DAG);
}
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FMAXC: return "X86ISD::FMAXC";
+ case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
+ case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
+ case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
// String/text processing lowering.
case X86::PCMPISTRM128REG:
case X86::VPCMPISTRM128REG:
- return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
case X86::VPCMPISTRM128MEM:
- return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
case X86::VPCMPESTRM128REG:
- return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
- case X86::VPCMPESTRM128MEM:
- return EmitPCMP(MI, BB, 5, true /* in mem */);
+ case X86::VPCMPESTRM128MEM: {
+ unsigned NumArgs;
+ bool MemArg;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
+ NumArgs = 3; MemArg = false; break;
+ case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
+ NumArgs = 3; MemArg = true; break;
+ case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
+ NumArgs = 5; MemArg = false; break;
+ case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
+ NumArgs = 5; MemArg = true; break;
+ }
+ return EmitPCMP(MI, BB, NumArgs, MemArg);
+ }
// Thread synchronization.
case X86::MONITOR:
return EmitMonitor(MI, BB);
// Atomic Lowering.
- case X86::ATOMAND32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
- X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMOR32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
- X86::OR32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMXOR32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
- X86::XOR32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass);
- case X86::ATOMNAND32:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
- X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32,
- X86::NOT32r, X86::EAX,
- &X86::GR32RegClass, true);
case X86::ATOMMIN32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
case X86::ATOMMAX32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
case X86::ATOMUMIN32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
case X86::ATOMUMAX32:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+ case X86::ATOMMIN16:
+ case X86::ATOMMAX16:
+ case X86::ATOMUMIN16:
+ case X86::ATOMUMAX16:
+ case X86::ATOMMIN64:
+ case X86::ATOMMAX64:
+ case X86::ATOMUMIN64:
+ case X86::ATOMUMAX64: {
+ unsigned Opc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break;
+ case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break;
+ case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break;
+ case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break;
+ case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break;
+ case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break;
+ case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break;
+ case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break;
+ case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break;
+ case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break;
+ case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break;
+ case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break;
+ // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ }
+ return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc);
+ }
+
+ case X86::ATOMAND32:
+ case X86::ATOMOR32:
+ case X86::ATOMXOR32:
+ case X86::ATOMNAND32: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND32:
+ RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break;
+ case X86::ATOMOR32:
+ RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break;
+ case X86::ATOMXOR32:
+ RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break;
+ case X86::ATOMNAND32:
+ RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV32rm, X86::LCMPXCHG32,
+ X86::NOT32r, X86::EAX,
+ &X86::GR32RegClass, Invert);
+ }
case X86::ATOMAND16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
- X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
case X86::ATOMOR16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
- X86::OR16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
case X86::ATOMXOR16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
- X86::XOR16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
- X86::NOT16r, X86::AX,
- &X86::GR16RegClass);
- case X86::ATOMNAND16:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
- X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16,
+ case X86::ATOMNAND16: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND16:
+ RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break;
+ case X86::ATOMOR16:
+ RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break;
+ case X86::ATOMXOR16:
+ RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break;
+ case X86::ATOMNAND16:
+ RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV16rm, X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
- &X86::GR16RegClass, true);
- case X86::ATOMMIN16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
- case X86::ATOMMAX16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
- case X86::ATOMUMIN16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
- case X86::ATOMUMAX16:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+ &X86::GR16RegClass, Invert);
+ }
case X86::ATOMAND8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
- X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
case X86::ATOMOR8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
- X86::OR8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
case X86::ATOMXOR8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
- X86::XOR8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
- X86::NOT8r, X86::AL,
- &X86::GR8RegClass);
- case X86::ATOMNAND8:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
- X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8,
+ case X86::ATOMNAND8: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND8:
+ RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break;
+ case X86::ATOMOR8:
+ RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break;
+ case X86::ATOMXOR8:
+ RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break;
+ case X86::ATOMNAND8:
+ RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV8rm, X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
- &X86::GR8RegClass, true);
- // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+ &X86::GR8RegClass, Invert);
+ }
+
// This group is for 64-bit host.
case X86::ATOMAND64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
- X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
case X86::ATOMOR64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
- X86::OR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
case X86::ATOMXOR64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
- X86::XOR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
- X86::NOT64r, X86::RAX,
- &X86::GR64RegClass);
- case X86::ATOMNAND64:
- return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
- X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64,
+ case X86::ATOMNAND64: {
+ bool Invert = false;
+ unsigned RegOpc, ImmOpc;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND64:
+ RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break;
+ case X86::ATOMOR64:
+ RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break;
+ case X86::ATOMXOR64:
+ RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break;
+ case X86::ATOMNAND64:
+ RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break;
+ }
+ return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+ X86::MOV64rm, X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
- &X86::GR64RegClass, true);
- case X86::ATOMMIN64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
- case X86::ATOMMAX64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
- case X86::ATOMUMIN64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
- case X86::ATOMUMAX64:
- return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+ &X86::GR64RegClass, Invert);
+ }
// This group does 64-bit operations on a 32-bit host.
case X86::ATOMAND6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::AND32rr, X86::AND32rr,
- X86::AND32ri, X86::AND32ri,
- false);
case X86::ATOMOR6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::OR32rr, X86::OR32rr,
- X86::OR32ri, X86::OR32ri,
- false);
case X86::ATOMXOR6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::XOR32rr, X86::XOR32rr,
- X86::XOR32ri, X86::XOR32ri,
- false);
case X86::ATOMNAND6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::AND32rr, X86::AND32rr,
- X86::AND32ri, X86::AND32ri,
- true);
case X86::ATOMADD6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::ADD32rr, X86::ADC32rr,
- X86::ADD32ri, X86::ADC32ri,
- false);
case X86::ATOMSUB6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::SUB32rr, X86::SBB32rr,
- X86::SUB32ri, X86::SBB32ri,
- false);
- case X86::ATOMSWAP6432:
- return EmitAtomicBit6432WithCustomInserter(MI, BB,
- X86::MOV32rr, X86::MOV32rr,
- X86::MOV32ri, X86::MOV32ri,
- false);
+ case X86::ATOMSWAP6432: {
+ bool Invert = false;
+ unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
+ case X86::ATOMAND6432:
+ RegOpcL = RegOpcH = X86::AND32rr;
+ ImmOpcL = ImmOpcH = X86::AND32ri;
+ break;
+ case X86::ATOMOR6432:
+ RegOpcL = RegOpcH = X86::OR32rr;
+ ImmOpcL = ImmOpcH = X86::OR32ri;
+ break;
+ case X86::ATOMXOR6432:
+ RegOpcL = RegOpcH = X86::XOR32rr;
+ ImmOpcL = ImmOpcH = X86::XOR32ri;
+ break;
+ case X86::ATOMNAND6432:
+ RegOpcL = RegOpcH = X86::AND32rr;
+ ImmOpcL = ImmOpcH = X86::AND32ri;
+ Invert = true;
+ break;
+ case X86::ATOMADD6432:
+ RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr;
+ ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri;
+ break;
+ case X86::ATOMSUB6432:
+ RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr;
+ ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri;
+ break;
+ case X86::ATOMSWAP6432:
+ RegOpcL = RegOpcH = X86::MOV32rr;
+ ImmOpcL = ImmOpcH = X86::MOV32ri;
+ break;
+ }
+ return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH,
+ ImmOpcL, ImmOpcH, Invert);
+ }
+
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
// PSHUFD
static const int ShufMask1[] = {0, 2, 0, 0};
- OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT), ShufMask1);
- OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT), ShufMask1);
+ SDValue Undef = DAG.getUNDEF(VT);
+ OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1);
// MOVLHPS
static const int ShufMask2[] = {0, 1, 4, 5};
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1};
- OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, DAG.getUNDEF(MVT::v16i8),
- ShufMask1);
+ SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1);
OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
return SDValue();
}
+/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and
+/// X86ISD::FMAX nodes.
+static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
+ assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
+
+ // Only perform optimizations if UnsafeMath is used.
+ if (!DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
+ // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
+ // into FMINC and MMAXC, which are Commutative operations.
+ unsigned NewOp = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("unknown opcode");
+ case X86ISD::FMIN: NewOp = X86ISD::FMINC; break;
+ case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break;
+ }
+
+ return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1));
+}
+
+
/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
// FAND(0.0, x) -> 0.0
// concat the vectors to original VT
unsigned NumElems = OpVT.getVectorNumElements();
+ SDValue Undef = DAG.getUNDEF(OpVT);
+
SmallVector<int,8> ShufMask1(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask1[i] = i;
- SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- &ShufMask1[0]);
+ SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]);
SmallVector<int,8> ShufMask2(NumElems, -1);
for (unsigned i = 0; i != NumElems/2; ++i)
ShufMask2[i] = i + NumElems/2;
- SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
- &ShufMask2[0]);
+ SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]);
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
VT.getVectorNumElements()/2);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FMIN:
+ case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);