setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
if (Subtarget->hasDQI()) {
- setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ }
+ }
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
}
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
+
+ // If this is an invoke in a 32-bit function using an MSVC personality, assume
+ // the function clobbers all registers. If an exception is thrown, the runtime
+ // will not restore CSRs.
+ // FIXME: Model this more precisely so that we can register allocate across
+ // the normal edge and spill and fill across the exceptional edge.
+ if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
+ const Function *CallerFn = MF.getFunction();
+ EHPersonality Pers =
+ CallerFn->hasPersonalityFn()
+ ? classifyEHPersonality(CallerFn->getPersonalityFn())
+ : EHPersonality::Unknown;
+ if (isMSVCEHPersonality(Pers))
+ Mask = RegInfo->getNoPreservedMask();
+ }
+
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
// Mask only contains negative index if an element is zero.
- if (std::any_of(Mask.begin(), Mask.end(),
+ if (std::any_of(Mask.begin(), Mask.end(),
[](int M){ return M == SM_SentinelZero; }))
return false;
break;
SDValue Rnd;
if (Op.getNumOperands() == 6)
Rnd = Op.getOperand(5);
- else
+ else
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Rnd),
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
- case VPERM_3OP_MASKZ:
+ case VPERM_3OP_MASKZ:
case VPERM_3OP_MASK:
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
return DAG.getMergeValues(Ops, dl);
}
-// Return true if the requred (according to Opcode) shift-imm form is natively
+// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
}
// The shift amount is a variable, but it is the same for all vector lanes.
-// These instrcutions are defined together with shift-immediate.
+// These instructions are defined together with shift-immediate.
static
bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
-// Return true if the requred (according to Opcode) variable-shift form is
+// Return true if the required (according to Opcode) variable-shift form is
// natively supported by the Subtarget
static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
}
}
+ // v4i32 Non Uniform Shifts.
+ // If the shift amount is constant we can shift each lane using the SSE2
+ // immediate shifts, else we need to zero-extend each lane to the lower i64
+ // and shift using the SSE2 variable shifts.
+ // The separate results can then be blended together.
+ if (VT == MVT::v4i32) {
+ unsigned Opc = Op.getOpcode();
+ SDValue Amt0, Amt1, Amt2, Amt3;
+ if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
+ Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
+ Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1});
+ Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
+ Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
+ } else {
+ // ISD::SHL is handled above but we include it here for completeness.
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown target vector shift node");
+ case ISD::SHL:
+ Opc = X86ISD::VSHL;
+ break;
+ case ISD::SRL:
+ Opc = X86ISD::VSRL;
+ break;
+ case ISD::SRA:
+ Opc = X86ISD::VSRA;
+ break;
+ }
+ // The SSE2 shifts use the lower i64 as the same shift amount for
+ // all lanes and the upper i64 is ignored. These shuffle masks
+ // optimally zero-extend each lanes on SSE2/SSE41/AVX targets.
+ SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+ Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
+ Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
+ Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
+ Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1});
+ }
+
+ SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
+ SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
+ SDValue R2 = DAG.getNode(Opc, dl, VT, R, Amt2);
+ SDValue R3 = DAG.getNode(Opc, dl, VT, R, Amt3);
+ SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});
+ SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});
+ return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
+ }
+
if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
unsigned ShiftOpcode = Op->getOpcode();
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
+ case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
+ case X86ISD::FP_TO_SINT_RND: return "X86ISD::FP_TO_SINT_RND";
+ case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND";
}
return nullptr;
}