bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
+ // Win64 functions have extra shadow space for argument homing. Don't do the
+ // sibcall if the caller and callee have mismatched expectations for this
+ // space.
+ if (IsCalleeWin64 != IsCallerWin64)
+ return false;
+
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
return true;
// FALLTHROUGH
case MVT::v16i8:
case MVT::v32i8: {
+ assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) &&
+ "256-bit byte-blends require AVX2 support!");
+
// Scale the blend by the number of bytes per element.
int Scale = VT.getScalarSizeInBits() / 8;
// Recurse back into this routine to re-compute state now that this isn't
// a 3 and 1 problem.
- return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
- Mask);
+ return lowerV8I16GeneralSingleInputVectorShuffle(DL, V, Mask, Subtarget,
+ DAG);
};
if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
if (!Subtarget->hasSSE41())
return SDValue();
- // Some types for vselect were previously set to Expand, not Legal or
- // Custom. Return an empty SDValue so we fall-through to Expand, after
- // the Custom lowering phase.
- MVT VT = Op.getSimpleValueType();
- switch (VT.SimpleTy) {
+ // Only some types will be legal on some subtargets. If we can emit a legal
+ // VSELECT-matching blend, return Op, and but if we need to expand, return
+ // a null value.
+ switch (Op.getSimpleValueType().SimpleTy) {
default:
- break;
+ // Most of the vector types have blends past SSE4.1.
+ return Op;
+
+ case MVT::v32i8:
+ // The byte blends for AVX vectors were introduced only in AVX2.
+ if (Subtarget->hasAVX2())
+ return Op;
+
+ return SDValue();
+
case MVT::v8i16:
case MVT::v16i16:
+ // AVX-512 BWI and VLX features support VSELECT with i16 elements.
if (Subtarget->hasBWI() && Subtarget->hasVLX())
- break;
+ return Op;
+
+ // FIXME: We should custom lower this by fixing the condition and using i8
+ // blends.
return SDValue();
}
-
- // We couldn't create a "Blend with immediate" node.
- // This node should still be legal, but we'll have to emit a blendv*
- // instruction.
- return Op;
}
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
}
}
- // If we know that this node is legal then we know that it is going to be
- // matched by one of the SSE/AVX BLEND instructions. These instructions only
- // depend on the highest bit in each word. Try to use SimplifyDemandedBits
- // to simplify previous instructions.
+ // We should generate an X86ISD::BLENDI from a vselect if its argument
+ // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
+ // constants. This specific pattern gets generated when we split a
+ // selector for a 512 bit vector in a machine without AVX512 (but with
+ // 256-bit vectors), during legalization:
+ //
+ // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
+ //
+ // Iff we find this pattern and the build_vectors are built from
+ // constants, we translate the vselect into a shuffle_vector that we
+ // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
+ if ((N->getOpcode() == ISD::VSELECT ||
+ N->getOpcode() == X86ISD::SHRUNKBLEND) &&
+ !DCI.isBeforeLegalize()) {
+ SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
+ if (Shuffle.getNode())
+ return Shuffle;
+ }
+
+ // If this is a *dynamic* select (non-constant condition) and we can match
+ // this node with one of the variable blend instructions, restructure the
+ // condition so that the blends can use the high bit of each element and use
+ // SimplifyDemandedBits to simplify the condition operand.
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() &&
- // We explicitly check against SSE4.1, v8i16 and v16i16 because, although
- // vselect nodes may be marked as Custom, they might only be legal when
- // Cond is a build_vector of constants. This will be taken care in
- // a later condition.
- (TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) &&
- Subtarget->hasSSE41() && VT != MVT::v16i16 && VT != MVT::v8i16) &&
- // Don't optimize vector of constants. Those are handled by
- // the generic code and all the bits must be properly set for
- // the generic optimizer.
!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
if (BitWidth == 1)
return SDValue();
+ // We can only handle the cases where VSELECT is directly legal on the
+ // subtarget. We custom lower VSELECT nodes with constant conditions and
+ // this makes it hard to see whether a dynamic VSELECT will correctly
+ // lower, so we both check the operation's status and explicitly handle the
+ // cases where a *dynamic* blend will fail even though a constant-condition
+ // blend could be custom lowered.
+ // FIXME: We should find a better way to handle this class of problems.
+ // Potentially, we should combine constant-condition vselect nodes
+ // pre-legalization into shuffles and not mark as many types as custom
+ // lowered.
+ if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+ // FIXME: We don't support i16-element blends currently. We could and
+ // should support them by making *all* the bits in the condition be set
+ // rather than just the high bit and using an i8-element blend.
+ if (VT.getScalarType() == MVT::i16)
+ return SDValue();
+ // Dynamic blending was only available from SSE4.1 onward.
+ if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41())
+ return SDValue();
+ // Byte blends are only available in AVX2
+ if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 &&
+ !Subtarget->hasAVX2())
+ return SDValue();
+
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
}
}
- // We should generate an X86ISD::BLENDI from a vselect if its argument
- // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
- // constants. This specific pattern gets generated when we split a
- // selector for a 512 bit vector in a machine without AVX512 (but with
- // 256-bit vectors), during legalization:
- //
- // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
- //
- // Iff we find this pattern and the build_vectors are built from
- // constants, we translate the vselect into a shuffle_vector that we
- // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
- if ((N->getOpcode() == ISD::VSELECT ||
- N->getOpcode() == X86ISD::SHRUNKBLEND) &&
- !DCI.isBeforeLegalize()) {
- SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
- if (Shuffle.getNode())
- return Shuffle;
- }
-
return SDValue();
}
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-std::pair<unsigned, const TargetRegisterClass*>
-X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+std::pair<unsigned, const TargetRegisterClass *>
+X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
MVT VT) const {
// First, see if this is a constraint that directly corresponds to an LLVM
// register class.
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass*> Res;
- Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {