X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=1bc68f3eaf771c2944dbbc7083c894f0d4e27c92;hb=b1961a389602e7ef23668d2388ea76d9d852f198;hp=3ab7af2edc223cff82c880b6426b0a906e673d09;hpb=eabb1227f6672d936435fa1fc870e22132872df8;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3ab7af2edc2..1bc68f3eaf7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3381,6 +3381,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC); + // Win64 functions have extra shadow space for argument homing. Don't do the + // sibcall if the caller and callee have mismatched expectations for this + // space. + if (IsCalleeWin64 != IsCallerWin64) + return false; + if (DAG.getTarget().Options.GuaranteedTailCallOpt) { if (IsTailCallConvention(CalleeCC) && CCMatch) return true; @@ -6169,6 +6175,9 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, // FALLTHROUGH case MVT::v16i8: case MVT::v32i8: { + assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) && + "256-bit byte-blends require AVX2 support!"); + // Scale the blend by the number of bytes per element. int Scale = VT.getScalarSizeInBits() / 8; @@ -7850,8 +7859,8 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( // Recurse back into this routine to re-compute state now that this isn't // a 3 and 1 problem. - return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16), - Mask); + return lowerV8I16GeneralSingleInputVectorShuffle(DL, V, Mask, Subtarget, + DAG); }; if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3)) return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4); @@ -10117,24 +10126,31 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasSSE41()) return SDValue(); - // Some types for vselect were previously set to Expand, not Legal or - // Custom. Return an empty SDValue so we fall-through to Expand, after - // the Custom lowering phase. - MVT VT = Op.getSimpleValueType(); - switch (VT.SimpleTy) { + // Only some types will be legal on some subtargets. If we can emit a legal + // VSELECT-matching blend, return Op, and but if we need to expand, return + // a null value. + switch (Op.getSimpleValueType().SimpleTy) { default: - break; + // Most of the vector types have blends past SSE4.1. + return Op; + + case MVT::v32i8: + // The byte blends for AVX vectors were introduced only in AVX2. + if (Subtarget->hasAVX2()) + return Op; + + return SDValue(); + case MVT::v8i16: case MVT::v16i16: + // AVX-512 BWI and VLX features support VSELECT with i16 elements. if (Subtarget->hasBWI() && Subtarget->hasVLX()) - break; + return Op; + + // FIXME: We should custom lower this by fixing the condition and using i8 + // blends. return SDValue(); } - - // We couldn't create a "Blend with immediate" node. - // This node should still be legal, but we'll have to emit a blendv* - // instruction. - return Op; } static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { @@ -20731,21 +20747,31 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } } - // If we know that this node is legal then we know that it is going to be - // matched by one of the SSE/AVX BLEND instructions. These instructions only - // depend on the highest bit in each word. Try to use SimplifyDemandedBits - // to simplify previous instructions. + // We should generate an X86ISD::BLENDI from a vselect if its argument + // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of + // constants. This specific pattern gets generated when we split a + // selector for a 512 bit vector in a machine without AVX512 (but with + // 256-bit vectors), during legalization: + // + // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS) + // + // Iff we find this pattern and the build_vectors are built from + // constants, we translate the vselect into a shuffle_vector that we + // know will be matched by LowerVECTOR_SHUFFLEtoBlend. + if ((N->getOpcode() == ISD::VSELECT || + N->getOpcode() == X86ISD::SHRUNKBLEND) && + !DCI.isBeforeLegalize()) { + SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget); + if (Shuffle.getNode()) + return Shuffle; + } + + // If this is a *dynamic* select (non-constant condition) and we can match + // this node with one of the variable blend instructions, restructure the + // condition so that the blends can use the high bit of each element and use + // SimplifyDemandedBits to simplify the condition operand. if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && !DCI.isBeforeLegalize() && - // We explicitly check against SSE4.1, v8i16 and v16i16 because, although - // vselect nodes may be marked as Custom, they might only be legal when - // Cond is a build_vector of constants. This will be taken care in - // a later condition. - (TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && - Subtarget->hasSSE41() && VT != MVT::v16i16 && VT != MVT::v8i16) && - // Don't optimize vector of constants. Those are handled by - // the generic code and all the bits must be properly set for - // the generic optimizer. !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits(); @@ -20753,6 +20779,31 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (BitWidth == 1) return SDValue(); + // We can only handle the cases where VSELECT is directly legal on the + // subtarget. We custom lower VSELECT nodes with constant conditions and + // this makes it hard to see whether a dynamic VSELECT will correctly + // lower, so we both check the operation's status and explicitly handle the + // cases where a *dynamic* blend will fail even though a constant-condition + // blend could be custom lowered. + // FIXME: We should find a better way to handle this class of problems. + // Potentially, we should combine constant-condition vselect nodes + // pre-legalization into shuffles and not mark as many types as custom + // lowered. + if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + // FIXME: We don't support i16-element blends currently. We could and + // should support them by making *all* the bits in the condition be set + // rather than just the high bit and using an i8-element blend. + if (VT.getScalarType() == MVT::i16) + return SDValue(); + // Dynamic blending was only available from SSE4.1 onward. + if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41()) + return SDValue(); + // Byte blends are only available in AVX2 + if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 && + !Subtarget->hasAVX2()) + return SDValue(); + assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1); @@ -20801,25 +20852,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, } } - // We should generate an X86ISD::BLENDI from a vselect if its argument - // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of - // constants. This specific pattern gets generated when we split a - // selector for a 512 bit vector in a machine without AVX512 (but with - // 256-bit vectors), during legalization: - // - // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS) - // - // Iff we find this pattern and the build_vectors are built from - // constants, we translate the vselect into a shuffle_vector that we - // know will be matched by LowerVECTOR_SHUFFLEtoBlend. - if ((N->getOpcode() == ISD::VSELECT || - N->getOpcode() == X86ISD::SHRUNKBLEND) && - !DCI.isBeforeLegalize()) { - SDValue Shuffle = transformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget); - if (Shuffle.getNode()) - return Shuffle; - } - return SDValue(); } @@ -23911,8 +23943,9 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } -std::pair -X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, +std::pair +X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + const std::string &Constraint, MVT VT) const { // First, see if this is a constraint that directly corresponds to an LLVM // register class. @@ -24018,7 +24051,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // Use the default implementation in TargetLowering to convert the register // constraint into a member of a register class. std::pair Res; - Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // Not found as a standard register? if (!Res.second) {