X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=86138a7834301cd43f931064432deb9967058149;hp=1afb82c735cf0ede3b6d7ddd3c9e7ca674757a76;hb=824e14ddab883edf9af866352bfe6b34577f2e2d;hpb=7a8ae347235c8eb239ad9ce6628ed70d2e318d46 diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1afb82c735c..86138a78343 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/LibCallSemantics.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -198,23 +199,29 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote); } - // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 - // are Legal, f80 is custom lowered. - setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); - setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); - // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have // this operation. setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); - if (X86ScalarSSEf32) { - setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); - // f32 and f64 cases are Legal, f80 case is not - setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + if (!Subtarget->useSoftFloat()) { + // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 + // are Legal, f80 is custom lowered. + setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); + setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); + + if (X86ScalarSSEf32) { + setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); + // f32 and f64 cases are Legal, f80 case is not + setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + } else { + setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); + setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + } } else { - setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); - setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); + setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); + setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand); + setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand); } // Handle FP_TO_UINT by promoting the destination to a larger signed @@ -414,9 +421,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); setOperationAction(ISD::SETCC , MVT::f80 , Custom); + setOperationAction(ISD::SETCCE , MVT::i8 , Custom); + setOperationAction(ISD::SETCCE , MVT::i16 , Custom); + setOperationAction(ISD::SETCCE , MVT::i32 , Custom); if (Subtarget->is64Bit()) { setOperationAction(ISD::SELECT , MVT::i64 , Custom); setOperationAction(ISD::SETCC , MVT::i64 , Custom); + setOperationAction(ISD::SETCCE , MVT::i64 , Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support @@ -476,13 +487,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } - if (Subtarget->isTarget64BitLP64()) { - setExceptionPointerRegister(X86::RAX); - setExceptionSelectorRegister(X86::RDX); - } else { - setExceptionPointerRegister(X86::EAX); - setExceptionSelectorRegister(X86::EDX); - } setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); @@ -1446,7 +1450,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); setOperationAction(ISD::SETCC, MVT::v16i1, Custom); setOperationAction(ISD::SETCC, MVT::v8i1, Custom); @@ -1607,8 +1611,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v32i16, Legal); setOperationAction(ISD::MULHS, MVT::v32i16, Legal); setOperationAction(ISD::MULHU, MVT::v32i16, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); @@ -1769,12 +1773,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); + setTargetDAGCombine(ISD::FNEG); setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::MLOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::MSTORE); + setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); @@ -2179,7 +2185,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, else if (VA.getLocInfo() == CCValAssign::ZExt) ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); else if (VA.getLocInfo() == CCValAssign::AExt) { - if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1) + if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); else ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); @@ -2721,8 +2727,6 @@ SDValue X86TargetLowering::LowerFormalArguments( MFI->CreateFixedObject(1, StackSize, true)); } - MachineModuleInfo &MMI = MF.getMMI(); - // Figure out if XMM registers are in use. assert(!(Subtarget->useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && @@ -2878,22 +2882,20 @@ SDValue X86TargetLowering::LowerFormalArguments( FuncInfo->setArgumentStackSize(StackSize); - if (MMI.hasWinEHFuncInfo(Fn)) { - if (Is64Bit) { - int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); - SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64); - MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI; - SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64); - Chain = DAG.getStore(Chain, dl, Neg2, StackSlot, - MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), UnwindHelpFI), - /*isVolatile=*/true, - /*isNonTemporal=*/false, /*Alignment=*/0); - } else { - // Functions using Win32 EH are considered to have opaque SP adjustments - // to force local variables to be addressed from the frame or base - // pointers. - MFI->setHasOpaqueSPAdjustment(true); + if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { + EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); + if (Personality == EHPersonality::CoreCLR) { + assert(Is64Bit); + // TODO: Add a mechanism to frame lowering that will allow us to indicate + // that we'd prefer this slot be allocated towards the bottom of the frame + // (i.e. near the stack pointer after allocating the frame). Every + // funclet needs a copy of this slot in its (mostly empty) frame, and the + // offset from the bottom of this and each funclet's frame must be the + // same, so the size of funclets' (mostly empty) frames is dictated by + // how far this slot is from the bottom (since they allocate just enough + // space to accomodate holding this slot at the correct offset). + int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); + EHInfo->PSPSymFrameIdx = PSPSymFI; } } @@ -2958,7 +2960,7 @@ static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, /// Returns a vector_shuffle mask for an movs{s|d}, movd /// operation of specified width. -static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, +static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1, SDValue V2) { unsigned NumElems = VT.getVectorNumElements(); SmallVector Mask; @@ -3120,7 +3122,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, break; case CCValAssign::AExt: if (Arg.getValueType().isVector() && - Arg.getValueType().getScalarType() == MVT::i1) + Arg.getValueType().getVectorElementType() == MVT::i1) Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); else if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. @@ -3844,7 +3846,7 @@ static bool isTargetShuffle(unsigned Opcode) { } } -static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, +static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT, SDValue V1, unsigned TargetMask, SelectionDAG &DAG) { switch(Opc) { @@ -3859,7 +3861,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, } } -static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, +static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); @@ -3959,7 +3961,22 @@ static bool isX86CCUnsigned(unsigned X86CC) { case X86::COND_BE: return true; case X86::COND_AE: return true; } - llvm_unreachable("covered switch fell through?!"); +} + +static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { + switch (SetCCOpcode) { + default: llvm_unreachable("Invalid integer condition!"); + case ISD::SETEQ: return X86::COND_E; + case ISD::SETGT: return X86::COND_G; + case ISD::SETGE: return X86::COND_GE; + case ISD::SETLT: return X86::COND_L; + case ISD::SETLE: return X86::COND_LE; + case ISD::SETNE: return X86::COND_NE; + case ISD::SETULT: return X86::COND_B; + case ISD::SETUGT: return X86::COND_A; + case ISD::SETULE: return X86::COND_BE; + case ISD::SETUGE: return X86::COND_AE; + } } /// Do a one-to-one translation of a ISD::CondCode to the X86-specific @@ -3985,19 +4002,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP, } } - switch (SetCCOpcode) { - default: llvm_unreachable("Invalid integer condition!"); - case ISD::SETEQ: return X86::COND_E; - case ISD::SETGT: return X86::COND_G; - case ISD::SETGE: return X86::COND_GE; - case ISD::SETLT: return X86::COND_L; - case ISD::SETLE: return X86::COND_LE; - case ISD::SETNE: return X86::COND_NE; - case ISD::SETULT: return X86::COND_B; - case ISD::SETUGT: return X86::COND_A; - case ISD::SETULE: return X86::COND_BE; - case ISD::SETUGE: return X86::COND_AE; - } + return TranslateIntegerX86CC(SetCCOpcode); } // First determine if it is required or is profitable to flip the operands. @@ -4207,8 +4212,8 @@ bool X86::isVEXTRACT256Index(SDNode *N) { static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"); - if (!isa(N->getOperand(1).getNode())) - llvm_unreachable("Illegal extract subvector for VEXTRACT"); + assert(isa(N->getOperand(1).getNode()) && + "Illegal extract subvector for VEXTRACT"); uint64_t Index = cast(N->getOperand(1).getNode())->getZExtValue(); @@ -4222,8 +4227,8 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) { static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) { assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"); - if (!isa(N->getOperand(2).getNode())) - llvm_unreachable("Illegal insert subvector for VINSERT"); + assert(isa(N->getOperand(2).getNode()) && + "Illegal insert subvector for VINSERT"); uint64_t Index = cast(N->getOperand(2).getNode())->getZExtValue(); @@ -4259,40 +4264,30 @@ unsigned X86::getInsertVINSERT256Immediate(SDNode *N) { return getInsertVINSERTImmediate(N, 256); } -/// Returns true if V is a constant integer zero. -static bool isZero(SDValue V) { - ConstantSDNode *C = dyn_cast(V); - return C && C->isNullValue(); -} - /// Returns true if Elt is a constant zero or a floating point constant +0.0. bool X86::isZeroNode(SDValue Elt) { - if (isZero(Elt)) - return true; - if (ConstantFPSDNode *CFP = dyn_cast(Elt)) - return CFP->getValueAPF().isPosZero(); - return false; + return isNullConstant(Elt) || isNullFPConstant(Elt); } // Build a vector of constants // Use an UNDEF node if MaskElt == -1. // Spilt 64-bit constants in the 32-bit mode. -static SDValue getConstVector(ArrayRef Values, EVT VT, +static SDValue getConstVector(ArrayRef Values, MVT VT, SelectionDAG &DAG, SDLoc dl, bool IsMask = false) { SmallVector Ops; bool Split = false; - EVT ConstVecVT = VT; + MVT ConstVecVT = VT; unsigned NumElts = VT.getVectorNumElements(); bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); - if (!In64BitMode && VT.getScalarType() == MVT::i64) { + if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); Split = true; } - EVT EltVT = ConstVecVT.getScalarType(); + MVT EltVT = ConstVecVT.getVectorElementType(); for (unsigned i = 0; i < NumElts; ++i) { bool IsUndef = Values[i] < 0 && IsMask; SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) : @@ -4341,7 +4336,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); - } else if (VT.getScalarType() == MVT::i1) { + } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); @@ -4456,7 +4451,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, Vec, ZeroIndex); // The blend instruction, and therefore its mask, depend on the data type. - MVT ScalarType = ResultVT.getScalarType().getSimpleVT(); + MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT(); if (ScalarType.isFloatingPoint()) { // Choose either vblendps (float) or vblendpd (double). unsigned ScalarSize = ScalarType.getSizeInBits(); @@ -4493,6 +4488,81 @@ static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256); } +/// Insert i1-subvector to i1-vector. +static SDValue Insert1BitVector(SDValue Op, SelectionDAG &DAG) { + + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + SDValue SubVec = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + + if (!isa(Idx)) + return SDValue(); + + unsigned IdxVal = cast(Idx)->getZExtValue(); + if (IdxVal == 0 && Vec.isUndef()) // the operation is legal + return Op; + + MVT OpVT = Op.getSimpleValueType(); + MVT SubVecVT = SubVec.getSimpleValueType(); + unsigned NumElems = OpVT.getVectorNumElements(); + unsigned SubVecNumElems = SubVecVT.getVectorNumElements(); + + assert(IdxVal + SubVecNumElems <= NumElems && + IdxVal % SubVecVT.getSizeInBits() == 0 && + "Unexpected index value in INSERT_SUBVECTOR"); + + // There are 3 possible cases: + // 1. Subvector should be inserted in the lower part (IdxVal == 0) + // 2. Subvector should be inserted in the upper part + // (IdxVal + SubVecNumElems == NumElems) + // 3. Subvector should be inserted in the middle (for example v2i1 + // to v16i1, index 2) + + SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); + SDValue Undef = DAG.getUNDEF(OpVT); + SDValue WideSubVec = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, SubVec, ZeroIdx); + if (Vec.isUndef()) + return DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + + if (ISD::isBuildVectorAllZeros(Vec.getNode())) { + unsigned ShiftLeft = NumElems - SubVecNumElems; + unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; + WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec, + DAG.getConstant(ShiftLeft, dl, MVT::i8)); + return ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, OpVT, WideSubVec, + DAG.getConstant(ShiftRight, dl, MVT::i8)) : WideSubVec; + } + + if (IdxVal == 0) { + // Zero lower bits of the Vec + SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); + Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); + // Merge them together + return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec); + } + + // Simple case when we put subvector in the upper part + if (IdxVal + SubVecNumElems == NumElems) { + // Zero upper bits of the Vec + WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8); + Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); + Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); + return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec); + } + // Subvector should be inserted in the middle - use shuffle + SmallVector Mask; + for (unsigned i = 0; i < NumElems; ++i) + Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ? + i : i + NumElems); + return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask); +} + /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 /// instructions. This is used because creating CONCAT_VECTOR nodes of /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower @@ -4651,7 +4721,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) { // If we have a build-vector, then things are easy. - EVT VT = MaskNode.getValueType(); + MVT VT = MaskNode.getSimpleValueType(); assert(VT.isVector() && "Can't produce a non-vector with a build_vector!"); if (!VT.isInteger()) @@ -4751,8 +4821,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, SmallVector RawMask; if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) { // If we have a build-vector, then things are easy. - assert(MaskNode.getValueType().isInteger() && - MaskNode.getValueType().getVectorNumElements() == + assert(MaskNode.getSimpleValueType().isInteger() && + MaskNode.getSimpleValueType().getVectorNumElements() == VT.getVectorNumElements()); for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) { @@ -4812,8 +4882,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) { // If we have a build-vector, then things are easy. - assert(MaskNode.getValueType().isInteger() && - MaskNode.getValueType().getVectorNumElements() == + assert(MaskNode.getSimpleValueType().isInteger() && + MaskNode.getSimpleValueType().getVectorNumElements() == VT.getVectorNumElements()); SmallVector RawMask; @@ -5883,7 +5953,7 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, /// node. static SDValue LowerToAddSub(const BuildVectorSDNode *BV, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT VT = BV->getValueType(0); + MVT VT = BV->getSimpleValueType(0); if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) return SDValue(); @@ -5945,12 +6015,12 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, // Update InVec0 and InVec1. if (InVec0.getOpcode() == ISD::UNDEF) { InVec0 = Op0.getOperand(0); - if (InVec0.getValueType() != VT) + if (InVec0.getSimpleValueType() != VT) return SDValue(); } if (InVec1.getOpcode() == ISD::UNDEF) { InVec1 = Op1.getOperand(0); - if (InVec1.getValueType() != VT) + if (InVec1.getSimpleValueType() != VT) return SDValue(); } @@ -5986,7 +6056,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV, static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT VT = BV->getValueType(0); + MVT VT = BV->getSimpleValueType(0); unsigned NumElts = VT.getVectorNumElements(); unsigned NumUndefsLO = 0; unsigned NumUndefsHI = 0; @@ -6128,7 +6198,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned NumElems = Op.getNumOperands(); // Generate vectors for predicate vectors. - if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512()) + if (VT.getVectorElementType() == MVT::i1 && Subtarget->hasAVX512()) return LowerBUILD_VECTORvXi1(Op, DAG); // Vectors containing all zeros can be matched by pxor and xorps later @@ -6164,7 +6234,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned NumZero = 0; unsigned NumNonZero = 0; - unsigned NonZeros = 0; + uint64_t NonZeros = 0; bool IsAllConstants = true; SmallSet Values; for (unsigned i = 0; i < NumElems; ++i) { @@ -6178,7 +6248,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (X86::isZeroNode(Elt)) NumZero++; else { - NonZeros |= (1 << i); + assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range. + NonZeros |= ((uint64_t)1 << i); NumNonZero++; } } @@ -6202,7 +6273,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { // Handle SSE only. assert(VT == MVT::v2i64 && "Expected an SSE value type!"); - EVT VecVT = MVT::v4i32; + MVT VecVT = MVT::v4i32; // Truncate the value (which may itself be a constant) to i32, and // convert it to a vector with movd (S2V+shuffle to zero extend). @@ -6334,7 +6405,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // One half is zero or undef. unsigned Idx = countTrailingZeros(NonZeros); SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, - Op.getOperand(Idx)); + Op.getOperand(Idx)); return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); } return SDValue(); @@ -6342,13 +6413,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // If element VT is < 32 bits, convert it to inserts into a zero vector. if (EVTBits == 8 && NumElems == 16) - if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, - Subtarget, *this)) + if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero, + DAG, Subtarget, *this)) return V; if (EVTBits == 16 && NumElems == 8) - if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, - Subtarget, *this)) + if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero, + DAG, Subtarget, *this)) return V; // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS @@ -6476,8 +6547,8 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); if (Op.getNumOperands() == 4) { - MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(), - ResVT.getVectorNumElements()/2); + MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), + ResVT.getVectorNumElements()/2); SDValue V3 = Op.getOperand(2); SDValue V4 = Op.getOperand(3); return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl), @@ -6496,8 +6567,27 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, assert(isPowerOf2_32(NumOfOperands) && "Unexpected number of operands in CONCAT_VECTORS"); + SDValue Undef = DAG.getUNDEF(ResVT); if (NumOfOperands > 2) { - MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(), + // Specialize the cases when all, or all but one, of the operands are undef. + unsigned NumOfDefinedOps = 0; + unsigned OpIdx = 0; + for (unsigned i = 0; i < NumOfOperands; i++) + if (!Op.getOperand(i).isUndef()) { + NumOfDefinedOps++; + OpIdx = i; + } + if (NumOfDefinedOps == 0) + return Undef; + if (NumOfDefinedOps == 1) { + unsigned SubVecNumElts = + Op.getOperand(OpIdx).getValueType().getVectorNumElements(); + SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, + Op.getOperand(OpIdx), IdxVal); + } + + MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), ResVT.getVectorNumElements()/2); SmallVector Ops; for (unsigned i = 0; i < NumOfOperands/2; i++) @@ -6510,31 +6600,38 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } + // 2 operands SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); + unsigned NumElems = ResVT.getVectorNumElements(); + assert(V1.getValueType() == V2.getValueType() && + V1.getValueType().getVectorNumElements() == NumElems/2 && + "Unexpected operands in CONCAT_VECTORS"); + + if (ResVT.getSizeInBits() >= 16) + return Op; // The operation is legal with KUNPCK + bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode()); bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode()); - + SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl); if (IsZeroV1 && IsZeroV2) - return getZeroVector(ResVT, Subtarget, DAG, dl); + return ZeroVec; SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); - SDValue Undef = DAG.getUNDEF(ResVT); - unsigned NumElems = ResVT.getVectorNumElements(); - SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8); + if (V2.isUndef()) + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); + if (IsZeroV2) + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx); + + SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl); + if (V1.isUndef()) + V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal); - V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx); - V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits); if (IsZeroV1) - return V2; + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal); V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx); - // Zero the upper bits of V1 - V1 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V1, ShiftBits); - V1 = DAG.getNode(X86ISD::VSRLI, dl, ResVT, V1, ShiftBits); - if (IsZeroV2) - return V1; - return DAG.getNode(ISD::OR, dl, ResVT, V1, V2); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal); } static SDValue LowerCONCAT_VECTORS(SDValue Op, @@ -6790,7 +6887,7 @@ static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef Mask, static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { - MVT EltVT = VT.getScalarType(); + MVT EltVT = VT.getVectorElementType(); int NumEltBits = EltVT.getSizeInBits(); MVT IntEltVT = MVT::getIntegerVT(NumEltBits); SDValue Zero = DAG.getConstant(0, DL, IntEltVT); @@ -6834,7 +6931,7 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG) { assert(VT.isInteger() && "Only supports integer vector types!"); - MVT EltVT = VT.getScalarType(); + MVT EltVT = VT.getVectorElementType(); int NumEltBits = EltVT.getSizeInBits(); SDValue Zero = DAG.getConstant(0, DL, EltVT); SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, @@ -7348,8 +7445,9 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1, SDValue &V = (M < Size ? V1 : V2); M = M % Size; - // All mask elements must be in the lower half. - if (M >= HalfSize) + // The extracted elements must start at a valid index and all mask + // elements must be in the lower half. + if (i > M || M >= HalfSize) return SDValue(); if (Idx < 0 || (Src == V && Idx == (M - i))) { @@ -7867,6 +7965,59 @@ static SDValue lowerVectorShuffleAsElementInsertion( return V2; } +/// \brief Try to lower broadcast of a single - truncated - integer element, +/// coming from a scalar_to_vector/build_vector node \p V0 with larger elements. +/// +/// This assumes we have AVX2. +static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0, + int BroadcastIdx, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + assert(Subtarget->hasAVX2() && + "We can only lower integer broadcasts with AVX2!"); + + EVT EltVT = VT.getVectorElementType(); + EVT V0VT = V0.getValueType(); + + assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!"); + assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!"); + + EVT V0EltVT = V0VT.getVectorElementType(); + if (!V0EltVT.isInteger()) + return SDValue(); + + const unsigned EltSize = EltVT.getSizeInBits(); + const unsigned V0EltSize = V0EltVT.getSizeInBits(); + + // This is only a truncation if the original element type is larger. + if (V0EltSize <= EltSize) + return SDValue(); + + assert(((V0EltSize % EltSize) == 0) && + "Scalar type sizes must all be powers of 2 on x86!"); + + const unsigned V0Opc = V0.getOpcode(); + const unsigned Scale = V0EltSize / EltSize; + const unsigned V0BroadcastIdx = BroadcastIdx / Scale; + + if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) && + V0Opc != ISD::BUILD_VECTOR) + return SDValue(); + + SDValue Scalar = V0.getOperand(V0BroadcastIdx); + + // If we're extracting non-least-significant bits, shift so we can truncate. + // Hopefully, we can fold away the trunc/srl/load into the broadcast. + // Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer + // vpbroadcast+vmovd+shr to vpshufb(m)+vmovd. + if (const int OffsetIdx = BroadcastIdx % Scale) + Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar, + DAG.getConstant(OffsetIdx * EltSize, DL, Scalar.getValueType())); + + return DAG.getNode(X86ISD::VBROADCAST, DL, VT, + DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar)); +} + /// \brief Try to lower broadcast of a single element. /// /// For convenience, this code also bundles all of the subtarget feature set @@ -7912,7 +8063,7 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, int BeginIdx = (int)ConstantIdx->getZExtValue(); int EndIdx = - BeginIdx + (int)VInner.getValueType().getVectorNumElements(); + BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements(); if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) { BroadcastIdx -= BeginIdx; V = VInner; @@ -7930,18 +8081,10 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, // First, look through bitcast: if the original value has a larger element // type than the shuffle, the broadcast element is in essence truncated. // Make that explicit to ease folding. - if (V.getOpcode() == ISD::BITCAST && VT.isInteger()) { - EVT EltVT = VT.getVectorElementType(); - SDValue V0 = V.getOperand(0); - EVT V0VT = V0.getValueType(); - - if (V0VT.isInteger() && V0VT.getVectorElementType().bitsGT(EltVT) && - ((V0.getOpcode() == ISD::BUILD_VECTOR || - (V0.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)))) { - V = DAG.getNode(ISD::TRUNCATE, DL, EltVT, V0.getOperand(BroadcastIdx)); - BroadcastIdx = 0; - } - } + if (V.getOpcode() == ISD::BITCAST && VT.isInteger()) + if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast( + DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG)) + return TruncBroadcast; // Also check the simpler case, where we can directly reuse the scalar. if (V.getOpcode() == ISD::BUILD_VECTOR || @@ -8660,7 +8803,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, static SDValue lowerV8I16GeneralSingleInputVectorShuffle( SDLoc DL, MVT VT, SDValue V, MutableArrayRef Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - assert(VT.getScalarType() == MVT::i16 && "Bad input type!"); + assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); assert(Mask.size() == 8 && "Shuffle mask length doen't match!"); @@ -9734,7 +9877,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1, int NumElements = VT.getVectorNumElements(); int SplitNumElements = NumElements / 2; - MVT ScalarVT = VT.getScalarType(); + MVT ScalarVT = VT.getVectorElementType(); MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2); // Rather than splitting build-vectors, just build two narrower build @@ -9746,7 +9889,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1, MVT OrigVT = V.getSimpleValueType(); int OrigNumElements = OrigVT.getVectorNumElements(); int OrigSplitNumElements = OrigNumElements / 2; - MVT OrigScalarVT = OrigVT.getScalarType(); + MVT OrigScalarVT = OrigVT.getVectorElementType(); MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2); SDValue LoV, HiV; @@ -10952,7 +11095,7 @@ static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef Mask = SVOp->getMask(); assert(Subtarget->hasAVX512() && "Cannot lower 512-bit vectors w/o basic ISA!"); - EVT ExtVT; + MVT ExtVT; switch (VT.SimpleTy) { default: llvm_unreachable("Expected a vector of i1 elements"); @@ -11010,7 +11153,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, MVT VT = Op.getSimpleValueType(); int NumElements = VT.getVectorNumElements(); SDLoc dl(Op); - bool Is1BitVector = (VT.getScalarType() == MVT::i1); + bool Is1BitVector = (VT.getVectorElementType() == MVT::i1); assert((VT.getSizeInBits() != 64 || Is1BitVector) && "Can't lower MMX shuffles"); @@ -11140,7 +11283,7 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, unsigned &MaskValue) { MaskValue = 0; unsigned NumElems = BuildVector->getNumOperands(); - + // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. // We don't handle the >2 lanes case right now. unsigned NumLanes = (NumElems - 1) / 8 + 1; @@ -11157,9 +11300,9 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, int Lane1Cond = -1, Lane2Cond = -1; if (isa(EltCond)) - Lane1Cond = !isZero(EltCond); + Lane1Cond = !isNullConstant(EltCond); if (isa(SndLaneEltCond)) - Lane2Cond = !isZero(SndLaneEltCond); + Lane2Cond = !isNullConstant(SndLaneEltCond); unsigned LaneMask = 0; if (Lane1Cond == Lane2Cond || Lane2Cond < 0) @@ -11200,7 +11343,8 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) { SDValue CondElt = CondBV->getOperand(i); Mask.push_back( - isa(CondElt) ? i + (isZero(CondElt) ? Size : 0) : -1); + isa(CondElt) ? i + (isNullConstant(CondElt) ? Size : 0) + : -1); } return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask); } @@ -11265,9 +11409,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { } if (VT.getSizeInBits() == 16) { - unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); // If Idx is 0, it's cheaper to do a move instead of a pextrw. - if (Idx == 0) + if (isNullConstant(Op.getOperand(1))) return DAG.getNode( ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, @@ -11290,8 +11433,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { return SDValue(); SDNode *User = *Op.getNode()->use_begin(); if ((User->getOpcode() != ISD::STORE || - (isa(Op.getOperand(1)) && - cast(Op.getOperand(1))->isNullValue())) && + isNullConstant(Op.getOperand(1))) && (User->getOpcode() != ISD::BITCAST || User->getValueType(0) != MVT::i32)) return SDValue(); @@ -11408,8 +11550,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // TODO: handle v16i8. if (VT.getSizeInBits() == 16) { SDValue Vec = Op.getOperand(0); - unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); - if (Idx == 0) + if (isNullConstant(Op.getOperand(1))) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Vec), @@ -11441,8 +11582,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught // to match extract_elt for f64. - unsigned Idx = cast(Op.getOperand(1))->getZExtValue(); - if (Idx == 0) + if (isNullConstant(Op.getOperand(1))) return Op; // UNPCKHPD the element to the lowest double word, then movsd. @@ -11720,37 +11860,9 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, if (OpVT.is512BitVector() && SubVecVT.is256BitVector()) return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl); - if (OpVT.getVectorElementType() == MVT::i1) { - if (IdxVal == 0 && Vec.getOpcode() == ISD::UNDEF) // the operation is legal - return Op; - SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); - SDValue Undef = DAG.getUNDEF(OpVT); - unsigned NumElems = OpVT.getVectorNumElements(); - SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8); - - if (IdxVal == OpVT.getVectorNumElements() / 2) { - // Zero upper bits of the Vec - Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); - Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); - - SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, - SubVec, ZeroIdx); - Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits); - return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2); - } - if (IdxVal == 0) { - SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, - SubVec, ZeroIdx); - // Zero upper bits of the Vec2 - Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits); - Vec2 = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec2, ShiftBits); - // Zero lower bits of the Vec - Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits); - Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits); - // Merge them together - return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2); - } - } + if (OpVT.getVectorElementType() == MVT::i1) + return Insert1BitVector(Op, DAG); + return SDValue(); } @@ -12509,7 +12621,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); // Handle final rounding. - EVT DestVT = Op.getValueType(); + MVT DestVT = Op.getSimpleValueType(); if (DestVT.bitsLT(MVT::f64)) return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, @@ -12546,12 +12658,12 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, SDLoc DL(Op); SDValue V = Op->getOperand(0); - EVT VecIntVT = V.getValueType(); + MVT VecIntVT = V.getSimpleValueType(); bool Is128 = VecIntVT == MVT::v4i32; - EVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; + MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; // If we convert to something else than the supported type, e.g., to v4f64, // abort early. - if (VecFloatVT != Op->getValueType(0)) + if (VecFloatVT != Op->getSimpleValueType(0)) return SDValue(); unsigned NumElts = VecIntVT.getVectorNumElements(); @@ -12589,7 +12701,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, SDValue Low, High; if (Subtarget.hasSSE41()) { - EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; + MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa); SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow); SDValue VecBitcast = DAG.getBitcast(VecI16VT, V); @@ -12657,11 +12769,10 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget); case MVT::v16i8: case MVT::v16i16: - if (Subtarget->hasAVX512()) - return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0)); + assert(Subtarget->hasAVX512()); + return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0)); } - llvm_unreachable(nullptr); } SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, @@ -12670,7 +12781,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); - if (Op.getValueType().isVector()) + if (Op.getSimpleValueType().isVector()) return lowerUINT_TO_FP_vec(Op, DAG); // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't @@ -12957,7 +13068,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); - if (VT.is512BitVector() || InVT.getScalarType() == MVT::i1) + if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In); // Optimize vectors in AVX mode: @@ -13102,10 +13213,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { } // vpmovqb/w/d, vpmovdb/w, vpmovwb - if (((!InVT.is512BitVector() && Subtarget->hasVLX()) || InVT.is512BitVector()) && - (InVT.getVectorElementType() != MVT::i16 || Subtarget->hasBWI())) + if (Subtarget->hasAVX512()) { + // word to byte only under BWI + if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8 + return DAG.getNode(X86ISD::VTRUNC, DL, VT, + DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In)); return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); - + } if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { // On AVX2, v4i64 -> v4i32 becomes VPERMD. if (Subtarget->hasInt256()) { @@ -13492,7 +13606,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget, return SDValue(); } - EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; + MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; // Cast all vectors into TestVT for PTEST. for (unsigned i = 0, e = VecIns.size(); i < e; ++i) @@ -13632,14 +13746,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, if (ConstantSDNode *C = dyn_cast(ArithOp.getNode()->getOperand(1))) { // An add of one will be selected as an INC. - if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) { + if (C->isOne() && !Subtarget->slowIncDec()) { Opcode = X86ISD::INC; NumOperands = 1; break; } // An add of negative one (subtract of one) will be selected as a DEC. - if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) { + if (C->isAllOnesValue() && !Subtarget->slowIncDec()) { Opcode = X86ISD::DEC; NumOperands = 1; break; @@ -13768,13 +13882,11 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, /// equivalent. SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, SelectionDAG &DAG) const { - if (ConstantSDNode *C = dyn_cast(Op1)) { - if (C->getAPIntValue() == 0) - return EmitTest(Op0, X86CC, dl, DAG); + if (isNullConstant(Op1)) + return EmitTest(Op0, X86CC, dl, DAG); - if (Op0.getValueType() == MVT::i1) - llvm_unreachable("Unexpected comparison operation for MVT::i1 operands"); - } + assert(!(isa(Op1) && Op0.getValueType() == MVT::i1) && + "Unexpected comparison operation for MVT::i1 operands"); if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { @@ -13897,11 +14009,6 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const { return 2; } -static bool isAllOnes(SDValue V) { - ConstantSDNode *C = dyn_cast(V); - return C && C->isAllOnesValue(); -} - /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, @@ -13917,8 +14024,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, if (Op1.getOpcode() == ISD::SHL) std::swap(Op0, Op1); if (Op0.getOpcode() == ISD::SHL) { - if (ConstantSDNode *And00C = dyn_cast(Op0.getOperand(0))) - if (And00C->getZExtValue() == 1) { + if (isOneConstant(Op0.getOperand(0))) { // If we looked past a truncate, check that it's only truncating away // known zeros. unsigned BitWidth = Op0.getValueSizeInBits(); @@ -14055,7 +14161,7 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - assert(Op0.getValueType().getVectorElementType() == MVT::i1 && + assert(Op0.getSimpleValueType().getVectorElementType() == MVT::i1 && "Unexpected type for boolean compare operation"); ISD::CondCode SetCCOpcode = cast(CC)->get(); SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0, @@ -14099,8 +14205,8 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 && - Op.getValueType().getScalarType() == MVT::i1 && + assert(Op0.getSimpleValueType().getVectorElementType().getSizeInBits() >= 8 && + Op.getSimpleValueType().getVectorElementType() == MVT::i1 && "Cannot set masked compare for this operation"); ISD::CondCode SetCCOpcode = cast(CC)->get(); @@ -14147,7 +14253,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG) for (unsigned i = 0; i < n; ++i) { ConstantSDNode *Elt = dyn_cast(BV->getOperand(i)); - if (!Elt || Elt->isOpaque() || Elt->getValueType(0) != EVT) + if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT) return SDValue(); // Avoid underflow. @@ -14238,13 +14344,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, if (VT.is256BitVector() && !Subtarget->hasInt256()) return Lower256IntVSETCC(Op, DAG); - EVT OpVT = Op1.getValueType(); + MVT OpVT = Op1.getSimpleValueType(); if (OpVT.getVectorElementType() == MVT::i1) return LowerBoolVSETCC_AVX512(Op, DAG); bool MaskResult = (VT.getVectorElementType() == MVT::i1); if (Subtarget->hasAVX512()) { - if (Op1.getValueType().is512BitVector() || + if (Op1.getSimpleValueType().is512BitVector() || (Subtarget->hasBWI() && Subtarget->hasVLX()) || (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32)) return LowerIntVSETCC_AVX512(Op, DAG, Subtarget); @@ -14477,11 +14583,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Lower ((X >>u N) & 1) != 0 to BT(X, N). // Lower ((X >>s N) & 1) != 0 to BT(X, N). if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && - Op1.getOpcode() == ISD::Constant && - cast(Op1)->isNullValue() && + isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); - if (NewSetCC.getNode()) { + if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) { if (VT == MVT::i1) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC); return NewSetCC; @@ -14490,17 +14594,14 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of // these. - if (Op1.getOpcode() == ISD::Constant && - (cast(Op1)->getZExtValue() == 1 || - cast(Op1)->isNullValue()) && + if ((isOneConstant(Op1) || isNullConstant(Op1)) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { // If the input is a setcc, then reuse the input setcc or use a new one with // the inverted condition. if (Op0.getOpcode() == X86ISD::SETCC) { X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); - bool Invert = (CC == ISD::SETNE) ^ - cast(Op1)->isNullValue(); + bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1); if (!Invert) return Op0; @@ -14513,8 +14614,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SetCC; } } - if ((Op0.getValueType() == MVT::i1) && (Op1.getOpcode() == ISD::Constant) && - (cast(Op1)->getZExtValue() == 1) && + if ((Op0.getValueType() == MVT::i1) && isOneConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true); @@ -14535,6 +14635,23 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return SetCC; } +SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue Carry = Op.getOperand(2); + SDValue Cond = Op.getOperand(3); + SDLoc DL(Op); + + assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."); + X86::CondCode CC = TranslateIntegerX86CC(cast(Cond)->get()); + + assert(Carry.getOpcode() != ISD::CARRY_FALSE); + SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry); + return DAG.getNode(X86ISD::SETCC, DL, Op.getValueType(), + DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1)); +} + // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); @@ -14577,7 +14694,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); SDLoc DL(Op); - EVT VT = Op1.getValueType(); + MVT VT = Op1.getSimpleValueType(); SDValue CC; // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops @@ -14586,7 +14703,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (Cond.getOpcode() == ISD::SETCC && ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) || (Subtarget->hasSSE1() && VT == MVT::f32)) && - VT == Cond.getOperand(0).getValueType() && Cond->hasOneUse()) { + VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); int SSECC = translateX86FSETCC( cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); @@ -14620,12 +14737,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // Convert to vectors, do a VSELECT, and convert back to scalar. // All of the conversions should be optimized away. - EVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; + MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1); SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2); SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); - EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; + MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; VCmp = DAG.getBitcast(VCmpVT, VCmp); SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); @@ -14639,7 +14756,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } - if (VT.isVector() && VT.getScalarType() == MVT::i1) { + if (VT.isVector() && VT.getVectorElementType() == MVT::i1) { SDValue Op1Scalar; if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode())) Op1Scalar = ConvertI1VectorToInteger(Op1, DAG); @@ -14685,22 +14802,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y if (Cond.getOpcode() == X86ISD::SETCC && Cond.getOperand(1).getOpcode() == X86ISD::CMP && - isZero(Cond.getOperand(1).getOperand(1))) { + isNullConstant(Cond.getOperand(1).getOperand(1))) { SDValue Cmp = Cond.getOperand(1); unsigned CondCode =cast(Cond.getOperand(0))->getZExtValue(); - if ((isAllOnes(Op1) || isAllOnes(Op2)) && + if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { - SDValue Y = isAllOnes(Op2) ? Op1 : Op2; + SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2; SDValue CmpOp0 = Cmp.getOperand(0); // Apply further optimizations for special cases // (select (x != 0), -1, 0) -> neg & sbb // (select (x == 0), 0, -1) -> neg & sbb - if (ConstantSDNode *YC = dyn_cast(Y)) - if (YC->isNullValue() && - (isAllOnes(Op1) == (CondCode == X86::COND_NE))) { + if (isNullConstant(Y) && + (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, DAG.getConstant(0, DL, @@ -14720,11 +14836,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp); - if (isAllOnes(Op1) != (CondCode == X86::COND_E)) + if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E)) Res = DAG.getNOT(DL, Res, Res.getValueType()); - ConstantSDNode *N2C = dyn_cast(Op2); - if (!N2C || !N2C->isNullValue()) + if (!isNullConstant(Op2)) Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); return Res; } @@ -14732,11 +14847,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // Look past (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && - Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { - ConstantSDNode *C = dyn_cast(Cond.getOperand(1)); - if (C && C->getAPIntValue() == 1) - Cond = Cond.getOperand(0); - } + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && + isOneConstant(Cond.getOperand(1))) + Cond = Cond.getOperand(0); // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. @@ -14802,8 +14915,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { - SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG); - if (NewSetCC.getNode()) { + if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) { CC = NewSetCC.getOperand(0); Cond = NewSetCC.getOperand(1); addTest = false; @@ -14825,11 +14937,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { unsigned CondCode = cast(CC)->getZExtValue(); if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && - (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) { + (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && + (isNullConstant(Op1) || isNullConstant(Op2))) { SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, DL, MVT::i8), Cond); - if (isAllOnes(Op1) != (CondCode == X86::COND_B)) + if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) return DAG.getNOT(DL, Res, Res.getValueType()); return Res; } @@ -14915,8 +15028,8 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, MVT InVT = In.getSimpleValueType(); assert(VT.getSizeInBits() == InVT.getSizeInBits()); - MVT InSVT = InVT.getScalarType(); - assert(VT.getScalarType().getScalarSizeInBits() > InSVT.getScalarSizeInBits()); + MVT InSVT = InVT.getVectorElementType(); + assert(VT.getVectorElementType().getSizeInBits() > InSVT.getSizeInBits()); if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) return SDValue(); @@ -14935,7 +15048,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, // As SRAI is only available on i16/i32 types, we expand only up to i32 // and handle i64 separately. - while (CurrVT != VT && CurrVT.getScalarType() != MVT::i32) { + while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) { Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr); MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2); CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2); @@ -14945,7 +15058,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SDValue SignExt = Curr; if (CurrVT != InVT) { unsigned SignExtShift = - CurrVT.getScalarSizeInBits() - InSVT.getScalarSizeInBits(); + CurrVT.getVectorElementType().getSizeInBits() - InSVT.getSizeInBits(); SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr, DAG.getConstant(SignExtShift, dl, MVT::i8)); } @@ -15005,7 +15118,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]); - MVT HalfVT = MVT::getVectorVT(VT.getScalarType(), + MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements()/2); OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo); @@ -15129,7 +15242,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, // memory. In practice, we ''widen'' MemVT. EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), - loadRegZize / MemVT.getScalarType().getSizeInBits()); + loadRegZize / MemVT.getScalarSizeInBits()); assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() && "Invalid vector type"); @@ -15219,11 +15332,9 @@ static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { static bool isXor1OfSetCC(SDValue Op) { if (Op.getOpcode() != ISD::XOR) return false; - ConstantSDNode *N1C = dyn_cast(Op.getOperand(1)); - if (N1C && N1C->getAPIntValue() == 1) { + if (isOneConstant(Op.getOperand(1))) return Op.getOperand(0).getOpcode() == X86ISD::SETCC && - Op.getOperand(0).hasOneUse(); - } + Op.getOperand(0).hasOneUse(); return false; } @@ -15239,8 +15350,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (Cond.getOpcode() == ISD::SETCC) { // Check for setcc([su]{add,sub,mul}o == 0). if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && - isa(Cond.getOperand(1)) && - cast(Cond.getOperand(1))->isNullValue() && + isNullConstant(Cond.getOperand(1)) && Cond.getOperand(0).getResNo() == 1 && (Cond.getOperand(0).getOpcode() == ISD::SADDO || Cond.getOperand(0).getOpcode() == ISD::UADDO || @@ -15267,11 +15377,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // Look pass (and (setcc_carry (cmp ...)), 1). if (Cond.getOpcode() == ISD::AND && - Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { - ConstantSDNode *C = dyn_cast(Cond.getOperand(1)); - if (C && C->getAPIntValue() == 1) - Cond = Cond.getOperand(0); - } + Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && + isOneConstant(Cond.getOperand(1))) + Cond = Cond.getOperand(0); // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. @@ -15315,16 +15423,14 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { switch (CondOpcode) { case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; case ISD::SADDO: - if (ConstantSDNode *C = dyn_cast(RHS)) - if (C->isOne()) { + if (isOneConstant(RHS)) { X86Opcode = X86ISD::INC; X86Cond = X86::COND_O; break; } X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; case ISD::SSUBO: - if (ConstantSDNode *C = dyn_cast(RHS)) - if (C->isOne()) { + if (isOneConstant(RHS)) { X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O; break; } @@ -15486,8 +15592,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // We know the result of AND is compared against zero. Try to match // it to BT. if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { - SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); - if (NewSetCC.getNode()) { + if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) { CC = NewSetCC.getOperand(0); Cond = NewSetCC.getOperand(1); addTest = false; @@ -15882,7 +15987,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, // The return type has to be a 128-bit type with the same element // type as the input type. MVT EltVT = VT.getVectorElementType(); - EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); + MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits()); ShAmt = DAG.getBitcast(ShVT, ShAmt); return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); @@ -15895,26 +16000,22 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), - MVT::i1, VT.getVectorNumElements()); - SDValue VMask = SDValue(); + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue VMask; unsigned OpcodeSelect = ISD::VSELECT; SDLoc dl(Op); - assert(MaskVT.isSimple() && "invalid mask type"); - - if (isAllOnes(Mask)) + if (isAllOnesConstant(Mask)) return Op; - if (MaskVT.bitsGT(Mask.getValueType())) { - EVT newMaskVT = EVT::getIntegerVT(*DAG.getContext(), - MaskVT.getSizeInBits()); + if (MaskVT.bitsGT(Mask.getSimpleValueType())) { + MVT newMaskVT = MVT::getIntegerVT(MaskVT.getSizeInBits()); VMask = DAG.getBitcast(MaskVT, DAG.getNode(ISD::ANY_EXTEND, dl, newMaskVT, Mask)); } else { - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, @@ -15956,10 +16057,10 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, SDValue PreservedSrc, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - if (isAllOnes(Mask)) + if (isAllOnesConstant(Mask)) return Op; - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); // The mask should be of type MVT::i1 SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); @@ -16032,7 +16133,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SelectionDAG &DAG) { SDLoc dl(Op); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); if (IntrData) { switch(IntrData->Type) { @@ -16217,7 +16318,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget // imm should be adapted to ISD::INSERT_SUBVECTOR behavior assert(isa(Src3) && "Expected a ConstantSDNode here!"); unsigned Imm = cast(Src3)->getZExtValue(); - Imm *= Src2.getValueType().getVectorNumElements(); + Imm *= Src2.getSimpleValueType().getVectorNumElements(); Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32); } @@ -16240,7 +16341,27 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask, PassThru, Subtarget, DAG); } case VPERM_3OP_MASKZ: - case VPERM_3OP_MASK: + case VPERM_3OP_MASK:{ + // Src2 is the PassThru + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + MVT VT = Op.getSimpleValueType(); + SDValue PassThru = SDValue(); + + // set PassThru element + if (IntrData->Type == VPERM_3OP_MASKZ) + PassThru = getZeroVector(VT, Subtarget, DAG, dl); + else + PassThru = Src2; + + // Swap Src1 and Src2 in the node creation + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, + dl, Op.getValueType(), + Src2, Src1, Src3), + Mask, PassThru, Subtarget, DAG); + } case FMA_OP_MASK3: case FMA_OP_MASKZ: case FMA_OP_MASK: { @@ -16248,11 +16369,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDValue PassThru = SDValue(); // set PassThru element - if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ) + if (IntrData->Type == FMA_OP_MASKZ) PassThru = getZeroVector(VT, Subtarget, DAG, dl); else if (IntrData->Type == FMA_OP_MASK3) PassThru = Src3; @@ -16284,7 +16405,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Src3 = Op.getOperand(3); SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4)); SDValue Mask = Op.getOperand(5); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDValue PassThru = Src1; // Set PassThru element. if (IntrData->Type == TERLOG_OP_MASKZ) @@ -16297,13 +16418,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget case FPCLASS: { // FPclass intrinsics with mask SDValue Src1 = Op.getOperand(1); - EVT VT = Src1.getValueType(); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); + MVT VT = Src1.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); SDValue Imm = Op.getOperand(2); SDValue Mask = Op.getOperand(3); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm); SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask, DAG.getTargetConstant(0, dl, MaskVT), @@ -16333,12 +16453,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget // (v2i1 (and (PCMPEQM %a, %b), // (extract_subvector // (v8i1 (bitcast %mask)), 0))), 0)))) - EVT VT = Op.getOperand(1).getValueType(); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); + MVT VT = Op.getOperand(1).getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); SDValue Cmp; if (IntrData->Type == CMP_MASK_CC) { SDValue CC = Op.getOperand(3); @@ -16423,20 +16542,25 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Mask = Op.getOperand(3); SDValue DataToCompress = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); - if (isAllOnes(Mask)) // return data as is + if (isAllOnesConstant(Mask)) // return data as is return Op.getOperand(1); return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress), Mask, PassThru, Subtarget, DAG); } + case BROADCASTM: { + SDValue Mask = Op.getOperand(1); + MVT MaskVT = MVT::getVectorVT(MVT::i1, Mask.getSimpleValueType().getSizeInBits()); + Mask = DAG.getBitcast(MaskVT, Mask); + return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask); + } case BLEND: { SDValue Mask = Op.getOperand(3); - EVT VT = Op.getValueType(); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); SDLoc dl(Op); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getBitcast(BitcastVT, Mask), @@ -16657,23 +16781,17 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget * Subtarget) { SDLoc dl(Op); - ConstantSDNode *C = dyn_cast(ScaleOp); - if (!C) - llvm_unreachable("Invalid scale type"); - unsigned ScaleVal = C->getZExtValue(); - if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8) - llvm_unreachable("Valid scale values are 1, 2, 4, 8"); - + auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); - EVT MaskVT = MVT::getVectorVT(MVT::i1, + MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg; ConstantSDNode *MaskC = dyn_cast(Mask); if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else { - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. @@ -16696,25 +16814,19 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain) { SDLoc dl(Op); - ConstantSDNode *C = dyn_cast(ScaleOp); - if (!C) - llvm_unreachable("Invalid scale type"); - unsigned ScaleVal = C->getZExtValue(); - if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8) - llvm_unreachable("Valid scale values are 1, 2, 4, 8"); - + auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); - EVT MaskVT = MVT::getVectorVT(MVT::i1, + MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg; ConstantSDNode *MaskC = dyn_cast(Mask); if (MaskC) MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else { - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. @@ -16732,12 +16844,11 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain) { SDLoc dl(Op); - ConstantSDNode *C = dyn_cast(ScaleOp); - assert(C && "Invalid scale type"); + auto *C = cast(ScaleOp); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); - EVT MaskVT = + MVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg; ConstantSDNode *MaskC = dyn_cast(Mask); @@ -16919,6 +17030,24 @@ static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget, return Chain; } +static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + SDValue Chain = Op.getOperand(0); + SDValue RegNode = Op.getOperand(2); + WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); + if (!EHInfo) + report_fatal_error("EH registrations only live in functions using WinEH"); + + // Cast the operand to an alloca, and remember the frame index. + auto *FINode = dyn_cast(RegNode); + if (!FINode) + report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca"); + EHInfo->EHRegNodeFrameIndex = FINode->getIndex(); + + // Return the chain operand without making any DAG nodes. + return Chain; +} + /// \brief Lower intrinsics for TRUNCATE_TO_MEM case /// return truncate Store/MaskedStore Node static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op, @@ -16930,19 +17059,17 @@ static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op, SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); - EVT VT = DataToTruncate.getValueType(); - EVT SVT = EVT::getVectorVT(*DAG.getContext(), - ElementType, VT.getVectorNumElements()); + MVT VT = DataToTruncate.getSimpleValueType(); + MVT SVT = MVT::getVectorVT(ElementType, VT.getVectorNumElements()); - if (isAllOnes(Mask)) // return just a truncate store + if (isAllOnesConstant(Mask)) // return just a truncate store return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MachinePointerInfo(), SVT, false, false, SVT.getScalarSizeInBits()/8); - EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), - MVT::i1, VT.getVectorNumElements()); - EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, - Mask.getValueType().getSizeInBits()); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements // are extracted by EXTRACT_SUBVECTOR. SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, @@ -16966,14 +17093,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, if (!IntrData) { if (IntNo == llvm::Intrinsic::x86_seh_restoreframe) return LowerSEHRESTOREFRAME(Op, Subtarget, DAG); + else if (IntNo == llvm::Intrinsic::x86_seh_ehregnode) + return MarkEHRegistrationNode(Op, DAG); return SDValue(); } SDLoc dl(Op); switch(IntrData->Type) { - default: - llvm_unreachable("Unknown Intrinsic Type"); - break; + default: llvm_unreachable("Unknown Intrinsic Type"); case RDSEED: case RDRAND: { // Emit the node with the right value type. @@ -17078,8 +17205,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); - EVT VT = DataToCompress.getValueType(); - if (isAllOnes(Mask)) // return just a store + MVT VT = DataToCompress.getSimpleValueType(); + if (isAllOnesConstant(Mask)) // return just a store return DAG.getStore(Chain, dl, DataToCompress, Addr, MachinePointerInfo(), false, false, VT.getScalarSizeInBits()/8); @@ -17103,9 +17230,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue PassThru = Op.getOperand(3); SDValue Addr = Op.getOperand(2); SDValue Chain = Op.getOperand(0); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); - if (isAllOnes(Mask)) // return just a load + if (isAllOnesConstant(Mask)) // return just a load return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, false, VT.getScalarSizeInBits()/8); @@ -17229,6 +17356,21 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); } +unsigned X86TargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR) + return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX; + + return Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX; +} + +unsigned X86TargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + // Funclet personalities don't use selectors (the runtime does the selection). + assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))); + return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX; +} + SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); @@ -17557,7 +17699,7 @@ static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) { static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); - EVT OpVT = VT; + MVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); SDLoc dl(Op); @@ -17857,7 +17999,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, SDValue AhiBlo = Ahi; SDValue AloBhi = Bhi; // Bit cast to 32-bit vectors for MULUDQ - EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : + MVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32; A = DAG.getBitcast(MulVT, A); B = DAG.getBitcast(MulVT, B); @@ -17934,7 +18076,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); - EVT VT = Op0.getValueType(); + MVT VT = Op0.getSimpleValueType(); SDLoc dl(Op); assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) || @@ -18254,7 +18396,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, unsigned SplatIdx = (unsigned)SVN->getSplatIndex(); SDValue InVec = Amt.getOperand(0); if (InVec.getOpcode() == ISD::BUILD_VECTOR) { - assert((SplatIdx < InVec.getValueType().getVectorNumElements()) && + assert((SplatIdx < InVec.getSimpleValueType().getVectorNumElements()) && "Unexpected shuffle index found!"); BaseShAmt = InVec.getOperand(SplatIdx); } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { @@ -18371,9 +18513,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, (Subtarget->hasInt256() && VT == MVT::v16i16)) && ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { SmallVector Elts; - EVT SVT = VT.getScalarType(); + MVT SVT = VT.getVectorElementType(); unsigned SVTBits = SVT.getSizeInBits(); - const APInt &One = APInt(SVTBits, 1); + APInt One(SVTBits, 1); unsigned NumElems = VT.getVectorNumElements(); for (unsigned i=0; i !=NumElems; ++i) { @@ -18384,7 +18526,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, } ConstantSDNode *ND = cast(Op); - const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue()); + APInt C(SVTBits, ND->getAPIntValue().getZExtValue()); uint64_t ShAmt = C.getZExtValue(); if (ShAmt >= SVTBits) { Elts.push_back(DAG.getUNDEF(SVT)); @@ -18463,7 +18605,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, if (CanBeSimplified && isa(Amt1) && isa(Amt2)) { // Replace this node with two shifts followed by a MOVSS/MOVSD. - EVT CastVT = MVT::v4i32; + MVT CastVT = MVT::v4i32; SDValue Splat1 = DAG.getConstant(cast(Amt1)->getAPIntValue(), dl, VT); SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1); @@ -18731,7 +18873,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, if (VT.is256BitVector()) { unsigned NumElems = VT.getVectorNumElements(); MVT EltVT = VT.getVectorElementType(); - EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); // Extract the two vectors SDValue V1 = Extract128BitVector(R, 0, DAG, dl); @@ -18814,8 +18956,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { case ISD::SADDO: // A subtract of one will be selected as a INC. Note that INC doesn't // set CF, so we can't do this for UADDO. - if (ConstantSDNode *C = dyn_cast(RHS)) - if (C->isOne()) { + if (isOneConstant(RHS)) { BaseOp = X86ISD::INC; Cond = X86::COND_O; break; @@ -18830,8 +18971,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { case ISD::SSUBO: // A subtract of one will be selected as a DEC. Note that DEC doesn't // set CF, so we can't do this for USUBO. - if (ConstantSDNode *C = dyn_cast(RHS)) - if (C->isOne()) { + if (isOneConstant(RHS)) { BaseOp = X86ISD::DEC; Cond = X86::COND_O; break; @@ -19160,7 +19300,8 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, // chunks, thus directly computes the pop count for v2i64 and v4i64. if (EltVT == MVT::i64) { SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); - V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros); + MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); + V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros); return DAG.getBitcast(VT, V); } @@ -19176,9 +19317,10 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, // Do the horizontal sums into two v2i64s. Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL); - Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); + Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, Low), Zeros); - High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, + High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, DAG.getBitcast(ByteVecVT, High), Zeros); // Merge them together. @@ -19368,7 +19510,7 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget, static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - assert(Op.getValueType().isVector() && + assert(Op.getSimpleValueType().isVector() && "We only do custom lowering for vector population count."); return LowerVectorCTPOP(Op, Subtarget, DAG); } @@ -19414,7 +19556,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getNode()->getSimpleValueType(0); + MVT VT = Op.getNode()->getSimpleValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) @@ -19498,7 +19640,7 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, "MGATHER/MSCATTER are supported on AVX-512 arch only"); MaskedScatterSDNode *N = cast(Op.getNode()); - EVT VT = N->getValue().getValueType(); + MVT VT = N->getValue().getSimpleValueType(); assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); SDLoc dl(Op); @@ -19507,7 +19649,7 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, if (N->getNumValues() == 1) { SDValue Index = N->getIndex(); if (!Subtarget->hasVLX() && !VT.is512BitVector() && - !Index.getValueType().is512BitVector()) + !Index.getSimpleValueType().is512BitVector()) Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); SDVTList VTs = DAG.getVTList(N->getMask().getValueType(), MVT::Other); @@ -19527,13 +19669,13 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget, "MGATHER/MSCATTER are supported on AVX-512 arch only"); MaskedGatherSDNode *N = cast(Op.getNode()); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); SDLoc dl(Op); SDValue Index = N->getIndex(); if (!Subtarget->hasVLX() && !VT.is512BitVector() && - !Index.getValueType().is512BitVector()) { + !Index.getSimpleValueType().is512BitVector()) { Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), Index }; @@ -19629,6 +19771,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::SETCCE: return LowerSETCCE(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); @@ -19697,14 +19840,43 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); + case X86ISD::AVG: { + // Legalize types for X86ISD::AVG by expanding vectors. + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + + auto InVT = N->getValueType(0); + auto InVTSize = InVT.getSizeInBits(); + const unsigned RegSize = + (InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128; + assert((!Subtarget->hasAVX512() || RegSize < 512) && + "512-bit vector requires AVX512"); + assert((!Subtarget->hasAVX2() || RegSize < 256) && + "256-bit vector requires AVX2"); + + auto ElemVT = InVT.getVectorElementType(); + auto RegVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, + RegSize / ElemVT.getSizeInBits()); + assert(RegSize % InVT.getSizeInBits() == 0); + unsigned NumConcat = RegSize / InVT.getSizeInBits(); + + SmallVector Ops(NumConcat, DAG.getUNDEF(InVT)); + Ops[0] = N->getOperand(0); + SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops); + Ops[0] = N->getOperand(1); + SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops); + + SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1); + Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res, + DAG.getIntPtrConstant(0, dl))); + return; + } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: case X86ISD::FMAXC: case X86ISD::FMAX: { EVT VT = N->getValueType(0); - if (VT != MVT::v2f32) - llvm_unreachable("Unexpected type (!= v2f32) on FMIN/FMAX."); + assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX."); SDValue UNDEF = DAG.getUNDEF(VT); SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, N->getOperand(0), UNDEF); @@ -19804,7 +19976,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT T = N->getValueType(0); assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); bool Regs64bit = T == MVT::i128; - EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; + MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), DAG.getConstant(0, dl, HalfT)); @@ -20059,6 +20231,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; + case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT"; case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; @@ -20324,11 +20497,11 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, return false; // Not for i1 vectors - if (VT.getScalarType() == MVT::i1) + if (VT.getSimpleVT().getScalarType() == MVT::i1) return false; // Very little shuffling can be done for 64-bit vectors right now. - if (VT.getSizeInBits() == 64) + if (VT.getSimpleVT().getSizeInBits() == 64) return false; // We only care that the types being shuffled are legal. The lowering can @@ -21355,14 +21528,60 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock * X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { + assert(!Subtarget->isTargetMachO()); DebugLoc DL = MI->getDebugLoc(); + MachineInstr *ResumeMI = Subtarget->getFrameLowering()->emitStackProbe( + *BB->getParent(), *BB, MI, DL, false); + MachineBasicBlock *ResumeBB = ResumeMI->getParent(); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return ResumeBB; +} - assert(!Subtarget->isTargetMachO()); +MachineBasicBlock * +X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + MachineBasicBlock *TargetMBB = MI->getOperand(0).getMBB(); + DebugLoc DL = MI->getDebugLoc(); - Subtarget->getFrameLowering()->emitStackProbeCall(*BB->getParent(), *BB, MI, - DL); + assert(!isAsynchronousEHPersonality( + classifyEHPersonality(MF->getFunction()->getPersonalityFn())) && + "SEH does not use catchret!"); - MI->eraseFromParent(); // The pseudo instruction is gone now. + // Only 32-bit EH needs to worry about manually restoring stack pointers. + if (!Subtarget->is32Bit()) + return BB; + + // C++ EH creates a new target block to hold the restore code, and wires up + // the new block to the return destination with a normal JMP_4. + MachineBasicBlock *RestoreMBB = + MF->CreateMachineBasicBlock(BB->getBasicBlock()); + assert(BB->succ_size() == 1); + MF->insert(std::next(BB->getIterator()), RestoreMBB); + RestoreMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(RestoreMBB); + MI->getOperand(0).setMBB(RestoreMBB); + + auto RestoreMBBI = RestoreMBB->begin(); + BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE)); + BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB); + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + const Constant *PerFn = MF->getFunction()->getPersonalityFn(); + bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn)); + // Only 32-bit SEH requires special handling for catchpad. + if (IsSEH && Subtarget->is32Bit()) { + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE)); + } + MI->eraseFromParent(); return BB; } @@ -21384,6 +21603,8 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, // FIXME: The 32-bit calls have non-standard calling conventions. Use a // proper register mask. const uint32_t *RegMask = + Subtarget->is64Bit() ? + Subtarget->getRegisterInfo()->getDarwinTLSCallPreservedMask() : Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); if (Subtarget->is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, @@ -21746,6 +21967,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return BB; case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); + case X86::CATCHRET: + return EmitLoweredCatchRet(MI, BB); + case X86::CATCHPAD: + return EmitLoweredCatchPad(MI, BB); case X86::SEG_ALLOCA_32: case X86::SEG_ALLOCA_64: return EmitLoweredSegAlloca(MI, BB); @@ -22032,7 +22257,7 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( unsigned Depth) const { // SETCC_CARRY sets the dest to ~0 for true or 0 for false. if (Op.getOpcode() == X86ISD::SETCC_CARRY) - return Op.getValueType().getScalarType().getSizeInBits(); + return Op.getValueType().getScalarSizeInBits(); // Fallback case. return 1; @@ -22613,8 +22838,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, case X86ISD::UNPCKH: // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword // shuffle into a preceding word shuffle. - if (V.getSimpleValueType().getScalarType() != MVT::i8 && - V.getSimpleValueType().getScalarType() != MVT::i16) + if (V.getSimpleValueType().getVectorElementType() != MVT::i8 && + V.getSimpleValueType().getVectorElementType() != MVT::i16) return SDValue(); // Search for a half-shuffle which we can combine with. @@ -22774,6 +22999,41 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, Mask = getPSHUFShuffleMask(N); assert(Mask.size() == 4); break; + case X86ISD::UNPCKL: { + // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in + // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE + // moves upper half elements into the lower half part. For example: + // + // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1, + // undef:v16i8 + // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2 + // + // will be combined to: + // + // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1 + + // This is only for 128-bit vectors. From SSE4.1 onward this combine may not + // happen due to advanced instructions. + if (!VT.is128BitVector()) + return SDValue(); + + auto Op0 = N.getOperand(0); + auto Op1 = N.getOperand(1); + if (Op0.getOpcode() == ISD::UNDEF && + Op1.getNode()->getOpcode() == ISD::VECTOR_SHUFFLE) { + ArrayRef Mask = cast(Op1.getNode())->getMask(); + + unsigned NumElts = VT.getVectorNumElements(); + SmallVector ExpectedMask(NumElts, -1); + std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2, + NumElts / 2); + + auto ShufOp = Op1.getOperand(0); + if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask)) + return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp); + } + return SDValue(); + } default: return SDValue(); } @@ -22789,7 +23049,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, break; case X86ISD::PSHUFLW: case X86ISD::PSHUFHW: - assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!"); + assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!"); if (combineRedundantHalfShuffle(N, Mask, DAG, DCI)) return SDValue(); // We combined away this shuffle, so we're done. @@ -23131,9 +23391,9 @@ static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG, // store-load conversions. if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR && N0.getValueType() == MVT::v2i32 && - isa(N0.getOperand(1))) { + isNullConstant(N0.getOperand(1))) { SDValue N00 = N0->getOperand(0); - if (N0.getConstantOperandVal(1) == 0 && N00.getValueType() == MVT::i32) + if (N00.getValueType() == MVT::i32) return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00); } @@ -23201,9 +23461,9 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); - if (VT == MVT::i1 && dyn_cast(N->getOperand(1)) && + if (VT == MVT::i1 && isa(N->getOperand(1)) && InputVector.getOpcode() == ISD::BITCAST && - dyn_cast(InputVector.getOperand(0))) { + isa(InputVector.getOperand(0))) { uint64_t ExtractedElt = cast(N->getOperand(1))->getZExtValue(); uint64_t InputValue = @@ -23797,7 +24057,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && !DCI.isBeforeLegalize() && !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { - unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits(); + unsigned BitWidth = Cond.getValueType().getScalarSizeInBits(); // Don't optimize vector selects that map to mask-registers. if (BitWidth == 1) @@ -23818,7 +24078,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // FIXME: We don't support i16-element blends currently. We could and // should support them by making *all* the bits in the condition be set // rather than just the high bit and using an i8-element blend. - if (VT.getScalarType() == MVT::i16) + if (VT.getVectorElementType() == MVT::i16) return SDValue(); // Dynamic blending was only available from SSE4.1 onward. if (VT.is128BitVector() && !Subtarget->hasSSE41()) @@ -23934,12 +24194,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { SetCC.getOpcode() == ISD::AND) { if (SetCC.getOpcode() == ISD::AND) { int OpIdx = -1; - ConstantSDNode *CS; - if ((CS = dyn_cast(SetCC.getOperand(0))) && - CS->getZExtValue() == 1) + if (isOneConstant(SetCC.getOperand(0))) OpIdx = 1; - if ((CS = dyn_cast(SetCC.getOperand(1))) && - CS->getZExtValue() == 1) + if (isOneConstant(SetCC.getOperand(1))) OpIdx = 0; if (OpIdx == -1) break; @@ -24018,8 +24275,7 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, X86::CondCode &CC1, SDValue &Flags, bool &isAnd) { if (Cond->getOpcode() == X86ISD::CMP) { - ConstantSDNode *CondOp1C = dyn_cast(Cond->getOperand(1)); - if (!CondOp1C || !CondOp1C->isNullValue()) + if (!isNullConstant(Cond->getOperand(1))) return false; Cond = Cond->getOperand(0); @@ -24624,7 +24880,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // Set N0 and N1 to hold the inputs to the new wide operation. N0 = N0->getOperand(0); if (RHSConstSplat) { - N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(), + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(), SDValue(RHSConstSplat, 0)); SmallVector C(WideVT.getVectorNumElements(), N1); N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C); @@ -24639,9 +24895,9 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, case ISD::ANY_EXTEND: return Op; case ISD::ZERO_EXTEND: { - unsigned InBits = NarrowVT.getScalarType().getSizeInBits(); + unsigned InBits = NarrowVT.getScalarSizeInBits(); APInt Mask = APInt::getAllOnesValue(InBits); - Mask = Mask.zext(VT.getScalarType().getSizeInBits()); + Mask = Mask.zext(VT.getScalarSizeInBits()); return DAG.getNode(ISD::AND, DL, VT, Op, DAG.getConstant(Mask, DL, VT)); } @@ -24927,7 +25183,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (!Subtarget->hasSSE41()) return SDValue(); - EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; + MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; X = DAG.getBitcast(BlendVT, X); Y = DAG.getBitcast(BlendVT, Y); @@ -25056,7 +25312,7 @@ static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) { return SDValue(); // Make sure we are performing an xor against one. - if (!isa(N1) || !cast(N1)->isOne()) + if (!isOneConstant(N1)) return SDValue(); // SetCC on x86 zero extends so only act on this if it's a logical shift. @@ -25104,6 +25360,132 @@ static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// This function detects the AVG pattern between vectors of unsigned i8/i16, +/// which is c = (a + b + 1) / 2, and replace this operation with the efficient +/// X86ISD::AVG instruction. +static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, + const X86Subtarget *Subtarget, SDLoc DL) { + if (!VT.isVector() || !VT.isSimple()) + return SDValue(); + EVT InVT = In.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + + EVT ScalarVT = VT.getVectorElementType(); + if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && + isPowerOf2_32(NumElems))) + return SDValue(); + + // InScalarVT is the intermediate type in AVG pattern and it should be greater + // than the original input type (i8/i16). + EVT InScalarVT = InVT.getVectorElementType(); + if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits()) + return SDValue(); + + if (Subtarget->hasAVX512()) { + if (VT.getSizeInBits() > 512) + return SDValue(); + } else if (Subtarget->hasAVX2()) { + if (VT.getSizeInBits() > 256) + return SDValue(); + } else { + if (VT.getSizeInBits() > 128) + return SDValue(); + } + + // Detect the following pattern: + // + // %1 = zext %a to + // %2 = zext %b to + // %3 = add nuw nsw %1, + // %4 = add nuw nsw %3, %2 + // %5 = lshr %N, + // %6 = trunc %5 to + // + // In AVX512, the last instruction can also be a trunc store. + + if (In.getOpcode() != ISD::SRL) + return SDValue(); + + // A lambda checking the given SDValue is a constant vector and each element + // is in the range [Min, Max]. + auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) { + BuildVectorSDNode *BV = dyn_cast(V); + if (!BV || !BV->isConstant()) + return false; + for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) { + ConstantSDNode *C = dyn_cast(V.getOperand(i)); + if (!C) + return false; + uint64_t Val = C->getZExtValue(); + if (Val < Min || Val > Max) + return false; + } + return true; + }; + + // Check if each element of the vector is left-shifted by one. + auto LHS = In.getOperand(0); + auto RHS = In.getOperand(1); + if (!IsConstVectorInRange(RHS, 1, 1)) + return SDValue(); + if (LHS.getOpcode() != ISD::ADD) + return SDValue(); + + // Detect a pattern of a + b + 1 where the order doesn't matter. + SDValue Operands[3]; + Operands[0] = LHS.getOperand(0); + Operands[1] = LHS.getOperand(1); + + // Take care of the case when one of the operands is a constant vector whose + // element is in the range [1, 256]. + if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) && + Operands[0].getOpcode() == ISD::ZERO_EXTEND && + Operands[0].getOperand(0).getValueType() == VT) { + // The pattern is detected. Subtract one from the constant vector, then + // demote it and emit X86ISD::AVG instruction. + SDValue One = DAG.getConstant(1, DL, InScalarVT); + SDValue Ones = DAG.getNode(ISD::BUILD_VECTOR, DL, InVT, + SmallVector(NumElems, One)); + Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], Ones); + Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); + return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0), + Operands[1]); + } + + if (Operands[0].getOpcode() == ISD::ADD) + std::swap(Operands[0], Operands[1]); + else if (Operands[1].getOpcode() != ISD::ADD) + return SDValue(); + Operands[2] = Operands[1].getOperand(0); + Operands[1] = Operands[1].getOperand(1); + + // Now we have three operands of two additions. Check that one of them is a + // constant vector with ones, and the other two are promoted from i8/i16. + for (int i = 0; i < 3; ++i) { + if (!IsConstVectorInRange(Operands[i], 1, 1)) + continue; + std::swap(Operands[i], Operands[2]); + + // Check if Operands[0] and Operands[1] are results of type promotion. + for (int j = 0; j < 2; ++j) + if (Operands[j].getOpcode() != ISD::ZERO_EXTEND || + Operands[j].getOperand(0).getValueType() != VT) + return SDValue(); + + // The pattern is detected, emit X86ISD::AVG instruction. + return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0), + Operands[1].getOperand(0)); + } + + return SDValue(); +} + +static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + return detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG, Subtarget, + SDLoc(N)); +} + /// PerformLOADCombine - Do target-specific dag combines on LOAD nodes. static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -25368,6 +25750,16 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // First, pack all of the elements in one place. Next, store to memory // in fewer chunks. if (St->isTruncatingStore() && VT.isVector()) { + // Check if we can detect an AVG pattern from the truncation. If yes, + // replace the trunc store by a normal store with the result of X86ISD::AVG + // instruction. + SDValue Avg = + detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, Subtarget, dl); + if (Avg.getNode()) + return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); @@ -25503,7 +25895,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store // pair instead. if (Subtarget->is64Bit() || F64IsLegal) { - EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; + MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), @@ -25736,6 +26128,33 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Do target-specific dag combines on floating point negations. +static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + SDValue Arg = N->getOperand(0); + + // If we're negating a FMA node, then we can adjust the + // instruction to include the extra negation. + if (Arg.hasOneUse()) { + switch (Arg.getOpcode()) { + case X86ISD::FMADD: + return DAG.getNode(X86ISD::FNMSUB, SDLoc(N), VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + case X86ISD::FMSUB: + return DAG.getNode(X86ISD::FNMADD, SDLoc(N), VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + case X86ISD::FNMADD: + return DAG.getNode(X86ISD::FMSUB, SDLoc(N), VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + case X86ISD::FNMSUB: + return DAG.getNode(X86ISD::FMADD, SDLoc(N), VT, Arg.getOperand(0), + Arg.getOperand(1), Arg.getOperand(2)); + } + } + return SDValue(); +} + /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -26092,8 +26511,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == X86ISD::SETCC_CARRY) { - ConstantSDNode *C = dyn_cast(N0.getOperand(1)); - if (!C || C->getZExtValue() != 1) + if (!isOneConstant(N0.getOperand(1))) return SDValue(); return DAG.getNode(ISD::AND, dl, VT, DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, @@ -26146,16 +26564,14 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) - if (ConstantSDNode *C = dyn_cast(LHS.getOperand(0))) - if (C->getAPIntValue() == 0 && LHS.hasOneUse()) { + if (isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) { SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS, LHS.getOperand(1)); return DAG.getSetCC(DL, N->getValueType(0), addV, DAG.getConstant(0, DL, addV.getValueType()), CC); } if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB) - if (ConstantSDNode *C = dyn_cast(RHS.getOperand(0))) - if (C->getAPIntValue() == 0 && RHS.hasOneUse()) { + if (isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) { SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS, RHS.getOperand(1)); return DAG.getSetCC(DL, N->getValueType(0), addV, @@ -26198,52 +26614,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index, - SelectionDAG &DAG) { - SDLoc dl(Load); - MVT VT = Load->getSimpleValueType(0); - MVT EVT = VT.getVectorElementType(); - SDValue Addr = Load->getOperand(1); - SDValue NewAddr = DAG.getNode( - ISD::ADD, dl, Addr.getSimpleValueType(), Addr, - DAG.getConstant(Index * EVT.getStoreSize(), dl, - Addr.getSimpleValueType())); - - SDValue NewLoad = - DAG.getLoad(EVT, dl, Load->getChain(), NewAddr, - DAG.getMachineFunction().getMachineMemOperand( - Load->getMemOperand(), 0, EVT.getStoreSize())); - return NewLoad; -} - -static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - SDLoc dl(N); - MVT VT = N->getOperand(1)->getSimpleValueType(0); - assert((VT == MVT::v4f32 || VT == MVT::v4i32) && - "X86insertps is only defined for v4x32"); - - SDValue Ld = N->getOperand(1); - if (MayFoldLoad(Ld)) { - // Extract the countS bits from the immediate so we can get the proper - // address when narrowing the vector load to a specific element. - // When the second source op is a memory address, insertps doesn't use - // countS and just gets an f32 from that address. - unsigned DestIndex = - cast(N->getOperand(2))->getZExtValue() >> 6; - - Ld = NarrowVectorLoadToElement(cast(Ld), DestIndex, DAG); - - // Create this as a scalar to vector to match the instruction pattern. - SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld); - // countS bits are ignored when loading from memory on insertps, which - // means we don't need to explicitly set them to 0. - return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0), - LoadScalarToVector, N->getOperand(2)); - } - return SDValue(); -} - static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) { SDValue V0 = N->getOperand(0); SDValue V1 = N->getOperand(1); @@ -26444,7 +26814,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have // a 32-bit target where SSE doesn't support i64->FP operations. - if (Op0.getOpcode() == ISD::LOAD) { + if (!Subtarget->useSoftFloat() && Op0.getOpcode() == ISD::LOAD) { LoadSDNode *Ld = cast(Op0.getNode()); EVT LdVT = Ld->getValueType(0); @@ -26626,8 +26996,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) { SDValue ExtractedV = V.getOperand(0); SDValue OrigV = ExtractedV.getOperand(0); - if (auto *ExtractIdx = dyn_cast(ExtractedV.getOperand(1))) - if (ExtractIdx->getZExtValue() == 0) { + if (isNullConstant(ExtractedV.getOperand(1))) { MVT OrigVT = OrigV.getSimpleValueType(); // Extract a subvector if necessary... if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) { @@ -26676,6 +27045,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget); case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget); case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); + case ISD::FNEG: return PerformFNEGCombine(N, DAG, Subtarget); + case ISD::TRUNCATE: return PerformTRUNCATECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget); case X86ISD::FMIN: @@ -26709,11 +27080,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); - case X86ISD::INSERTPS: { - if (getTargetMachine().getOptLevel() > CodeGenOpt::None) - return PerformINSERTPSCombine(N, DAG, Subtarget); - break; - } case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG); }