X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=bfd99a5517914470ef06d0f779fe6ae5b4783f2c;hb=37f1bba13a323828a90703cec569e8c9f10fb4ef;hp=2101724588b9fb7b85c3f864b87825ba0bc101ff;hpb=2ec72426009ce0462398fb7b370cd7ed58213db5;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2101724588b..bfd99a55179 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -183,7 +183,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget->is64Bit()) { setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); - } else if (!TM.Options.UseSoftFloat) { + } else if (!Subtarget->useSoftFloat()) { // We have an algorithm for SSE2->double, and we turn this into a // 64-bit FILD followed by conditional FADD for other targets. setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); @@ -197,7 +197,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); - if (!TM.Options.UseSoftFloat) { + if (!Subtarget->useSoftFloat()) { // SSE has no i16 to fp conversion, only i32 if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); @@ -240,7 +240,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget->is64Bit()) { setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); - } else if (!TM.Options.UseSoftFloat) { + } else if (!Subtarget->useSoftFloat()) { // Since AVX is a superset of SSE3, only check for SSE here. if (Subtarget->hasSSE1() && !Subtarget->hasSSE3()) // Expand FP_TO_UINT into a select. @@ -368,7 +368,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Special handling for half-precision floating point conversions. // If we don't have F16C support, then lower half float conversions // into library calls. - if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) { + if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) { setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } @@ -513,7 +513,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom); - if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { + // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering. + setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); + setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); + + if (!Subtarget->useSoftFloat() && X86ScalarSSEf64) { // f32 and f64 use SSE. // Set up the FP register classes. addRegisterClass(MVT::f32, &X86::FR32RegClass); @@ -547,7 +551,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // cases we handle. addLegalFPImmediate(APFloat(+0.0)); // xorpd addLegalFPImmediate(APFloat(+0.0f)); // xorps - } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) { + } else if (!Subtarget->useSoftFloat() && X86ScalarSSEf32) { // Use SSE for f32, x87 for f64. // Set up the FP register classes. addRegisterClass(MVT::f32, &X86::FR32RegClass); @@ -582,7 +586,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); } - } else if (!TM.Options.UseSoftFloat) { + } else if (!Subtarget->useSoftFloat()) { // f32 and f64 in x87. // Set up the FP register classes. addRegisterClass(MVT::f64, &X86::RFP64RegClass); @@ -616,7 +620,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::f32, Expand); // Long double always uses X87. - if (!TM.Options.UseSoftFloat) { + if (!Subtarget->useSoftFloat()) { addRegisterClass(MVT::f80, &X86::RFP80RegClass); setOperationAction(ISD::UNDEF, MVT::f80, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); @@ -746,12 +750,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // them legal. if (VT.getVectorElementType() == MVT::i1) setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); + + // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are + // split/scalarized right now. + if (VT.getVectorElementType() == MVT::f16) + setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); } } // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. - if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) { + if (!Subtarget->useSoftFloat() && Subtarget->hasMMX()) { addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); // No operations on x86mmx supported, everything uses intrinsics. } @@ -769,7 +778,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v1i64, Expand); - if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) { + if (!Subtarget->useSoftFloat() && Subtarget->hasSSE1()) { addRegisterClass(MVT::v4f32, &X86::VR128RegClass); setOperationAction(ISD::FADD, MVT::v4f32, Legal); @@ -788,7 +797,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); } - if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) { + if (!Subtarget->useSoftFloat() && Subtarget->hasSSE2()) { addRegisterClass(MVT::v2f64, &X86::VR128RegClass); // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM @@ -802,6 +811,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ADD, MVT::v8i16, Legal); setOperationAction(ISD::ADD, MVT::v4i32, Legal); setOperationAction(ISD::ADD, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom); @@ -932,7 +942,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); } - if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) { + if (!Subtarget->useSoftFloat() && Subtarget->hasSSE41()) { for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { setOperationAction(ISD::FFLOOR, RoundedTy, Legal); setOperationAction(ISD::FCEIL, RoundedTy, Legal); @@ -1014,7 +1024,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRA, MVT::v4i32, Custom); } - if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) { + if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) { addRegisterClass(MVT::v32i8, &X86::VR256RegClass); addRegisterClass(MVT::v16i16, &X86::VR256RegClass); addRegisterClass(MVT::v8i32, &X86::VR256RegClass); @@ -1122,7 +1132,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v4i64, Custom); setOperationAction(ISD::MUL, MVT::v8i32, Legal); setOperationAction(ISD::MUL, MVT::v16i16, Legal); - // Don't lower v32i8 because there is no 128-bit byte mul + setOperationAction(ISD::MUL, MVT::v32i8, Custom); setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom); setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom); @@ -1171,7 +1181,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v4i64, Custom); setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); - // Don't lower v32i8 because there is no 128-bit byte mul + setOperationAction(ISD::MUL, MVT::v32i8, Custom); } // In the customized shift lowering, the legal cases in AVX2 will be @@ -1234,7 +1244,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - if (!TM.Options.UseSoftFloat && Subtarget->hasAVX512()) { + if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) { addRegisterClass(MVT::v16i32, &X86::VR512RegClass); addRegisterClass(MVT::v16f32, &X86::VR512RegClass); addRegisterClass(MVT::v8i64, &X86::VR512RegClass); @@ -1296,23 +1306,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal); setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); + if (Subtarget->hasDQI()) { + setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom); + setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom); + } setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom); - + if (Subtarget->hasDQI()) { + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); + } setOperationAction(ISD::FFLOOR, MVT::v16f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v8f64, Legal); setOperationAction(ISD::FCEIL, MVT::v16f32, Legal); @@ -1344,6 +1365,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v8f64, Custom); setOperationAction(ISD::SELECT, MVT::v8i64, Custom); setOperationAction(ISD::SELECT, MVT::v16f32, Custom); + setOperationAction(ISD::SELECT, MVT::v16i1, Custom); + setOperationAction(ISD::SELECT, MVT::v8i1, Custom); setOperationAction(ISD::ADD, MVT::v8i64, Legal); setOperationAction(ISD::ADD, MVT::v16i32, Legal); @@ -1373,10 +1396,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); } - + if (Subtarget->hasDQI()) { + setOperationAction(ISD::MUL, MVT::v2i64, Legal); + setOperationAction(ISD::MUL, MVT::v4i64, Legal); + setOperationAction(ISD::MUL, MVT::v8i64, Legal); + } // Custom lower several nodes. for (MVT VT : MVT::vector_valuetypes()) { unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize == 1) { + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + } + if (EltSize >= 32 && VT.getSizeInBits() <= 512) { + setOperationAction(ISD::MGATHER, VT, Custom); + setOperationAction(ISD::MSCATTER, VT, Custom); + } // Extract subvector is special because the value type // (result) is 256/128-bit but the source is 512-bit wide. if (VT.is128BitVector() || VT.is256BitVector()) { @@ -1389,7 +1425,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!VT.is512BitVector()) continue; - if ( EltSize >= 32) { + if (EltSize >= 32) { setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); @@ -1413,7 +1449,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } }// has AVX-512 - if (!TM.Options.UseSoftFloat && Subtarget->hasBWI()) { + if (!Subtarget->useSoftFloat() && Subtarget->hasBWI()) { addRegisterClass(MVT::v32i16, &X86::VR512RegClass); addRegisterClass(MVT::v64i8, &X86::VR512RegClass); @@ -1433,6 +1469,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom); + setOperationAction(ISD::SELECT, MVT::v32i1, Custom); + setOperationAction(ISD::SELECT, MVT::v64i1, Custom); for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { const MVT VT = (MVT::SimpleValueType)i; @@ -1450,7 +1488,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } } - if (!TM.Options.UseSoftFloat && Subtarget->hasVLX()) { + if (!Subtarget->useSoftFloat() && Subtarget->hasVLX()) { addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v2i1, &X86::VK2RegClass); @@ -1460,6 +1498,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); + setOperationAction(ISD::SELECT, MVT::v4i1, Custom); + setOperationAction(ISD::SELECT, MVT::v2i1, Custom); setOperationAction(ISD::AND, MVT::v8i32, Legal); setOperationAction(ISD::OR, MVT::v8i32, Legal); @@ -1467,6 +1507,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::AND, MVT::v4i32, Legal); setOperationAction(ISD::OR, MVT::v4i32, Legal); setOperationAction(ISD::XOR, MVT::v4i32, Legal); + setOperationAction(ISD::SRA, MVT::v2i64, Custom); + setOperationAction(ISD::SRA, MVT::v4i64, Custom); } // We want to custom lower some of our intrinsics. @@ -1546,7 +1588,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); - setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); @@ -1755,6 +1796,10 @@ unsigned X86TargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } +bool X86TargetLowering::useSoftFloat() const { + return Subtarget->useSoftFloat(); +} + const MCExpr * X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, @@ -1880,7 +1925,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop - RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), + RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, MVT::i16)); // Copy the result values into the output registers. @@ -1895,8 +1940,12 @@ X86TargetLowering::LowerReturn(SDValue Chain, ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); else if (VA.getLocInfo() == CCValAssign::ZExt) ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); - else if (VA.getLocInfo() == CCValAssign::AExt) - ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); + else if (VA.getLocInfo() == CCValAssign::AExt) { + if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1) + ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); + else + ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); + } else if (VA.getLocInfo() == CCValAssign::BCvt) ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy); @@ -1951,9 +2000,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - // The x86-64 ABIs require that for returning structs by value we copy + // All x86 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. - // Win32 requires us to put the sret argument to %eax as well. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. // @@ -1962,8 +2010,6 @@ X86TargetLowering::LowerReturn(SDValue Chain, // false, then an sret argument may be implicitly inserted in the SelDAG. In // either case FuncInfo->setSRetReturnReg() will have been called. if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { - assert((Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) && - "No need for an sret register"); SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy()); unsigned RetValReg @@ -2058,7 +2104,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; - EVT CopyVT = VA.getValVT(); + EVT CopyVT = VA.getLocVT(); // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && @@ -2068,18 +2114,24 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. + bool RoundAfterCopy = false; if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && - isScalarFPTypeInSSEReg(VA.getValVT())) + isScalarFPTypeInSSEReg(VA.getValVT())) { CopyVT = MVT::f80; + RoundAfterCopy = (CopyVT != VA.getLocVT()); + } Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag).getValue(1); SDValue Val = Chain.getValue(0); - if (CopyVT != VA.getValVT()) + if (RoundAfterCopy) Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, // This truncation won't change the value. - DAG.getIntPtrConstant(1)); + DAG.getIntPtrConstant(1, dl)); + + if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1) + Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); InFlag = Chain.getValue(2); InVals.push_back(Val); @@ -2139,10 +2191,11 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, SDLoc dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, + /*isTailCall*/false, MachinePointerInfo(), MachinePointerInfo()); } @@ -2195,7 +2248,10 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // If value is passed by pointer we have address passed instead of the value // itself. - if (VA.getLocInfo() == CCValAssign::Indirect) + bool ExtendedInMem = VA.isExtInLoc() && + VA.getValVT().getScalarType() == MVT::i1; + + if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) ValVT = VA.getLocVT(); else ValVT = VA.getValVT(); @@ -2213,9 +2269,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - return DAG.getLoad(ValVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + SDValue Val = DAG.getLoad(ValVT, dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); + return ExtendedInMem ? + DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val; } } @@ -2252,10 +2310,10 @@ static ArrayRef get64BitArgumentXMMs(MachineFunction &MF, const Function *Fn = MF.getFunction(); bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat); - assert(!(MF.getTarget().Options.UseSoftFloat && NoImplicitFloatOps) && + bool isSoftFloat = Subtarget->useSoftFloat(); + assert(!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps || - !Subtarget->hasSSE1()) + if (isSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) // Kernel mode asks for SSE to be disabled, so there are no XMM argument // registers. return None; @@ -2363,7 +2421,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isExtInLoc()) { // Handle MMX values passed in XMM regs. - if (RegVT.isVector()) + if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); else ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); @@ -2381,24 +2439,21 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(ArgValue); } - if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) { - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - // The x86-64 ABIs require that for returning structs by value we copy - // the sret argument into %rax/%eax (depending on ABI) for the return. - // Win32 requires us to put the sret argument to %eax as well. - // Save the argument into a virtual register so that we can access it - // from the return points. - if (Ins[i].Flags.isSRet()) { - unsigned Reg = FuncInfo->getSRetReturnReg(); - if (!Reg) { - MVT PtrTy = getPointerTy(); - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); - FuncInfo->setSRetReturnReg(Reg); - } - SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); - break; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + // All x86 ABIs require that for returning structs by value we copy the + // sret argument into %rax/%eax (depending on ABI) for the return. Save + // the argument into a virtual register so that we can access it from the + // return points. + if (Ins[i].Flags.isSRet()) { + unsigned Reg = FuncInfo->getSRetReturnReg(); + if (!Reg) { + MVT PtrTy = getPointerTy(); + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); + FuncInfo->setSRetReturnReg(Reg); } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); + break; } } @@ -2426,7 +2481,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, bool IsWinEHParent = WinEHParent && WinEHParent == Fn; // Figure out if XMM registers are in use. - assert(!(MF.getTarget().Options.UseSoftFloat && + assert(!(Subtarget->useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"); @@ -2486,7 +2541,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned Offset = FuncInfo->getVarArgsGPOffset(); for (SDValue Val : LiveGPRs) { SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, - DAG.getIntPtrConstant(Offset)); + DAG.getIntPtrConstant(Offset, dl)); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo::getFixedStack( @@ -2502,9 +2557,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SaveXMMOps.push_back(Chain); SaveXMMOps.push_back(ALVal); SaveXMMOps.push_back(DAG.getIntPtrConstant( - FuncInfo->getRegSaveFrameIndex())); + FuncInfo->getRegSaveFrameIndex(), dl)); SaveXMMOps.push_back(DAG.getIntPtrConstant( - FuncInfo->getVarArgsFPOffset())); + FuncInfo->getVarArgsFPOffset(), dl)); SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(), LiveXMMRegs.end()); MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, @@ -2604,7 +2659,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64); MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI; - SDValue Neg2 = DAG.getConstant(-2, MVT::i64); + SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64); Chain = DAG.getStore(Chain, dl, Neg2, StackSlot, MachinePointerInfo::getFixedStack(UnwindHelpFI), /*isVolatile=*/true, @@ -2621,7 +2676,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); - SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); if (Flags.isByVal()) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); @@ -2767,7 +2822,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!IsSibcall) Chain = DAG.getCALLSEQ_START( - Chain, DAG.getIntPtrConstant(NumBytesToPush, true), dl); + Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl); SDValue RetAddrFrIdx; // Load return address for tail calls. @@ -2804,7 +2859,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: - if (RegVT.is128BitVector()) { + if (Arg.getValueType().isVector() && + Arg.getValueType().getScalarType() == MVT::i1) + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); + else if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); @@ -2899,7 +2957,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, && "SSE registers cannot be used when SSE is disabled"); RegsToPass.push_back(std::make_pair(unsigned(X86::AL), - DAG.getConstant(NumXMMRegs, MVT::i8))); + DAG.getConstant(NumXMMRegs, dl, + MVT::i8))); } if (isVarArg && IsMustTail) { @@ -2943,7 +3002,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Flags.isByVal()) { // Copy relative to framepointer. - SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); + SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), @@ -3069,8 +3128,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!IsSibcall && isTailCall) { Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NumBytesToPop, true), - DAG.getIntPtrConstant(0, true), InFlag, dl); + DAG.getIntPtrConstant(NumBytesToPop, dl, true), + DAG.getIntPtrConstant(0, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } @@ -3078,7 +3137,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(Callee); if (isTailCall) - Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); + Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32)); // Add argument registers to the end of the list so that they are known live // into the call. @@ -3102,6 +3161,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This isn't right, although it's probably harmless on x86; liveouts // should be computed from returns not tail calls. Consider a void // function making a tail call to a function returning int. + MF.getFrameInfo()->setHasTailCall(); return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); } @@ -3127,8 +3187,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Returns a flag for retval copy to use. if (!IsSibcall) { Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NumBytesToPop, true), - DAG.getIntPtrConstant(NumBytesForCalleeToPop, + DAG.getIntPtrConstant(NumBytesToPop, dl, true), + DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, true), InFlag, dl); InFlag = Chain.getValue(1); @@ -3515,7 +3575,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, case X86ISD::PSHUFLW: case X86ISD::VPERMILPI: case X86ISD::VPERMI: - return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); + return DAG.getNode(Opc, dl, VT, V1, + DAG.getConstant(TargetMask, dl, MVT::i8)); } } @@ -3625,13 +3686,13 @@ static bool isX86CCUnsigned(unsigned X86CC) { /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 /// specific condition code, returning the condition code and the LHS/RHS of the /// comparison to make. -static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, +static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP, SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) { if (!isFP) { if (ConstantSDNode *RHSC = dyn_cast(RHS)) { if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { // X > -1 -> X == 0, jump !sign. - RHS = DAG.getConstant(0, RHS.getValueType()); + RHS = DAG.getConstant(0, DL, RHS.getValueType()); return X86::COND_NS; } if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { @@ -3640,7 +3701,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, } if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { // X < 1 -> X <= 0 - RHS = DAG.getConstant(0, RHS.getValueType()); + RHS = DAG.getConstant(0, DL, RHS.getValueType()); return X86::COND_LE; } } @@ -3943,26 +4004,26 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, SDValue Vec; if (VT.is128BitVector()) { // SSE if (Subtarget->hasSSE2()) { // SSE2 - SDValue Cst = DAG.getConstant(0, MVT::i32); + SDValue Cst = DAG.getConstant(0, dl, MVT::i32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } else { // SSE1 - SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32); + SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32); Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); } } else if (VT.is256BitVector()) { // AVX if (Subtarget->hasInt256()) { // AVX2 - SDValue Cst = DAG.getConstant(0, MVT::i32); + SDValue Cst = DAG.getConstant(0, dl, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops); } else { // 256-bit logic and arithmetic instructions in AVX are all // floating-point, no support for integer ops. Emit fp zeroed vectors. - SDValue Cst = DAG.getConstantFP(+0.0, MVT::f32); + SDValue Cst = DAG.getConstantFP(+0.0, dl, MVT::f32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops); } } else if (VT.is512BitVector()) { // AVX-512 - SDValue Cst = DAG.getConstant(0, MVT::i32); + SDValue Cst = DAG.getConstant(0, dl, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); @@ -3972,7 +4033,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, && "Unexpected vector type"); assert((Subtarget->hasVLX() || VT.getVectorNumElements() >= 8) && "Unexpected vector type"); - SDValue Cst = DAG.getConstant(0, MVT::i1); + SDValue Cst = DAG.getConstant(0, dl, MVT::i1); SmallVector Ops(VT.getVectorNumElements(), Cst); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } else @@ -4010,7 +4071,7 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, makeArrayRef(Vec->op_begin() + NormalizedIdxVal, ElemsPerChunk)); - SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); } @@ -4054,7 +4115,7 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth) * ElemsPerChunk); - SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); } @@ -4076,7 +4137,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, if (IdxVal == 0 && Result.getValueType().is256BitVector() && Result.getOpcode() != ISD::UNDEF) { EVT ResultVT = Result.getValueType(); - SDValue ZeroIndex = DAG.getIntPtrConstant(0); + SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl); SDValue Undef = DAG.getUNDEF(ResultVT); SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef, Vec, ZeroIndex); @@ -4088,7 +4149,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, unsigned ScalarSize = ScalarType.getSizeInBits(); assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type"); unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f; - SDValue Mask = DAG.getConstant(MaskVal, MVT::i8); + SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8); return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask); } @@ -4104,7 +4165,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, // will be created by InsertSubVector(). MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32; - SDValue Mask = DAG.getConstant(0x0f, MVT::i8); + SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8); Vec256 = DAG.getNode(ISD::BITCAST, dl, CastVT, Vec256); Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask); return DAG.getNode(ISD::BITCAST, dl, ResultVT, Vec256); @@ -4145,7 +4206,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, SDLoc dl) { assert(VT.isVector() && "Expected a vector type"); - SDValue Cst = DAG.getConstant(~0U, MVT::i32); + SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32); SDValue Vec; if (VT.is256BitVector()) { if (HasInt256) { // AVX2 @@ -4322,7 +4383,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, return false; SDValue Ptr = MaskLoad->getBasePtr(); - if (Ptr->getOpcode() == X86ISD::Wrapper) + if (Ptr->getOpcode() == X86ISD::Wrapper || + Ptr->getOpcode() == X86ISD::WrapperRIP) Ptr = Ptr->getOperand(0); auto *MaskCP = dyn_cast(Ptr); @@ -4475,7 +4537,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, } V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V, Op.getOperand(i), - DAG.getIntPtrConstant(i)); + DAG.getIntPtrConstant(i, dl)); } } @@ -4503,7 +4565,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, if (ThisIsNonZero) { ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i)); ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, - ThisElt, DAG.getConstant(8, MVT::i8)); + ThisElt, DAG.getConstant(8, dl, MVT::i8)); if (LastIsNonZero) ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt); } else @@ -4511,7 +4573,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, if (ThisElt.getNode()) V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, - DAG.getIntPtrConstant(i/2)); + DAG.getIntPtrConstant(i/2, dl)); } } @@ -4543,7 +4605,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, } V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Op.getOperand(i), - DAG.getIntPtrConstant(i)); + DAG.getIntPtrConstant(i, dl)); } } @@ -4650,9 +4712,10 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask; assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); - SDValue Result = DAG.getNode(X86ISD::INSERTPS, SDLoc(Op), MVT::v4f32, V1, V2, - DAG.getIntPtrConstant(InsertPSMask)); - return DAG.getNode(ISD::BITCAST, SDLoc(Op), VT, Result); + SDLoc DL(Op); + SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, + DAG.getIntPtrConstant(InsertPSMask, DL)); + return DAG.getNode(ISD::BITCAST, DL, VT, Result); } /// Return a vector logical shift node. @@ -4665,7 +4728,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType()); assert(NumBits % 8 == 0 && "Only support byte sized shifts"); - SDValue ShiftVal = DAG.getConstant(NumBits/8, ScalarShiftTy); + SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy); return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); } @@ -4722,9 +4785,11 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) { if ((Offset % RequiredAlign) & 3) return SDValue(); int64_t StartOffset = Offset & ~(RequiredAlign-1); - if (StartOffset) - Ptr = DAG.getNode(ISD::ADD, SDLoc(Ptr), Ptr.getValueType(), - Ptr,DAG.getConstant(StartOffset, Ptr.getValueType())); + if (StartOffset) { + SDLoc DL(Ptr); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(StartOffset, DL, Ptr.getValueType())); + } int EltNo = (Offset - StartOffset) >> 2; unsigned NumElems = VT.getVectorNumElements(); @@ -5117,7 +5182,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) { unsigned Idx = InsertIndices[i]; NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), - DAG.getIntPtrConstant(Idx)); + DAG.getIntPtrConstant(Idx, DL)); } return NV; @@ -5133,13 +5198,13 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (ISD::isBuildVectorAllZeros(Op.getNode())) { - SDValue Cst = DAG.getTargetConstant(0, MVT::i1); + SDValue Cst = DAG.getTargetConstant(0, dl, MVT::i1); SmallVector Ops(VT.getVectorNumElements(), Cst); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } if (ISD::isBuildVectorAllOnes(Op.getNode())) { - SDValue Cst = DAG.getTargetConstant(1, MVT::i1); + SDValue Cst = DAG.getTargetConstant(1, dl, MVT::i1); SmallVector Ops(VT.getVectorNumElements(), Cst); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } @@ -5169,15 +5234,15 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { if (AllContants) { SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, - DAG.getConstant(Immediate, MVT::i16)); + DAG.getConstant(Immediate, dl, MVT::i16)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } if (NumNonConsts == 1 && NonConstIdx != 0) { SDValue DstVec; if (NumConsts) { - SDValue VecAsImm = DAG.getConstant(Immediate, + SDValue VecAsImm = DAG.getConstant(Immediate, dl, MVT::getIntegerVT(VT.getSizeInBits())); DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm); } @@ -5185,7 +5250,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { DstVec = DAG.getUNDEF(VT); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, Op.getOperand(NonConstIdx), - DAG.getIntPtrConstant(NonConstIdx)); + DAG.getIntPtrConstant(NonConstIdx, dl)); } if (!IsSplat && (NonConstIdx != 0)) llvm_unreachable("Unsupported BUILD_VECTOR operation"); @@ -5193,19 +5258,19 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { SDValue Select; if (IsSplat) Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0), - DAG.getConstant(-1, SelectVT), - DAG.getConstant(0, SelectVT)); + DAG.getConstant(-1, dl, SelectVT), + DAG.getConstant(0, dl, SelectVT)); else Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0), - DAG.getConstant((Immediate | 1), SelectVT), - DAG.getConstant(Immediate, SelectVT)); + DAG.getConstant((Immediate | 1), dl, SelectVT), + DAG.getConstant(Immediate, dl, SelectVT)); return DAG.getNode(ISD::BITCAST, dl, VT, Select); } /// \brief Return true if \p N implements a horizontal binop and return the /// operands for the horizontal binop into V0 and V1. /// -/// This is a helper function of PerformBUILD_VECTORCombine. +/// This is a helper function of LowerToHorizontalOp(). /// This function checks that the build_vector \p N in input implements a /// horizontal operation. Parameter \p Opcode defines the kind of horizontal /// operation to match. @@ -5268,11 +5333,17 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, unsigned I1 = cast(Op1.getOperand(1))->getZExtValue(); if (i * 2 < NumElts) { - if (V0.getOpcode() == ISD::UNDEF) + if (V0.getOpcode() == ISD::UNDEF) { V0 = Op0.getOperand(0); + if (V0.getValueType() != VT) + return false; + } } else { - if (V1.getOpcode() == ISD::UNDEF) + if (V1.getOpcode() == ISD::UNDEF) { V1 = Op0.getOperand(0); + if (V1.getValueType() != VT) + return false; + } if (i * 2 == NumElts) ExpectedVExtractIdx = BaseIdx; } @@ -5296,7 +5367,7 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, /// \brief Emit a sequence of two 128-bit horizontal add/sub followed by /// a concat_vector. /// -/// This is a helper function of PerformBUILD_VECTORCombine. +/// This is a helper function of LowerToHorizontalOp(). /// This function expects two 256-bit vectors called V0 and V1. /// At first, each vector is split into two separate 128-bit vectors. /// Then, the resulting 128-bit vectors are used to implement two @@ -5362,12 +5433,16 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); } -/// \brief Try to fold a build_vector that performs an 'addsub' into the -/// sequence of 'vadd + vsub + blendi'. -static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - SDLoc DL(BV); +/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB +/// node. +static SDValue LowerToAddSub(const BuildVectorSDNode *BV, + const X86Subtarget *Subtarget, SelectionDAG &DAG) { EVT VT = BV->getValueType(0); + if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) && + (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64))) + return SDValue(); + + SDLoc DL(BV); unsigned NumElts = VT.getVectorNumElements(); SDValue InVec0 = DAG.getUNDEF(VT); SDValue InVec1 = DAG.getUNDEF(VT); @@ -5422,10 +5497,16 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG, SubFound = true; // Update InVec0 and InVec1. - if (InVec0.getOpcode() == ISD::UNDEF) + if (InVec0.getOpcode() == ISD::UNDEF) { InVec0 = Op0.getOperand(0); - if (InVec1.getOpcode() == ISD::UNDEF) + if (InVec0.getValueType() != VT) + return SDValue(); + } + if (InVec1.getOpcode() == ISD::UNDEF) { InVec1 = Op1.getOperand(0); + if (InVec1.getValueType() != VT) + return SDValue(); + } // Make sure that operands in input to each add/sub node always // come from a same pair of vectors. @@ -5455,23 +5536,12 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, - const X86Subtarget *Subtarget) { - SDLoc DL(N); - EVT VT = N->getValueType(0); +/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible. +static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + EVT VT = BV->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); - BuildVectorSDNode *BV = cast(N); - SDValue InVec0, InVec1; - - // Try to match an ADDSUB. - if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || - (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) { - SDValue Value = matchAddSub(BV, DAG, Subtarget); - if (Value.getNode()) - return Value; - } - - // Try to match horizontal ADD/SUB. unsigned NumUndefsLO = 0; unsigned NumUndefsHI = 0; unsigned Half = NumElts/2; @@ -5490,6 +5560,8 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, if (NumUndefsLO + NumUndefsHI + 1 >= NumElts) return SDValue(); + SDLoc DL(BV); + SDValue InVec0, InVec1; if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) { // Try to match an SSE3 float HADD/HSUB. if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) @@ -5634,6 +5706,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl); } + BuildVectorSDNode *BV = cast(Op.getNode()); + if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG)) + return AddSub; + if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) + return HorizontalOp; if (SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG)) return Broadcast; @@ -5704,7 +5781,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (VT.is512BitVector()) { SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, - Item, DAG.getIntPtrConstant(0)); + Item, DAG.getIntPtrConstant(0, dl)); } assert((VT.is128BitVector() || VT.is256BitVector()) && "Expected an SSE value type!"); @@ -5898,7 +5975,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { for (unsigned i = 1; i < NumElems; ++i) { if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue; Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, - Op.getOperand(i), DAG.getIntPtrConstant(i)); + Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); } return Result; } @@ -5996,10 +6073,10 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, if (IsZeroV1 && IsZeroV2) return getZeroVector(ResVT, Subtarget, DAG, dl); - SDValue ZeroIdx = DAG.getIntPtrConstant(0); + SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); SDValue Undef = DAG.getUNDEF(ResVT); unsigned NumElems = ResVT.getVectorNumElements(); - SDValue ShiftBits = DAG.getConstant(NumElems/2, MVT::i8); + SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8); V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx); V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits); @@ -6167,7 +6244,7 @@ static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef Mask, /// example. /// /// NB: We rely heavily on "undef" masks preserving the input lane. -static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, +static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, SDLoc DL, SelectionDAG &DAG) { assert(Mask.size() == 4 && "Only 4-lane shuffle masks"); assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!"); @@ -6180,7 +6257,7 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef Mask, Imm |= (Mask[1] == -1 ? 1 : Mask[1]) << 2; Imm |= (Mask[2] == -1 ? 2 : Mask[2]) << 4; Imm |= (Mask[3] == -1 ? 3 : Mask[3]) << 6; - return DAG.getConstant(Imm, MVT::i8); + return DAG.getConstant(Imm, DL, MVT::i8); } /// \brief Try to emit a blend instruction for a shuffle using bit math. @@ -6194,8 +6271,9 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1, assert(VT.isInteger() && "Only supports integer vector types!"); MVT EltVT = VT.getScalarType(); int NumEltBits = EltVT.getSizeInBits(); - SDValue Zero = DAG.getConstant(0, EltVT); - SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), EltVT); + SDValue Zero = DAG.getConstant(0, DL, EltVT); + SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, + EltVT); SmallVector MaskOps; for (int i = 0, Size = Mask.size(); i < Size; ++i) { if (Mask[i] != -1 && Mask[i] != i && Mask[i] != i + Size) @@ -6241,7 +6319,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, case MVT::v4f64: case MVT::v8f32: return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, - DAG.getConstant(BlendMask, MVT::i8)); + DAG.getConstant(BlendMask, DL, MVT::i8)); case MVT::v4i64: case MVT::v8i32: @@ -6265,7 +6343,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2); return DAG.getNode(ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2, - DAG.getConstant(BlendMask, MVT::i8))); + DAG.getConstant(BlendMask, DL, MVT::i8))); } // FALLTHROUGH case MVT::v8i16: { @@ -6282,7 +6360,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V2); return DAG.getNode(ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::BLENDI, DL, MVT::v8i16, V1, V2, - DAG.getConstant(BlendMask, MVT::i8))); + DAG.getConstant(BlendMask, DL, MVT::i8))); } case MVT::v16i16: { @@ -6296,7 +6374,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, if (RepeatedMask[i] >= 16) BlendMask |= 1u << i; return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, - DAG.getConstant(BlendMask, MVT::i8)); + DAG.getConstant(BlendMask, DL, MVT::i8)); } } // FALLTHROUGH @@ -6328,7 +6406,8 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1, for (int j = 0; j < Scale; ++j) VSELECTMask.push_back( Mask[i] < 0 ? DAG.getUNDEF(MVT::i8) - : DAG.getConstant(Mask[i] < Size ? -1 : 0, MVT::i8)); + : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, + MVT::i8)); V1 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V1); V2 = DAG.getNode(ISD::BITCAST, DL, BlendVT, V2); @@ -6522,7 +6601,8 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, return DAG.getNode(ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo, - DAG.getConstant(Rotation * Scale, MVT::i8))); + DAG.getConstant(Rotation * Scale, DL, + MVT::i8))); } assert(VT.getSizeInBits() == 128 && @@ -6539,9 +6619,9 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1, Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Hi); SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v2i64, Lo, - DAG.getConstant(LoByteShift, MVT::i8)); + DAG.getConstant(LoByteShift, DL, MVT::i8)); SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v2i64, Hi, - DAG.getConstant(HiByteShift, MVT::i8)); + DAG.getConstant(HiByteShift, DL, MVT::i8)); return DAG.getNode(ISD::BITCAST, DL, VT, DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift)); } @@ -6600,8 +6680,9 @@ static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1, MVT EltVT = VT.getScalarType(); int NumEltBits = EltVT.getSizeInBits(); MVT IntEltVT = MVT::getIntegerVT(NumEltBits); - SDValue Zero = DAG.getConstant(0, IntEltVT); - SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), IntEltVT); + SDValue Zero = DAG.getConstant(0, DL, IntEltVT); + SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL, + IntEltVT); if (EltVT.isFloatingPoint()) { Zero = DAG.getNode(ISD::BITCAST, DL, EltVT, Zero); AllOnes = DAG.getNode(ISD::BITCAST, DL, EltVT, AllOnes); @@ -6698,7 +6779,8 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1, "Illegal integer vector type"); V = DAG.getNode(ISD::BITCAST, DL, ShiftVT, V); - V = DAG.getNode(OpCode, DL, ShiftVT, V, DAG.getConstant(ShiftAmt, MVT::i8)); + V = DAG.getNode(OpCode, DL, ShiftVT, V, + DAG.getConstant(ShiftAmt, DL, MVT::i8)); return DAG.getNode(ISD::BITCAST, DL, VT, V); }; @@ -6752,19 +6834,19 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG))); + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } if (AnyExt && EltBits == 16 && Scale > 2) { int PSHUFDMask[4] = {0, -1, 0, -1}; InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, InputV), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)); + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); int PSHUFHWMask[4] = {1, -1, -1, -1}; return DAG.getNode( ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, InputV), - getV4X86ShuffleImm8ForMask(PSHUFHWMask, DAG))); + getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG))); } // If this would require more than 2 unpack instructions to expand, use @@ -6775,7 +6857,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( SDValue PSHUFBMask[16]; for (int i = 0; i < 16; ++i) PSHUFBMask[i] = - DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, MVT::i8); + DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8); InputV = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, InputV); return DAG.getNode(ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, @@ -6917,8 +6999,13 @@ static SDValue getScalarValueForVectorElement(SDValue V, int Idx, return SDValue(); if (V.getOpcode() == ISD::BUILD_VECTOR || - (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) - return DAG.getNode(ISD::BITCAST, SDLoc(V), EltVT, V.getOperand(Idx)); + (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) { + // Ensure the scalar operand is the same size as the destination. + // FIXME: Add support for scalar truncation where possible. + SDValue S = V.getOperand(Idx); + if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits()) + return DAG.getNode(ISD::BITCAST, SDLoc(V), EltVT, S); + } return SDValue(); } @@ -7028,7 +7115,7 @@ static SDValue lowerVectorShuffleAsElementInsertion( V2 = DAG.getNode( X86ISD::VSHLDQ, DL, MVT::v2i64, V2, DAG.getConstant( - V2Index * EltVT.getSizeInBits()/8, + V2Index * EltVT.getSizeInBits()/8, DL, DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64))); V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2); } @@ -7188,7 +7275,7 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2, // Insert the V2 element into the desired position. SDLoc DL(Op); return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, - DAG.getConstant(InsertPSMask, MVT::i8)); + DAG.getConstant(InsertPSMask, DL, MVT::i8)); } /// \brief Try to lower a shuffle as a permute of the inputs followed by an @@ -7338,11 +7425,11 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, - DAG.getConstant(SHUFPDMask, MVT::i8)); + DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } - return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1, - DAG.getConstant(SHUFPDMask, MVT::i8)); + return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V1, + DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!"); assert(Mask[1] >= 2 && "Non-canonicalized blend!"); @@ -7385,8 +7472,8 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2); unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); - return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2, - DAG.getConstant(SHUFPDMask, MVT::i8)); + return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, + DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } /// \brief Handle lowering of 2-lane 64-bit integer shuffles. @@ -7421,8 +7508,8 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1}; return DAG.getNode( ISD::BITCAST, DL, MVT::v2i64, - DAG.getNode(X86ISD::PSHUFD, SDLoc(Op), MVT::v4i32, V1, - getV4X86ShuffleImm8ForMask(WidenedMask, DAG))); + DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, + getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG))); } assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!"); assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!"); @@ -7556,7 +7643,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT, int V1Index = V2AdjIndex; int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0}; V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, - getV4X86ShuffleImm8ForMask(BlendMask, DAG)); + getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); // Now proceed to reconstruct the final blend as we have the necessary // high or low half formed. @@ -7595,7 +7682,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT, (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4, (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4}; V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, - getV4X86ShuffleImm8ForMask(BlendMask, DAG)); + getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); // Now we do a normal shuffle of V1 by giving V1 as both operands to // a blend. @@ -7607,7 +7694,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(SDLoc DL, MVT VT, } } return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV, - getV4X86ShuffleImm8ForMask(NewMask, DAG)); + getV4X86ShuffleImm8ForMask(NewMask, DL, DAG)); } /// \brief Lower 4-lane 32-bit floating point shuffles. @@ -7647,13 +7734,13 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // If we have AVX, we can use VPERMILPS which will allow folding a load // into the shuffle. return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1, - getV4X86ShuffleImm8ForMask(Mask, DAG)); + getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // Otherwise, use a straight shuffle of a single input vector. We pass the // input vector to both operands to simulate this with a SHUFPS. return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1, - getV4X86ShuffleImm8ForMask(Mask, DAG)); + getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // There are special ways we can lower some single-element blends. However, we @@ -7739,7 +7826,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, Mask = UnpackHiMask; return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, - getV4X86ShuffleImm8ForMask(Mask, DAG)); + getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); } // Try to use shift instructions. @@ -7956,7 +8043,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]); V = DAG.getNode(FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, MVT::v8i16, V, - getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DAG)); + getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); for (int &M : Mask) if (M != -1 && M == FixIdx) @@ -7983,7 +8070,8 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( V = DAG.getNode(ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG))); + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, + DAG))); // Adjust the mask to match the new locations of A and B. for (int &M : Mask) @@ -8219,15 +8307,16 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( // target half. if (!isNoopShuffleMask(PSHUFLMask)) V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, - getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG)); + getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG)); if (!isNoopShuffleMask(PSHUFHMask)) V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, - getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG)); + getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG)); if (!isNoopShuffleMask(PSHUFDMask)) V = DAG.getNode(ISD::BITCAST, DL, VT, DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getNode(ISD::BITCAST, DL, PSHUFDVT, V), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG))); + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, + DAG))); // At this point, each half should contain all its inputs, and we can then // just shuffle them into their final position. @@ -8241,7 +8330,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( // Do a half shuffle for the low mask. if (!isNoopShuffleMask(LoMask)) V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, - getV4X86ShuffleImm8ForMask(LoMask, DAG)); + getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); // Do a half shuffle with the high mask after shifting its values down. for (int &M : HiMask) @@ -8249,7 +8338,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( M -= 4; if (!isNoopShuffleMask(HiMask)) V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, - getV4X86ShuffleImm8ForMask(HiMask, DAG)); + getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); return V; } @@ -8279,8 +8368,8 @@ static SDValue lowerVectorShuffleAsPSHUFB(SDLoc DL, MVT VT, SDValue V1, : (Mask[i / Scale] - Size) * Scale + i % Scale; if (Zeroable[i / Scale]) V1Idx = V2Idx = ZeroMask; - V1Mask[i] = DAG.getConstant(V1Idx, MVT::i8); - V2Mask[i] = DAG.getConstant(V2Idx, MVT::i8); + V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8); + V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8); V1InUse |= (ZeroMask != V1Idx); V2InUse |= (ZeroMask != V2Idx); } @@ -8723,7 +8812,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, MVT MaskVTs[] = { MVT::v8i16, MVT::v4i32, MVT::v2i64 }; SDValue ByteClearMask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, - DAG.getConstant(0xFF, MaskVTs[NumEvenDrops - 1])); + DAG.getConstant(0xFF, DL, MaskVTs[NumEvenDrops - 1])); V1 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V1, ByteClearMask); if (!IsSingleInput) V2 = DAG.getNode(ISD::AND, DL, MVT::v16i8, V2, ByteClearMask); @@ -8769,7 +8858,7 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // Use a mask to drop the high bytes. VLoHalf = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V); VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf, - DAG.getConstant(0x00FF, MVT::v8i16)); + DAG.getConstant(0x00FF, DL, MVT::v8i16)); // This will be a single vector shuffle instead of a blend so nuke VHiHalf. VHiHalf = DAG.getUNDEF(MVT::v8i16); @@ -8915,9 +9004,9 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1, auto *BV = dyn_cast(V); if (!BV) { LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, DL)); HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigSplitVT, V, - DAG.getIntPtrConstant(OrigSplitNumElements)); + DAG.getIntPtrConstant(OrigSplitNumElements, DL)); } else { SmallVector LoOps, HiOps; @@ -9106,7 +9195,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT, // allow folding it into a memory operand. unsigned PERMMask = 3 | 2 << 4; SDValue Flipped = DAG.getNode(X86ISD::VPERM2X128, DL, VT, DAG.getUNDEF(VT), - V1, DAG.getConstant(PERMMask, MVT::i8)); + V1, DAG.getConstant(PERMMask, DL, MVT::i8)); return DAG.getVectorShuffle(VT, DL, V1, Flipped, FlippedBlendMask); } @@ -9143,9 +9232,10 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), VT.getVectorNumElements() / 2); SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, DL)); SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, - OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0)); + OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); } } @@ -9194,7 +9284,7 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, } return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, - DAG.getConstant(PermMask, MVT::i8)); + DAG.getConstant(PermMask, DL, MVT::i8)); } /// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then @@ -9330,13 +9420,13 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, - DAG.getConstant(VPERMILPMask, MVT::i8)); + DAG.getConstant(VPERMILPMask, DL, MVT::i8)); } // With AVX2 we have direct support for this permutation. if (Subtarget->hasAVX2()) return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1, - getV4X86ShuffleImm8ForMask(Mask, DAG)); + getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Otherwise, fall back. return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, @@ -9366,7 +9456,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 5) << 1) | ((Mask[2] == 3) << 2) | ((Mask[3] == 7) << 3); return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V1, V2, - DAG.getConstant(SHUFPDMask, MVT::i8)); + DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } if ((Mask[0] == -1 || (Mask[0] >= 4 && Mask[0] < 6)) && (Mask[1] == -1 || Mask[1] < 2) && @@ -9375,7 +9465,7 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, unsigned SHUFPDMask = (Mask[0] == 5) | ((Mask[1] == 1) << 1) | ((Mask[2] == 7) << 2) | ((Mask[3] == 3) << 3); return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f64, V2, V1, - DAG.getConstant(SHUFPDMask, MVT::i8)); + DAG.getConstant(SHUFPDMask, DL, MVT::i8)); } // Try to simplify this by merging 128-bit lanes to enable a lane-based @@ -9442,7 +9532,7 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ISD::BITCAST, DL, MVT::v4i64, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, DAG.getNode(ISD::BITCAST, DL, MVT::v8i32, V1), - getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG))); + getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); } } @@ -9450,7 +9540,7 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, // lanes. if (isSingleInputShuffleMask(Mask)) return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1, - getV4X86ShuffleImm8ForMask(Mask, DAG)); + getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Try to use shift instructions. if (SDValue Shift = @@ -9520,7 +9610,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (isSingleInputShuffleMask(Mask)) return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1, - getV4X86ShuffleImm8ForMask(RepeatedMask, DAG)); + getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13})) @@ -9547,7 +9637,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, SDValue VPermMask[8]; for (int i = 0; i < 8; ++i) VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32) - : DAG.getConstant(Mask[i], MVT::i32); + : DAG.getConstant(Mask[i], DL, MVT::i32); if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask)) return DAG.getNode( X86ISD::VPERMILPV, DL, MVT::v8f32, V1, @@ -9620,7 +9710,7 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); if (isSingleInputShuffleMask(Mask)) return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1, - getV4X86ShuffleImm8ForMask(RepeatedMask, DAG)); + getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13})) @@ -9648,7 +9738,7 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, SDValue VPermMask[8]; for (int i = 0; i < 8; ++i) VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32) - : DAG.getConstant(Mask[i], MVT::i32); + : DAG.getConstant(Mask[i], DL, MVT::i32); return DAG.getNode( X86ISD::VPERMV, DL, MVT::v8i32, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1); @@ -9745,8 +9835,8 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, int M = i < 8 ? Mask[i] : Mask[i] - 8; assert(M >= 0 && M < 8 && "Invalid single-input mask!"); - PSHUFBMask[2 * i] = DAG.getConstant(2 * M, MVT::i8); - PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, MVT::i8); + PSHUFBMask[2 * i] = DAG.getConstant(2 * M, DL, MVT::i8); + PSHUFBMask[2 * i + 1] = DAG.getConstant(2 * M + 1, DL, MVT::i8); } return DAG.getNode( ISD::BITCAST, DL, MVT::v16i16, @@ -9837,7 +9927,8 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, PSHUFBMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i8) - : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, MVT::i8); + : DAG.getConstant(Mask[i] < 16 ? Mask[i] : Mask[i] - 16, DL, + MVT::i8); return DAG.getNode( X86ISD::PSHUFB, DL, MVT::v32i8, V1, @@ -9872,7 +9963,7 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) { return M >= NumElts; }); - + if (NumV2Elements == 1 && Mask[0] >= NumElts) if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( DL, VT, V1, V2, Mask, Subtarget, DAG)) @@ -10435,11 +10526,11 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const rc = getRegClassFor(MVT::v16i1); unsigned MaxSift = rc->getSize()*8 - 1; Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec, - DAG.getConstant(MaxSift - IdxVal, MVT::i8)); + DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8)); Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec, - DAG.getConstant(MaxSift, MVT::i8)); + DAG.getConstant(MaxSift, dl, MVT::i8)); return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } SDValue @@ -10466,10 +10557,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT); SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT, getZeroVector(MaskVT, Subtarget, DAG, dl), - Idx, DAG.getConstant(0, getPointerTy())); + Idx, DAG.getConstant(0, dl, getPointerTy())); SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), - Perm, DAG.getConstant(0, getPointerTy())); + Perm, DAG.getConstant(0, dl, getPointerTy())); } return SDValue(); } @@ -10489,7 +10580,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // IdxVal -= NumElems/2; IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, - DAG.getConstant(IdxVal, MVT::i32)); + DAG.getConstant(IdxVal, dl, MVT::i32)); } assert(VecVT.is128BitVector() && "Unexpected vector length"); @@ -10531,7 +10622,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } if (VT.getSizeInBits() == 64) { @@ -10550,7 +10641,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), DAG.getUNDEF(VVT), Mask); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } return SDValue(); @@ -10581,13 +10672,13 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt); if (Vec.getOpcode() == ISD::UNDEF) return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec, - DAG.getConstant(IdxVal, MVT::i8)); + DAG.getConstant(IdxVal, dl, MVT::i8)); const TargetRegisterClass* rc = getRegClassFor(VecVT); unsigned MaxSift = rc->getSize()*8 - 1; EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec, - DAG.getConstant(MaxSift, MVT::i8)); + DAG.getConstant(MaxSift, dl, MVT::i8)); EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec, - DAG.getConstant(MaxSift - IdxVal, MVT::i8)); + DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8)); return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); } @@ -10620,11 +10711,11 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, if ((Subtarget->hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || (Subtarget->hasAVX2() && EltVT == MVT::i32)) { SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); - N2 = DAG.getIntPtrConstant(1); + N2 = DAG.getIntPtrConstant(1, dl); return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, N2); } } - + // Get the desired 128-bit vector chunk. SDValue V = Extract128BitVector(N0, IdxVal, DAG, dl); @@ -10633,7 +10724,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128; V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, - DAG.getConstant(IdxIn128, MVT::i32)); + DAG.getConstant(IdxIn128, dl, MVT::i32)); // Insert the changed part back into the bigger vector return Insert128BitVector(N0, V, IdxVal, DAG, dl); @@ -10655,7 +10746,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, if (N1.getValueType() != MVT::i32) N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) - N2 = DAG.getIntPtrConstant(IdxVal); + N2 = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(Opc, dl, VT, N0, N1, N2); } @@ -10679,11 +10770,11 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, // But if optimizing for size and there's a load folding opportunity, // generate insertps because blendps does not have a 32-bit memory // operand form. - N2 = DAG.getIntPtrConstant(1); + N2 = DAG.getIntPtrConstant(1, dl); N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, N2); } - N2 = DAG.getIntPtrConstant(IdxVal << 4); + N2 = DAG.getIntPtrConstant(IdxVal << 4, dl); // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); @@ -10704,7 +10795,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, if (N1.getValueType() != MVT::i32) N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); if (N2.getValueType() != MVT::i32) - N2 = DAG.getIntPtrConstant(IdxVal); + N2 = DAG.getIntPtrConstant(IdxVal, dl); return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2); } return SDValue(); @@ -10813,10 +10904,10 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget, if (OpVT.getVectorElementType() == MVT::i1) { if (IdxVal == 0 && Vec.getOpcode() == ISD::UNDEF) // the operation is legal return Op; - SDValue ZeroIdx = DAG.getIntPtrConstant(0); + SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); SDValue Undef = DAG.getUNDEF(OpVT); unsigned NumElems = OpVT.getVectorNumElements(); - SDValue ShiftBits = DAG.getConstant(NumElems/2, MVT::i8); + SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8); if (IdxVal == OpVT.getVectorNumElements() / 2) { // Zero upper bits of the Vec @@ -11031,7 +11122,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result, - DAG.getConstant(Offset, getPointerTy())); + DAG.getConstant(Offset, dl, getPointerTy())); return Result; } @@ -11146,7 +11237,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, is64Bit ? 257 : 256)); SDValue ThreadPointer = - DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0), + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), MachinePointerInfo(Ptr), false, false, false, 0); unsigned char OperandFlags = 0; @@ -11288,9 +11379,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue TlsArray = Subtarget->is64Bit() - ? DAG.getIntPtrConstant(0x58) + ? DAG.getIntPtrConstant(0x58, dl) : (Subtarget->isTargetWindowsGNU() - ? DAG.getIntPtrConstant(0x2C) + ? DAG.getIntPtrConstant(0x2C, dl) : DAG.getExternalSymbol("_tls_array", getPointerTy())); SDValue ThreadPointer = @@ -11307,7 +11398,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(), false, false, false, 0); - SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), + SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), dl, getPointerTy()); IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale); @@ -11344,10 +11435,10 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { // generic ISD nodes haven't. Insert an AND to be safe, it's optimized away // during isel. SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, - DAG.getConstant(VTBits - 1, MVT::i8)); + DAG.getConstant(VTBits - 1, dl, MVT::i8)); SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, MVT::i8)) - : DAG.getConstant(0, VT); + DAG.getConstant(VTBits - 1, dl, MVT::i8)) + : DAG.getConstant(0, dl, VT); SDValue Tmp2, Tmp3; if (Op.getOpcode() == ISD::SHL_PARTS) { @@ -11362,12 +11453,12 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { // rely on the results of shld/shrd. Insert a test and select the appropriate // values for large shift amounts. SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, - DAG.getConstant(VTBits, MVT::i8)); + DAG.getConstant(VTBits, dl, MVT::i8)); SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, - AndNode, DAG.getConstant(0, MVT::i8)); + AndNode, DAG.getConstant(0, dl, MVT::i8)); SDValue Hi, Lo; - SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8); + SDValue CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond }; SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; @@ -11546,7 +11637,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, } return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } // LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion. @@ -11554,7 +11645,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // FP constant to bias correct the final result. - SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), + SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); // Load the 32-bit value into an XMM register. @@ -11566,7 +11657,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load), - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); // Or the load with the bias. SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, @@ -11578,7 +11669,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, MVT::v2f64, Bias))); Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or), - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); // Subtract the bias. SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); @@ -11588,7 +11679,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, if (DestVT.bitsLT(MVT::f64)) return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); if (DestVT.bitsGT(MVT::f64)) return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); @@ -11633,20 +11724,20 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // -- v >> 16 // Create the splat vector for 0x4b000000. - SDValue CstLow = DAG.getConstant(0x4b000000, MVT::i32); + SDValue CstLow = DAG.getConstant(0x4b000000, DL, MVT::i32); SDValue CstLowArray[] = {CstLow, CstLow, CstLow, CstLow, CstLow, CstLow, CstLow, CstLow}; SDValue VecCstLow = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, makeArrayRef(&CstLowArray[0], NumElts)); // Create the splat vector for 0x53000000. - SDValue CstHigh = DAG.getConstant(0x53000000, MVT::i32); + SDValue CstHigh = DAG.getConstant(0x53000000, DL, MVT::i32); SDValue CstHighArray[] = {CstHigh, CstHigh, CstHigh, CstHigh, CstHigh, CstHigh, CstHigh, CstHigh}; SDValue VecCstHigh = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, makeArrayRef(&CstHighArray[0], NumElts)); // Create the right shift. - SDValue CstShift = DAG.getConstant(16, MVT::i32); + SDValue CstShift = DAG.getConstant(16, DL, MVT::i32); SDValue CstShiftArray[] = {CstShift, CstShift, CstShift, CstShift, CstShift, CstShift, CstShift, CstShift}; SDValue VecCstShift = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, @@ -11663,7 +11754,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // Low will be bitcasted right away, so do not bother bitcasting back to its // original type. Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, - VecCstLowBitcast, DAG.getConstant(0xaa, MVT::i32)); + VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); // uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16), // (uint4) 0x53000000, 0xaa); SDValue VecCstHighBitcast = @@ -11673,9 +11764,9 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // High will be bitcasted right away, so do not bother bitcasting back to // its original type. High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, - VecCstHighBitcast, DAG.getConstant(0xaa, MVT::i32)); + VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i32)); } else { - SDValue CstMask = DAG.getConstant(0xffff, MVT::i32); + SDValue CstMask = DAG.getConstant(0xffff, DL, MVT::i32); SDValue VecCstMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VecIntVT, CstMask, CstMask, CstMask, CstMask); // uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000; @@ -11688,7 +11779,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // Create the vector constant for -(0x1.0p39f + 0x1.0p23f). SDValue CstFAdd = DAG.getConstantFP( - APFloat(APFloat::IEEEsingle, APInt(32, 0xD3000080)), MVT::f32); + APFloat(APFloat::IEEEsingle, APInt(32, 0xD3000080)), DL, MVT::f32); SDValue CstFAddArray[] = {CstFAdd, CstFAdd, CstFAdd, CstFAdd, CstFAdd, CstFAdd, CstFAdd, CstFAdd}; SDValue VecCstFAdd = DAG.getNode(ISD::BUILD_VECTOR, DL, VecFloatVT, @@ -11723,6 +11814,11 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, case MVT::v4i32: case MVT::v8i32: return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget); + case MVT::v16i8: + case MVT::v16i16: + if (Subtarget->hasAVX512()) + return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0)); } llvm_unreachable(nullptr); } @@ -11753,13 +11849,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); if (SrcVT == MVT::i32) { - SDValue WordOff = DAG.getConstant(4, getPointerTy()); + SDValue WordOff = DAG.getConstant(4, dl, getPointerTy()); SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackSlot, WordOff); SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, MachinePointerInfo(), false, false, 0); - SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), + SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), OffsetSlot, MachinePointerInfo(), false, false, 0); SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); @@ -11791,8 +11887,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Check whether the sign bit is set. SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), MVT::i64), - Op.getOperand(0), DAG.getConstant(0, MVT::i64), - ISD::SETLT); + Op.getOperand(0), + DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. SDValue FudgePtr = DAG.getConstantPool( @@ -11800,8 +11896,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, getPointerTy()); // Get a pointer to FF if the sign bit was set, or to 0 otherwise. - SDValue Zero = DAG.getIntPtrConstant(0); - SDValue Four = DAG.getIntPtrConstant(4); + SDValue Zero = DAG.getIntPtrConstant(0, dl); + SDValue Four = DAG.getIntPtrConstant(4, dl); SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, Zero, Four); FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset); @@ -11813,7 +11909,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, MVT::f32, false, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); - return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); + return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, + DAG.getIntPtrConstant(0, dl)); } std::pair @@ -11916,6 +12013,9 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, MVT InVT = In.getSimpleValueType(); SDLoc dl(Op); + if (VT.is512BitVector() || InVT.getScalarType() == MVT::i1) + return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In); + // Optimize vectors in AVX mode: // // v8i16 -> v8i32 @@ -11965,22 +12065,17 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) return DAG.getNode(X86ISD::VZEXT, DL, VT, In); - EVT ExtVT = (NumElts == 8)? MVT::v8i64 : MVT::v16i32; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - // Now we have only mask extension assert(InVT.getVectorElementType() == MVT::i1); - SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType()); - const Constant *C = (dyn_cast(Cst))->getConstantIntValue(); - SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); - unsigned Alignment = cast(CP)->getAlignment(); - SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); - - SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, DL, ExtVT, In, Ld); + MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32; + SDValue One = + DAG.getConstant(APInt(ExtVT.getScalarSizeInBits(), 1), DL, ExtVT); + SDValue Zero = + DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), DL, ExtVT); + + SDValue V = DAG.getNode(ISD::VSELECT, DL, ExtVT, In, One, Zero); if (VT.is512BitVector()) - return Brcst; - return DAG.getNode(X86ISD::VTRUNC, DL, VT, Brcst); + return V; + return DAG.getNode(X86ISD::VTRUNC, DL, VT, V); } static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget *Subtarget, @@ -12032,6 +12127,23 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); + // move vector to mask - truncate solution for SKX + if (VT.getVectorElementType() == MVT::i1) { + if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 && + Subtarget->hasBWI()) + return Op; // legal, will go to VPMOVB2M, VPMOVW2M + if ((InVT.is256BitVector() || InVT.is128BitVector()) + && InVT.getScalarSizeInBits() <= 16 && + Subtarget->hasBWI() && Subtarget->hasVLX()) + return Op; // legal, will go to VPMOVB2M, VPMOVW2M + if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 && + Subtarget->hasDQI()) + return Op; // legal, will go to VPMOVD2M, VPMOVQ2M + if ((InVT.is256BitVector() || InVT.is128BitVector()) + && InVT.getScalarSizeInBits() >= 32 && + Subtarget->hasDQI() && Subtarget->hasVLX()) + return Op; // legal, will go to VPMOVB2M, VPMOVQ2M + } if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) { if (VT.getVectorElementType().getSizeInBits() >=8) return DAG.getNode(X86ISD::VTRUNC, DL, VT, In); @@ -12045,14 +12157,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { InVT = ExtVT; } - SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType()); - const Constant *C = (dyn_cast(Cst))->getConstantIntValue(); - SDValue CP = DAG.getConstantPool(C, getPointerTy()); - unsigned Alignment = cast(CP)->getAlignment(); - SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); - SDValue OneV = DAG.getNode(X86ISD::VBROADCAST, DL, InVT, Ld); + SDValue OneV = + DAG.getConstant(APInt::getSignBit(InVT.getScalarSizeInBits()), DL, InVT); SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In); return DAG.getNode(X86ISD::TESTM, DL, VT, And, And); } @@ -12065,13 +12171,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32), ShufMask); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, DL)); } SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, DL)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, - DAG.getIntPtrConstant(2)); + DAG.getIntPtrConstant(2, DL)); OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpLo); OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, OpHi); static const int ShufMask[] = {0, 2, 4, 6}; @@ -12085,16 +12191,16 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SmallVector pshufbMask; for (unsigned i = 0; i < 2; ++i) { - pshufbMask.push_back(DAG.getConstant(0x0, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x1, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x4, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x5, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x8, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0x9, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0xc, MVT::i8)); - pshufbMask.push_back(DAG.getConstant(0xd, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x0, DL, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x1, DL, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x4, DL, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x5, DL, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x8, DL, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x9, DL, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0xc, DL, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0xd, DL, MVT::i8)); for (unsigned j = 0; j < 8; ++j) - pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); + pshufbMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); } SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask); In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV); @@ -12104,15 +12210,15 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64), &ShufMask[0]); In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, DL)); return DAG.getNode(ISD::BITCAST, DL, VT, In); } SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, DL)); SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, - DAG.getIntPtrConstant(4)); + DAG.getIntPtrConstant(4, DL)); OpLo = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpLo); OpHi = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, OpHi); @@ -12151,7 +12257,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(ISD::BITCAST, DL, NVT, In), DAG.getUNDEF(NVT), &MaskVec[0]); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, DL)); } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, @@ -12288,7 +12394,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { } // And if it is bigger, shrink it first. if (SrcVT.bitsGT(VT)) { - Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1)); + Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1, dl)); SrcVT = VT; } @@ -12347,8 +12453,8 @@ static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { // Lower ISD::FGETSIGN to (AND (X86ISD::FGETSIGNx86 ...) 1). SDValue xFGETSIGN = DAG.getNode(X86ISD::FGETSIGNx86, dl, VT, N0, - DAG.getConstant(1, VT)); - return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, VT)); + DAG.getConstant(1, dl, VT)); + return DAG.getNode(ISD::AND, dl, VT, xFGETSIGN, DAG.getConstant(1, dl, VT)); } // Check whether an OR'd tree is PTEST-able. @@ -12469,7 +12575,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, if (Op.getValueType() == MVT::i1) { SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op); return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp, - DAG.getConstant(0, MVT::i8)); + DAG.getConstant(0, dl, MVT::i8)); } // CF and OF aren't always set the way we want. Determine which // of these we need. @@ -12492,9 +12598,8 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, case ISD::SUB: case ISD::MUL: case ISD::SHL: { - const BinaryWithFlagsSDNode *BinNode = - cast(Op.getNode()); - if (BinNode->hasNoSignedWrap()) + const auto *BinNode = cast(Op.getNode()); + if (BinNode->Flags.hasNoSignedWrap()) break; } default: @@ -12513,7 +12618,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, // return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op, // DAG.getConstant(0, MVT::i1)); return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, - DAG.getConstant(0, Op.getValueType())); + DAG.getConstant(0, dl, Op.getValueType())); } unsigned Opcode = 0; unsigned NumOperands = 0; @@ -12601,7 +12706,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, if (!Mask.isSignedIntN(32)) // Avoid large immediates. break; SDValue New = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0), - DAG.getConstant(Mask, VT)); + DAG.getConstant(Mask, dl, VT)); DAG.ReplaceAllUsesWith(Op, New); Op = New; } @@ -12687,7 +12792,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, if (Opcode == 0) // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, - DAG.getConstant(0, Op.getValueType())); + DAG.getConstant(0, dl, Op.getValueType())); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SmallVector Ops(Op->op_begin(), Op->op_begin() + NumOperands); @@ -12752,7 +12857,7 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, SDValue TruncFPSW = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Cmp); SDValue FNStSW = DAG.getNode(X86ISD::FNSTSW16r, dl, MVT::i16, TruncFPSW); SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW, - DAG.getConstant(8, MVT::i8)); + DAG.getConstant(8, dl, MVT::i8)); SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); } @@ -12817,6 +12922,16 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, return SDValue(); } +/// If we have at least two divisions that use the same divisor, convert to +/// multplication by a reciprocal. This may need to be adjusted for a given +/// CPU if a division's cost is not at least twice the cost of a multiplication. +/// This is because we still need one division to calculate the reciprocal and +/// then we need two multiplies by that reciprocal as replacements for the +/// original divisions. +bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { + return NumUsers > 1; +} + static bool isAllOnes(SDValue V) { ConstantSDNode *C = dyn_cast(V); return C && C->isAllOnesValue(); @@ -12865,7 +12980,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, // Use BT if the immediate can't be encoded in a TEST instruction. if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) { LHS = AndLHS; - RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType()); + RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, LHS.getValueType()); } } @@ -12887,7 +13002,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(Cond, MVT::i8), BT); + DAG.getConstant(Cond, dl, MVT::i8), BT); } return SDValue(); @@ -12968,6 +13083,49 @@ static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2, CC)); } +static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + MVT VT = Op.getSimpleValueType(); + SDLoc dl(Op); + + assert(Op0.getValueType().getVectorElementType() == MVT::i1 && + "Unexpected type for boolean compare operation"); + ISD::CondCode SetCCOpcode = cast(CC)->get(); + SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0, + DAG.getConstant(-1, dl, VT)); + SDValue NotOp1 = DAG.getNode(ISD::XOR, dl, VT, Op1, + DAG.getConstant(-1, dl, VT)); + switch (SetCCOpcode) { + default: llvm_unreachable("Unexpected SETCC condition"); + case ISD::SETNE: + // (x != y) -> ~(x ^ y) + return DAG.getNode(ISD::XOR, dl, VT, + DAG.getNode(ISD::XOR, dl, VT, Op0, Op1), + DAG.getConstant(-1, dl, VT)); + case ISD::SETEQ: + // (x == y) -> (x ^ y) + return DAG.getNode(ISD::XOR, dl, VT, Op0, Op1); + case ISD::SETUGT: + case ISD::SETGT: + // (x > y) -> (x & ~y) + return DAG.getNode(ISD::AND, dl, VT, Op0, NotOp1); + case ISD::SETULT: + case ISD::SETLT: + // (x < y) -> (~x & y) + return DAG.getNode(ISD::AND, dl, VT, NotOp0, Op1); + case ISD::SETULE: + case ISD::SETLE: + // (x <= y) -> (~x | y) + return DAG.getNode(ISD::OR, dl, VT, NotOp0, Op1); + case ISD::SETUGE: + case ISD::SETGE: + // (x >=y) -> (x | ~y) + return DAG.getNode(ISD::OR, dl, VT, Op0, NotOp1); + } +} + static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { SDValue Op0 = Op.getOperand(0); @@ -13005,7 +13163,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG, return DAG.getNode(Opc, dl, VT, Op0, Op1); Opc = Unsigned ? X86ISD::CMPMU: X86ISD::CMPM; return DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(SSECC, MVT::i8)); + DAG.getConstant(SSECC, dl, MVT::i8)); } /// \brief Try to turn a VSETULT into a VSETULE by modifying its second @@ -13032,7 +13190,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG) if (Val == 0) return SDValue(); - ULTOp1.push_back(DAG.getConstant(Val - 1, EVT)); + ULTOp1.push_back(DAG.getConstant(Val - 1, dl, EVT)); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1); @@ -13072,22 +13230,25 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, } SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC0, MVT::i8)); + DAG.getConstant(CC0, dl, MVT::i8)); SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(CC1, MVT::i8)); + DAG.getConstant(CC1, dl, MVT::i8)); return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } // Handle all other FP comparisons here. return DAG.getNode(Opc, dl, VT, Op0, Op1, - DAG.getConstant(SSECC, MVT::i8)); + DAG.getConstant(SSECC, dl, MVT::i8)); } // Break 256-bit integer vector compare into smaller ones. if (VT.is256BitVector() && !Subtarget->hasInt256()) return Lower256IntVSETCC(Op, DAG); - bool MaskResult = (VT.getVectorElementType() == MVT::i1); EVT OpVT = Op1.getValueType(); + if (OpVT.getVectorElementType() == MVT::i1) + return LowerBoolVSETCC_AVX512(Op, DAG); + + bool MaskResult = (VT.getVectorElementType() == MVT::i1); if (Subtarget->hasAVX512()) { if (Op1.getValueType().is512BitVector() || (Subtarget->hasBWI() && Subtarget->hasVLX()) || @@ -13197,10 +13358,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // compare is always unsigned. SDValue SB; if (FlipSigns) { - SB = DAG.getConstant(0x80000000U, MVT::v4i32); + SB = DAG.getConstant(0x80000000U, dl, MVT::v4i32); } else { - SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32); - SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32); + SDValue Sign = DAG.getConstant(0x80000000U, dl, MVT::i32); + SDValue Zero = DAG.getConstant(0x00000000U, dl, MVT::i32); SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Sign, Zero, Sign, Zero); } @@ -13255,7 +13416,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // bits of the inputs before performing those operations. if (FlipSigns) { EVT EltVT = VT.getVectorElementType(); - SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT); + SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl, + VT); Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB); } @@ -13323,7 +13485,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { CCode = X86::GetOppositeBranchCondition(CCode); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(CCode, MVT::i8), + DAG.getConstant(CCode, dl, MVT::i8), Op0.getOperand(1)); if (VT == MVT::i1) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC); @@ -13335,18 +13497,18 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { (CC == ISD::SETEQ || CC == ISD::SETNE)) { ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true); - return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, MVT::i1), NewCC); + return DAG.getSetCC(dl, VT, Op0, DAG.getConstant(0, dl, MVT::i1), NewCC); } bool isFP = Op1.getSimpleValueType().isFloatingPoint(); - unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); + unsigned X86CC = TranslateX86CC(CC, dl, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) return SDValue(); SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG); EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), EFLAGS); + DAG.getConstant(X86CC, dl, MVT::i8), EFLAGS); if (VT == MVT::i1) return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC); return SetCC; @@ -13411,12 +13573,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (SSECC != 8) { if (Subtarget->hasAVX512()) { SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1, - DAG.getConstant(SSECC, MVT::i8)); + DAG.getConstant(SSECC, DL, MVT::i8)); return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2); } SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, - DAG.getConstant(SSECC, MVT::i8)); + DAG.getConstant(SSECC, DL, MVT::i8)); // If we have AVX, we can use a variable vector select (VBLENDV) instead // of 3 logic instructions for size savings and potentially speed. @@ -13448,7 +13610,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, - VSel, DAG.getIntPtrConstant(0)); + VSel, DAG.getIntPtrConstant(0, DL)); } SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); @@ -13456,6 +13618,17 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + if (VT == MVT::v4i1 || VT == MVT::v2i1) { + SDValue zeroConst = DAG.getIntPtrConstant(0, DL); + Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1, + DAG.getUNDEF(MVT::v8i1), Op1, zeroConst); + Op2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1, + DAG.getUNDEF(MVT::v8i1), Op2, zeroConst); + SDValue newSelect = DAG.getNode(ISD::SELECT, DL, MVT::v8i1, + Cond, Op1, Op2); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst); + } + if (Cond.getOpcode() == ISD::SETCC) { SDValue NewCond = LowerSETCC(Cond, DAG); if (NewCond.getNode()) @@ -13486,21 +13659,22 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { (isAllOnes(Op1) == (CondCode == X86::COND_NE))) { SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, - DAG.getConstant(0, CmpOp0.getValueType()), + DAG.getConstant(0, DL, + CmpOp0.getValueType()), CmpOp0); SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, MVT::i8), + DAG.getConstant(X86::COND_B, DL, MVT::i8), SDValue(Neg.getNode(), 1)); return Res; } Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, - CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); + CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); Cmp = ConvertCmpIfNecessary(Cmp, DAG); SDValue Res = // Res = 0 or -1. DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, MVT::i8), Cmp); + DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp); if (isAllOnes(Op1) != (CondCode == X86::COND_E)) Res = DAG.getNOT(DL, Res, Res.getValueType()); @@ -13572,7 +13746,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { else Cond = X86Op.getValue(1); - CC = DAG.getConstant(X86Cond, MVT::i8); + CC = DAG.getConstant(X86Cond, DL, MVT::i8); addTest = false; } @@ -13594,7 +13768,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (addTest) { - CC = DAG.getConstant(X86::COND_NE, MVT::i8); + CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DL, DAG); } @@ -13609,7 +13783,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) { SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), - DAG.getConstant(X86::COND_B, MVT::i8), Cond); + DAG.getConstant(X86::COND_B, DL, MVT::i8), + Cond); if (isAllOnes(Op1) != (CondCode == X86::COND_B)) return DAG.getNOT(DL, Res, Res.getValueType()); return Res; @@ -13673,22 +13848,18 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget return DAG.getNode(X86ISD::VSEXT, dl, VT, In); } - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); - - MVT ExtVT = (NumElts == 8) ? MVT::v8i64 : MVT::v16i32; - Constant *C = ConstantInt::get(*DAG.getContext(), - APInt::getAllOnesValue(ExtVT.getScalarType().getSizeInBits())); - - SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); - unsigned Alignment = cast(CP)->getAlignment(); - SDValue Ld = DAG.getLoad(ExtVT.getScalarType(), dl, DAG.getEntryNode(), CP, - MachinePointerInfo::getConstantPool(), - false, false, false, Alignment); - SDValue Brcst = DAG.getNode(X86ISD::VBROADCASTM, dl, ExtVT, In, Ld); + MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32; + SDValue NegOne = + DAG.getConstant(APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, + ExtVT); + SDValue Zero = + DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT); + + SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); if (VT.is512BitVector()) - return Brcst; - return DAG.getNode(X86ISD::VTRUNC, dl, VT, Brcst); + return V; + return DAG.getNode(X86ISD::VTRUNC, dl, VT, V); } static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, @@ -13845,8 +14016,8 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, "Can only lower sext loads with a single scalar load!"); unsigned loadRegZize = RegSz; - if (Ext == ISD::SEXTLOAD && RegSz == 256) - loadRegZize /= 2; + if (Ext == ISD::SEXTLOAD && RegSz >= 256) + loadRegZize = 128; // Represent our vector as a sequence of elements which are the // largest scalar that we can load. @@ -13869,7 +14040,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, SmallVector Chains; SDValue Ptr = Ld->getBasePtr(); SDValue Increment = - DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, TLI.getPointerTy()); + DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl, TLI.getPointerTy()); SDValue Res = DAG.getUNDEF(LoadUnitVecVT); for (unsigned i = 0; i < NumLoads; ++i) { @@ -13885,7 +14056,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoadUnitVecVT, ScalarLoad); else Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, LoadUnitVecVT, Res, - ScalarLoad, DAG.getIntPtrConstant(i)); + ScalarLoad, DAG.getIntPtrConstant(i, dl)); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); } @@ -13925,7 +14096,8 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget, unsigned Amt = RegVT.getVectorElementType().getSizeInBits() - MemVT.getVectorElementType().getSizeInBits(); Shuff = - DAG.getNode(ISD::SRA, dl, RegVT, Shuff, DAG.getConstant(Amt, RegVT)); + DAG.getNode(ISD::SRA, dl, RegVT, Shuff, + DAG.getConstant(Amt, dl, RegVT)); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); return Shuff; @@ -14092,7 +14264,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { else Cond = X86Op.getValue(1); - CC = DAG.getConstant(X86Cond, MVT::i8); + CC = DAG.getConstant(X86Cond, dl, MVT::i8); addTest = false; } else { unsigned CondOpc; @@ -14123,7 +14295,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { X86::CondCode CCode = (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); - CC = DAG.getConstant(CCode, MVT::i8); + CC = DAG.getConstant(CCode, dl, MVT::i8); SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order @@ -14141,7 +14313,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { X86::CondCode CCode = (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); - CC = DAG.getConstant(CCode, MVT::i8); + CC = DAG.getConstant(CCode, dl, MVT::i8); Cond = Cmp; addTest = false; } @@ -14154,7 +14326,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { X86::CondCode CCode = (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); - CC = DAG.getConstant(CCode, MVT::i8); + CC = DAG.getConstant(CCode, dl, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; } else if (Cond.getOpcode() == ISD::SETCC && @@ -14180,10 +14352,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); - CC = DAG.getConstant(X86::COND_NE, MVT::i8); + CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_P, MVT::i8); + CC = DAG.getConstant(X86::COND_P, dl, MVT::i8); Cond = Cmp; addTest = false; } @@ -14210,10 +14382,10 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, Cond.getOperand(0), Cond.getOperand(1)); Cmp = ConvertCmpIfNecessary(Cmp, DAG); - CC = DAG.getConstant(X86::COND_NE, MVT::i8); + CC = DAG.getConstant(X86::COND_NE, dl, MVT::i8); Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_NP, MVT::i8); + CC = DAG.getConstant(X86::COND_NP, dl, MVT::i8); Cond = Cmp; addTest = false; Dest = FalseBB; @@ -14241,7 +14413,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (addTest) { X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE; - CC = DAG.getConstant(X86Cond, MVT::i8); + CC = DAG.getConstant(X86Cond, dl, MVT::i8); Cond = EmitTest(Cond, X86Cond, dl, DAG); } Cond = ConvertCmpIfNecessary(Cond, DAG); @@ -14278,7 +14450,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true), + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), SDLoc(Node)); SDValue Size = Tmp2.getOperand(1); @@ -14290,11 +14462,11 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Align, VT)); + DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain - Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), SDValue(), + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), SDLoc(Node)); SDValue Ops[2] = { Tmp1, Tmp2 }; @@ -14350,7 +14522,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, if (Align) { SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), - DAG.getConstant(-(uint64_t)Align, VT)); + DAG.getConstant(-(uint64_t)Align, dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); } @@ -14385,22 +14557,22 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // Store gp_offset SDValue Store = DAG.getStore(Op.getOperand(0), DL, DAG.getConstant(FuncInfo->getVarArgsGPOffset(), - MVT::i32), + DL, MVT::i32), FIN, MachinePointerInfo(SV), false, false, 0); MemOps.push_back(Store); // Store fp_offset FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), - FIN, DAG.getIntPtrConstant(4)); + FIN, DAG.getIntPtrConstant(4, DL)); Store = DAG.getStore(Op.getOperand(0), DL, - DAG.getConstant(FuncInfo->getVarArgsFPOffset(), + DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN, MachinePointerInfo(SV, 4), false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), - FIN, DAG.getIntPtrConstant(4)); + FIN, DAG.getIntPtrConstant(4, DL)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, @@ -14410,7 +14582,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), - FIN, DAG.getIntPtrConstant(8)); + FIN, DAG.getIntPtrConstant(8, DL)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, @@ -14452,7 +14624,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. - assert(!DAG.getTarget().Options.UseSoftFloat && + assert(!Subtarget->useSoftFloat() && !(DAG.getMachineFunction().getFunction()->hasFnAttribute( Attribute::NoImplicitFloat)) && Subtarget->hasSSE1()); @@ -14460,9 +14632,9 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Insert VAARG_64 node into the DAG // VAARG_64 returns two values: Variable Argument Address, Chain - SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, MVT::i32), - DAG.getConstant(ArgMode, MVT::i8), - DAG.getConstant(Align, MVT::i32)}; + SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32), + DAG.getConstant(ArgMode, dl, MVT::i8), + DAG.getConstant(Align, dl, MVT::i32)}; SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other); SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, VTs, InstOps, MVT::i64, @@ -14493,8 +14665,8 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, SDLoc DL(Op); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, - DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, - false, + DAG.getIntPtrConstant(24, DL), 8, /*isVolatile*/false, + false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -14514,7 +14686,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, if (Opc == X86ISD::VSRAI) ShiftAmt = ElementType.getSizeInBits() - 1; else - return DAG.getConstant(0, VT); + return DAG.getConstant(0, dl, VT); } assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) @@ -14539,7 +14711,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, } ND = cast(CurrentOp); const APInt &C = ND->getAPIntValue(); - Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), ElementType)); + Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType)); } break; case X86ISD::VSRLI: @@ -14551,7 +14723,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, } ND = cast(CurrentOp); const APInt &C = ND->getAPIntValue(); - Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), ElementType)); + Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType)); } break; case X86ISD::VSRAI: @@ -14563,7 +14735,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, } ND = cast(CurrentOp); const APInt &C = ND->getAPIntValue(); - Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), ElementType)); + Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType)); } break; } @@ -14571,7 +14743,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts); } - return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8)); + return DAG.getNode(Opc, dl, VT, SrcOp, + DAG.getConstant(ShiftAmt, dl, MVT::i8)); } // getTargetVShiftNode - Handle vector element shifts where the shift amount @@ -14609,7 +14782,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, SmallVector ShOps; ShOps.push_back(ShAmt); if (SVT == MVT::i32) { - ShOps.push_back(DAG.getConstant(0, SVT)); + ShOps.push_back(DAG.getConstant(0, dl, SVT)); ShOps.push_back(DAG.getUNDEF(SVT)); } ShOps.push_back(DAG.getUNDEF(SVT)); @@ -14650,7 +14823,7 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, // are extracted by EXTRACT_SUBVECTOR. SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); switch (Op.getOpcode()) { default: break; @@ -14720,11 +14893,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Src0 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); // There are 2 kinds of intrinsics in this group: - // (1) With supress-all-exceptions (sae) - 6 operands + // (1) With supress-all-exceptions (sae) or rounding mode- 6 operands // (2) With rounding mode and sae - 7 operands. if (Op.getNumOperands() == 6) { SDValue Sae = Op.getOperand(5); - return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, + unsigned Opc = IntrData->Opc1 ? IntrData->Opc1 : IntrData->Opc0; + return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Sae), Mask, Src0, Subtarget, DAG); } @@ -14800,30 +14974,46 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget Mask.getValueType().getSizeInBits()); SDValue Cmp; if (IntrData->Type == CMP_MASK_CC) { - Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), - Op.getOperand(2), Op.getOperand(3)); + SDValue CC = Op.getOperand(3); + CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC); + // We specify 2 possible opcodes for intrinsics with rounding modes. + // First, we check if the intrinsic may have non-default rounding mode, + // (IntrData->Opc1 != 0), then we check the rounding mode operand. + if (IntrData->Opc1 != 0) { + SDValue Rnd = Op.getOperand(5); + if (cast(Rnd)->getZExtValue() != + X86::STATIC_ROUNDING::CUR_DIRECTION) + Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), + Op.getOperand(2), CC, Rnd); + } + //default rounding mode + if(!Cmp.getNode()) + Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), + Op.getOperand(2), CC); + } else { assert(IntrData->Type == CMP_MASK && "Unexpected intrinsic type!"); Cmp = DAG.getNode(IntrData->Opc0, dl, MaskVT, Op.getOperand(1), - Op.getOperand(2)); + Op.getOperand(2)); } SDValue CmpMask = getVectorMaskingNode(Cmp, Mask, - DAG.getTargetConstant(0, MaskVT), + DAG.getTargetConstant(0, dl, + MaskVT), Subtarget, DAG); SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), CmpMask, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } case COMI: { // Comparison intrinsics ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG); + unsigned X86CC = TranslateX86CC(CC, dl, true, LHS, RHS, DAG); assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!"); SDValue Cond = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), Cond); + DAG.getConstant(X86CC, dl, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } case VSHIFT: @@ -14850,7 +15040,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDLoc dl(Op); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress, PassThru); @@ -14865,7 +15055,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDLoc dl(Op); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1), Op.getOperand(2)); } @@ -14955,7 +15145,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue RHS = Op.getOperand(2); unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST; SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); - SDValue CC = DAG.getConstant(X86CC, MVT::i8); + SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } @@ -14964,7 +15154,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B; SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1)); SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2)); - SDValue CC = DAG.getConstant(X86CC, MVT::i8); + SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8); SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); @@ -15029,7 +15219,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), + DAG.getConstant(X86CC, dl, MVT::i8), SDValue(PCMP.getNode(), 1)); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } @@ -15056,17 +15246,17 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); ConstantSDNode *C = dyn_cast(ScaleOp); assert(C && "Invalid scale type"); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); EVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg; ConstantSDNode *MaskC = dyn_cast(Mask); if (MaskC) - MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT); + MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); - SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); if (Src.getOpcode() == ISD::UNDEF) Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); @@ -15082,15 +15272,15 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); ConstantSDNode *C = dyn_cast(ScaleOp); assert(C && "Invalid scale type"); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); - SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); EVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg; ConstantSDNode *MaskC = dyn_cast(Mask); if (MaskC) - MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT); + MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); @@ -15105,15 +15295,15 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); ConstantSDNode *C = dyn_cast(ScaleOp); assert(C && "Invalid scale type"); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); - SDValue Disp = DAG.getTargetConstant(0, MVT::i32); + SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8); + SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); EVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); SDValue MaskInReg; ConstantSDNode *MaskC = dyn_cast(Mask); if (MaskC) - MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT); + MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT); else MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); //SDVTList VTs = DAG.getVTList(MVT::Other); @@ -15154,7 +15344,7 @@ static void getReadPerformanceCounter(SDNode *N, SDLoc DL, // The EAX register is loaded with the low-order 32 bits. The EDX register // is loaded with the supported high-order bits of the counter. SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, - DAG.getConstant(32, MVT::i8)); + DAG.getConstant(32, DL, MVT::i8)); Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); Results.push_back(Chain); return; @@ -15208,7 +15398,7 @@ static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, // The EDX register is loaded with the high-order 32 bits of the MSR, and // the EAX register is loaded with the low-order 32 bits. SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, - DAG.getConstant(32, MVT::i8)); + DAG.getConstant(32, DL, MVT::i8)); Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); Results.push_back(Chain); return; @@ -15253,8 +15443,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. // Otherwise return the value from Rand, which is always 0, casted to i32. SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), - DAG.getConstant(1, Op->getValueType(1)), - DAG.getConstant(X86::COND_B, MVT::i32), + DAG.getConstant(1, dl, Op->getValueType(1)), + DAG.getConstant(X86::COND_B, dl, MVT::i32), SDValue(Result.getNode(), 1) }; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, DAG.getVTList(Op->getValueType(1), MVT::Glue), @@ -15272,8 +15462,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue Index = Op.getOperand(4); SDValue Mask = Op.getOperand(5); SDValue Scale = Op.getOperand(6); - return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain, - Subtarget); + return getGatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, + Chain, Subtarget); } case SCATTER: { //scatter(base, mask, index, v1, scale); @@ -15283,14 +15473,13 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue Index = Op.getOperand(4); SDValue Src = Op.getOperand(5); SDValue Scale = Op.getOperand(6); - return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain); + return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, + Scale, Chain); } case PREFETCH: { SDValue Hint = Op.getOperand(6); - unsigned HintVal; - if (dyn_cast (Hint) == nullptr || - (HintVal = dyn_cast (Hint)->getZExtValue()) > 1) - llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1"); + unsigned HintVal = cast(Hint)->getZExtValue(); + assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1"); unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0); SDValue Chain = Op.getOperand(0); SDValue Mask = Op.getOperand(2); @@ -15302,7 +15491,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, // Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP). case RDTSC: { SmallVector Results; - getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, Results); + getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, + Results); return DAG.getMergeValues(Results, dl); } // Read Performance Monitoring Counters. @@ -15316,7 +15506,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86::COND_NE, MVT::i8), + DAG.getConstant(X86::COND_NE, dl, MVT::i8), InTrans); SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), @@ -15328,14 +15518,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::Other); SDVTList VTs = DAG.getVTList(Op.getOperand(3)->getValueType(0), MVT::Other); SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(2), - DAG.getConstant(-1, MVT::i8)); + DAG.getConstant(-1, dl, MVT::i8)); SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(3), Op.getOperand(4), GenCF.getValue(1)); SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0), Op.getOperand(5), MachinePointerInfo(), false, false, 0); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86::COND_B, MVT::i8), + DAG.getConstant(X86::COND_B, dl, MVT::i8), Res.getValue(1)); Results.push_back(SetCC); Results.push_back(Store); @@ -15359,7 +15549,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, Mask.getValueType().getSizeInBits()); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress, DAG.getUNDEF(VT)); @@ -15383,7 +15573,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, Mask.getValueType().getSizeInBits()); SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false, false, 0); @@ -15411,7 +15601,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); - SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), PtrVT); + SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), @@ -15442,7 +15632,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // Set up a frame object for the return address. unsigned SlotSize = RegInfo->getSlotSize(); FrameAddrIndex = MF.getFrameInfo()->CreateFixedObject( - SlotSize, /*Offset=*/INT64_MIN, /*IsImmutable=*/false); + SlotSize, /*Offset=*/0, /*IsImmutable=*/false); FuncInfo->setFAIndex(FrameAddrIndex); } return DAG.getFrameIndex(FrameAddrIndex, VT); @@ -15479,7 +15669,7 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName, SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); - return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize()); + return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); } SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { @@ -15498,7 +15688,8 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, - DAG.getIntPtrConstant(RegInfo->getSlotSize())); + DAG.getIntPtrConstant(RegInfo->getSlotSize(), + dl)); StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(), false, false, 0); @@ -15553,12 +15744,12 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, // Load the pointer to the nested function into R11. unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 SDValue Addr = Trmp; - OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), + OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), Addr, MachinePointerInfo(TrmpAddr), false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, - DAG.getConstant(2, MVT::i64)); + DAG.getConstant(2, dl, MVT::i64)); OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 2), false, false, 2); @@ -15567,13 +15758,13 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, // R10 is specified in X86CallingConv.td OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, - DAG.getConstant(10, MVT::i64)); - OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), + DAG.getConstant(10, dl, MVT::i64)); + OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), Addr, MachinePointerInfo(TrmpAddr, 10), false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, - DAG.getConstant(12, MVT::i64)); + DAG.getConstant(12, dl, MVT::i64)); OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 12), false, false, 2); @@ -15581,16 +15772,16 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, // Jump to the nested function. OpCode = (JMP64r << 8) | REX_WB; // jmpq *... Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, - DAG.getConstant(20, MVT::i64)); - OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), + DAG.getConstant(20, dl, MVT::i64)); + OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), Addr, MachinePointerInfo(TrmpAddr, 20), false, false, 0); unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, - DAG.getConstant(22, MVT::i64)); - OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, - MachinePointerInfo(TrmpAddr, 22), + DAG.getConstant(22, dl, MVT::i64)); + OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8), + Addr, MachinePointerInfo(TrmpAddr, 22), false, false, 0); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); @@ -15643,32 +15834,32 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDValue Addr, Disp; Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, - DAG.getConstant(10, MVT::i32)); + DAG.getConstant(10, dl, MVT::i32)); Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); // This is storing the opcode for MOV32ri. const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte. const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7; OutChains[0] = DAG.getStore(Root, dl, - DAG.getConstant(MOV32ri|N86Reg, MVT::i8), + DAG.getConstant(MOV32ri|N86Reg, dl, MVT::i8), Trmp, MachinePointerInfo(TrmpAddr), false, false, 0); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, - DAG.getConstant(1, MVT::i32)); + DAG.getConstant(1, dl, MVT::i32)); OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 1), false, false, 1); const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, - DAG.getConstant(5, MVT::i32)); - OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, - MachinePointerInfo(TrmpAddr, 5), + DAG.getConstant(5, dl, MVT::i32)); + OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), + Addr, MachinePointerInfo(TrmpAddr, 5), false, false, 1); Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, - DAG.getConstant(6, MVT::i32)); + DAG.getConstant(6, dl, MVT::i32)); OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, MachinePointerInfo(TrmpAddr, 6), false, false, 1); @@ -15725,20 +15916,20 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue CWD1 = DAG.getNode(ISD::SRL, DL, MVT::i16, DAG.getNode(ISD::AND, DL, MVT::i16, - CWD, DAG.getConstant(0x800, MVT::i16)), - DAG.getConstant(11, MVT::i8)); + CWD, DAG.getConstant(0x800, DL, MVT::i16)), + DAG.getConstant(11, DL, MVT::i8)); SDValue CWD2 = DAG.getNode(ISD::SRL, DL, MVT::i16, DAG.getNode(ISD::AND, DL, MVT::i16, - CWD, DAG.getConstant(0x400, MVT::i16)), - DAG.getConstant(9, MVT::i8)); + CWD, DAG.getConstant(0x400, DL, MVT::i16)), + DAG.getConstant(9, DL, MVT::i8)); SDValue RetVal = DAG.getNode(ISD::AND, DL, MVT::i16, DAG.getNode(ISD::ADD, DL, MVT::i16, DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2), - DAG.getConstant(1, MVT::i16)), - DAG.getConstant(3, MVT::i16)); + DAG.getConstant(1, DL, MVT::i16)), + DAG.getConstant(3, DL, MVT::i16)); return DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal); @@ -15764,14 +15955,15 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) { // If src is zero (i.e. bsr sets ZF), returns NumBits. SDValue Ops[] = { Op, - DAG.getConstant(NumBits+NumBits-1, OpVT), - DAG.getConstant(X86::COND_E, MVT::i8), + DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), + DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1) }; Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); // Finally xor with NumBits-1. - Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); + Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, + DAG.getConstant(NumBits - 1, dl, OpVT)); if (VT == MVT::i8) Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); @@ -15796,7 +15988,8 @@ static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) { Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); // And xor with NumBits-1. - Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); + Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, + DAG.getConstant(NumBits - 1, dl, OpVT)); if (VT == MVT::i8) Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); @@ -15816,8 +16009,8 @@ static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) { // If src is zero (i.e. bsf sets ZF), returns NumBits. SDValue Ops[] = { Op, - DAG.getConstant(NumBits, VT), - DAG.getConstant(X86::COND_E, MVT::i8), + DAG.getConstant(NumBits, dl, VT), + DAG.getConstant(X86::COND_E, dl, MVT::i8), Op.getValue(1) }; return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); @@ -15878,6 +16071,79 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, SDValue A = Op.getOperand(0); SDValue B = Op.getOperand(1); + // Lower v16i8/v32i8 mul as promotion to v8i16/v16i16 vector + // pairs, multiply and truncate. + if (VT == MVT::v16i8 || VT == MVT::v32i8) { + if (Subtarget->hasInt256()) { + if (VT == MVT::v32i8) { + MVT SubVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() / 2); + SDValue Lo = DAG.getIntPtrConstant(0, dl); + SDValue Hi = DAG.getIntPtrConstant(VT.getVectorNumElements() / 2, dl); + SDValue ALo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, A, Lo); + SDValue BLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, B, Lo); + SDValue AHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, A, Hi); + SDValue BHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, B, Hi); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + DAG.getNode(ISD::MUL, dl, SubVT, ALo, BLo), + DAG.getNode(ISD::MUL, dl, SubVT, AHi, BHi)); + } + + MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); + return DAG.getNode( + ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::MUL, dl, ExVT, + DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, A), + DAG.getNode(ISD::SIGN_EXTEND, dl, ExVT, B))); + } + + assert(VT == MVT::v16i8 && + "Pre-AVX2 support only supports v16i8 multiplication"); + MVT ExVT = MVT::v8i16; + + // Extract the lo parts and sign extend to i16 + SDValue ALo, BLo; + if (Subtarget->hasSSE41()) { + ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A); + BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B); + } else { + const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7}; + ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); + BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); + ALo = DAG.getNode(ISD::BITCAST, dl, ExVT, ALo); + BLo = DAG.getNode(ISD::BITCAST, dl, ExVT, BLo); + ALo = DAG.getNode(ISD::SRA, dl, ExVT, ALo, DAG.getConstant(8, dl, ExVT)); + BLo = DAG.getNode(ISD::SRA, dl, ExVT, BLo, DAG.getConstant(8, dl, ExVT)); + } + + // Extract the hi parts and sign extend to i16 + SDValue AHi, BHi; + if (Subtarget->hasSSE41()) { + const int ShufMask[] = {8, 9, 10, 11, 12, 13, 14, 15, + -1, -1, -1, -1, -1, -1, -1, -1}; + AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); + BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); + AHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, AHi); + BHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, BHi); + } else { + const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11, + -1, 12, -1, 13, -1, 14, -1, 15}; + AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask); + BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask); + AHi = DAG.getNode(ISD::BITCAST, dl, ExVT, AHi); + BHi = DAG.getNode(ISD::BITCAST, dl, ExVT, BHi); + AHi = DAG.getNode(ISD::SRA, dl, ExVT, AHi, DAG.getConstant(8, dl, ExVT)); + BHi = DAG.getNode(ISD::SRA, dl, ExVT, BHi, DAG.getConstant(8, dl, ExVT)); + } + + // Multiply, mask the lower 8bits of the lo/hi results and pack + SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo); + SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); + RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT)); + RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT)); + return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); + } + // Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle. if (VT == MVT::v4i32) { assert(Subtarget->hasSSE2() && !Subtarget->hasSSE41() && @@ -16050,7 +16316,8 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, // unsigned multiply. if (IsSigned && !Subtarget->hasSSE41()) { SDValue ShAmt = - DAG.getConstant(31, DAG.getTargetLoweringInfo().getShiftAmountTy(VT)); + DAG.getConstant(31, dl, + DAG.getTargetLoweringInfo().getShiftAmountTy(VT)); SDValue T1 = DAG.getNode(ISD::AND, dl, VT, DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1); SDValue T2 = DAG.getNode(ISD::AND, dl, VT, @@ -16066,6 +16333,53 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMergeValues(Ops, dl); } +// Return true if the requred (according to Opcode) shift-imm form is natively +// supported by the Subtarget +static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget, + unsigned Opcode) { + if (VT.getScalarSizeInBits() < 16) + return false; + + if (VT.is512BitVector() && + (VT.getScalarSizeInBits() > 16 || Subtarget->hasBWI())) + return true; + + bool LShift = VT.is128BitVector() || + (VT.is256BitVector() && Subtarget->hasInt256()); + + bool AShift = LShift && (Subtarget->hasVLX() || + (VT != MVT::v2i64 && VT != MVT::v4i64)); + return (Opcode == ISD::SRA) ? AShift : LShift; +} + +// The shift amount is a variable, but it is the same for all vector lanes. +// These instrcutions are defined together with shift-immediate. +static +bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget, + unsigned Opcode) { + return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); +} + +// Return true if the requred (according to Opcode) variable-shift form is +// natively supported by the Subtarget +static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget, + unsigned Opcode) { + + if (!Subtarget->hasInt256() || VT.getScalarSizeInBits() < 16) + return false; + + // vXi16 supported only on AVX-512, BWI + if (VT.getScalarSizeInBits() == 16 && !Subtarget->hasBWI()) + return false; + + if (VT.is512BitVector() || Subtarget->hasVLX()) + return true; + + bool LShift = VT.is128BitVector() || VT.is256BitVector(); + bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64; + return (Opcode == ISD::SRA) ? AShift : LShift; +} + static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, const X86Subtarget *Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -16073,26 +16387,16 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); + unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI : + (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI; + // Optimize shl/srl/sra with constant shift amount. if (auto *BVAmt = dyn_cast(Amt)) { if (auto *ShiftConst = BVAmt->getConstantSplatNode()) { uint64_t ShiftAmt = ShiftConst->getZExtValue(); - if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || - (Subtarget->hasInt256() && - (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) || - (Subtarget->hasAVX512() && - (VT == MVT::v8i64 || VT == MVT::v16i32))) { - if (Op.getOpcode() == ISD::SHL) - return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt, - DAG); - if (Op.getOpcode() == ISD::SRL) - return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, - DAG); - if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64) - return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt, - DAG); - } + if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) + return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) { unsigned NumElts = VT.getVectorNumElements(); @@ -16105,7 +16409,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL); // Zero out the rightmost bits. SmallVector V( - NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), MVT::i8)); + NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } @@ -16116,7 +16420,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL); // Zero out the leftmost bits. SmallVector V( - NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, MVT::i8)); + NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } @@ -16130,7 +16434,8 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // R s>> a === ((R u>> a) ^ m) - m SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); SmallVector V(NumElts, - DAG.getConstant(128 >> ShiftAmt, MVT::i8)); + DAG.getConstant(128 >> ShiftAmt, dl, + MVT::i8)); SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V); Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); @@ -16172,19 +16477,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, if (ShAmt != ShiftAmt) return SDValue(); } - switch (Op.getOpcode()) { - default: - llvm_unreachable("Unknown shift opcode!"); - case ISD::SHL: - return getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, R, ShiftAmt, - DAG); - case ISD::SRL: - return getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, - DAG); - case ISD::SRA: - return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, ShiftAmt, - DAG); - } + return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); } return SDValue(); @@ -16197,12 +16490,13 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) || - VT == MVT::v4i32 || VT == MVT::v8i16 || - (Subtarget->hasInt256() && - ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) || - VT == MVT::v8i32 || VT == MVT::v16i16)) || - (Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) { + unsigned X86OpcI = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI : + (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI; + + unsigned X86OpcV = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHL : + (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRL : X86ISD::VSRA; + + if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) { SDValue BaseShAmt; EVT EltVT = VT.getVectorElementType(); @@ -16235,7 +16529,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, if (!BaseShAmt) // Avoid introducing an extract element from a shuffle. BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InVec, - DAG.getIntPtrConstant(SplatIdx)); + DAG.getIntPtrConstant(SplatIdx, dl)); } } @@ -16246,54 +16540,12 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, else if (EltVT.bitsLT(MVT::i32)) BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); - switch (Op.getOpcode()) { - default: - llvm_unreachable("Unknown shift opcode!"); - case ISD::SHL: - switch (VT.SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - case MVT::v16i32: - case MVT::v8i64: - return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG); - } - case ISD::SRA: - switch (VT.SimpleTy) { - default: return SDValue(); - case MVT::v4i32: - case MVT::v8i16: - case MVT::v8i32: - case MVT::v16i16: - case MVT::v16i32: - case MVT::v8i64: - return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG); - } - case ISD::SRL: - switch (VT.SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - case MVT::v16i32: - case MVT::v8i64: - return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG); - } - } + return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, DAG); } } // Special case in 32-bit mode, where i64 is expanded into high and low parts. - if (!Subtarget->is64Bit() && - (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) || - (Subtarget->hasAVX512() && VT == MVT::v8i64)) && + if (!Subtarget->is64Bit() && VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); @@ -16307,18 +16559,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, if (Vals[j] != Amt.getOperand(i + j)) return SDValue(); } - switch (Op.getOpcode()) { - default: - llvm_unreachable("Unknown shift opcode!"); - case ISD::SHL: - return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1)); - case ISD::SRL: - return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1)); - case ISD::SRA: - return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1)); - } + return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); } - return SDValue(); } @@ -16338,23 +16580,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget)) return V; - if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64)) + if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode())) return Op; - // AVX2 has VPSLLV/VPSRAV/VPSRLV. - if (Subtarget->hasInt256()) { - if (Op.getOpcode() == ISD::SRL && - (VT == MVT::v2i64 || VT == MVT::v4i32 || - VT == MVT::v4i64 || VT == MVT::v8i32)) - return Op; - if (Op.getOpcode() == ISD::SHL && - (VT == MVT::v2i64 || VT == MVT::v4i32 || - VT == MVT::v4i64 || VT == MVT::v8i32)) - return Op; - if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32)) - return Op; - } - // 2i64 vector logical shifts can efficiently avoid scalarization - do the // shifts per-lane and then shuffle the partial results back together. if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) { @@ -16393,7 +16621,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, Elts.push_back(DAG.getUNDEF(SVT)); continue; } - Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT)); + Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT)); } SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts); return DAG.getNode(ISD::MUL, dl, VT, R, BV); @@ -16401,9 +16629,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { - Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); + Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT)); - Op = DAG.getNode(ISD::ADD, dl, VT, Op, DAG.getConstant(0x3f800000U, VT)); + Op = DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getConstant(0x3f800000U, dl, VT)); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); return DAG.getNode(ISD::MUL, dl, VT, Op, R); @@ -16467,10 +16696,10 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, // Replace this node with two shifts followed by a MOVSS/MOVSD. EVT CastVT = MVT::v4i32; SDValue Splat1 = - DAG.getConstant(cast(Amt1)->getAPIntValue(), VT); + DAG.getConstant(cast(Amt1)->getAPIntValue(), dl, VT); SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1); SDValue Splat2 = - DAG.getConstant(cast(Amt2)->getAPIntValue(), VT); + DAG.getConstant(cast(Amt2)->getAPIntValue(), dl, VT); SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2); if (TargetOpcode == X86ISD::MOVSD) CastVT = MVT::v2i64; @@ -16483,24 +16712,15 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, } if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) { - assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq."); - - // a = a << 5; - Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, VT)); - Op = DAG.getNode(ISD::BITCAST, dl, VT, Op); + // Turn 'a' into a mask suitable for VSELECT: a = a << 5; + Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(5, dl, VT)); - // Turn 'a' into a mask suitable for VSELECT - SDValue VSelM = DAG.getConstant(0x80, VT); + SDValue VSelM = DAG.getConstant(0x80, dl, VT); SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op); OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM); - SDValue CM1 = DAG.getConstant(0x0f, VT); - SDValue CM2 = DAG.getConstant(0x3f, VT); - - // r = VSELECT(r, psllw(r & (char16)15, 4), a); - SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1); - M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 4, DAG); - M = DAG.getNode(ISD::BITCAST, dl, VT, M); + // r = VSELECT(r, shl(r, 4), a); + SDValue M = DAG.getNode(ISD::SHL, dl, VT, R, DAG.getConstant(4, dl, VT)); R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R); // a += a @@ -16508,10 +16728,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op); OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM); - // r = VSELECT(r, psllw(r & (char16)63, 2), a); - M = DAG.getNode(ISD::AND, dl, VT, R, CM2); - M = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, MVT::v8i16, M, 2, DAG); - M = DAG.getNode(ISD::BITCAST, dl, VT, M); + // r = VSELECT(r, shl(r, 2), a); + M = DAG.getNode(ISD::SHL, dl, VT, R, DAG.getConstant(2, dl, VT)); R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R); // a += a @@ -16636,7 +16854,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { SDValue SetCC = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, - DAG.getConstant(X86::COND_O, MVT::i32), + DAG.getConstant(X86::COND_O, DL, MVT::i32), SDValue(Sum.getNode(), 2)); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); @@ -16649,7 +16867,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { SDValue SetCC = DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1), - DAG.getConstant(Cond, MVT::i32), + DAG.getConstant(Cond, DL, MVT::i32), SDValue(Sum.getNode(), 1)); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); @@ -16761,21 +16979,21 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // otherwise, we might be able to be more agressive on relaxed idempotent // rmw. In practice, they do not look useful, so we don't try to be // especially clever. - if (SynchScope == SingleThread) { + if (SynchScope == SingleThread) // FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at // the IR level, so we must wrap it in an intrinsic. return nullptr; - } else if (hasMFENCE(*Subtarget)) { - Function *MFence = llvm::Intrinsic::getDeclaration(M, - Intrinsic::x86_sse2_mfence); - Builder.CreateCall(MFence); - } else { + + if (!hasMFENCE(*Subtarget)) // FIXME: it might make sense to use a locked operation here but on a // different cache-line to prevent cache-line bouncing. In practice it // is probably a small win, and x86 processors without mfence are rare // enough that we do not bother. return nullptr; - } + + Function *MFence = + llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence); + Builder.CreateCall(MFence, {}); // Finally we can emit the atomic load. LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr, @@ -16801,13 +17019,13 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); SDValue Chain = Op.getOperand(0); - SDValue Zero = DAG.getConstant(0, MVT::i32); + SDValue Zero = DAG.getConstant(0, dl, MVT::i32); SDValue Ops[] = { - DAG.getRegister(X86::ESP, MVT::i32), // Base - DAG.getTargetConstant(1, MVT::i8), // Scale - DAG.getRegister(0, MVT::i32), // Index - DAG.getTargetConstant(0, MVT::i32), // Disp - DAG.getRegister(0, MVT::i32), // Segment. + DAG.getRegister(X86::ESP, MVT::i32), // Base + DAG.getTargetConstant(1, dl, MVT::i8), // Scale + DAG.getRegister(0, MVT::i32), // Index + DAG.getTargetConstant(0, dl, MVT::i32), // Disp + DAG.getRegister(0, MVT::i32), // Segment. Zero, Chain }; @@ -16840,7 +17058,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SDValue Ops[] = { cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3), - DAG.getTargetConstant(size, MVT::i8), + DAG.getTargetConstant(size, DL, MVT::i8), cpIn.getValue(1) }; SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast(Op)->getMemOperand(); @@ -16852,7 +17070,8 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS, MVT::i32, cpOut.getValue(2)); SDValue Success = DAG.getNode(X86ISD::SETCC, DL, Op->getValueType(1), - DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS); + DAG.getConstant(X86::COND_E, DL, MVT::i8), + EFLAGS); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); @@ -16881,7 +17100,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, SmallVector Elts; for (unsigned i = 0, e = NumElts; i != e; ++i) Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec, - DAG.getIntPtrConstant(i))); + DAG.getIntPtrConstant(i, dl))); // Explicitly mark the extra elements as Undef. Elts.append(NumElts, DAG.getUNDEF(SVT)); @@ -16890,7 +17109,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts); SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, dl)); } assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && @@ -16945,12 +17164,15 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, bool NeedsBitcast = EltVT == MVT::i32; MVT BitcastVT = VT.is256BitVector() ? MVT::v4i64 : MVT::v2i64; - SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), EltVT); - SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), EltVT); - SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), EltVT); + SDValue Cst55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, + EltVT); + SDValue Cst33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, + EltVT); + SDValue Cst0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, + EltVT); // v = v - ((v >> 1) & 0x55555555...) - SmallVector Ones(NumElts, DAG.getConstant(1, EltVT)); + SmallVector Ones(NumElts, DAG.getConstant(1, dl, EltVT)); SDValue OnesV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ones); SDValue Srl = DAG.getNode(ISD::SRL, dl, VT, Op, OnesV); if (NeedsBitcast) @@ -16969,7 +17191,7 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) SmallVector Mask33(NumElts, Cst33); SDValue M33 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Mask33); - SmallVector Twos(NumElts, DAG.getConstant(2, EltVT)); + SmallVector Twos(NumElts, DAG.getConstant(2, dl, EltVT)); SDValue TwosV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Twos); Srl = DAG.getNode(ISD::SRL, dl, VT, Sub, TwosV); @@ -16988,7 +17210,7 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, SDValue Add = DAG.getNode(ISD::ADD, dl, VT, AndLHS, AndRHS); // v = (v + (v >> 4)) & 0x0F0F0F0F... - SmallVector Fours(NumElts, DAG.getConstant(4, EltVT)); + SmallVector Fours(NumElts, DAG.getConstant(4, dl, EltVT)); SDValue FoursV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Fours); Srl = DAG.getNode(ISD::SRL, dl, VT, Add, FoursV); Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl); @@ -17021,7 +17243,7 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, Add = And; SmallVector Csts; for (unsigned i = 8; i <= Len/2; i *= 2) { - Csts.assign(NumElts, DAG.getConstant(i, EltVT)); + Csts.assign(NumElts, DAG.getConstant(i, dl, EltVT)); SDValue CstsV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Csts); Srl = DAG.getNode(ISD::SRL, dl, VT, Add, CstsV); Add = DAG.getNode(ISD::ADD, dl, VT, Add, Srl); @@ -17029,7 +17251,8 @@ static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, } // The result is on the least significant 6-bits on i32 and 7-bits on i64. - SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), EltVT); + SDValue Cst3F = DAG.getConstant(APInt(Len, Len == 32 ? 0x3F : 0x7F), dl, + EltVT); SmallVector Cst3FV(NumElts, Cst3F); SDValue M3F = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Cst3FV); if (NeedsBitcast) { @@ -17048,7 +17271,7 @@ static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { SDLoc dl(Node); EVT T = Node->getValueType(0); SDValue negOp = DAG.getNode(ISD::SUB, dl, T, - DAG.getConstant(0, T), Node->getOperand(2)); + DAG.getConstant(0, dl, T), Node->getOperand(2)); return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, cast(Node)->getMemoryVT(), Node->getOperand(0), @@ -17154,13 +17377,105 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, // Returned in bits 0:31 and 32:64 xmm0. SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, - CallResult.first, DAG.getIntPtrConstant(0)); + CallResult.first, DAG.getIntPtrConstant(0, dl)); SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, - CallResult.first, DAG.getIntPtrConstant(1)); + CallResult.first, DAG.getIntPtrConstant(1, dl)); SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); } +static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + assert(Subtarget->hasAVX512() && + "MGATHER/MSCATTER are supported on AVX-512 arch only"); + + MaskedScatterSDNode *N = cast(Op.getNode()); + EVT VT = N->getValue().getValueType(); + assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); + SDLoc dl(Op); + + // X86 scatter kills mask register, so its type should be added to + // the list of return values + if (N->getNumValues() == 1) { + SDValue Index = N->getIndex(); + if (!Subtarget->hasVLX() && !VT.is512BitVector() && + !Index.getValueType().is512BitVector()) + Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); + + SDVTList VTs = DAG.getVTList(N->getMask().getValueType(), MVT::Other); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), Index }; + + SDValue NewScatter = DAG.getMaskedScatter(VTs, VT, dl, Ops, N->getMemOperand()); + DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1)); + return SDValue(NewScatter.getNode(), 0); + } + return Op; +} + +static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + assert(Subtarget->hasAVX512() && + "MGATHER/MSCATTER are supported on AVX-512 arch only"); + + MaskedGatherSDNode *N = cast(Op.getNode()); + EVT VT = Op.getValueType(); + assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); + SDLoc dl(Op); + + SDValue Index = N->getIndex(); + if (!Subtarget->hasVLX() && !VT.is512BitVector() && + !Index.getValueType().is512BitVector()) { + Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), + N->getOperand(3), Index }; + DAG.UpdateNodeOperands(N, Ops); + } + return Op; +} + +SDValue X86TargetLowering::LowerGC_TRANSITION_START(SDValue Op, + SelectionDAG &DAG) const { + // TODO: Eventually, the lowering of these nodes should be informed by or + // deferred to the GC strategy for the function in which they appear. For + // now, however, they must be lowered to something. Since they are logically + // no-ops in the case of a null GC strategy (or a GC strategy which does not + // require special handling for these nodes), lower them as literal NOOPs for + // the time being. + SmallVector Ops; + + Ops.push_back(Op.getOperand(0)); + if (Op->getGluedNode()) + Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); + + SDLoc OpDL(Op); + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); + + return NOOP; +} + +SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op, + SelectionDAG &DAG) const { + // TODO: Eventually, the lowering of these nodes should be informed by or + // deferred to the GC strategy for the function in which they appear. For + // now, however, they must be lowered to something. Since they are logically + // no-ops in the case of a null GC strategy (or a GC strategy which does not + // require special handling for these nodes), lower them as literal NOOPs for + // the time being. + SmallVector Ops; + + Ops.push_back(Op.getOperand(0)); + if (Op->getGluedNode()) + Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); + + SDLoc OpDL(Op); + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); + + return NOOP; +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -17248,6 +17563,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); + case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); + case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); + case ISD::GC_TRANSITION_START: + return LowerGC_TRANSITION_START(Op, DAG); + case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION_END(Op, DAG); } } @@ -17295,6 +17615,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: + // FP_TO_INT*_IN_MEM is not legal for f16 inputs. Do not convert + // (FP_TO_SINT (load f16)) to FP_TO_INT*. + if (N->getOperand(0).getValueType() == MVT::f16) + break; + // fallthrough case ISD::FP_TO_UINT: { bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; @@ -17323,7 +17648,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N->getOperand(0)); - SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), + SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); SDValue VBias = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2f64, Bias, Bias); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, @@ -17340,6 +17665,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(V); return; } + case ISD::FP_EXTEND: { + // Right now, only MVT::v2f32 has OperationAction for FP_EXTEND. + // No other ValueType for FP_EXTEND should reach this point. + assert(N->getValueType(0) == MVT::v2f32 && + "Do not know how to legalize this Node"); + return; + } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); switch (IntNo) { @@ -17366,9 +17698,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; SDValue cpInL, cpInH; cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), - DAG.getConstant(0, HalfT)); + DAG.getConstant(0, dl, HalfT)); cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), - DAG.getConstant(1, HalfT)); + DAG.getConstant(1, dl, HalfT)); cpInL = DAG.getCopyToReg(N->getOperand(0), dl, Regs64bit ? X86::RAX : X86::EAX, cpInL, SDValue()); @@ -17377,9 +17709,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, cpInH, cpInL.getValue(1)); SDValue swapInL, swapInH; swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), - DAG.getConstant(0, HalfT)); + DAG.getConstant(0, dl, HalfT)); swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), - DAG.getConstant(1, HalfT)); + DAG.getConstant(1, dl, HalfT)); swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RBX : X86::EBX, swapInL, cpInH.getValue(1)); @@ -17406,7 +17738,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, MVT::i32, cpOutH.getValue(2)); SDValue Success = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS); + DAG.getConstant(X86::COND_E, dl, MVT::i8), EFLAGS); Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1)); Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF)); @@ -17456,7 +17788,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SmallVector Elts; for (unsigned i = 0, e = NumElts; i != e; ++i) Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, - ToVecInt, DAG.getIntPtrConstant(i))); + ToVecInt, DAG.getIntPtrConstant(i, dl))); Results.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, DstVT, Elts)); } @@ -17464,8 +17796,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return nullptr; + switch ((X86ISD::NodeType)Opcode) { + case X86ISD::FIRST_NUMBER: break; case X86ISD::BSF: return "X86ISD::BSF"; case X86ISD::BSR: return "X86ISD::BSR"; case X86ISD::SHLD: return "X86ISD::SHLD"; @@ -17492,9 +17824,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UCOMI: return "X86ISD::UCOMI"; case X86ISD::CMPM: return "X86ISD::CMPM"; case X86ISD::CMPMU: return "X86ISD::CMPMU"; + case X86ISD::CMPM_RND: return "X86ISD::CMPM_RND"; case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::FSETCC: return "X86ISD::FSETCC"; + case X86ISD::FGETSIGNx86: return "X86ISD::FGETSIGNx86"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; @@ -17503,16 +17837,21 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; + case X86ISD::MOVDQ2Q: return "X86ISD::MOVDQ2Q"; + case X86ISD::MMX_MOVD2W: return "X86ISD::MMX_MOVD2W"; + case X86ISD::MMX_MOVW2D: return "X86ISD::MMX_MOVW2D"; case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; case X86ISD::PINSRB: return "X86ISD::PINSRB"; case X86ISD::PINSRW: return "X86ISD::PINSRW"; + case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDI: return "X86ISD::BLENDI"; case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND"; + case X86ISD::ADDUS: return "X86ISD::ADDUS"; case X86ISD::SUBUS: return "X86ISD::SUBUS"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; @@ -17523,7 +17862,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::SMAX: return "X86ISD::SMAX"; case X86ISD::SMIN: return "X86ISD::SMIN"; case X86ISD::FMAX: return "X86ISD::FMAX"; + case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; + case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND"; case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; @@ -17605,8 +17946,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; - case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; + case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST"; case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT"; + case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV"; case X86ISD::VPERMILPI: return "X86ISD::VPERMILPI"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VPERMV: return "X86ISD::VPERMV"; @@ -17619,6 +17961,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; case X86ISD::MEMBARRIER: return "X86ISD::MEMBARRIER"; + case X86ISD::MFENCE: return "X86ISD::MFENCE"; + case X86ISD::SFENCE: return "X86ISD::SFENCE"; + case X86ISD::LFENCE: return "X86ISD::LFENCE"; case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA"; case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; case X86ISD::SAHF: return "X86ISD::SAHF"; @@ -17630,6 +17975,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; + case X86ISD::FMADD_RND: return "X86ISD::FMADD_RND"; + case X86ISD::FNMADD_RND: return "X86ISD::FNMADD_RND"; + case X86ISD::FMSUB_RND: return "X86ISD::FMSUB_RND"; + case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND"; + case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND"; + case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND"; + case X86ISD::RNDSCALE: return "X86ISD::RNDSCALE"; case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; case X86ISD::XTEST: return "X86ISD::XTEST"; @@ -17638,12 +17990,16 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::SELECT: return "X86ISD::SELECT"; case X86ISD::ADDSUB: return "X86ISD::ADDSUB"; case X86ISD::RCP28: return "X86ISD::RCP28"; + case X86ISD::EXP2: return "X86ISD::EXP2"; case X86ISD::RSQRT28: return "X86ISD::RSQRT28"; case X86ISD::FADD_RND: return "X86ISD::FADD_RND"; case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND"; case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND"; case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND"; + case X86ISD::ADDS: return "X86ISD::ADDS"; + case X86ISD::SUBS: return "X86ISD::SUBS"; } + return nullptr; } // isLegalAddressingMode - Return true if the addressing mode represented @@ -17829,6 +18185,10 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, if (!VT.isSimple()) return false; + // Not for i1 vectors + if (VT.getScalarType() == MVT::i1) + return false; + // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) return false; @@ -19105,6 +19465,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::CMOV_RFP32: case X86::CMOV_RFP64: case X86::CMOV_RFP80: + case X86::CMOV_V8I1: + case X86::CMOV_V16I1: + case X86::CMOV_V32I1: + case X86::CMOV_V64I1: return EmitLoweredSelect(MI, BB); case X86::FP32_TO_INT16_IN_MEM: @@ -19667,7 +20031,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, int M = Mask[i / Ratio] != SM_SentinelZero ? Ratio * Mask[i / Ratio] + i % Ratio : 255; - PSHUFBMask.push_back(DAG.getConstant(M, MVT::i8)); + PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8)); } MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Input); @@ -19973,7 +20337,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef Mask, for (int &M : Mask) M = VMask[M]; V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), - getV4X86ShuffleImm8ForMask(Mask, DAG)); + getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Rebuild the chain around this new shuffle. while (!Chain.empty()) { @@ -20060,7 +20424,7 @@ static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef Mask, for (int &M : Mask) M = VMask[M]; V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0), - getV4X86ShuffleImm8ForMask(Mask, DAG)); + getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); // Check that the shuffles didn't cancel each other out. If not, we need to // combine to the new one. @@ -20119,7 +20483,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, V = DAG.getNode(ISD::BITCAST, DL, DVT, V); DCI.AddToWorklist(V.getNode()); V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, - getV4X86ShuffleImm8ForMask(DMask, DAG)); + getV4X86ShuffleImm8ForMask(DMask, DL, DAG)); DCI.AddToWorklist(V.getNode()); return DAG.getNode(ISD::BITCAST, DL, VT, V); } @@ -20337,15 +20701,6 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// PerformTruncateCombine - Converts truncate operation to -/// a sequence of vector shuffle operations. -/// It is possible when we truncate 256-bit vector to 128-bit vector -static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget *Subtarget) { - return SDValue(); -} - /// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target /// specific shuffle of a load can be folded into a single element load. /// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but @@ -20549,11 +20904,11 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector); EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(); SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, - DAG.getConstant(0, VecIdxTy)); + DAG.getConstant(0, dl, VecIdxTy)); SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst, - DAG.getConstant(1, VecIdxTy)); + DAG.getConstant(1, dl, VecIdxTy)); - SDValue ShAmt = DAG.getConstant(32, + SDValue ShAmt = DAG.getConstant(32, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64)); Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf); Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, @@ -20573,7 +20928,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, // Replace each use (extract) with a load of the appropriate element. for (unsigned i = 0; i < 4; ++i) { uint64_t Offset = EltSize * i; - SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); + SDValue OffsetVal = DAG.getConstant(Offset, dl, TLI.getPointerTy()); SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), StackPtr, OffsetVal); @@ -20656,16 +21011,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, default: break; case ISD::SETULT: case ISD::SETULE: - Opc = hasUnsigned ? X86ISD::UMIN : 0; break; + Opc = hasUnsigned ? X86ISD::UMIN : 0u; break; case ISD::SETUGT: case ISD::SETUGE: - Opc = hasUnsigned ? X86ISD::UMAX : 0; break; + Opc = hasUnsigned ? X86ISD::UMAX : 0u; break; case ISD::SETLT: case ISD::SETLE: - Opc = hasSigned ? X86ISD::SMIN : 0; break; + Opc = hasSigned ? X86ISD::SMIN : 0u; break; case ISD::SETGT: case ISD::SETGE: - Opc = hasSigned ? X86ISD::SMAX : 0; break; + Opc = hasSigned ? X86ISD::SMAX : 0u; break; } // Check for x CC y ? y : x -- a min/max with reversed arms. } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && @@ -20674,16 +21029,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, default: break; case ISD::SETULT: case ISD::SETULE: - Opc = hasUnsigned ? X86ISD::UMAX : 0; break; + Opc = hasUnsigned ? X86ISD::UMAX : 0u; break; case ISD::SETUGT: case ISD::SETUGE: - Opc = hasUnsigned ? X86ISD::UMIN : 0; break; + Opc = hasUnsigned ? X86ISD::UMIN : 0u; break; case ISD::SETLT: case ISD::SETLE: - Opc = hasSigned ? X86ISD::SMAX : 0; break; + Opc = hasSigned ? X86ISD::SMAX : 0u; break; case ISD::SETGT: case ISD::SETGE: - Opc = hasSigned ? X86ISD::SMIN : 0; break; + Opc = hasSigned ? X86ISD::SMIN : 0u; break; } } @@ -20934,21 +21289,21 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TrueC->getAPIntValue().isPowerOf2()) { if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, - DAG.getConstant(1, Cond.getValueType())); + DAG.getConstant(1, DL, Cond.getValueType())); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond); unsigned ShAmt = TrueC->getAPIntValue().logBase2(); return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond, - DAG.getConstant(ShAmt, MVT::i8)); + DAG.getConstant(ShAmt, DL, MVT::i8)); } // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, - DAG.getConstant(1, Cond.getValueType())); + DAG.getConstant(1, DL, Cond.getValueType())); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, @@ -20983,7 +21338,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); if (NeedsCondInvert) // Invert the condition if needed. Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, - DAG.getConstant(1, Cond.getValueType())); + DAG.getConstant(1, DL, Cond.getValueType())); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), @@ -20991,7 +21346,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Scale the condition by the difference. if (Diff != 1) Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, - DAG.getConstant(Diff, Cond.getValueType())); + DAG.getConstant(Diff, DL, + Cond.getValueType())); // Add the base if non-zero. if (FalseC->getAPIntValue() != 0) @@ -21079,7 +21435,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, (-OpRHSConst->getAPIntValue() - 1)) return DAG.getNode( X86ISD::SUBUS, DL, VT, OpLHS, - DAG.getConstant(-OpRHSConst->getAPIntValue(), VT)); + DAG.getConstant(-OpRHSConst->getAPIntValue(), DL, VT)); // Another special case: If C was a sign bit, the sub has been // canonicalized into a xor. @@ -21093,7 +21449,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // don't rely on particular values of undef lanes. return DAG.getNode( X86ISD::SUBUS, DL, VT, OpLHS, - DAG.getConstant(OpRHSConst->getAPIntValue(), VT)); + DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT)); } } } @@ -21487,7 +21843,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, // Extra check as FCMOV only supports a subset of X86 cond. (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) { SDValue Ops[] = { FalseOp, TrueOp, - DAG.getConstant(CC, MVT::i8), Flags }; + DAG.getConstant(CC, DL, MVT::i8), Flags }; return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); } @@ -21509,14 +21865,14 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, // shift amount. if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, - DAG.getConstant(CC, MVT::i8), Cond); + DAG.getConstant(CC, DL, MVT::i8), Cond); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); unsigned ShAmt = TrueC->getAPIntValue().logBase2(); Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, - DAG.getConstant(ShAmt, MVT::i8)); + DAG.getConstant(ShAmt, DL, MVT::i8)); if (N->getNumValues() == 2) // Dead flag value? return DCI.CombineTo(N, Cond, SDValue()); return Cond; @@ -21526,7 +21882,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, // for any integer data type, including i8/i16. if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, - DAG.getConstant(CC, MVT::i8), Cond); + DAG.getConstant(CC, DL, MVT::i8), Cond); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, @@ -21564,14 +21920,14 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, if (isFastMultiplier) { APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, - DAG.getConstant(CC, MVT::i8), Cond); + DAG.getConstant(CC, DL, MVT::i8), Cond); // Zero extend the condition if needed. Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); // Scale the condition by the difference. if (Diff != 1) Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, - DAG.getConstant(Diff, Cond.getValueType())); + DAG.getConstant(Diff, DL, Cond.getValueType())); // Add the base if non-zero. if (FalseC->getAPIntValue() != 0) @@ -21617,7 +21973,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, if (CC == X86::COND_E && CmpAgainst == dyn_cast(TrueOp)) { SDValue Ops[] = { FalseOp, Cond.getOperand(0), - DAG.getConstant(CC, MVT::i8), Cond }; + DAG.getConstant(CC, DL, MVT::i8), Cond }; return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops); } } @@ -21651,10 +22007,10 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, CC1 = X86::GetOppositeBranchCondition(CC1); } - SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, MVT::i8), + SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8), Flags}; SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps); - SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, MVT::i8), Flags}; + SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags}; SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1)); return CMOV; @@ -21752,8 +22108,9 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, // Replace this packed shift intrinsic with a target independent // shift dag node. - SDValue Splat = DAG.getConstant(C, VT); - return DAG.getNode(ISD::SRA, SDLoc(N), VT, Op0, Splat); + SDLoc DL(N); + SDValue Splat = DAG.getConstant(C, DL, VT); + return DAG.getNode(ISD::SRA, DL, VT, Op0, Splat); } } } @@ -21803,17 +22160,17 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, SDValue NewMul; if (isPowerOf2_64(MulAmt1)) NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), - DAG.getConstant(Log2_64(MulAmt1), MVT::i8)); + DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8)); else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), - DAG.getConstant(MulAmt1, VT)); + DAG.getConstant(MulAmt1, DL, VT)); if (isPowerOf2_64(MulAmt2)) NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, - DAG.getConstant(Log2_64(MulAmt2), MVT::i8)); + DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8)); else NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, - DAG.getConstant(MulAmt2, VT)); + DAG.getConstant(MulAmt2, DL, VT)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, NewMul, false); @@ -21840,9 +22197,11 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); APInt ShAmt = N1C->getAPIntValue(); Mask = Mask.shl(ShAmt); - if (Mask != 0) - return DAG.getNode(ISD::AND, SDLoc(N), VT, - N00, DAG.getConstant(Mask, VT)); + if (Mask != 0) { + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + N00, DAG.getConstant(Mask, DL, VT)); + } } } @@ -21972,7 +22331,8 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; if (Subtarget->hasAVX512()) { SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00, - CMP01, DAG.getConstant(x86cc, MVT::i8)); + CMP01, + DAG.getConstant(x86cc, DL, MVT::i8)); if (N->getValueType(0) != MVT::i1) return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), FSetCC); @@ -21980,7 +22340,8 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, } SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01, - DAG.getConstant(x86cc, MVT::i8)); + DAG.getConstant(x86cc, DL, + MVT::i8)); bool is64BitFP = (CMP00.getValueType() == MVT::f64); MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; @@ -21996,14 +22357,16 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG, SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, Vector64); OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, - Vector32, DAG.getIntPtrConstant(0)); + Vector32, DAG.getIntPtrConstant(0, DL)); IntVT = MVT::i32; } - SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF); + SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, + OnesOrZeroesF); SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, - DAG.getConstant(1, IntVT)); - SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed); + DAG.getConstant(1, DL, IntVT)); + SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, + ANDed); return OneBitOfTruth; } } @@ -22115,7 +22478,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, APInt Mask = APInt::getAllOnesValue(InBits); Mask = Mask.zext(VT.getScalarType().getSizeInBits()); return DAG.getNode(ISD::AND, DL, VT, - Op, DAG.getConstant(Mask, VT)); + Op, DAG.getConstant(Mask, DL, VT)); } case ISD::SIGN_EXTEND: return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, @@ -22211,8 +22574,8 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG, Mask.push_back(i / ZextRatio); SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL, - Shuffle->getOperand(0), DAG.getConstant(0, SrcType), Mask); - return DAG.getNode(ISD::BITCAST, DL, N0.getValueType(), NewShuffle); + Shuffle->getOperand(0), DAG.getConstant(0, DL, SrcType), Mask); + return DAG.getNode(ISD::BITCAST, DL, N0.getValueType(), NewShuffle); } static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, @@ -22247,7 +22610,8 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, uint64_t MaskSize = countPopulation(Mask); if (Shift + MaskSize <= VT.getSizeInBits()) return DAG.getNode(X86ISD::BEXTR, DL, VT, N0.getOperand(0), - DAG.getConstant(Shift | (MaskSize << 8), VT)); + DAG.getConstant(Shift | (MaskSize << 8), DL, + VT)); } } } // BEXTR @@ -22462,10 +22826,10 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) { // Generate SUB & CMOV. SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), - DAG.getConstant(0, VT), N0.getOperand(0)); + DAG.getConstant(0, DL, VT), N0.getOperand(0)); SDValue Ops[] = { N0.getOperand(0), Neg, - DAG.getConstant(X86::COND_GE, MVT::i8), + DAG.getConstant(X86::COND_GE, DL, MVT::i8), SDValue(Neg.getNode(), 1) }; return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops); } @@ -22510,7 +22874,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue Ptr = Ld->getBasePtr(); - SDValue Increment = DAG.getConstant(16, TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(16, dl, TLI.getPointerTy()); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), NumElems/2); @@ -22589,7 +22953,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) ShuffleVec[i] = NumElems*SizeRatio; NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, WideVecVT), + DAG.getConstant(0, dl, WideVecVT), &ShuffleVec[0]); } else { @@ -22601,7 +22965,7 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG, unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType()); + SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; @@ -22673,7 +23037,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i) ShuffleVec[i] = NumElems*SizeRatio; NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask, - DAG.getConstant(0, WideVecVT), + DAG.getConstant(0, dl, WideVecVT), &ShuffleVec[0]); } else { @@ -22685,7 +23049,7 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG, unsigned NumConcat = WidenNumElts / MaskNumElts; SmallVector Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType()); + SDValue ZeroVal = DAG.getConstant(0, dl, Mask.getValueType()); Ops[0] = Mask; for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = ZeroVal; @@ -22719,7 +23083,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl); SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl); - SDValue Stride = DAG.getConstant(16, TLI.getPointerTy()); + SDValue Stride = DAG.getConstant(16, dl, TLI.getPointerTy()); SDValue Ptr0 = St->getBasePtr(); SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride); @@ -22792,7 +23156,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff); SmallVector Chains; - SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl, TLI.getPointerTy()); SDValue Ptr = St->getBasePtr(); @@ -22800,7 +23164,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, for (unsigned i=0, e=(ToSz*NumElems)/StoreType.getSizeInBits(); i!=e; ++i) { SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StoreType, ShuffWide, - DAG.getIntPtrConstant(i)); + DAG.getIntPtrConstant(i, dl)); SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); @@ -22822,8 +23186,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, const Function *F = DAG.getMachineFunction().getFunction(); bool NoImplicitFloatOps = F->hasFnAttribute(Attribute::NoImplicitFloat); - bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps - && Subtarget->hasSSE2(); + bool F64IsLegal = + !Subtarget->useSoftFloat() && !NoImplicitFloatOps && Subtarget->hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && isa(St->getValue()) && @@ -22884,7 +23248,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // Otherwise, lower to two pairs of 32-bit loads / stores. SDValue LoAddr = Ld->getBasePtr(); SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr, - DAG.getConstant(4, MVT::i32)); + DAG.getConstant(4, LdDL, MVT::i32)); SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, Ld->getPointerInfo(), @@ -22905,7 +23269,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, LoAddr = St->getBasePtr(); HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr, - DAG.getConstant(4, MVT::i32)); + DAG.getConstant(4, StDL, MVT::i32)); SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, St->getPointerInfo(), @@ -22918,6 +23282,27 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, MinAlign(St->getAlignment(), 4)); return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); } + + // This is similar to the above case, but here we handle a scalar 64-bit + // integer store that is extracted from a vector on a 32-bit target. + // If we have SSE2, then we can treat it like a floating-point double + // to get past legalization. The execution dependencies fixup pass will + // choose the optimal machine instruction for the store if this really is + // an integer or v2f32 rather than an f64. + if (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit() && + St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue OldExtract = St->getOperand(1); + SDValue ExtOp0 = OldExtract.getOperand(0); + unsigned VecSize = ExtOp0.getValueSizeInBits(); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64); + SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtOp0); + SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, + BitCast, OldExtract.getOperand(1)); + return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(), + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + } + return SDValue(); } @@ -23308,7 +23693,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::AND, dl, VT, DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N00.getOperand(0), N00.getOperand(1)), - DAG.getConstant(1, VT)); + DAG.getConstant(1, dl, VT)); } } @@ -23320,7 +23705,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::AND, dl, VT, DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N00.getOperand(0), N00.getOperand(1)), - DAG.getConstant(1, VT)); + DAG.getConstant(1, dl, VT)); } } if (VT.is256BitVector()) { @@ -23359,18 +23744,18 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) if (ConstantSDNode *C = dyn_cast(LHS.getOperand(0))) if (C->getAPIntValue() == 0 && LHS.hasOneUse()) { - SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS, + SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS, LHS.getOperand(1)); - return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV, - DAG.getConstant(0, addV.getValueType()), CC); + return DAG.getSetCC(DL, N->getValueType(0), addV, + DAG.getConstant(0, DL, addV.getValueType()), CC); } if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB) if (ConstantSDNode *C = dyn_cast(RHS.getOperand(0))) if (C->getAPIntValue() == 0 && RHS.hasOneUse()) { - SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS, + SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS, RHS.getOperand(1)); - return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV, - DAG.getConstant(0, addV.getValueType()), CC); + return DAG.getSetCC(DL, N->getValueType(0), addV, + DAG.getConstant(0, DL, addV.getValueType()), CC); } if (VT.getScalarType() == MVT::i1 && @@ -23394,9 +23779,9 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type"); if (CC == ISD::SETGT) - return DAG.getConstant(0, VT); + return DAG.getConstant(0, DL, VT); if (CC == ISD::SETLE) - return DAG.getConstant(1, VT); + return DAG.getConstant(1, DL, VT); if (CC == ISD::SETEQ || CC == ISD::SETGE) return DAG.getNOT(DL, LHS.getOperand(0), VT); @@ -23417,7 +23802,8 @@ static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index, SDValue Addr = Load->getOperand(1); SDValue NewAddr = DAG.getNode( ISD::ADD, dl, Addr.getSimpleValueType(), Addr, - DAG.getConstant(Index * EVT.getStoreSize(), Addr.getSimpleValueType())); + DAG.getConstant(Index * EVT.getStoreSize(), dl, + Addr.getSimpleValueType())); SDValue NewLoad = DAG.getLoad(EVT, dl, Load->getChain(), NewAddr, @@ -23473,7 +23859,7 @@ static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) { if (VT == MVT::v2f64) if (auto *Mask = dyn_cast(N->getOperand(2))) if (Mask->getZExtValue() == 2 && !isShuffleFoldableLoad(V0)) { - SDValue NewMask = DAG.getConstant(1, MVT::i8); + SDValue NewMask = DAG.getConstant(1, DL, MVT::i8); return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask); } @@ -23488,12 +23874,14 @@ static SDValue MaterializeSETB(SDLoc DL, SDValue EFLAGS, SelectionDAG &DAG, if (VT == MVT::i8) return DAG.getNode(ISD::AND, DL, VT, DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS), - DAG.getConstant(1, VT)); + DAG.getConstant(X86::COND_B, DL, MVT::i8), + EFLAGS), + DAG.getConstant(1, DL, VT)); assert (VT == MVT::i1 && "Unexpected type for SECCC node"); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(X86::COND_B, MVT::i8), EFLAGS)); + DAG.getConstant(X86::COND_B, DL, MVT::i8), + EFLAGS)); } // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT @@ -23532,7 +23920,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG, Flags = checkBoolTestSetCCCombine(EFLAGS, CC); if (Flags.getNode()) { - SDValue Cond = DAG.getConstant(CC, MVT::i8); + SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); } @@ -23554,7 +23942,7 @@ static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, Flags = checkBoolTestSetCCCombine(EFLAGS, CC); if (Flags.getNode()) { - SDValue Cond = DAG.getConstant(CC, MVT::i8); + SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, Flags); } @@ -23633,6 +24021,11 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, if (Op0.getOpcode() == ISD::LOAD) { LoadSDNode *Ld = cast(Op0.getNode()); EVT VT = Ld->getValueType(0); + + // This transformation is not supported if the result type is f16 + if (N->getValueType(0) == MVT::f16) + return SDValue(); + if (!Ld->isVolatile() && !N->getValueType(0).isVector() && ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() && !Subtarget->is64Bit() && VT == MVT::i64) { @@ -23658,12 +24051,13 @@ static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG, SDValue(N, 1).use_empty()) { SDLoc DL(N); EVT VT = N->getValueType(0); - SDValue CarryOut = DAG.getConstant(0, N->getValueType(1)); + SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); SDValue Res1 = DAG.getNode(ISD::AND, DL, VT, DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, - DAG.getConstant(X86::COND_B,MVT::i8), + DAG.getConstant(X86::COND_B, DL, + MVT::i8), N->getOperand(2)), - DAG.getConstant(1, VT)); + DAG.getConstant(1, DL, VT)); return DCI.CombineTo(N, Res1, CarryOut); } @@ -23698,16 +24092,17 @@ static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) { SDValue CmpOp0 = Cmp.getOperand(0); SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0, - DAG.getConstant(1, CmpOp0.getValueType())); + DAG.getConstant(1, DL, CmpOp0.getValueType())); SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1); if (CC == X86::COND_NE) return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB, DL, OtherVal.getValueType(), OtherVal, - DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp); + DAG.getConstant(-1ULL, DL, OtherVal.getValueType()), + NewCmp); return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC, DL, OtherVal.getValueType(), OtherVal, - DAG.getConstant(0, OtherVal.getValueType()), NewCmp); + DAG.getConstant(0, DL, OtherVal.getValueType()), NewCmp); } /// PerformADDCombine - Do target-specific dag combines on integer adds. @@ -23743,9 +24138,9 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, EVT VT = Op0.getValueType(); SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0), - DAG.getConstant(~XorC, VT)); + DAG.getConstant(~XorC, SDLoc(Op1), VT)); return DAG.getNode(ISD::ADD, SDLoc(N), VT, NewXor, - DAG.getConstant(C->getAPIntValue()+1, VT)); + DAG.getConstant(C->getAPIntValue() + 1, SDLoc(N), VT)); } } @@ -23815,7 +24210,7 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, OrigVT = MVT::getVectorVT(OrigVT.getVectorElementType(), OrigVT.getVectorNumElements() / Ratio); OrigV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OrigVT, OrigV, - DAG.getIntPtrConstant(0)); + DAG.getIntPtrConstant(0, DL)); } Op = DAG.getNode(ISD::BITCAST, DL, OpVT, OrigV); return DAG.getNode(X86ISD::VZEXT, DL, VT, Op); @@ -23868,7 +24263,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); - case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG, Subtarget); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); @@ -23897,7 +24291,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, break; } case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG); - case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DAG, Subtarget); } return SDValue(); @@ -24242,7 +24635,7 @@ TargetLowering::ConstraintWeight break; case 'G': case 'C': - if (dyn_cast(CallOperandVal)) { + if (isa(CallOperandVal)) { weight = CW_Constant; } break; @@ -24297,7 +24690,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'I': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 31) { - Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType()); break; } } @@ -24305,7 +24699,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'J': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 63) { - Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType()); break; } } @@ -24313,7 +24708,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'K': if (ConstantSDNode *C = dyn_cast(Op)) { if (isInt<8>(C->getSExtValue())) { - Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType()); break; } } @@ -24322,7 +24718,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff || (Subtarget->is64Bit() && C->getZExtValue() == 0xffffffff)) { - Result = DAG.getTargetConstant(C->getSExtValue(), Op.getValueType()); + Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), + Op.getValueType()); break; } } @@ -24330,7 +24727,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'M': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 3) { - Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType()); break; } } @@ -24338,7 +24736,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'N': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 255) { - Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType()); break; } } @@ -24346,7 +24745,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'O': if (ConstantSDNode *C = dyn_cast(Op)) { if (C->getZExtValue() <= 127) { - Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType()); break; } } @@ -24357,7 +24757,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. - Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64); + Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64); break; } // FIXME gcc accepts some relocatable values here too, but only in certain @@ -24370,7 +24770,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (ConstantSDNode *C = dyn_cast(Op)) { if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), C->getZExtValue())) { - Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); + Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), + Op.getValueType()); break; } } @@ -24382,7 +24783,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // Literal immediates are always ok. if (ConstantSDNode *CST = dyn_cast(Op)) { // Widen to 64 bits here to get it sign extended. - Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64); + Result = DAG.getTargetConstant(CST->getSExtValue(), SDLoc(Op), MVT::i64); break; }