X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=2f14d790742f2f77a8852af3c996a0492ecb86ce;hb=22f779d1fd0d7a7e878a3c28bc5d9910ae655df3;hp=7d195fcdae9be4390e6de83d02d3b622afa0238c;hpb=9e93e47b7f196893b5779090897210d750aa1c6b;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7d195fcdae9..2f14d790742 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/VariadicFunction.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -85,7 +86,8 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, - Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk); + makeArrayRef(Vec->op_begin()+NormalizedIdxVal, + ElemsPerChunk)); SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, @@ -826,7 +828,9 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); @@ -938,6 +942,10 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::ADD, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom); + setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom); + setOperationAction(ISD::MULHU, MVT::v8i16, Legal); + setOperationAction(ISD::MULHS, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v16i8, Legal); setOperationAction(ISD::SUB, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v4i32, Legal); @@ -1105,9 +1113,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SRA, MVT::v4i32, Custom); - - setOperationAction(ISD::SDIV, MVT::v8i16, Custom); - setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) { @@ -1172,8 +1177,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::SRA, MVT::v16i16, Custom); setOperationAction(ISD::SRA, MVT::v32i8, Custom); - setOperationAction(ISD::SDIV, MVT::v16i16, Custom); - setOperationAction(ISD::SETCC, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i16, Custom); setOperationAction(ISD::SETCC, MVT::v8i32, Custom); @@ -1226,9 +1229,12 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::MUL, MVT::v16i16, Legal); // Don't lower v32i8 because there is no 128-bit byte mul - setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); + setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom); + setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom); + setOperationAction(ISD::MULHU, MVT::v16i16, Legal); + setOperationAction(ISD::MULHS, MVT::v16i16, Legal); - setOperationAction(ISD::SDIV, MVT::v8i32, Custom); + setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -1337,7 +1343,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FNEG, MVT::v8f64, Custom); setOperationAction(ISD::FMA, MVT::v8f64, Legal); setOperationAction(ISD::FMA, MVT::v16f32, Legal); - setOperationAction(ISD::SDIV, MVT::v16i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); @@ -1516,6 +1521,15 @@ void X86TargetLowering::resetOperationActions() { } } + if (Subtarget->isTargetWin64()) { + setOperationAction(ISD::SDIV, MVT::i128, Custom); + setOperationAction(ISD::UDIV, MVT::i128, Custom); + setOperationAction(ISD::SREM, MVT::i128, Custom); + setOperationAction(ISD::UREM, MVT::i128, Custom); + setOperationAction(ISD::SDIVREM, MVT::i128, Custom); + setOperationAction(ISD::UDIVREM, MVT::i128, Custom); + } + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); @@ -1930,8 +1944,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(X86ISD::RET_FLAG, dl, - MVT::Other, &RetOps[0], RetOps.size()); + return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps); } bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2424,13 +2437,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SaveXMMOps.push_back(Val); } MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, - MVT::Other, - &SaveXMMOps[0], SaveXMMOps.size())); + MVT::Other, SaveXMMOps)); } if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); } } @@ -2497,10 +2508,10 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, /// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call /// optimization is performed and it is required (FPDiff!=0). -static SDValue -EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, - SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, - unsigned SlotSize, int FPDiff, SDLoc dl) { +static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, + SDValue Chain, SDValue RetAddrFrIdx, + EVT PtrVT, unsigned SlotSize, + int FPDiff, SDLoc dl) { // Store the return address to the appropriate stack slot. if (!FPDiff) return Chain; // Calculate the new stack slot for the return address. @@ -2537,17 +2548,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (MF.getTarget().Options.DisableTailCalls) isTailCall = false; - if (isTailCall) { + bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall(); + if (IsMustTail) { + // Force this to be a tail call. The verifier rules are enough to ensure + // that we can lower this successfully without moving the return address + // around. + isTailCall = true; + } else if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, SR != NotStructReturn, MF.getFunction()->hasStructRetAttr(), CLI.RetTy, Outs, OutVals, Ins, DAG); - if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) - report_fatal_error("failed to perform tail call elimination on a call " - "site marked musttail"); - // Sibcalls are automatically detected tailcalls which do not require // ABI changes. if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall) @@ -2582,7 +2595,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; - if (isTailCall && !IsSibcall) { + if (isTailCall && !IsSibcall && !IsMustTail) { // Lower arguments at fp - stackoffset + fpdiff. X86MachineFunctionInfo *X86Info = MF.getInfo(); unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); @@ -2696,8 +2709,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); if (Subtarget->isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT @@ -2746,8 +2758,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(NumXMMRegs, MVT::i8))); } - // For tail calls lower the arguments to the 'real' stack slot. - if (isTailCall) { + // For tail calls lower the arguments to the 'real' stack slots. Sibcalls + // don't need this because the eligibility check rejects calls that require + // shuffling arguments passed in memory. + if (!IsSibcall && isTailCall) { // Force all the incoming stack arguments to be loaded from the stack // before any new outgoing arguments are stored to the stack, because the // outgoing stack slots may alias the incoming argument stack slots, and @@ -2759,45 +2773,45 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector MemOpChains2; SDValue FIN; int FI = 0; - if (getTargetMachine().Options.GuaranteedTailCallOpt) { - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - if (VA.isRegLoc()) - continue; - assert(VA.isMemLoc()); - SDValue Arg = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; - // Create frame index. - int32_t Offset = VA.getLocMemOffset()+FPDiff; - uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; - FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); - FIN = DAG.getFrameIndex(FI, getPointerTy()); - - if (Flags.isByVal()) { - // Copy relative to framepointer. - SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); - if (!StackPtr.getNode()) - StackPtr = DAG.getCopyFromReg(Chain, dl, - RegInfo->getStackRegister(), - getPointerTy()); - Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); - - MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, - ArgChain, - Flags, DAG, dl)); - } else { - // Store relative to framepointer. - MemOpChains2.push_back( - DAG.getStore(ArgChain, dl, Arg, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); - } + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) + continue; + assert(VA.isMemLoc()); + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + // Skip inalloca arguments. They don't require any work. + if (Flags.isInAlloca()) + continue; + // Create frame index. + int32_t Offset = VA.getLocMemOffset()+FPDiff; + uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); + FIN = DAG.getFrameIndex(FI, getPointerTy()); + + if (Flags.isByVal()) { + // Copy relative to framepointer. + SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, dl, + RegInfo->getStackRegister(), + getPointerTy()); + Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); + + MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, + ArgChain, + Flags, DAG, dl)); + } else { + // Store relative to framepointer. + MemOpChains2.push_back( + DAG.getStore(ArgChain, dl, Arg, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); } } if (!MemOpChains2.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains2[0], MemOpChains2.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, @@ -2934,10 +2948,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This isn't right, although it's probably harmless on x86; liveouts // should be computed from returns not tail calls. Consider a void // function making a tail call to a function returning int. - return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); } - Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. @@ -4173,6 +4187,29 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef Mask, MVT VT, bool HasInt256) { return true; } +// Match for INSERTI64x4 INSERTF64x4 instructions (src0[0], src1[0]) or +// (src1[0], src0[1]), manipulation with 256-bit sub-vectors +static bool isINSERT64x4Mask(ArrayRef Mask, MVT VT, unsigned int *Imm) { + if (!VT.is512BitVector()) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfSize = NumElts/2; + if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) { + if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) { + *Imm = 1; + return true; + } + } + if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) { + if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) { + *Imm = 0; + return true; + } + } + return false; +} + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. @@ -4830,28 +4867,24 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, if (Subtarget->hasInt256()) { // AVX2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, - array_lengthof(Ops)); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops); } else { // 256-bit logic and arithmetic instructions in AVX are all // floating-point, no support for integer ops. Emit fp zeroed vectors. SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, - array_lengthof(Ops)); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops); } } else if (VT.is512BitVector()) { // AVX-512 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); } else if (VT.getScalarType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type"); SDValue Cst = DAG.getTargetConstant(0, MVT::i1); - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, - Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - Ops, VT.getVectorNumElements()); + SmallVector Ops(VT.getVectorNumElements(), Cst); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } else llvm_unreachable("Unexpected vector type"); @@ -4871,8 +4904,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, if (VT.is256BitVector()) { if (HasInt256) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, - array_lengthof(Ops)); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops); } else { // AVX Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); @@ -5572,8 +5604,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; SDValue ResNode = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, - array_lengthof(Ops), MVT::i64, + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64, LDBase->getPointerInfo(), LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, @@ -5851,18 +5882,14 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (ISD::isBuildVectorAllZeros(Op.getNode())) { SDValue Cst = DAG.getTargetConstant(0, MVT::i1); - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, - Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - Ops, VT.getVectorNumElements()); + SmallVector Ops(VT.getVectorNumElements(), Cst); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } if (ISD::isBuildVectorAllOnes(Op.getNode())) { SDValue Cst = DAG.getTargetConstant(1, MVT::i1); - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, - Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - Ops, VT.getVectorNumElements()); + SmallVector Ops(VT.getVectorNumElements(), Cst); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } bool AllContants = true; @@ -6122,9 +6149,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); // Build both the lower and upper subvector. - SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); - SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], - NumElems/2); + SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, + makeArrayRef(&V[0], NumElems/2)); + SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, + makeArrayRef(&V[NumElems / 2], NumElems/2)); // Recreate the wider vector with the lower and upper part. if (VT.is256BitVector()) @@ -6411,8 +6439,7 @@ static SDValue getPSHUFB(ArrayRef MaskVals, SDValue V1, SDLoc &dl, if (ShufVT != VT) V1 = DAG.getNode(ISD::BITCAST, dl, ShufVT, V1); return DAG.getNode(X86ISD::PSHUFB, dl, ShufVT, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, ShufVT, - PshufbMask.data(), PshufbMask.size())); + DAG.getNode(ISD::BUILD_VECTOR, dl, ShufVT, PshufbMask)); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -6714,7 +6741,7 @@ static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v16i8, &pshufbMask[0], 16)); + MVT::v16i8, pshufbMask)); // As PSHUFB will zero elements with negative indices, it's safe to ignore // the 2nd operand if it's undefined or zero. @@ -6732,7 +6759,7 @@ static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v16i8, &pshufbMask[0], 16)); + MVT::v16i8, pshufbMask)); return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); } @@ -7003,8 +7030,7 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } // Construct the output using a BUILD_VECTOR. - Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &SVOps[0], - SVOps.size()); + Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, SVOps); } else if (InputUsed[0] < 0) { // No input vectors were used! The result is undefined. Output[l] = DAG.getUNDEF(NVT); @@ -7766,6 +7792,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { getShuffleSHUFImmediate(SVOp), DAG); } + unsigned Idx; + if (VT.is512BitVector() && isINSERT64x4Mask(M, VT, &Idx)) + return Insert256BitVector(V1, Extract256BitVector(V2, 0, DAG, dl), + Idx*(NumElems/2), DAG, dl); + // Handle VPERM2F128/VPERM2I128 permutations if (isVPERM2X128Mask(M, VT, HasFp256)) return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, @@ -7791,8 +7822,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT)); } - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT, - &permclMask[0], NumElems); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT, permclMask); if (V2IsUndef) // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32 return DAG.getNode(X86ISD::VPERMV, dl, VT, @@ -8491,10 +8521,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops)); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } else { SDValue Ops[] = { Chain, TGA }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops)); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. @@ -8678,7 +8708,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Args[] = { Chain, Offset }; - Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2); + Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); @@ -8806,15 +8836,15 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; if (Op.getOpcode() == ISD::SHL_PARTS) { - Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4); - Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4); + Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0); + Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1); } else { - Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4); - Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4); + Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0); + Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1); } SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, array_lengthof(Ops), dl); + return DAG.getMergeValues(Ops, dl); } SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, @@ -8877,8 +8907,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) }; SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL, - Tys, Ops, array_lengthof(Ops), - SrcVT, MMO); + Tys, Ops, SrcVT, MMO); if (useSSE) { Chain = Result.getValue(1); @@ -8901,8 +8930,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, MachineMemOperand::MOStore, SSFISize, SSFISize); Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, - Ops, array_lengthof(Ops), - Op.getValueType(), MMO); + Ops, Op.getValueType(), MMO); Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot, MachinePointerInfo::getFixedStack(SSFI), false, false, false, 0); @@ -9097,7 +9125,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, - array_lengthof(Ops), MVT::i64, MMO); + MVT::i64, MMO); APInt FF(32, 0x5F800000ULL); @@ -9190,8 +9218,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), MachineMemOperand::MOLoad, MemSize, MemSize); - Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, - array_lengthof(Ops), DstTy, MMO); + Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); @@ -9205,8 +9232,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), - Ops, array_lengthof(Ops), DstTy, - MMO); + Ops, DstTy, MMO); return std::make_pair(FIST, StackSlot); } else { SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL, @@ -9218,8 +9244,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MVT::i32, eax.getValue(2)); SDValue Ops[] = { eax, edx }; SDValue pair = IsReplace - ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops)) - : DAG.getMergeValues(Ops, array_lengthof(Ops), DL); + ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops) + : DAG.getMergeValues(Ops, DL); return std::make_pair(pair, SDValue()); } } @@ -9414,8 +9440,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { for (unsigned j = 0; j < 8; ++j) pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, - &pshufbMask[0], 32); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask); In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV); In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In); @@ -10012,7 +10037,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, for (unsigned i = 0; i != NumOperands; ++i) Ops.push_back(Op.getOperand(i)); - SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); + SDValue New = DAG.getNode(Opcode, dl, VTs, Ops); DAG.ReplaceAllUsesWith(Op, New); return SDValue(New.getNode(), 1); } @@ -10295,7 +10320,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG) ULTOp1.push_back(DAG.getConstant(Val - 1, EVT)); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1.data(), ULTOp1.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1); } static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, @@ -10862,7 +10887,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // condition is true. SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = { Op2, Op1, CC, Cond }; - return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops); } static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) { @@ -11299,7 +11324,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDLoc(Node)); SDValue Ops[2] = { Tmp1, Tmp2 }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } // Get the inputs. @@ -11333,7 +11358,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); SDValue Ops1[2] = { Value, Chain }; - return DAG.getMergeValues(Ops1, 2, dl); + return DAG.getMergeValues(Ops1, dl); } else { SDValue Flag; unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); @@ -11357,7 +11382,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, } SDValue Ops1[2] = { SP, Chain }; - return DAG.getMergeValues(Ops1, 2, dl); + return DAG.getMergeValues(Ops1, dl); } } @@ -11418,8 +11443,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo(SV, 16), false, false, 0); MemOps.push_back(Store); - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &MemOps[0], MemOps.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { @@ -11473,8 +11497,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { InstOps.push_back(DAG.getConstant(Align, MVT::i32)); SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other); SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, - VTs, &InstOps[0], InstOps.size(), - MVT::i64, + VTs, InstOps, MVT::i64, MachinePointerInfo(SV), /*Align=*/0, /*Volatile=*/false, @@ -11534,7 +11557,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, ConstantSDNode *ND; switch(Opc) { - default: llvm_unreachable(0); + default: llvm_unreachable(nullptr); case X86ISD::VSHLI: for (unsigned i=0; i!=NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); @@ -11573,7 +11596,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, break; } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts); } return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8)); @@ -11605,7 +11628,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, ShOps); // The return type has to be a 128-bit type with the same element // type as the input type. @@ -11728,6 +11751,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_pmuldq: + case Intrinsic::x86_avx2_pmul_dq: + return DAG.getNode(X86ISD::PMULDQ, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::x86_sse2_pmulhu_w: + case Intrinsic::x86_avx2_pmulhu_w: + return DAG.getNode(ISD::MULHU, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::x86_sse2_pmulh_w: + case Intrinsic::x86_avx2_pmulh_w: + return DAG.getNode(ISD::MULHS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + // SSE2/AVX2 sub with unsigned saturation intrinsics case Intrinsic::x86_sse2_psubus_b: case Intrinsic::x86_sse2_psubus_w: @@ -12179,7 +12217,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } SmallVector NewOps(Op->op_begin()+1, Op->op_end()); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), SDValue(PCMP.getNode(), 1)); @@ -12196,7 +12234,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SmallVector NewOps(Op->op_begin()+1, Op->op_end()); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + return DAG.getNode(Opcode, dl, VTs, NewOps); } case Intrinsic::x86_fma_vfmadd_ps: case Intrinsic::x86_fma_vfmadd_pd: @@ -12311,7 +12349,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; - return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl); + return DAG.getMergeValues(RetOps, dl); } static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, @@ -12333,7 +12371,7 @@ static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; - return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl); + return DAG.getMergeValues(RetOps, dl); } static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, @@ -12379,8 +12417,7 @@ static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, SelectionDAG &DAG, const X86Subtarget *Subtarget, SmallVectorImpl &Results) { SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue TheChain = N->getOperand(0); - SDValue rd = DAG.getNode(Opcode, DL, Tys, &TheChain, 1); + SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0)); SDValue LO, HI; // The processor's time-stamp counter (a 64-bit MSR) is stored into the @@ -12422,8 +12459,7 @@ static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, // Use a buildpair to merge the two 32-bit values into a 64-bit one. SDValue Ops[] = { LO, HI }; - SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, - array_lengthof(Ops)); + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); Results.push_back(Pair); Results.push_back(Chain); } @@ -12434,7 +12470,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, SDLoc DL(Op); getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); - return DAG.getMergeValues(&Results[0], Results.size(), DL); + return DAG.getMergeValues(Results, DL); } static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, @@ -12467,7 +12503,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue(Result.getNode(), 1) }; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, DAG.getVTList(Op->getValueType(1), MVT::Glue), - Ops, array_lengthof(Ops)); + Ops); // Return { result, isValid, chain }. return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, @@ -12625,7 +12661,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, } SmallVector Results; getReadTimeStampCounter(Op.getNode(), dl, Opc, DAG, Subtarget, Results); - return DAG.getMergeValues(&Results[0], Results.size(), dl); + return DAG.getMergeValues(Results, dl); } // XTEST intrinsics. case Intrinsic::x86_xtest: { @@ -12691,6 +12727,18 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned X86TargetLowering::getRegisterByName(const char* RegName) const { + unsigned Reg = StringSwitch(RegName) + .Case("esp", X86::ESP) + .Case("rsp", X86::RSP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = @@ -12810,7 +12858,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 22), false, false, 0); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } else { const Function *Func = cast(cast(Op.getOperand(5))->getValue()); @@ -12890,7 +12938,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 6), false, false, 1); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } } @@ -12933,8 +12981,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), - Ops, array_lengthof(Ops), MVT::i16, - MMO); + Ops, MVT::i16, MMO); // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, @@ -12987,7 +13034,7 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(X86::COND_E, MVT::i8), Op.getValue(1) }; - Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops)); + Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); // Finally xor with NumBits-1. Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); @@ -13039,7 +13086,7 @@ static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(X86::COND_E, MVT::i8), Op.getValue(1) }; - return DAG.getNode(X86ISD::CMOV, dl, VT, Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); } // Lower256IntArith - Break a 256-bit integer operation into two new 128-bit @@ -13157,59 +13204,104 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } -static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getSimpleValueType(); - MVT EltTy = VT.getVectorElementType(); - unsigned NumElts = VT.getVectorNumElements(); - SDValue N0 = Op.getOperand(0); +SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetWin64() && "Unexpected target"); + EVT VT = Op.getValueType(); + assert(VT.isInteger() && VT.getSizeInBits() == 128 && + "Unexpected return type for lowering"); + + RTLIB::Libcall LC; + bool isSigned; + switch (Op->getOpcode()) { + default: llvm_unreachable("Unexpected request for libcall!"); + case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break; + case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break; + case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break; + case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break; + case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break; + case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break; + } + SDLoc dl(Op); + SDValue InChain = DAG.getEntryNode(); - // Lower sdiv X, pow2-const. - BuildVectorSDNode *C = dyn_cast(Op.getOperand(1)); - if (!C) - return SDValue(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op->getOperand(i).getValueType(); + assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 && + "Unexpected argument type for lowering"); + SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16); + Entry.Node = StackPtr; + InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MachinePointerInfo(), + false, false, 16); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Ty = PointerType::get(ArgTy,0); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + } + + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy()); - APInt SplatValue, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (!C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs) || - EltTy.getSizeInBits() < SplatBitSize) - return SDValue(); + TargetLowering::CallLoweringInfo CLI( + InChain, static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()), + isSigned, !isSigned, false, true, 0, getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, + dl); + std::pair CallInfo = LowerCallTo(CLI); + + return DAG.getNode(ISD::BITCAST, dl, VT, CallInfo.first); +} + +static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + EVT VT = Op0.getValueType(); + SDLoc dl(Op); + + assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) || + (VT == MVT::v8i32 && Subtarget->hasInt256())); + + // Get the high parts. + const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8}; + SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask); + SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask); - if ((SplatValue != 0) && - (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) { - unsigned Lg2 = SplatValue.countTrailingZeros(); - // Splat the sign bit. - SmallVector Sz(NumElts, - DAG.getConstant(EltTy.getSizeInBits() - 1, - EltTy)); - SDValue SGN = DAG.getNode(ISD::SRA, dl, VT, N0, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Sz[0], - NumElts)); - // Add (N0 < 0) ? abs2 - 1 : 0; - SmallVector Amt(NumElts, - DAG.getConstant(EltTy.getSizeInBits() - Lg2, - EltTy)); - SDValue SRL = DAG.getNode(ISD::SRL, dl, VT, SGN, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Amt[0], - NumElts)); - SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL); - SmallVector Lg2Amt(NumElts, DAG.getConstant(Lg2, EltTy)); - SDValue SRA = DAG.getNode(ISD::SRA, dl, VT, ADD, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Lg2Amt[0], - NumElts)); - - // If we're dividing by a positive value, we're done. Otherwise, we must - // negate the result. - if (SplatValue.isNonNegative()) - return SRA; - - SmallVector V(NumElts, DAG.getConstant(0, EltTy)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts); - return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA); + // Emit two multiplies, one for the lower 2 ints and one for the higher 2 + // ints. + MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64; + bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI; + unsigned Opcode = + (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; + SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); + SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1)); + + // Shuffle it back into the right order. + const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15}; + SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14}; + SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + + // If we have a signed multiply but no PMULDQ fix up the high parts of a + // unsigned multiply. + if (IsSigned && !Subtarget->hasSSE41()) { + SDValue ShAmt = + DAG.getConstant(31, DAG.getTargetLoweringInfo().getShiftAmountTy(VT)); + SDValue T1 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1); + SDValue T2 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0); + + SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); + Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup); } - return SDValue(); + + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs, Lows); } static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, @@ -13253,7 +13345,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, DAG.getConstant(uint8_t(-1U << ShiftAmt), MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SHL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } if (Op.getOpcode() == ISD::SRL) { // Make a large shift. @@ -13266,7 +13358,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SRL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } if (Op.getOpcode() == ISD::SRA) { if (ShiftAmt == 7) { @@ -13279,7 +13371,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); SmallVector V(16, DAG.getConstant(128 >> ShiftAmt, MVT::i8)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V); Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; @@ -13299,7 +13391,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, DAG.getConstant(uint8_t(-1U << ShiftAmt), MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SHL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } if (Op.getOpcode() == ISD::SRL) { // Make a large shift. @@ -13312,7 +13404,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SRL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } if (Op.getOpcode() == ISD::SRA) { if (ShiftAmt == 7) { @@ -13325,7 +13417,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); SmallVector V(32, DAG.getConstant(128 >> ShiftAmt, MVT::i8)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V); Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; @@ -13593,7 +13685,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, } Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElems); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts); return DAG.getNode(ISD::MUL, dl, VT, R, BV); } @@ -13757,10 +13849,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, for (unsigned i = NumElems/2; i != NumElems; ++i) Amt2Csts.push_back(Amt->getOperand(i)); - Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, - &Amt1Csts[0], NumElems/2); - Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, - &Amt2Csts[0], NumElems/2); + Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts); + Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts); } else { // Variable shift amount Amt1 = Extract128BitVector(Amt, 0, DAG, dl); @@ -13991,7 +14081,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast(Op)->getMemOperand(); SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, - Ops, array_lengthof(Ops), T, MMO); + Ops, T, MMO); SDValue cpOut = DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); return cpOut; @@ -14201,6 +14291,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); + case ISD::UMUL_LOHI: + case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, Subtarget, DAG); @@ -14218,7 +14310,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); - case ISD::SDIV: return LowerSDIV(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); } } @@ -14261,10 +14352,10 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl&Results, SDValue Ops[] = { Chain, In1, In2L, In2H }; SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64, + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64, cast(Node)->getMemOperand()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF)); Results.push_back(Result.getValue(2)); } @@ -14285,6 +14376,16 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SUBE: // We don't want to expand or promote these. return; + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::SDIVREM: + case ISD::UDIVREM: { + SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG); + Results.push_back(V); + return; + } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: { bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; @@ -14382,8 +14483,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, MachineMemOperand *MMO = cast(N)->getMemOperand(); unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG; - SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, - Ops, array_lengthof(Ops), T, MMO); + SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO); SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, Regs64bit ? X86::RAX : X86::EAX, HalfT, Result.getValue(1)); @@ -14391,7 +14491,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Regs64bit ? X86::RDX : X86::EDX, HalfT, cpOutL.getValue(2)); SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF)); Results.push_back(cpOutH.getValue(1)); return; } @@ -14598,6 +14698,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; + case X86ISD::PMULDQ: return "X86ISD::PMULDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -17069,7 +17170,6 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() }; SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, - array_lengthof(Ops), Ld->getMemoryVT(), Ld->getPointerInfo(), Ld->getAlignment(), @@ -18130,8 +18230,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) { SDValue Ops[] = { FalseOp, TrueOp, DAG.getConstant(CC, MVT::i8), Flags }; - return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), - Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); } // If this is a select between two integer constants, try to do some @@ -18261,8 +18360,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, CmpAgainst == dyn_cast(TrueOp)) { SDValue Ops[] = { FalseOp, Cond.getOperand(0), DAG.getConstant(CC, MVT::i8), Cond }; - return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops, - array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops); } } } @@ -18613,7 +18711,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(), N1->getOperand(0)); SmallVector C(WideVT.getVectorNumElements(), N1); - N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size()); + N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C); } else if (RHSTrunc) { N1 = N1->getOperand(0); } @@ -18896,8 +18994,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { SDValue Ops[] = { N0.getOperand(0), Neg, DAG.getConstant(X86::COND_GE, MVT::i8), SDValue(Neg.getNode(), 1) }; - return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), - Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops); } return SDValue(); } @@ -19054,8 +19151,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); } - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], - Chains.size()); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); // Bitcast the loaded value to a vector of the original element type, in // the size of the target vector type. @@ -19230,8 +19326,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, Chains.push_back(Ch); } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], - Chains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } // Turn load->store of MMX types into GPR load/stores. This avoids clobbering @@ -19297,8 +19392,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(NewChain); - NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0], - Ops.size()); + NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), St->getPointerInfo(), @@ -19325,8 +19419,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, if (TokenFactorIndex != -1) { Ops.push_back(LoLd); Ops.push_back(HiLd); - NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0], - Ops.size()); + NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } LoAddr = St->getBasePtr(); @@ -20856,8 +20949,22 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, Type *Ty) const { // Scaling factors are not free at all. // An indexed folded instruction, i.e., inst (reg1, reg2, scale), - // will take 2 allocations instead of 1 for plain addressing mode, - // i.e. inst (reg1). + // will take 2 allocations in the out of order engine instead of 1 + // for plain addressing mode, i.e. inst (reg1). + // E.g., + // vaddps (%rsi,%drx), %ymm0, %ymm1 + // Requires two allocations (one for the load, one for the computation) + // whereas: + // vaddps (%rsi), %ymm0, %ymm1 + // Requires just 1 allocation, i.e., freeing allocations for other operations + // and having less micro operations to execute. + // + // For some X86 architectures, this is even worse because for instance for + // stores, the complex addressing mode forces the instruction to use the + // "load" ports instead of the dedicated "store" port. + // E.g., on Haswell: + // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3. + // vmovaps %ymm1, (%r8) can use port 2, 3, or 7. if (isLegalAddressingMode(AM, Ty)) // Scale represents reg2 * scale, thus account for 1 // as soon as we use a second register.