X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FX86%2FX86ISelLowering.cpp;h=38e6342ae6544a93babddb6eddd8fc0e08ae7b89;hb=29e4bdbf27c5f03b12dd2bc41d9ccb0d5f3dfdf4;hp=f4ec6afd5406ff02cb92642b215a472bc5932daa;hpb=68d599df37218452acd5a680d5360d3caaa1623c;p=oota-llvm.git diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f4ec6afd540..38e6342ae65 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -206,7 +206,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::BRCOND , MVT::Other, Custom); setOperationAction(ISD::BR_CC , MVT::Other, Expand); setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); @@ -281,9 +280,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } - // X86 wants to expand memset / memcpy itself. - setOperationAction(ISD::MEMSET , MVT::Other, Custom); - setOperationAction(ISD::MEMCPY , MVT::Other, Custom); if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); @@ -983,8 +979,8 @@ static bool ArgsAreStructReturn(SDOperand Op) { return cast(Op.getOperand(3))->getArgFlags().isSRet(); } -/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires the -/// callee to pop its own arguments. Callee pop is necessary to support tail +/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires +/// the callee to pop its own arguments. Callee pop is necessary to support tail /// calls. bool X86TargetLowering::IsCalleePop(SDOperand Op) { bool IsVarArg = cast(Op.getOperand(2))->getValue() != 0; @@ -1108,15 +1104,15 @@ CopyTailCallClobberedArgumentsToVRegs(SDOperand Chain, /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" with size and alignment information specified by -/// the specific parameter attribute. The copy will be passed as a byval function -/// parameter. +/// the specific parameter attribute. The copy will be passed as a byval +/// function parameter. static SDOperand CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { - SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); - SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32); - return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); + return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), + /*AlwaysInline=*/true, + NULL, 0, NULL, 0); } SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, @@ -1351,6 +1347,99 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG, PseudoSourceValue::getStack(), LocMemOffset); } +/// EmitTailCallLoadRetAddr - Emit a load of return adress if tail call +/// optimization is performed and it is required. +SDOperand +X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, + SDOperand &OutRetAddr, + SDOperand Chain, + bool IsTailCall, + bool Is64Bit, + int FPDiff) { + if (!IsTailCall || FPDiff==0) return Chain; + + // Adjust the Return address stack slot. + MVT::ValueType VT = getPointerTy(); + OutRetAddr = getReturnAddressFrameIndex(DAG); + // Load the "old" Return address. + OutRetAddr = DAG.getLoad(VT, Chain,OutRetAddr, NULL, 0); + return SDOperand(OutRetAddr.Val, 1); +} + +/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call +/// optimization is performed and it is required (FPDiff!=0). +static SDOperand +EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, + SDOperand Chain, SDOperand RetAddrFrIdx, + bool Is64Bit, int FPDiff) { + // Store the return address to the appropriate stack slot. + if (!FPDiff) return Chain; + // Calculate the new stack slot for the return address. + int SlotSize = Is64Bit ? 8 : 4; + int NewReturnAddrFI = + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); + MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32; + SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); + Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx, + PseudoSourceValue::getFixedStack(), NewReturnAddrFI); + return Chain; +} + +/// CopyTailCallByValClobberedRegToVirtReg - Copy arguments with register target +/// which might be overwritten by later byval tail call lowering to a virtual +/// register. +bool +X86TargetLowering::CopyTailCallByValClobberedRegToVirtReg(bool containsByValArg, + SmallVector< std::pair, 8> &TailCallByValClobberedVRegs, + SmallVector &TailCallByValClobberedVRegTypes, + std::pair &RegToPass, + SDOperand &OutChain, + SDOperand &OutFlag, + MachineFunction &MF, + SelectionDAG & DAG) { + if (!containsByValArg) return false; + + std::pair ArgRegVReg; + MVT::ValueType VT = RegToPass.second.getValueType(); + + ArgRegVReg.first = RegToPass.first; + ArgRegVReg.second = MF.getRegInfo().createVirtualRegister(getRegClassFor(VT)); + + // Copy Argument to virtual register. + OutChain = DAG.getCopyToReg(OutChain, ArgRegVReg.second, + RegToPass.second, OutFlag); + OutFlag = OutChain.getValue(1); + // Remember virtual register and type. + TailCallByValClobberedVRegs.push_back(ArgRegVReg); + TailCallByValClobberedVRegTypes.push_back(VT); + return true; +} + + +/// RestoreTailCallByValClobberedReg - Restore registers which were saved to +/// virtual registers to prevent tail call byval lowering from overwriting +/// parameter registers. +static SDOperand +RestoreTailCallByValClobberedRegs(SelectionDAG & DAG, SDOperand Chain, + SmallVector< std::pair, 8> &TailCallByValClobberedVRegs, + SmallVector &TailCallByValClobberedVRegTypes) { + if (TailCallByValClobberedVRegs.size()==0) return Chain; + + SmallVector RegOpChains; + for (unsigned i = 0, e=TailCallByValClobberedVRegs.size(); i != e; i++) { + SDOperand InFlag; + unsigned DestReg = TailCallByValClobberedVRegs[i].first; + unsigned VirtReg = TailCallByValClobberedVRegs[i].second; + MVT::ValueType VT = TailCallByValClobberedVRegTypes[i]; + SDOperand Tmp = DAG.getCopyFromReg(Chain, VirtReg, VT, InFlag); + Chain = DAG.getCopyToReg(Chain, DestReg, Tmp, InFlag); + RegOpChains.push_back(Chain); + } + if (!RegOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + &RegOpChains[0], RegOpChains.size()); + return Chain; +} SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); @@ -1400,30 +1489,29 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes)); SDOperand RetAddrFrIdx; - if (IsTailCall) { - // Adjust the Return address stack slot. - if (FPDiff) { - MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32; - RetAddrFrIdx = getReturnAddressFrameIndex(DAG); - // Load the "old" Return address. - RetAddrFrIdx = - DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0); - Chain = SDOperand(RetAddrFrIdx.Val, 1); - } - } + // Load return adress for tail calls. + Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit, + FPDiff); SmallVector, 8> RegsToPass; SmallVector, 8> TailCallClobberedVRegs; + SmallVector MemOpChains; SDOperand StackPtr; + bool containsTailCallByValArg = false; + SmallVector, 8> TailCallByValClobberedVRegs; + SmallVector TailCallByValClobberedVRegTypes; + // Walk the register/memloc assignments, inserting copies/loads. For tail // calls, remember all arguments for later special lowering. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); - + bool isByVal = cast(Op.getOperand(6+2*VA.getValNo()))-> + getArgFlags().isByVal(); + // Promote the value if needed. switch (VA.getLocInfo()) { default: assert(0 && "Unknown loc info!"); @@ -1442,13 +1530,15 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { - if (!IsTailCall) { + if (!IsTailCall || (IsTailCall && isByVal)) { assert(VA.isMemLoc()); if (StackPtr.Val == 0) StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain, Arg)); + // Remember fact that this call contains byval arguments. + containsTailCallByValArg |= IsTailCall && isByVal; } else if (IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) { TailCallClobberedVRegs.push_back(std::make_pair(i,Arg)); } @@ -1463,6 +1553,16 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { // and flag operands which copy the outgoing args into registers. SDOperand InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + // Tail call byval lowering might overwrite argument registers so arguments + // passed to be copied to a virtual register for + // later processing. + if (CopyTailCallByValClobberedRegToVirtReg(containsTailCallByValArg, + TailCallByValClobberedVRegs, + TailCallByValClobberedVRegTypes, + RegsToPass[i], Chain, InFlag, MF, + DAG)) + continue; + Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); @@ -1537,7 +1637,7 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); - FIN = DAG.getFrameIndex(FI, MVT::i32); + FIN = DAG.getFrameIndex(FI, getPointerTy()); // Find virtual register for this argument. bool Found=false; @@ -1552,7 +1652,12 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { if (Flags.isByVal()) { // Copy relative to framepointer. - MemOpChains2.push_back(CreateCopyOfByValArgument(Arg, FIN, Chain, + SDOperand Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); + if (StackPtr.Val == 0) + StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); + Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source); + + MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain, Flags, DAG)); } else { // Store relative to framepointer. @@ -1567,17 +1672,14 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOpChains2[0], MemOpChains2.size()); + // Restore byval lowering clobbered registers. + Chain = RestoreTailCallByValClobberedRegs(DAG, Chain, + TailCallByValClobberedVRegs, + TailCallByValClobberedVRegTypes); + // Store the return address to the appropriate stack slot. - if (FPDiff) { - // Calculate the new stack slot for the return address. - int SlotSize = Is64Bit ? 8 : 4; - int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); - MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32; - SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); - Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx, - PseudoSourceValue::getFixedStack(), NewReturnAddrFI); - } + Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, + FPDiff); } // If the callee is a GlobalAddress node (quite common, every direct call is) @@ -2782,23 +2884,28 @@ static SDOperand getSwapEltZeroMask(unsigned NumElems, unsigned DestElt, return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size()); } -/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32. -/// -static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) { +/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. +static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG, bool HasSSE2) { + MVT::ValueType PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32; + MVT::ValueType VT = Op.getValueType(); + if (PVT == VT) + return Op; SDOperand V1 = Op.getOperand(0); SDOperand Mask = Op.getOperand(2); - MVT::ValueType VT = Op.getValueType(); unsigned NumElems = Mask.getNumOperands(); - Mask = getUnpacklMask(NumElems, DAG); - while (NumElems != 4) { - V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); - NumElems >>= 1; + // Special handling of v4f32 -> v4i32. + if (VT != MVT::v4f32) { + Mask = getUnpacklMask(NumElems, DAG); + while (NumElems > 4) { + V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask); + NumElems >>= 1; + } + Mask = getZeroVector(MVT::v4i32, DAG); } - V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1); - Mask = getZeroVector(MVT::v4i32, DAG); - SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1, - DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask); + V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1); + SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1, + DAG.getNode(ISD::UNDEF, PVT), Mask); return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle); } @@ -3426,6 +3533,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { SDOperand PermMask = Op.getOperand(2); MVT::ValueType VT = Op.getValueType(); unsigned NumElems = PermMask.getNumOperands(); + bool isMMX = MVT::getSizeInBits(VT) == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; @@ -3443,9 +3551,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { return V2; if (isSplatMask(PermMask.Val)) { - if (NumElems <= 4) return Op; - // Promote it to a v4i32 splat. - return PromoteSplat(Op, DAG); + if (isMMX || NumElems < 4) return Op; + // Promote it to a v4{if}32 splat. + return PromoteSplat(Op, DAG, Subtarget->hasSSE2()); } // If the shuffle can be profitably rewritten as a narrower shuffle, then @@ -3556,35 +3664,39 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { return Op; } - // If VT is integer, try PSHUF* first, then SHUFP*. - if (MVT::isInteger(VT)) { - // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically - // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. - if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) && - X86::isPSHUFDMask(PermMask.Val)) || - X86::isPSHUFHWMask(PermMask.Val) || - X86::isPSHUFLWMask(PermMask.Val)) { - if (V2.getOpcode() != ISD::UNDEF) - return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, - DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); + // Try PSHUF* first, then SHUFP*. + // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically + // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented. + if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.Val)) { + if (V2.getOpcode() != ISD::UNDEF) + return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, + DAG.getNode(ISD::UNDEF, VT), PermMask); + return Op; + } + + if (!isMMX) { + if (Subtarget->hasSSE2() && + (X86::isPSHUFDMask(PermMask.Val) || + X86::isPSHUFHWMask(PermMask.Val) || + X86::isPSHUFLWMask(PermMask.Val))) { + MVT::ValueType RVT = VT; + if (VT == MVT::v4f32) { + RVT = MVT::v4i32; + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, + DAG.getNode(ISD::BIT_CONVERT, RVT, V1), + DAG.getNode(ISD::UNDEF, RVT), PermMask); + } else if (V2.getOpcode() != ISD::UNDEF) + Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1, + DAG.getNode(ISD::UNDEF, RVT), PermMask); + if (RVT != VT) + Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op); return Op; } - if (X86::isSHUFPMask(PermMask.Val) && - MVT::getSizeInBits(VT) != 64) // Don't do this for MMX. - return Op; - } else { - // Floating point cases in the other order. - if (X86::isSHUFPMask(PermMask.Val)) + // Binary or unary shufps. + if (X86::isSHUFPMask(PermMask.Val) || + (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.Val))) return Op; - if (X86::isPSHUFDMask(PermMask.Val) || - X86::isPSHUFHWMask(PermMask.Val) || - X86::isPSHUFLWMask(PermMask.Val)) { - if (V2.getOpcode() != ISD::UNDEF) - return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, - DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask); - return Op; - } } // Handle v8i16 specifically since SSE can do byte extraction and insertion. @@ -3595,7 +3707,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { } // Handle all 4 wide cases with a number of shuffles. - if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) { + if (NumElems == 4 && !isMMX) { // Don't do this for MMX. MVT::ValueType MaskVT = PermMask.getValueType(); MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT); @@ -4547,51 +4659,51 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2); } -SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { - SDOperand InFlag(0, 0); - SDOperand Chain = Op.getOperand(0); - unsigned Align = - (unsigned)cast(Op.getOperand(4))->getValue(); - if (Align == 0) Align = 1; - - ConstantSDNode *I = dyn_cast(Op.getOperand(3)); - // If not DWORD aligned or size is more than the threshold, call memset. - // The libc version is likely to be faster for these cases. It can use the - // address value and run time information about the CPU. - if ((Align & 3) != 0 || - (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) { +SDOperand +X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + const Value *DstSV, uint64_t DstOff) { + ConstantSDNode *ConstantSize = dyn_cast(Size); + + /// If not DWORD aligned or size is more than the threshold, call the library. + /// The libc version is likely to be faster for these cases. It can use the + /// address value and run time information about the CPU. + if ((Align & 3) == 0 || + !ConstantSize || + ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) { + SDOperand InFlag(0, 0); // Check to see if there is a specialized entry-point for memory zeroing. - const char *bzeroEntry = Subtarget->getBZeroEntry(); - ConstantSDNode *V = dyn_cast(Op.getOperand(2)); - - MVT::ValueType IntPtr = getPointerTy(); - const Type *IntPtrTy = getTargetData()->getIntPtrType(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Op.getOperand(1); - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - - if (!bzeroEntry) { - // Extend the unsigned i8 argument to be an int value for the call. - Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2)); + ConstantSDNode *V = dyn_cast(Src); + if (const char *bzeroEntry = + V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + MVT::ValueType IntPtr = getPointerTy(); + const Type *IntPtrTy = getTargetData()->getIntPtrType(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + std::pair CallResult = + LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, + false, DAG.getExternalSymbol(bzeroEntry, IntPtr), + Args, DAG); + return CallResult.second; } - Entry.Node = Op.getOperand(3); - Args.push_back(Entry); - const char *Name = bzeroEntry ? bzeroEntry : "memset"; - std::pair CallResult = - LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C, - false, DAG.getExternalSymbol(Name, IntPtr), Args, DAG); - return CallResult.second; + // Otherwise have the target-independent code call memset. + return SDOperand(); } + uint64_t SizeVal = ConstantSize->getValue(); + SDOperand InFlag(0, 0); MVT::ValueType AVT; SDOperand Count; - ConstantSDNode *ValC = dyn_cast(Op.getOperand(2)); + ConstantSDNode *ValC = dyn_cast(Src); unsigned BytesLeft = 0; bool TwoRepStos = false; if (ValC) { @@ -4610,7 +4722,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; - if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned AVT = MVT::i64; ValReg = X86::RAX; Val = (Val << 32) | Val; @@ -4619,22 +4731,14 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { default: // Byte aligned AVT = MVT::i8; ValReg = X86::AL; - Count = Op.getOperand(3); + Count = Size; break; } if (AVT > MVT::i8) { - if (I) { - unsigned UBytes = MVT::getSizeInBits(AVT) / 8; - Count = DAG.getIntPtrConstant(I->getValue() / UBytes); - BytesLeft = I->getValue() % UBytes; - } else { - assert(AVT >= MVT::i32 && - "Do not use rep;stos if not at least DWORD aligned"); - Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(), - Op.getOperand(3), DAG.getConstant(2, MVT::i8)); - TwoRepStos = true; - } + unsigned UBytes = MVT::getSizeInBits(AVT) / 8; + Count = DAG.getIntPtrConstant(SizeVal / UBytes); + BytesLeft = SizeVal % UBytes; } Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT), @@ -4642,8 +4746,8 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { InFlag = Chain.getValue(1); } else { AVT = MVT::i8; - Count = Op.getOperand(3); - Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag); + Count = Size; + Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag); InFlag = Chain.getValue(1); } @@ -4651,7 +4755,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Op.getOperand(1), InFlag); + Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -4663,7 +4767,7 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { if (TwoRepStos) { InFlag = Chain.getValue(1); - Count = Op.getOperand(3); + Count = Size; MVT::ValueType CVT = Count.getValueType(); SDOperand Left = DAG.getNode(ISD::AND, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); @@ -4677,79 +4781,68 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(InFlag); Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size()); } else if (BytesLeft) { - // Issue stores for the last 1 - 7 bytes. - SDOperand Value; - unsigned Val = ValC->getValue() & 255; - unsigned Offset = I->getValue() - BytesLeft; - SDOperand DstAddr = Op.getOperand(1); - MVT::ValueType AddrVT = DstAddr.getValueType(); - if (BytesLeft >= 4) { - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - Value = DAG.getConstant(Val, MVT::i32); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - BytesLeft -= 4; - Offset += 4; - } - if (BytesLeft >= 2) { - Value = DAG.getConstant((Val << 8) | Val, MVT::i16); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - BytesLeft -= 2; - Offset += 2; - } - if (BytesLeft == 1) { - Value = DAG.getConstant(Val, MVT::i8); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, AddrVT, DstAddr, - DAG.getConstant(Offset, AddrVT)), - NULL, 0); - } + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT::ValueType AddrVT = Dst.getValueType(); + MVT::ValueType SizeVT = Size.getValueType(); + + Chain = DAG.getMemset(Chain, + DAG.getNode(ISD::ADD, AddrVT, Dst, + DAG.getConstant(Offset, AddrVT)), + Src, + DAG.getConstant(BytesLeft, SizeVT), + Align, DstSV, Offset); } + // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. return Chain; } -SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, - SDOperand Dest, - SDOperand Source, - unsigned Size, - unsigned Align, - SelectionDAG &DAG) { +SDOperand +X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, + SDOperand Chain, + SDOperand Dst, SDOperand Src, + SDOperand Size, unsigned Align, + bool AlwaysInline, + const Value *DstSV, uint64_t DstOff, + const Value *SrcSV, uint64_t SrcOff){ + + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return SDOperand(); + uint64_t SizeVal = ConstantSize->getValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDOperand(); + + SmallVector Results; + MVT::ValueType AVT; unsigned BytesLeft = 0; - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned - AVT = MVT::i64; - break; - default: // Byte aligned - AVT = MVT::i8; - break; - } + if (Align >= 8 && Subtarget->is64Bit()) + AVT = MVT::i64; + else if (Align >= 4) + AVT = MVT::i32; + else if (Align >= 2) + AVT = MVT::i16; + else + AVT = MVT::i8; unsigned UBytes = MVT::getSizeInBits(AVT) / 8; - SDOperand Count = DAG.getIntPtrConstant(Size / UBytes); - BytesLeft = Size % UBytes; + unsigned CountVal = SizeVal / UBytes; + SDOperand Count = DAG.getIntPtrConstant(CountVal); + BytesLeft = SizeVal % UBytes; SDOperand InFlag(0, 0); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI, - Dest, InFlag); + Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI, - Source, InFlag); + Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); @@ -4757,57 +4850,28 @@ SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain, Ops.push_back(Chain); Ops.push_back(DAG.getValueType(AVT)); Ops.push_back(InFlag); - Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()); + Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size())); if (BytesLeft) { - // Issue loads and stores for the last 1 - 7 bytes. - unsigned Offset = Size - BytesLeft; - SDOperand DstAddr = Dest; - MVT::ValueType DstVT = DstAddr.getValueType(); - SDOperand SrcAddr = Source; - MVT::ValueType SrcVT = SrcAddr.getValueType(); - SDOperand Value; - if (BytesLeft >= 4) { - Value = DAG.getLoad(MVT::i32, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - BytesLeft -= 4; - Offset += 4; - } - if (BytesLeft >= 2) { - Value = DAG.getLoad(MVT::i16, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - BytesLeft -= 2; - Offset += 2; - } + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + MVT::ValueType DstVT = Dst.getValueType(); + MVT::ValueType SrcVT = Src.getValueType(); + MVT::ValueType SizeVT = Size.getValueType(); - if (BytesLeft == 1) { - Value = DAG.getLoad(MVT::i8, Chain, - DAG.getNode(ISD::ADD, SrcVT, SrcAddr, - DAG.getConstant(Offset, SrcVT)), - NULL, 0); - Chain = Value.getValue(1); - Chain = DAG.getStore(Chain, Value, - DAG.getNode(ISD::ADD, DstVT, DstAddr, - DAG.getConstant(Offset, DstVT)), - NULL, 0); - } + Results.push_back(DAG.getMemcpy(Chain, + DAG.getNode(ISD::ADD, DstVT, Dst, + DAG.getConstant(Offset, + DstVT)), + DAG.getNode(ISD::ADD, SrcVT, Src, + DAG.getConstant(Offset, + SrcVT)), + DAG.getConstant(BytesLeft, SizeVT), + Align, AlwaysInline, + DstSV, Offset, SrcSV, Offset)); } - return Chain; + return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size()); } /// Expand the result of: i64,outchain = READCYCLECOUNTER inchain @@ -5419,8 +5483,6 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::CALL: return LowerCALL(Op, DAG); case ISD::RET: return LowerRET(Op, DAG); case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); - case ISD::MEMSET: return LowerMEMSET(Op, DAG); - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); @@ -5591,9 +5653,10 @@ X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { X86::isUNPCKH_v_undef_Mask(Mask.Val)); } -bool X86TargetLowering::isVectorClearMaskLegal(std::vector &BVOps, - MVT::ValueType EVT, - SelectionDAG &DAG) const { +bool +X86TargetLowering::isVectorClearMaskLegal(const std::vector &BVOps, + MVT::ValueType EVT, + SelectionDAG &DAG) const { unsigned NumElts = BVOps.size(); // Only do shuffles on 128-bit vector types for now. if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;