setOperationAction(ISD::BRCOND , MVT::Other, Custom);
setOperationAction(ISD::BR_CC , MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
- setOperationAction(ISD::MEMMOVE , MVT::Other, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
}
- // X86 wants to expand memset / memcpy itself.
- setOperationAction(ISD::MEMSET , MVT::Other, Custom);
- setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
}
-/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires the
-/// callee to pop its own arguments. Callee pop is necessary to support tail
+/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
+/// the callee to pop its own arguments. Callee pop is necessary to support tail
/// calls.
bool X86TargetLowering::IsCalleePop(SDOperand Op) {
bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
-/// the specific parameter attribute. The copy will be passed as a byval function
-/// parameter.
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
static SDOperand
CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
- SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
SDOperand SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
- SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
- return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline);
+ return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*AlwaysInline=*/true,
+ NULL, 0, NULL, 0);
}
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
PseudoSourceValue::getStack(), LocMemOffset);
}
+/// EmitTailCallLoadRetAddr - Emit a load of return adress if tail call
+/// optimization is performed and it is required.
+SDOperand
+X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
+ SDOperand &OutRetAddr,
+ SDOperand Chain,
+ bool IsTailCall,
+ bool Is64Bit,
+ int FPDiff) {
+ if (!IsTailCall || FPDiff==0) return Chain;
+
+ // Adjust the Return address stack slot.
+ MVT::ValueType VT = getPointerTy();
+ OutRetAddr = getReturnAddressFrameIndex(DAG);
+ // Load the "old" Return address.
+ OutRetAddr = DAG.getLoad(VT, Chain,OutRetAddr, NULL, 0);
+ return SDOperand(OutRetAddr.Val, 1);
+}
+
+/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDOperand
+EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
+ SDOperand Chain, SDOperand RetAddrFrIdx,
+ bool Is64Bit, int FPDiff) {
+ // Store the return address to the appropriate stack slot.
+ if (!FPDiff) return Chain;
+ // Calculate the new stack slot for the return address.
+ int SlotSize = Is64Bit ? 8 : 4;
+ int NewReturnAddrFI =
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+ MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
+ SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+ Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx,
+ PseudoSourceValue::getFixedStack(), NewReturnAddrFI);
+ return Chain;
+}
+
+/// CopyTailCallByValClobberedRegToVirtReg - Copy arguments with register target
+/// which might be overwritten by later byval tail call lowering to a virtual
+/// register.
+bool
+X86TargetLowering::CopyTailCallByValClobberedRegToVirtReg(bool containsByValArg,
+ SmallVector< std::pair<unsigned, unsigned>, 8> &TailCallByValClobberedVRegs,
+ SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes,
+ std::pair<unsigned, SDOperand> &RegToPass,
+ SDOperand &OutChain,
+ SDOperand &OutFlag,
+ MachineFunction &MF,
+ SelectionDAG & DAG) {
+ if (!containsByValArg) return false;
+
+ std::pair<unsigned, unsigned> ArgRegVReg;
+ MVT::ValueType VT = RegToPass.second.getValueType();
+
+ ArgRegVReg.first = RegToPass.first;
+ ArgRegVReg.second = MF.getRegInfo().createVirtualRegister(getRegClassFor(VT));
+
+ // Copy Argument to virtual register.
+ OutChain = DAG.getCopyToReg(OutChain, ArgRegVReg.second,
+ RegToPass.second, OutFlag);
+ OutFlag = OutChain.getValue(1);
+ // Remember virtual register and type.
+ TailCallByValClobberedVRegs.push_back(ArgRegVReg);
+ TailCallByValClobberedVRegTypes.push_back(VT);
+ return true;
+}
+
+
+/// RestoreTailCallByValClobberedReg - Restore registers which were saved to
+/// virtual registers to prevent tail call byval lowering from overwriting
+/// parameter registers.
+static SDOperand
+RestoreTailCallByValClobberedRegs(SelectionDAG & DAG, SDOperand Chain,
+ SmallVector< std::pair<unsigned, unsigned>, 8> &TailCallByValClobberedVRegs,
+ SmallVector<MVT::ValueType, 8> &TailCallByValClobberedVRegTypes) {
+ if (TailCallByValClobberedVRegs.size()==0) return Chain;
+
+ SmallVector<SDOperand, 8> RegOpChains;
+ for (unsigned i = 0, e=TailCallByValClobberedVRegs.size(); i != e; i++) {
+ SDOperand InFlag;
+ unsigned DestReg = TailCallByValClobberedVRegs[i].first;
+ unsigned VirtReg = TailCallByValClobberedVRegs[i].second;
+ MVT::ValueType VT = TailCallByValClobberedVRegTypes[i];
+ SDOperand Tmp = DAG.getCopyFromReg(Chain, VirtReg, VT, InFlag);
+ Chain = DAG.getCopyToReg(Chain, DestReg, Tmp, InFlag);
+ RegOpChains.push_back(Chain);
+ }
+ if (!RegOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
+ &RegOpChains[0], RegOpChains.size());
+ return Chain;
+}
SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes));
SDOperand RetAddrFrIdx;
- if (IsTailCall) {
- // Adjust the Return address stack slot.
- if (FPDiff) {
- MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
- RetAddrFrIdx = getReturnAddressFrameIndex(DAG);
- // Load the "old" Return address.
- RetAddrFrIdx =
- DAG.getLoad(VT, Chain,RetAddrFrIdx, NULL, 0);
- Chain = SDOperand(RetAddrFrIdx.Val, 1);
- }
- }
+ // Load return adress for tail calls.
+ Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit,
+ FPDiff);
SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
SmallVector<std::pair<unsigned, SDOperand>, 8> TailCallClobberedVRegs;
+
SmallVector<SDOperand, 8> MemOpChains;
SDOperand StackPtr;
+ bool containsTailCallByValArg = false;
+ SmallVector<std::pair<unsigned, unsigned>, 8> TailCallByValClobberedVRegs;
+ SmallVector<MVT::ValueType, 8> TailCallByValClobberedVRegTypes;
+
// Walk the register/memloc assignments, inserting copies/loads. For tail
// calls, remember all arguments for later special lowering.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
-
+ bool isByVal = cast<ARG_FLAGSSDNode>(Op.getOperand(6+2*VA.getValNo()))->
+ getArgFlags().isByVal();
+
// Promote the value if needed.
switch (VA.getLocInfo()) {
default: assert(0 && "Unknown loc info!");
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
- if (!IsTailCall) {
+ if (!IsTailCall || (IsTailCall && isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.Val == 0)
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
Arg));
+ // Remember fact that this call contains byval arguments.
+ containsTailCallByValArg |= IsTailCall && isByVal;
} else if (IsPossiblyOverwrittenArgumentOfTailCall(Arg, MFI)) {
TailCallClobberedVRegs.push_back(std::make_pair(i,Arg));
}
// and flag operands which copy the outgoing args into registers.
SDOperand InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ // Tail call byval lowering might overwrite argument registers so arguments
+ // passed to be copied to a virtual register for
+ // later processing.
+ if (CopyTailCallByValClobberedRegToVirtReg(containsTailCallByValArg,
+ TailCallByValClobberedVRegs,
+ TailCallByValClobberedVRegTypes,
+ RegsToPass[i], Chain, InFlag, MF,
+ DAG))
+ continue;
+
Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
InFlag);
InFlag = Chain.getValue(1);
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
- FIN = DAG.getFrameIndex(FI, MVT::i32);
+ FIN = DAG.getFrameIndex(FI, getPointerTy());
// Find virtual register for this argument.
bool Found=false;
if (Flags.isByVal()) {
// Copy relative to framepointer.
- MemOpChains2.push_back(CreateCopyOfByValArgument(Arg, FIN, Chain,
+ SDOperand Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (StackPtr.Val == 0)
+ StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
+ Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
+
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
Flags, DAG));
} else {
// Store relative to framepointer.
Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
&MemOpChains2[0], MemOpChains2.size());
+ // Restore byval lowering clobbered registers.
+ Chain = RestoreTailCallByValClobberedRegs(DAG, Chain,
+ TailCallByValClobberedVRegs,
+ TailCallByValClobberedVRegTypes);
+
// Store the return address to the appropriate stack slot.
- if (FPDiff) {
- // Calculate the new stack slot for the return address.
- int SlotSize = Is64Bit ? 8 : 4;
- int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
- MVT::ValueType VT = Is64Bit ? MVT::i64 : MVT::i32;
- SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
- Chain = DAG.getStore(Chain, RetAddrFrIdx, NewRetAddrFrIdx,
- PseudoSourceValue::getFixedStack(), NewReturnAddrFI);
- }
+ Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+ FPDiff);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
}
-/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
-///
-static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
+/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
+static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG, bool HasSSE2) {
+ MVT::ValueType PVT = HasSSE2 ? MVT::v4i32 : MVT::v4f32;
+ MVT::ValueType VT = Op.getValueType();
+ if (PVT == VT)
+ return Op;
SDOperand V1 = Op.getOperand(0);
SDOperand Mask = Op.getOperand(2);
- MVT::ValueType VT = Op.getValueType();
unsigned NumElems = Mask.getNumOperands();
- Mask = getUnpacklMask(NumElems, DAG);
- while (NumElems != 4) {
- V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
- NumElems >>= 1;
+ // Special handling of v4f32 -> v4i32.
+ if (VT != MVT::v4f32) {
+ Mask = getUnpacklMask(NumElems, DAG);
+ while (NumElems > 4) {
+ V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
+ NumElems >>= 1;
+ }
+ Mask = getZeroVector(MVT::v4i32, DAG);
}
- V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
- Mask = getZeroVector(MVT::v4i32, DAG);
- SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
- DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
+ V1 = DAG.getNode(ISD::BIT_CONVERT, PVT, V1);
+ SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, PVT, V1,
+ DAG.getNode(ISD::UNDEF, PVT), Mask);
return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
}
SDOperand PermMask = Op.getOperand(2);
MVT::ValueType VT = Op.getValueType();
unsigned NumElems = PermMask.getNumOperands();
+ bool isMMX = MVT::getSizeInBits(VT) == 64;
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
return V2;
if (isSplatMask(PermMask.Val)) {
- if (NumElems <= 4) return Op;
- // Promote it to a v4i32 splat.
- return PromoteSplat(Op, DAG);
+ if (isMMX || NumElems < 4) return Op;
+ // Promote it to a v4{if}32 splat.
+ return PromoteSplat(Op, DAG, Subtarget->hasSSE2());
}
// If the shuffle can be profitably rewritten as a narrower shuffle, then
return Op;
}
- // If VT is integer, try PSHUF* first, then SHUFP*.
- if (MVT::isInteger(VT)) {
- // MMX doesn't have PSHUFD; it does have PSHUFW. While it's theoretically
- // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
- if (((MVT::getSizeInBits(VT) != 64 || NumElems == 4) &&
- X86::isPSHUFDMask(PermMask.Val)) ||
- X86::isPSHUFHWMask(PermMask.Val) ||
- X86::isPSHUFLWMask(PermMask.Val)) {
- if (V2.getOpcode() != ISD::UNDEF)
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
- DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
+ // Try PSHUF* first, then SHUFP*.
+ // MMX doesn't have PSHUFD but it does have PSHUFW. While it's theoretically
+ // possible to shuffle a v2i32 using PSHUFW, that's not yet implemented.
+ if (isMMX && NumElems == 4 && X86::isPSHUFDMask(PermMask.Val)) {
+ if (V2.getOpcode() != ISD::UNDEF)
+ return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
+ DAG.getNode(ISD::UNDEF, VT), PermMask);
+ return Op;
+ }
+
+ if (!isMMX) {
+ if (Subtarget->hasSSE2() &&
+ (X86::isPSHUFDMask(PermMask.Val) ||
+ X86::isPSHUFHWMask(PermMask.Val) ||
+ X86::isPSHUFLWMask(PermMask.Val))) {
+ MVT::ValueType RVT = VT;
+ if (VT == MVT::v4f32) {
+ RVT = MVT::v4i32;
+ Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT,
+ DAG.getNode(ISD::BIT_CONVERT, RVT, V1),
+ DAG.getNode(ISD::UNDEF, RVT), PermMask);
+ } else if (V2.getOpcode() != ISD::UNDEF)
+ Op = DAG.getNode(ISD::VECTOR_SHUFFLE, RVT, V1,
+ DAG.getNode(ISD::UNDEF, RVT), PermMask);
+ if (RVT != VT)
+ Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op);
return Op;
}
- if (X86::isSHUFPMask(PermMask.Val) &&
- MVT::getSizeInBits(VT) != 64) // Don't do this for MMX.
- return Op;
- } else {
- // Floating point cases in the other order.
- if (X86::isSHUFPMask(PermMask.Val))
+ // Binary or unary shufps.
+ if (X86::isSHUFPMask(PermMask.Val) ||
+ (V2.getOpcode() == ISD::UNDEF && X86::isPSHUFDMask(PermMask.Val)))
return Op;
- if (X86::isPSHUFDMask(PermMask.Val) ||
- X86::isPSHUFHWMask(PermMask.Val) ||
- X86::isPSHUFLWMask(PermMask.Val)) {
- if (V2.getOpcode() != ISD::UNDEF)
- return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
- DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
- return Op;
- }
}
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
}
// Handle all 4 wide cases with a number of shuffles.
- if (NumElems == 4 && MVT::getSizeInBits(VT) != 64) {
+ if (NumElems == 4 && !isMMX) {
// Don't do this for MMX.
MVT::ValueType MaskVT = PermMask.getValueType();
MVT::ValueType MaskEVT = MVT::getVectorElementType(MaskVT);
return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops1, 2);
}
-SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
- SDOperand InFlag(0, 0);
- SDOperand Chain = Op.getOperand(0);
- unsigned Align =
- (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
- if (Align == 0) Align = 1;
-
- ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- // If not DWORD aligned or size is more than the threshold, call memset.
- // The libc version is likely to be faster for these cases. It can use the
- // address value and run time information about the CPU.
- if ((Align & 3) != 0 ||
- (I && I->getValue() > Subtarget->getMaxInlineSizeThreshold())) {
- MVT::ValueType IntPtr = getPointerTy();
- const Type *IntPtrTy = getTargetData()->getIntPtrType();
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Op.getOperand(1);
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- // Extend the unsigned i8 argument to be an int value for the call.
- Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- Entry.Node = Op.getOperand(3);
- Args.push_back(Entry);
- std::pair<SDOperand,SDOperand> CallResult =
- LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
- false, DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
- return CallResult.second;
+SDOperand
+X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ const Value *DstSV, uint64_t DstOff) {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ /// If not DWORD aligned or size is more than the threshold, call the library.
+ /// The libc version is likely to be faster for these cases. It can use the
+ /// address value and run time information about the CPU.
+ if ((Align & 3) == 0 ||
+ !ConstantSize ||
+ ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) {
+ SDOperand InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+ if (const char *bzeroEntry =
+ V && V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ MVT::ValueType IntPtr = getPointerTy();
+ const Type *IntPtrTy = getTargetData()->getIntPtrType();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, false, false, CallingConv::C,
+ false, DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ Args, DAG);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDOperand();
}
+ uint64_t SizeVal = ConstantSize->getValue();
+ SDOperand InFlag(0, 0);
MVT::ValueType AVT;
SDOperand Count;
- ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
unsigned BytesLeft = 0;
bool TwoRepStos = false;
if (ValC) {
ValReg = X86::EAX;
Val = (Val << 8) | Val;
Val = (Val << 16) | Val;
- if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) { // QWORD aligned
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
AVT = MVT::i64;
ValReg = X86::RAX;
Val = (Val << 32) | Val;
default: // Byte aligned
AVT = MVT::i8;
ValReg = X86::AL;
- Count = Op.getOperand(3);
+ Count = Size;
break;
}
if (AVT > MVT::i8) {
- if (I) {
- unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- Count = DAG.getIntPtrConstant(I->getValue() / UBytes);
- BytesLeft = I->getValue() % UBytes;
- } else {
- assert(AVT >= MVT::i32 &&
- "Do not use rep;stos if not at least DWORD aligned");
- Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
- Op.getOperand(3), DAG.getConstant(2, MVT::i8));
- TwoRepStos = true;
- }
+ unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
}
Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
InFlag = Chain.getValue(1);
} else {
AVT = MVT::i8;
- Count = Op.getOperand(3);
- Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
+ Count = Size;
+ Chain = DAG.getCopyToReg(Chain, X86::AL, Src, InFlag);
InFlag = Chain.getValue(1);
}
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
- Op.getOperand(1), InFlag);
+ Dst, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
if (TwoRepStos) {
InFlag = Chain.getValue(1);
- Count = Op.getOperand(3);
+ Count = Size;
MVT::ValueType CVT = Count.getValueType();
SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
Ops.push_back(InFlag);
Chain = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
} else if (BytesLeft) {
- // Issue stores for the last 1 - 7 bytes.
- SDOperand Value;
- unsigned Val = ValC->getValue() & 255;
- unsigned Offset = I->getValue() - BytesLeft;
- SDOperand DstAddr = Op.getOperand(1);
- MVT::ValueType AddrVT = DstAddr.getValueType();
- if (BytesLeft >= 4) {
- Val = (Val << 8) | Val;
- Val = (Val << 16) | Val;
- Value = DAG.getConstant(Val, MVT::i32);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, AddrVT, DstAddr,
- DAG.getConstant(Offset, AddrVT)),
- NULL, 0);
- BytesLeft -= 4;
- Offset += 4;
- }
- if (BytesLeft >= 2) {
- Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, AddrVT, DstAddr,
- DAG.getConstant(Offset, AddrVT)),
- NULL, 0);
- BytesLeft -= 2;
- Offset += 2;
- }
- if (BytesLeft == 1) {
- Value = DAG.getConstant(Val, MVT::i8);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, AddrVT, DstAddr,
- DAG.getConstant(Offset, AddrVT)),
- NULL, 0);
- }
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT::ValueType AddrVT = Dst.getValueType();
+ MVT::ValueType SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain,
+ DAG.getNode(ISD::ADD, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, DstSV, Offset);
}
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
return Chain;
}
-SDOperand X86TargetLowering::LowerMEMCPYInline(SDOperand Chain,
- SDOperand Dest,
- SDOperand Source,
- unsigned Size,
- unsigned Align,
- SelectionDAG &DAG) {
+SDOperand
+X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
+ SDOperand Chain,
+ SDOperand Dst, SDOperand Src,
+ SDOperand Size, unsigned Align,
+ bool AlwaysInline,
+ const Value *DstSV, uint64_t DstOff,
+ const Value *SrcSV, uint64_t SrcOff){
+
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDOperand();
+ uint64_t SizeVal = ConstantSize->getValue();
+ if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+ return SDOperand();
+
+ SmallVector<SDOperand, 4> Results;
+
MVT::ValueType AVT;
unsigned BytesLeft = 0;
- switch (Align & 3) {
- case 2: // WORD aligned
- AVT = MVT::i16;
- break;
- case 0: // DWORD aligned
- AVT = MVT::i32;
- if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) // QWORD aligned
- AVT = MVT::i64;
- break;
- default: // Byte aligned
- AVT = MVT::i8;
- break;
- }
+ if (Align >= 8 && Subtarget->is64Bit())
+ AVT = MVT::i64;
+ else if (Align >= 4)
+ AVT = MVT::i32;
+ else if (Align >= 2)
+ AVT = MVT::i16;
+ else
+ AVT = MVT::i8;
unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
- SDOperand Count = DAG.getIntPtrConstant(Size / UBytes);
- BytesLeft = Size % UBytes;
+ unsigned CountVal = SizeVal / UBytes;
+ SDOperand Count = DAG.getIntPtrConstant(CountVal);
+ BytesLeft = SizeVal % UBytes;
SDOperand InFlag(0, 0);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
- Dest, InFlag);
+ Dst, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
- Source, InFlag);
+ Src, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
Ops.push_back(Chain);
Ops.push_back(DAG.getValueType(AVT));
Ops.push_back(InFlag);
- Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
+ Results.push_back(DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size()));
if (BytesLeft) {
- // Issue loads and stores for the last 1 - 7 bytes.
- unsigned Offset = Size - BytesLeft;
- SDOperand DstAddr = Dest;
- MVT::ValueType DstVT = DstAddr.getValueType();
- SDOperand SrcAddr = Source;
- MVT::ValueType SrcVT = SrcAddr.getValueType();
- SDOperand Value;
- if (BytesLeft >= 4) {
- Value = DAG.getLoad(MVT::i32, Chain,
- DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
- DAG.getConstant(Offset, SrcVT)),
- NULL, 0);
- Chain = Value.getValue(1);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, DstVT, DstAddr,
- DAG.getConstant(Offset, DstVT)),
- NULL, 0);
- BytesLeft -= 4;
- Offset += 4;
- }
- if (BytesLeft >= 2) {
- Value = DAG.getLoad(MVT::i16, Chain,
- DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
- DAG.getConstant(Offset, SrcVT)),
- NULL, 0);
- Chain = Value.getValue(1);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, DstVT, DstAddr,
- DAG.getConstant(Offset, DstVT)),
- NULL, 0);
- BytesLeft -= 2;
- Offset += 2;
- }
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ MVT::ValueType DstVT = Dst.getValueType();
+ MVT::ValueType SrcVT = Src.getValueType();
+ MVT::ValueType SizeVT = Size.getValueType();
- if (BytesLeft == 1) {
- Value = DAG.getLoad(MVT::i8, Chain,
- DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
- DAG.getConstant(Offset, SrcVT)),
- NULL, 0);
- Chain = Value.getValue(1);
- Chain = DAG.getStore(Chain, Value,
- DAG.getNode(ISD::ADD, DstVT, DstAddr,
- DAG.getConstant(Offset, DstVT)),
- NULL, 0);
- }
+ Results.push_back(DAG.getMemcpy(Chain,
+ DAG.getNode(ISD::ADD, DstVT, Dst,
+ DAG.getConstant(Offset,
+ DstVT)),
+ DAG.getNode(ISD::ADD, SrcVT, Src,
+ DAG.getConstant(Offset,
+ SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, AlwaysInline,
+ DstSV, Offset, SrcSV, Offset));
}
- return Chain;
+ return DAG.getNode(ISD::TokenFactor, MVT::Other, &Results[0], Results.size());
}
/// Expand the result of: i64,outchain = READCYCLECOUNTER inchain
case ISD::CALL: return LowerCALL(Op, DAG);
case ISD::RET: return LowerRET(Op, DAG);
case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
- case ISD::MEMSET: return LowerMEMSET(Op, DAG);
- case ISD::MEMCPY: return LowerMEMCPY(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VACOPY: return LowerVACOPY(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
X86::isUNPCKH_v_undef_Mask(Mask.Val));
}
-bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
- MVT::ValueType EVT,
- SelectionDAG &DAG) const {
+bool
+X86TargetLowering::isVectorClearMaskLegal(const std::vector<SDOperand> &BVOps,
+ MVT::ValueType EVT,
+ SelectionDAG &DAG) const {
unsigned NumElts = BVOps.size();
// Only do shuffles on 128-bit vector types for now.
if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;