if (!VT.is128BitVector()) {
continue;
}
+
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
setOperationAction(ISD::OR, SVT, Promote);
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
-/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
-/// determining it.
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if SelectionDAG should be responsible for
+/// determining the type.
EVT
-X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe,
+ bool MemcpyStrSrc,
SelectionDAG &DAG) const {
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
// linux. This is because the stack realignment code can't handle certain
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
- if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
- return MVT::v4i32;
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
- return MVT::v4f32;
+ if (NonScalarIntSafe &&
+ !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ if (Size >= 16 &&
+ (Subtarget->isUnalignedMemAccessFast() ||
+ ((DstAlign == 0 || DstAlign >= 16) &&
+ (SrcAlign == 0 || SrcAlign >= 16))) &&
+ Subtarget->getStackAlignment() >= 16) {
+ if (Subtarget->hasSSE2())
+ return MVT::v4i32;
+ if (Subtarget->hasSSE1())
+ return MVT::v4f32;
+ } else if (!MemcpyStrSrc && Size >= 8 &&
+ !Subtarget->is64Bit() &&
+ Subtarget->getStackAlignment() >= 8 &&
+ Subtarget->hasSSE2()) {
+ // Do not use f64 to lower memcpy if source is string constant. It's
+ // better to use i32 to avoid the loads.
+ return MVT::f64;
+ }
}
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF,
MCContext &Ctx) const {
const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo();
- return Ctx.GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix())+
- Twine(MF->getFunctionNumber())+"$pb");
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(MF->getFunctionNumber())+"$pb");
}
if (!Subtarget->is64Bit())
// This doesn't have DebugLoc associated with it, but is not really the
// same as a Register.
- return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
- getPointerTy());
+ return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
return Table;
}
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
- llvm_report_error("SSE register return with SSE disabled");
+ report_fatal_error("SSE register return with SSE disabled");
}
// If this is a call to a function that returns an fp value on the floating
DebugLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- /*AlwaysInline=*/true, NULL, 0, NULL, 0);
+ /*isVolatile*/false, /*AlwaysInline=*/true,
+ NULL, 0, NULL, 0);
}
/// IsTailCallConvention - Return true if the calling convention is one that
if (!isTailCall) {
Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
- DebugLoc::getUnknownLoc(),
- getPointerTy()),
+ DebugLoc(), getPointerTy()),
InFlag);
InFlag = Chain.getValue(1);
} else {
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
+ const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = DAG.getMachineFunction().getFunction();
if (GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) &&
// Look for obvious safe cases to perform tail call optimization that does not
// requite ABI changes. This is what gcc calls sibcall.
+ // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+ // emit a special epilogue.
+ if (RegInfo->needsStackRealignment(MF))
+ return false;
+
// Do not sibcall optimize vararg calls unless the call site is not passing any
// arguments.
if (isVarArg && !Outs.empty())
}
FastISel *
-X86TargetLowering::createFastISel(MachineFunction &mf, MachineModuleInfo *mmo,
- DwarfWriter *dw,
+X86TargetLowering::createFastISel(MachineFunction &mf,
DenseMap<const Value *, unsigned> &vm,
DenseMap<const BasicBlock*, MachineBasicBlock*> &bm,
DenseMap<const AllocaInst *, int> &am
#ifndef NDEBUG
- , SmallSet<Instruction*, 8> &cil
+ , SmallSet<const Instruction *, 8> &cil
#endif
) {
- return X86::createFastISel(mf, mmo, dw, vm, bm, am
+ return X86::createFastISel(mf, vm, bm, am
#ifndef NDEBUG
, cil
#endif
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
- if (!isInt32(Offset))
+ if (!isInt<32>(Offset))
return false;
// If we don't have a symbolic displacement - we don't have any extra
/// FIXME: split into pslldqi, psrldqi, palignr variants.
static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
- int NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
isLeft = true;
unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
}
bool SeenV1 = false;
bool SeenV2 = false;
- for (int i = NumZeros; i < NumElems; ++i) {
- int Val = isLeft ? (i - NumZeros) : i;
- int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
- if (Idx < 0)
+ for (unsigned i = NumZeros; i < NumElems; ++i) {
+ unsigned Val = isLeft ? (i - NumZeros) : i;
+ int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
+ if (Idx_ < 0)
continue;
+ unsigned Idx = (unsigned) Idx_;
if (Idx < NumElems)
SeenV1 = true;
else {
if (OpFlag) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
- DebugLoc::getUnknownLoc(), getPointerTy()),
+ DebugLoc(), getPointerTy()),
Result);
}
if (OpFlag) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
- DebugLoc::getUnknownLoc(), getPointerTy()),
+ DebugLoc(), getPointerTy()),
Result);
}
!Subtarget->is64Bit()) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
- DebugLoc::getUnknownLoc(),
- getPointerTy()),
+ DebugLoc(), getPointerTy()),
Result);
}
DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
- DebugLoc::getUnknownLoc(),
- PtrVT), InFlag);
+ DebugLoc(), PtrVT), InFlag);
InFlag = Chain.getValue(1);
return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
DebugLoc dl = GA->getDebugLoc();
// Get the Thread Pointer
SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress,
- DebugLoc::getUnknownLoc(), PtrVT,
+ DebugLoc(), PtrVT,
DAG.getRegister(is64Bit? X86::FS : X86::GS,
MVT::i32));
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
+ bool isVolatile,
const Value *DstSV,
uint64_t DstSVOff) {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
DAG.getConstant(Offset, AddrVT)),
Src,
DAG.getConstant(BytesLeft, SizeVT),
- Align, DstSV, DstSVOff + Offset);
+ Align, isVolatile, DstSV, DstSVOff + Offset);
}
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain, SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
- bool AlwaysInline,
+ bool isVolatile, bool AlwaysInline,
const Value *DstSV, uint64_t DstSVOff,
const Value *SrcSV, uint64_t SrcSVOff) {
// This requires the copy size to be a constant, preferrably
Count, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
- X86::EDI,
+ X86::EDI,
Dst, InFlag);
InFlag = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
DAG.getNode(ISD::ADD, dl, SrcVT, Src,
DAG.getConstant(Offset, SrcVT)),
DAG.getConstant(BytesLeft, SizeVT),
- Align, AlwaysInline,
+ Align, isVolatile, AlwaysInline,
DstSV, DstSVOff + Offset,
SrcSV, SrcSVOff + Offset));
}
SDValue SrcPtr = Op.getOperand(1);
SDValue SrcSV = Op.getOperand(2);
- llvm_report_error("VAArgInst is not yet implemented for x86-64!");
+ report_fatal_error("VAArgInst is not yet implemented for x86-64!");
return SDValue();
}
DebugLoc dl = Op.getDebugLoc();
return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr,
- DAG.getIntPtrConstant(24), 8, false,
- DstSV, 0, SrcSV, 0);
+ DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+ false, DstSV, 0, SrcSV, 0);
}
SDValue
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
if (InRegCount > 2) {
- llvm_report_error("Nest register in use - reduce number of inreg parameters!");
+ report_fatal_error("Nest register in use - reduce number of inreg parameters!");
}
}
break;