/// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
/// same number of types as the Ins/Outs arrays in LowerFormalArguments,
/// LowerCall, and LowerReturn.
-static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
- SmallVectorImpl<EVT> &ValueVTs,
+static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<uint64_t> *Offsets = nullptr,
uint64_t StartingOffset = 0) {
SmallVector<EVT, 16> TempVTs;
SmallVector<uint64_t, 16> TempOffsets;
- ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset);
+ ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
EVT VT = TempVTs[i];
uint64_t Off = TempOffsets[i];
// condition branches.
setJumpIsExpensive(true);
+ // Wide divides are _very_ slow. Try to reduce the width of the divide if
+ // possible.
+ addBypassSlowDiv(64, 32);
+
// By default, use the Source scheduling
if (sched4reg)
setSchedulingPreference(Sched::RegPressure);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
// Turn FP truncstore into trunc + store.
+ // FIXME: vector types should also be expanded
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SELECT);
// Now deduce the information based on the above mentioned
// actions
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
- return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+ return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
}
-std::string
-NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- unsigned retAlignment,
- const ImmutableCallSite *CS) const {
+std::string NVPTXTargetLowering::getPrototype(
+ const DataLayout &DL, Type *retTy, const ArgListTy &Args,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
+ const ImmutableCallSite *CS) const {
+ auto PtrVT = getPointerTy(DL);
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
O << "(";
if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
unsigned size = 0;
- if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
+ if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
if (size < 32)
size = 32;
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
- O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+ O << ".param .b" << PtrVT.getSizeInBits() << " _";
} else if ((retTy->getTypeID() == Type::StructTyID) ||
isa<VectorType>(retTy)) {
- O << ".param .align "
- << retAlignment
- << " .b8 _["
- << getDataLayout()->getTypeAllocSize(retTy) << "]";
+ auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
+ O << ".param .align " << retAlignment << " .b8 _["
+ << DL.getTypeAllocSize(retTy) << "]";
} else {
llvm_unreachable("Unknown return type");
}
O << "_ (";
bool first = true;
- MVT thePointerTy = getPointerTy();
unsigned OIdx = 0;
for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
unsigned align = 0;
const CallInst *CallI = cast<CallInst>(CS->getInstruction());
- const DataLayout *TD = getDataLayout();
// +1 because index 0 is reserved for return type alignment
if (!llvm::getAlign(*CallI, i + 1, align))
- align = TD->getABITypeAlignment(Ty);
- unsigned sz = TD->getTypeAllocSize(Ty);
+ align = DL.getABITypeAlignment(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
O << ".param .align " << align << " .b8 ";
O << "_";
O << "[" << sz << "]";
// update the index for Outs
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*this, Ty, vtparts);
+ ComputeValueVTs(*this, DL, Ty, vtparts);
if (unsigned len = vtparts.size())
OIdx += len - 1;
continue;
}
// i8 types in IR will be i16 types in SDAG
- assert((getValueType(Ty) == Outs[OIdx].VT ||
- (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
+ assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
+ (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
"type mismatch between callee prototype and arguments");
// scalar type
unsigned sz = 0;
if (sz < 32)
sz = 32;
} else if (isa<PointerType>(Ty))
- sz = thePointerTy.getSizeInBits();
+ sz = PtrVT.getSizeInBits();
else
sz = Ty->getPrimitiveSizeInBits();
O << ".param .b" << sz << " ";
O << "_";
continue;
}
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
unsigned align = Outs[OIdx].Flags.getByValAlign();
- unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
+ unsigned sz = DL.getTypeAllocSize(ETy);
O << ".param .align " << align << " .b8 ";
O << "_";
O << "[" << sz << "]";
const ImmutableCallSite *CS,
Type *Ty,
unsigned Idx) const {
- const DataLayout *TD = getDataLayout();
unsigned Align = 0;
const Value *DirectCallee = CS->getCalledFunction();
// Call is indirect or alignment information is not available, fall back to
// the ABI type alignment
- return TD->getABITypeAlignment(Ty);
+ auto &DL = CS->getCaller()->getParent()->getDataLayout();
+ return DL.getABITypeAlignment(Ty);
}
SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
- const DataLayout *TD = getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
+ auto &DL = MF.getDataLayout();
SDValue tempChain = Chain;
Chain = DAG.getCALLSEQ_START(Chain,
// aggregate
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
+ 0);
unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
// declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = TD->getTypeAllocSize(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
MVT::i32),
continue;
}
if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
// declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = TD->getTypeAllocSize(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareParamOps[] = { Chain,
DAG.getConstant(align, dl, MVT::i32),
// struct or vector
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> Offsets;
- const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
+ auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
- ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
+ vtparts, &Offsets, 0);
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
EVT elemtype = vtparts[j];
int curOffset = Offsets[j];
unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
- SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
- DAG.getConstant(curOffset, dl, getPointerTy()));
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
+ DAG.getConstant(curOffset, dl, PtrVT));
SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
MachinePointerInfo(), false, false, false,
PartAlign);
// Handle Result
if (Ins.size() > 0) {
SmallVector<EVT, 16> resvtparts;
- ComputeValueVTs(*this, retTy, resvtparts);
+ ComputeValueVTs(*this, DL, retTy, resvtparts);
// Declare
// .param .align 16 .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
- unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
+ unsigned resultsz = DL.getTypeAllocSizeInBits(retTy);
// Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
// these three types to match the logic in
// NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS);
+ std::string Proto =
+ getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
if (retTy && retTy->isVectorTy()) {
- EVT ObjectVT = getValueType(retTy);
+ EVT ObjectVT = getValueType(DL, retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
InVals.push_back(Elt);
}
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
}
}
} else {
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0);
assert(VTs.size() == Ins.size() && "Bad value decomposition");
unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
SmallVector<EVT, 4> LoadRetVTs;
EVT TheLoadType = VTs[i];
- if (retTy->isIntegerTy() &&
- TD->getTypeAllocSizeInBits(retTy) < 32) {
+ if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) {
// This is for integer types only, and specifically not for
// aggregates.
LoadRetVTs.push_back(MVT::i32);
}
MemSDNode *MemSD = cast<MemSDNode>(N);
- const DataLayout *TD = getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
unsigned Align = MemSD->getAlignment();
unsigned PrefAlign =
- TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
+ TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
if (Align < PrefAlign) {
// This store is not sufficiently aligned, so bail out and let this vector
// store be scalarized. Note that we may still be able to emit smaller
"struct._image3d_t",
"struct._sampler_t" };
- const Type *Ty = arg->getType();
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ Type *Ty = arg->getType();
+ auto *PTy = dyn_cast<PointerType>(Ty);
if (!PTy)
return false;
if (!context)
return false;
- const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
+ auto *STy = dyn_cast<StructType>(PTy->getElementType());
const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
- const DataLayout *TD = getDataLayout();
+ const DataLayout &DL = DAG.getDataLayout();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
if (Ty->isAggregateType()) {
SmallVector<EVT, 16> vtparts;
- ComputePTXValueVTs(*this, Ty, vtparts);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
assert(vtparts.size() > 0 && "empty aggregate type not expected");
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
continue;
}
if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
for (unsigned parti = 0; parti < NumRegs; ++parti) {
InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
// NOTE: Here, we lose the ability to issue vector loads for vectors
// that are a part of a struct. This should be investigated in the
// future.
- ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets,
+ 0);
assert(vtparts.size() > 0 && "empty aggregate type not expected");
bool aggregateIsPacked = false;
if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
aggregateIsPacked = STy->isPacked();
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
EVT partVT = vtparts[parti];
PointerType::get(partVT.getTypeForEVT(F->getContext()),
llvm::ADDRESS_SPACE_PARAM));
SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(offsets[parti], dl, getPointerTy()));
- unsigned partAlign =
- aggregateIsPacked ? 1
- : TD->getABITypeAlignment(
- partVT.getTypeForEVT(F->getContext()));
+ DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+ DAG.getConstant(offsets[parti], dl, PtrVT));
+ unsigned partAlign = aggregateIsPacked
+ ? 1
+ : DL.getABITypeAlignment(
+ partVT.getTypeForEVT(F->getContext()));
SDValue p;
if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
continue;
}
if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(Ty);
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ EVT ObjectVT = getValueType(DL, Ty);
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
unsigned NumElts = ObjectVT.getVectorNumElements();
assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
"Vector was not scalarized");
Value *SrcValue = Constant::getNullValue(PointerType::get(
EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
SDValue P = DAG.getLoad(
- EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
- false, true,
- TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+ EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
+ true,
+ DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
Value *SrcValue = Constant::getNullValue(PointerType::get(
VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
SDValue P = DAG.getLoad(
- VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
- false, true,
- TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
+ VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
+ true,
+ DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
Value *SrcValue = Constant::getNullValue(
PointerType::get(VecVT.getTypeForEVT(F->getContext()),
llvm::ADDRESS_SPACE_PARAM));
- SDValue SrcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
- DAG.getConstant(Ofst, dl, getPointerTy()));
+ SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+ DAG.getConstant(Ofst, dl, PtrVT));
SDValue P = DAG.getLoad(
VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
false, true,
- TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
+ DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
InVals.push_back(Elt);
}
- Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
}
InsIdx += NumElts;
}
continue;
}
// A plain scalar.
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
// If ABI, load from the param symbol
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
Value *srcValue = Constant::getNullValue(PointerType::get(
ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
SDValue p;
if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
ISD::SEXTLOAD : ISD::ZEXTLOAD;
- p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
- MachinePointerInfo(srcValue), ObjectVT, false, false,
- false,
- TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ p = DAG.getExtLoad(
+ ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue),
+ ObjectVT, false, false, false,
+ DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
} else {
- p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
- MachinePointerInfo(srcValue), false, false, false,
- TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ p = DAG.getLoad(
+ Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false,
+ false, false,
+ DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
}
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
// machine instruction fails because TargetExternalSymbol
// (not lowered) is target dependent, and CopyToReg assumes
// the source is lowered.
- EVT ObjectVT = getValueType(Ty);
+ EVT ObjectVT = getValueType(DL, Ty);
assert(ObjectVT == Ins[InsIdx].VT &&
"Ins type did not match function type");
- SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
MachineFunction &MF = DAG.getMachineFunction();
const Function *F = MF.getFunction();
Type *RetTy = F->getReturnType();
- const DataLayout *TD = getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
assert(NumElts == Outs.size() && "Bad scalarization of return value");
// const_cast can be removed in later LLVM versions
- EVT EltVT = getValueType(RetTy).getVectorElementType();
+ EVT EltVT = getValueType(TD, RetTy).getVectorElementType();
bool NeedExtend = false;
if (EltVT.getSizeInBits() < 16)
NeedExtend = true;
EVT VecVT =
EVT::getVectorVT(F->getContext(), EltVT, VecSize);
unsigned PerStoreOffset =
- TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
for (unsigned i = 0; i < NumElts; i += VecSize) {
// Get values
} else {
SmallVector<EVT, 16> ValVTs;
SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
+ ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0);
assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
TheValType.getVectorElementType(), TmpVal,
DAG.getIntPtrConstant(j, dl));
EVT TheStoreType = ValVTs[i];
- if (RetTy->isIntegerTy() &&
- TD->getTypeAllocSizeInBits(RetTy) < 32) {
+ if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) {
// The following zero-extension is for integer types only, and
// specifically not for aggregates.
TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
case Intrinsic::nvvm_ldu_global_i:
case Intrinsic::nvvm_ldu_global_f:
case Intrinsic::nvvm_ldu_global_p: {
-
+ auto &DL = I.getModule()->getDataLayout();
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
- Info.memVT = getPointerTy();
+ Info.memVT = getPointerTy(DL);
else
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.vol = 0;
case Intrinsic::nvvm_ldg_global_i:
case Intrinsic::nvvm_ldg_global_f:
case Intrinsic::nvvm_ldg_global_p: {
+ auto &DL = I.getModule()->getDataLayout();
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
- Info.memVT = getPointerTy();
+ Info.memVT = getPointerTy(DL);
else
- Info.memVT = getValueType(I.getType());
+ Info.memVT = getValueType(DL, I.getType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.vol = 0;
/// Used to guide target specific optimizations, like loop strength reduction
/// (LoopStrengthReduce.cpp) and memory optimization for address mode
/// (CodeGenPrepare.cpp)
-bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS) const {
// AddrMode - This represents an addressing mode of:
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
// - [immAddr]
if (AM.BaseGV) {
- if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
- return false;
- return true;
+ return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
}
switch (AM.Scale) {
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
NVPTXTargetLowering::ConstraintType
-NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
+NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
std::pair<unsigned, const TargetRegisterClass *>
NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
return SDValue();
}
+static SDValue PerformSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Currently this detects patterns for integer min and max and
+ // lowers them to PTX-specific intrinsics that enable hardware
+ // support.
+
+ const SDValue Cond = N->getOperand(0);
+ if (Cond.getOpcode() != ISD::SETCC) return SDValue();
+
+ const SDValue LHS = Cond.getOperand(0);
+ const SDValue RHS = Cond.getOperand(1);
+ const SDValue True = N->getOperand(1);
+ const SDValue False = N->getOperand(2);
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ const EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
+
+ const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ SDValue Larger; // The larger of LHS and RHS when condition is true.
+ switch (CC) {
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Larger = RHS;
+ break;
+
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ Larger = LHS;
+ break;
+
+ default:
+ return SDValue();
+ }
+ const bool IsMax = (Larger == True);
+ const bool IsSigned = ISD::isSignedIntSetCC(CC);
+
+ unsigned IntrinsicId;
+ if (VT == MVT::i32) {
+ if (IsSigned)
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
+ else
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
+ } else {
+ assert(VT == MVT::i64);
+ if (IsSigned)
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
+ else
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
+ }
+
+ SDLoc DL(N);
+ return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
+}
+
enum OperandSignedness {
Signed = 0,
Unsigned,
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
APInt Val = CI->getAPIntValue();
if (LHSSign == Unsigned) {
- if (Val.isIntN(OptSize)) {
- return true;
- }
- return false;
+ return Val.isIntN(OptSize);
} else {
- if (Val.isSignedIntN(OptSize)) {
- return true;
- }
- return false;
+ return Val.isSignedIntN(OptSize);
}
} else {
OperandSignedness RHSSign;
if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
return false;
- if (LHSSign != RHSSign)
- return false;
-
- return true;
+ return LHSSign == RHSSign;
}
}
return PerformSHLCombine(N, DCI, OptLevel);
case ISD::AND:
return PerformANDCombine(N, DCI);
+ case ISD::SELECT:
+ return PerformSELECTCombine(N, DCI);
}
return SDValue();
}
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
- const DataLayout *TD,
SmallVectorImpl<SDValue> &Results) {
EVT ResVT = N->getValueType(0);
SDLoc DL(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
unsigned Align = LD->getAlignment();
+ auto &TD = DAG.getDataLayout();
unsigned PrefAlign =
- TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
+ TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
if (Align < PrefAlign) {
// This load is not sufficiently aligned, so bail out and let this vector
// load be scalarized. Note that we may still be able to emit smaller
default:
report_fatal_error("Unhandled custom legalization");
case ISD::LOAD:
- ReplaceLoadVector(N, DAG, getDataLayout(), Results);
+ ReplaceLoadVector(N, DAG, Results);
return;
case ISD::INTRINSIC_W_CHAIN:
ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);