//===----------------------------------------------------------------------===//
#include "NVPTXISelDAGToDAG.h"
+#include "NVPTXUtilities.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/CommandLine.h"
return UsePrecSqrtF32;
} else {
// Otherwise, use sqrt.approx if fast math is enabled
- if (TM.Options.UnsafeFPMath)
- return false;
- else
- return true;
+ return !TM.Options.UnsafeFPMath;
}
}
if (!Src)
return NVPTX::PTXLdStInstCode::GENERIC;
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
+ if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
switch (PT->getAddressSpace()) {
case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
return NVPTX::PTXLdStInstCode::GENERIC;
}
+static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
+ unsigned CodeAddrSpace, MachineFunction *F) {
+ // To use non-coherent caching, the load has to be from global
+ // memory and we have to prove that the memory area is not written
+ // to anywhere for the duration of the kernel call, not even after
+ // the load.
+ //
+ // To ensure that there are no writes to the memory, we require the
+ // underlying pointer to be a noalias (__restrict) kernel parameter
+ // that is never used for a write. We can only do this for kernel
+ // functions since from within a device function, we cannot know if
+ // there were or will be writes to the memory from the caller - or we
+ // could, but then we would have to do inter-procedural analysis.
+ if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
+ !isKernelFunction(*F->getFunction())) {
+ return false;
+ }
+
+ // We use GetUnderlyingObjects() here instead of
+ // GetUnderlyingObject() mainly because the former looks through phi
+ // nodes while the latter does not. We need to look through phi
+ // nodes to handle pointer induction variables.
+ SmallVector<Value *, 8> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
+ Objs, F->getDataLayout());
+ for (Value *Obj : Objs) {
+ auto *A = dyn_cast<const Argument>(Obj);
+ if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
+ }
+
+ return true;
+}
+
SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IID) {
Opc =
TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
break;
+ case ADDRESS_SPACE_PARAM:
+ Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
+ : NVPTX::nvvm_ptr_gen_to_param;
+ break;
}
return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
}
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(LD);
+ if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
+ return SelectLDGLDU(N);
+ }
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool isVolatile = LD->isVolatile();
default:
return nullptr;
}
- SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(fromType),
- getI32Imm(fromTypeWidth), Addr, Chain };
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), Addr, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
default:
return nullptr;
}
- SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(fromType),
- getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
return nullptr;
}
}
- SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(fromType),
- getI32Imm(fromTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (TM.is64Bit()) {
return nullptr;
}
}
- SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(fromType),
- getI32Imm(fromTypeWidth), N1, Chain };
+ SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
+ getI32Imm(vecType, dl), getI32Imm(fromType, dl),
+ getI32Imm(fromTypeWidth, dl), N1, Chain };
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
}
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
+ if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
+ return SelectLDGLDU(N);
+ }
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool IsVolatile = MemSD->isVolatile();
break;
}
- SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
- getI32Imm(VecType), getI32Imm(FromType),
- getI32Imm(FromTypeWidth), Addr, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Addr, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
break;
}
- SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
- getI32Imm(VecType), getI32Imm(FromType),
- getI32Imm(FromTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
}
}
- SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
- getI32Imm(VecType), getI32Imm(FromType),
- getI32Imm(FromTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else {
}
}
- SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
- getI32Imm(VecType), getI32Imm(FromType),
- getI32Imm(FromTypeWidth), Op1, Chain };
+ SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
+ getI32Imm(VecType, DL), getI32Imm(FromType, DL),
+ getI32Imm(FromTypeWidth, DL), Op1, Chain };
LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
}
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
default:
return nullptr;
}
- SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(toType),
- getI32Imm(toTypeWidth), Addr, Chain };
+ SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+ getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
+ Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
default:
return nullptr;
}
- SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(toType),
- getI32Imm(toTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+ getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
+ Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
return nullptr;
}
}
- SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(toType),
- getI32Imm(toTypeWidth), Base, Offset, Chain };
+ SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+ getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
+ Offset, Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
} else {
if (TM.is64Bit()) {
return nullptr;
}
}
- SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
- getI32Imm(vecType), getI32Imm(toType),
- getI32Imm(toTypeWidth), N2, Chain };
+ SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
+ getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
+ getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
+ Chain };
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
}
return nullptr;
}
- StOps.push_back(getI32Imm(IsVolatile));
- StOps.push_back(getI32Imm(CodeAddrSpace));
- StOps.push_back(getI32Imm(VecType));
- StOps.push_back(getI32Imm(ToType));
- StOps.push_back(getI32Imm(ToTypeWidth));
+ StOps.push_back(getI32Imm(IsVolatile, DL));
+ StOps.push_back(getI32Imm(CodeAddrSpace, DL));
+ StOps.push_back(getI32Imm(VecType, DL));
+ StOps.push_back(getI32Imm(ToType, DL));
+ StOps.push_back(getI32Imm(ToTypeWidth, DL));
if (SelectDirectAddr(N2, Addr)) {
switch (N->getOpcode()) {
unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
SmallVector<SDValue, 2> Ops;
- Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Ops.push_back(Chain);
Ops.push_back(Flag);
- SDNode *Ret =
- CurDAG->getMachineNode(Opc, DL, VTs, Ops);
- return Ret;
+ return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
}
SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
SmallVector<SDValue, 6> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(N->getOperand(i + 2));
- Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Ops.push_back(Chain);
// Determine target opcode
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(N->getOperand(i + 3));
- Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
- Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
+ Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Ops.push_back(Chain);
Ops.push_back(Flag);
// the selected StoreParam node.
case NVPTXISD::StoreParamU32: {
Opcode = NVPTX::StoreParamI32;
- SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
+ SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
MVT::i32);
SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
MVT::i32, Ops[0], CvtNone);
}
case NVPTXISD::StoreParamS32: {
Opcode = NVPTX::StoreParamI32;
- SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
+ SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
MVT::i32);
SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
MVT::i32, Ops[0], CvtNone);
/// SelectBFE - Look for instruction sequences that can be made more efficient
/// by using the 'bfe' (bit-field extract) PTX instruction
SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
+ SDLoc DL(N);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Len;
// How many bits are in our mask?
uint64_t NumBits = countTrailingOnes(MaskVal);
- Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+ Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
// We have a 'srl/and' pair, extract the effective start bit and length
// emitting the srl/and pair.
return NULL;
}
- Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
+ Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
} else {
// Do not handle the case where the shift amount (can be zero if no srl
// was found) is not constant. We could handle this case, but it would
}
Val = AndLHS;
- Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
- Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+ Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
+ Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
} else if (LHS->getOpcode() == ISD::SHL) {
// Here, we have a pattern like:
//
}
Start =
- CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
+ CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
Len =
CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
- OuterShiftAmt, MVT::i32);
+ OuterShiftAmt, DL, MVT::i32);
if (N->getOpcode() == ISD::SRA) {
// If we have a arithmetic right shift, we need to use the signed bfe
Val, Start, Len
};
- SDNode *Ret =
- CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
-
- return Ret;
+ return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
}
// SelectDirectAddr - Match a direct address for DAG.
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
SDValue base = Addr.getOperand(0);
if (SelectDirectAddr(base, Base)) {
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
+ mvt);
return true;
}
}
SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
- Offset = CurDAG->getTargetConstant(0, mvt);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
return true;
}
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
else
Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
+ mvt);
return true;
}
}
}
if (!Src)
return false;
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (auto *PT = dyn_cast<PointerType>(Src->getType()))
return (PT->getAddressSpace() == spN);
return false;
}
case InlineAsm::Constraint_m: // memory
if (SelectDirectAddr(Op, Op0)) {
OutOps.push_back(Op0);
- OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
return false;
}
if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {