#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUDiagnosticInfoUnsupported.h"
#include "AMDGPUFrameLowering.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPURegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DiagnosticPrinter.h"
using namespace llvm;
-namespace {
-
-/// Diagnostic information for unimplemented or unsupported feature reporting.
-class DiagnosticInfoUnsupported : public DiagnosticInfo {
-private:
- const Twine &Description;
- const Function &Fn;
-
- static int KindID;
-
- static int getKindID() {
- if (KindID == 0)
- KindID = llvm::getNextAvailablePluginDiagnosticKind();
- return KindID;
- }
-
-public:
- DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
- DiagnosticSeverity Severity = DS_Error)
- : DiagnosticInfo(getKindID(), Severity),
- Description(Desc),
- Fn(Fn) { }
-
- const Function &getFunction() const { return Fn; }
- const Twine &getDescription() const { return Description; }
-
- void print(DiagnosticPrinter &DP) const override {
- DP << "unsupported " << getDescription() << " in " << Fn.getName();
- }
-
- static bool classof(const DiagnosticInfo *DI) {
- return DI->getKind() == getKindID();
- }
-};
-
-int DiagnosticInfoUnsupported::KindID = 0;
-}
-
-
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ // This is totally unsupported, just custom lower to produce an error.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+
// We need to custom lower some of the intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Custom);
- setOperationAction(ISD::UDIVREM, VT, Custom);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::ADDC, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::ADDE, VT, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+ setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SELECT_CC);
setSelectIsExpensive(false);
PredictableSelectIsExpensive = false;
- // There are no integer divide instructions, and these expand to a pretty
- // large sequence of instructions.
- setIntDivIsCheap(false);
- setPow2SDivIsCheap(false);
setFsqrtIsCheap(true);
+ // We want to find all load dependencies for long chains of stores to enable
+ // merging into very wide vectors. The problem is with vectors with > 4
+ // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
+ // vectors are a legal type, even though we have to split the loads
+ // usually. When we can more precisely specify load legality per address
+ // space, we should be able to make FindBetterChain/MergeConsecutiveStores
+ // smarter so that they can figure out what to do in 2 iterations without all
+ // N > 4 stores on the same chain.
+ GatherAllAliasesMaxDepth = 16;
+
// FIXME: Need to really handle these.
MaxStoresPerMemcpy = 4096;
MaxStoresPerMemmove = 4096;
// Target Information
//===----------------------------------------------------------------------===//
-MVT AMDGPUTargetLowering::getVectorIdxTy() const {
+MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
return MVT::i32;
}
return true;
}
+bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const {
+ // There are few operations which truly have vector input operands. Any vector
+ // operation is going to involve operations on each component, and a
+ // build_vector will be a copy per element, so it always makes sense to use a
+ // build_vector input in place of the extracted element to avoid a copy into a
+ // super register.
+ //
+ // We should probably only do this if all users are extracts only, but this
+ // should be the common case.
+ return true;
+}
+
bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
// Truncate is just accessing a subregister.
return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
}
bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
- const DataLayout *DL = getDataLayout();
- unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType());
- unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType());
+ unsigned SrcSize = Src->getScalarSizeInBits();
+ unsigned DestSize = Dest->getScalarSizeInBits();
return SrcSize == 32 && DestSize == 64;
}
return SDValue();
}
+SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ const Function &Fn = *DAG.getMachineFunction().getFunction();
+
+ DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "dynamic alloca");
+ DAG.getContext()->diagnose(NoDynamicAlloca);
+ return SDValue();
+}
+
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
return Op;
}
const SDValue &InitPtr,
SDValue Chain,
SelectionDAG &DAG) const {
- const DataLayout *TD = getDataLayout();
+ const DataLayout &TD = DAG.getDataLayout();
SDLoc DL(InitPtr);
Type *InitTy = Init->getType();
EVT VT = EVT::getEVT(InitTy);
PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(InitTy));
+ MachinePointerInfo(UndefValue::get(PtrTy)), false,
+ false, TD.getPrefTypeAlignment(InitTy));
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
EVT VT = EVT::getEVT(CFP->getType());
PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(CFP->getType()));
+ MachinePointerInfo(UndefValue::get(PtrTy)), false,
+ false, TD.getPrefTypeAlignment(CFP->getType()));
}
if (StructType *ST = dyn_cast<StructType>(InitTy)) {
- const StructLayout *SL = TD->getStructLayout(ST);
+ const StructLayout *SL = TD.getStructLayout(ST);
EVT PtrVT = InitPtr.getValueType();
SmallVector<SDValue, 8> Chains;
else
llvm_unreachable("Unexpected type");
- unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
+ unsigned EltSize = TD.getTypeAllocSize(SeqTy->getElementType());
SmallVector<SDValue, 8> Chains;
for (unsigned i = 0; i < NumElements; ++i) {
SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT);
EVT VT = EVT::getEVT(InitTy);
PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(InitTy));
+ MachinePointerInfo(UndefValue::get(PtrTy)), false,
+ false, TD.getPrefTypeAlignment(InitTy));
}
Init->dump();
SDValue Op,
SelectionDAG &DAG) const {
- const DataLayout *TD = getDataLayout();
+ const DataLayout &DL = DAG.getDataLayout();
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
unsigned Offset;
if (MFI->LocalMemoryObjects.count(GV) == 0) {
- uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
Offset = MFI->LDSSize;
MFI->LocalMemoryObjects[GV] = Offset;
// XXX: Account for alignment?
}
return DAG.getConstant(Offset, SDLoc(Op),
- getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
+ getPointerTy(DL, AMDGPUAS::LOCAL_ADDRESS));
}
case AMDGPUAS::CONSTANT_ADDRESS: {
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
Type *EltType = GV->getType()->getElementType();
- unsigned Size = TD->getTypeAllocSize(EltType);
- unsigned Alignment = TD->getPrefTypeAlignment(EltType);
+ unsigned Size = DL.getTypeAllocSize(EltType);
+ unsigned Alignment = DL.getPrefTypeAlignment(EltType);
- MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS);
- MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
+ MVT PrivPtrVT = getPointerTy(DL, AMDGPUAS::PRIVATE_ADDRESS);
+ MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FIN->getIndex();
- unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ unsigned IgnoredFrameReg;
+ unsigned Offset =
+ TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
Op.getValueType());
}
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
+ // TODO: Should this propagate fast-math-flags?
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
DAG.getConstantFP(1.0f, DL, MVT::f32),
Op.getOperand(1));
return SDValue();
}
-// FIXME: Remove this when combines added to DAGCombiner.
-SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const {
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
-
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- switch (CCOpcode) {
- case ISD::SETULE:
- case ISD::SETULT: {
- unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETLE:
- case ISD::SETLT: {
- unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETGT:
- case ISD::SETGE: {
- unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETUGE:
- case ISD::SETUGT: {
- unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- default:
- return SDValue();
- }
-}
-
SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
- return ScalarizeVectorStore(Op, DAG);
+ return SplitVectorStore(Op, DAG);
}
EVT MemVT = Store->getMemoryVT();
// float fb = (float)ib;
SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
+ // TODO: Should this propagate fast-math-flags?
// float fq = native_divide(fa, fb);
SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
// fb = fabs(fb);
fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT);
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// int cv = fr >= fb;
SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
const SDValue One = DAG.getConstant(1, SL, MVT::i32);
const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
-
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
- EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
DAG.getConstant(32, SL, MVT::i32));
-
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
}
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
DAG.getConstant(1, DL, MVT::i32));
SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+ // TODO: Should this propagate fast-math-flags?
FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
MVT::f64);
SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
MVT::f64);
-
+ // TODO: Should this propagate fast-math-flags?
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
SN->getBasePtr(), SN->getMemOperand());
}
+SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ // i64 (shl x, 32) -> (build_pair 0, x)
+
+ // Doing this with moves theoretically helps MI optimizations that understand
+ // copies. 2 v_mov_b32_e32 will have the same code size / cycle count as
+ // v_lshl_b64. In the SALU case, I think this is slightly worse since it
+ // doubles the code size and I'm unsure about cycle count.
+ const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!RHS || RHS->getZExtValue() != 32)
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+
+ SDLoc SL(N);
+ SelectionDAG &DAG = DCI.DAG;
+
+ // Extract low 32-bits.
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
+
+ const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+ return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Zero, Lo);
+}
+
SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
switch(N->getOpcode()) {
default:
break;
+ case ISD::SHL: {
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ break;
+
+ return performShlCombine(N, DCI);
+ }
case ISD::MUL:
return performMulCombine(N, DCI);
case AMDGPUISD::MUL_I24:
if (VT == MVT::f32)
return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
-
- // TODO: Implement min / max Evergreen instructions.
- if (VT == MVT::i32 &&
- Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
- }
}
break;
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->isExactlyValue(1.0);
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isAllOnesValue();
- }
- return false;
+ return isAllOnesConstant(Op);
}
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->getValueAPF().isZero();
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isNullValue();
- }
- return false;
+ return isNullConstant(Op);
}
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
return DAG.getRegister(VirtualRegister, VT);
}
+uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
+ const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
+ uint64_t ArgOffset = MFI->ABIArgOffset;
+ switch (Param) {
+ case GRID_DIM:
+ return ArgOffset;
+ case GRID_OFFSET:
+ return ArgOffset + 4;
+ }
+ llvm_unreachable("unexpected implicit parameter type");
+}
+
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
return 1;
unsigned SignBits = 32 - Width->getZExtValue() + 1;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!Offset || !Offset->isNullValue())
+ if (!isNullConstant(Op.getOperand(1)))
return SignBits;
// TODO: Could probably figure something out with non-0 offsets.