true, // isNonTemporal
true, // isInvariant
Align); // Alignment
- return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load);
+ SDValue Ops[] = {
+ DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
+ Load.getValue(1)
+ };
+
+ return DAG.getMergeValues(Ops, SL);
}
ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
AnalyzeFormalArguments(CCInfo, Splits);
+ SmallVector<SDValue, 16> Chains;
+
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
+ SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
Offset, Ins[i].Flags.isSExt());
+ Chains.push_back(Arg.getValue(1));
const PointerType *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
&AMDGPU::SReg_64RegClass);
Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
- InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ InVals.push_back(Copy);
continue;
}
for (unsigned j = 1; j != NumElements; ++j) {
Reg = ArgLocs[ArgIdx++].getLocReg();
Reg = MF.addLiveIn(Reg, RC);
- Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ Regs.push_back(Copy);
}
// Fill up the missing vector elements
AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
}
- return Chain;
+
+ if (Chains.empty())
+ return Chain;
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FINode->getIndex();
- return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
+ // A FrameIndex node represents a 32-bit offset into scratch memory. If
+ // the high bit of a frame index offset were to be set, this would mean
+ // that it represented an offset of ~2GB * 64 = ~128GB from the start of the
+ // scratch buffer, with 64 being the number of threads per wave.
+ //
+ // If we know the machine uses less than 128GB of scratch, then we can
+ // amrk the high bit of the FrameIndex node as known zero,
+ // which is important, because it means in most situations we can
+ // prove that values derived from FrameIndex nodes are non-negative.
+ // This enables us to take advantage of more addressing modes when
+ // accessing scratch buffers, since for scratch reads/writes, the register
+ // offset must always be positive.
+
+ SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
+ if (Subtarget->enableHugeScratchBuffer())
+ return TFI;
+
+ return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 31)));
}
/// This transforms the control flow intrinsics to get the branch destination as
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ auto MFI = MF.getInfo<SIMachineFunctionInfo>();
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
case Intrinsic::AMDGPU_read_workdim:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
- false);
+ getImplicitParameterOffset(MFI, GRID_DIM), false);
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
}
}
+static bool isFrameIndexOp(SDValue Op) {
+ if (Op.getOpcode() == ISD::AssertZext)
+ Op = Op.getOperand(0);
+
+ return isa<FrameIndexSDNode>(Op);
+}
+
/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
/// with frame index operands.
/// LLVM assumes that inputs are to these instructions are registers.
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
- if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
+ if (!isFrameIndexOp(Node->getOperand(i))) {
Ops.push_back(Node->getOperand(i));
continue;
}