X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FAMDGPU%2FSIISelLowering.cpp;h=dd818a9ba746687b7c9c0c8c0be65d795f045cce;hb=0219a272ec3c1a3784bd85bd0dc2fa457743ffd6;hp=7bcf3f7bf24841b8bb8d2f6e176f94968e5dbc74;hpb=691b2ff11e8a60ec9200a68c227d0d6358120551;p=oota-llvm.git diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 7bcf3f7bf24..dd818a9ba74 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -452,7 +452,12 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, true, // isNonTemporal true, // isInvariant Align); // Alignment - return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load); + SDValue Ops[] = { + DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load), + Load.getValue(1) + }; + + return DAG.getMergeValues(Ops, SL); } ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD; @@ -570,6 +575,8 @@ SDValue SITargetLowering::LowerFormalArguments( AnalyzeFormalArguments(CCInfo, Splits); + SmallVector Chains; + for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { const ISD::InputArg &Arg = Ins[i]; @@ -588,8 +595,9 @@ SDValue SITargetLowering::LowerFormalArguments( VA.getLocMemOffset(); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. - SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), + SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain, Offset, Ins[i].Flags.isSExt()); + Chains.push_back(Arg.getValue(1)); const PointerType *ParamTy = dyn_cast(FType->getParamType(Ins[i].getOrigArgIndex())); @@ -615,7 +623,8 @@ SDValue SITargetLowering::LowerFormalArguments( Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, &AMDGPU::SReg_64RegClass); Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass); - InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); + SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT); + InVals.push_back(Copy); continue; } @@ -635,7 +644,9 @@ SDValue SITargetLowering::LowerFormalArguments( for (unsigned j = 1; j != NumElements; ++j) { Reg = ArgLocs[ArgIdx++].getLocReg(); Reg = MF.addLiveIn(Reg, RC); - Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); + + SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT); + Regs.push_back(Copy); } // Fill up the missing vector elements @@ -654,7 +665,11 @@ SDValue SITargetLowering::LowerFormalArguments( AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs())); Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx); } - return Chain; + + if (Chains.empty()) + return Chain; + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( @@ -704,7 +719,7 @@ EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); } -MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &) const { +MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT) const { return MVT::i32; } @@ -928,6 +943,7 @@ SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); + auto MFI = MF.getInfo(); const SIRegisterInfo *TRI = static_cast(Subtarget->getRegisterInfo()); @@ -966,8 +982,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::AMDGPU_read_workdim: return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - MF.getInfo()->ABIArgOffset, - false); + getImplicitParameterOffset(MFI, GRID_DIM), false); case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,