From 7f288b455eebcb61c5ecbade9323e4f610068d2a Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 1 Aug 2014 21:50:47 +0000 Subject: [PATCH] R600: Move code for generating REGISTER_LOAD into R600ISelLowering.cpp SI doesn't use REGISTER_LOAD anymore, but it was still hitting this code path for 8-bit and 16-bit private loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214566 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 38 +----------------------- lib/Target/R600/R600ISelLowering.cpp | 41 ++++++++++++++++++++++++++ test/CodeGen/R600/private-memory.ll | 3 +- 3 files changed, 44 insertions(+), 38 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index ea6071bc245..881775eaa31 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1285,43 +1285,7 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(Ops, DL); } - if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS || - ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32)) - return SDValue(); - - - SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), - DAG.getConstant(2, MVT::i32)); - SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), - Load->getChain(), Ptr, - DAG.getTargetConstant(0, MVT::i32), - Op.getOperand(2)); - SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, - Load->getBasePtr(), - DAG.getConstant(0x3, MVT::i32)); - SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, - DAG.getConstant(3, MVT::i32)); - - Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt); - - EVT MemEltVT = MemVT.getScalarType(); - if (ExtType == ISD::SEXTLOAD) { - SDValue MemEltVTNode = DAG.getValueType(MemEltVT); - - SDValue Ops[] = { - DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode), - Load->getChain() - }; - - return DAG.getMergeValues(Ops, DL); - } - - SDValue Ops[] = { - DAG.getZeroExtendInReg(Ret, DL, MemEltVT), - Load->getChain() - }; - - return DAG.getMergeValues(Ops, DL); + return SDValue(); } SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index b16d53fd015..eb16fcc5458 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1514,6 +1514,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const EVT VT = Op.getValueType(); SDLoc DL(Op); LoadSDNode *LoadNode = cast(Op); + ISD::LoadExtType ExtType = LoadNode->getExtensionType(); + EVT MemVT = LoadNode->getMemoryVT(); SDValue Chain = Op.getOperand(0); SDValue Ptr = Op.getOperand(1); SDValue LoweredLoad; @@ -1527,6 +1529,45 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(Ops, DL); } + // Handle ext private loads + if (LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS && + ExtType != ISD::NON_EXTLOAD && LoadNode->getMemoryVT().bitsLT(MVT::i32)) { + + + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, LoadNode->getBasePtr(), + DAG.getConstant(2, MVT::i32)); + SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), + LoadNode->getChain(), Ptr, + DAG.getTargetConstant(0, MVT::i32), + Op.getOperand(2)); + SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, + LoadNode->getBasePtr(), + DAG.getConstant(0x3, MVT::i32)); + SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, + DAG.getConstant(3, MVT::i32)); + + Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt); + + EVT MemEltVT = MemVT.getScalarType(); + if (ExtType == ISD::SEXTLOAD) { + SDValue MemEltVTNode = DAG.getValueType(MemEltVT); + + SDValue Ops[] = { + DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode), + LoadNode->getChain() + }; + + return DAG.getMergeValues(Ops, DL); + } + + SDValue Ops[] = { + DAG.getZeroExtendInReg(Ret, DL, MemEltVT), + LoadNode->getChain() + }; + + return DAG.getMergeValues(Ops, DL); + } + // Lower loads constant address space global variable loads if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && isa( diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index 3ce8c2cb03d..124d9fa6450 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -118,7 +118,8 @@ for.end: ; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} ; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} -; SI_PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v{{[0-9]+}}, s{{[0-9]+}} +; SI-PROMOTE-NOT: MOVREL +; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v{{[0-9]+}} + s{{[0-9]+}} define void @short_array(i32 addrspace(1)* %out, i32 %index) { entry: %0 = alloca [2 x i16] -- 2.34.1