From a89c1839c73017079ab8d79b8e248cb86164f794 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 12 May 2015 15:00:46 +0000 Subject: [PATCH] R600/SI: Remove explicit m0 operand from v_interp instructions Instead add m0 as an implicit operand. This helps avoid spills of the m0 register in some cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237140 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 3 ++ lib/Target/R600/AMDGPUISelLowering.h | 3 ++ lib/Target/R600/AMDGPUInstrInfo.td | 12 +++++++ lib/Target/R600/SIISelLowering.cpp | 23 ++++++++++++- lib/Target/R600/SIInstrInfo.td | 4 +-- lib/Target/R600/SIInstructions.td | 47 ++++++++++---------------- 6 files changed, 59 insertions(+), 33 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index ceb7e151959..b304488142a 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -2678,6 +2678,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CONST_DATA_PTR) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) + NODE_NAME_CASE(INTERP_MOV) + NODE_NAME_CASE(INTERP_P1) + NODE_NAME_CASE(INTERP_P2) NODE_NAME_CASE(STORE_MSKOR) NODE_NAME_CASE(TBUFFER_STORE_FORMAT) case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index a6bfbbd1572..8507cb3c745 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -289,6 +289,9 @@ enum NodeType : unsigned { /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, SENDMSG, + INTERP_MOV, + INTERP_P1, + INTERP_P2, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index e251cb48e0d..790f34cea8c 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -223,6 +223,18 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", SDTypeProfile<0, 1, [SDTCisInt<0>]>, [SDNPHasChain, SDNPInGlue]>; +def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", + SDTypeProfile<1, 3, [SDTCisFP<0>]>, + [SDNPInGlue]>; + +def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1", + SDTypeProfile<1, 3, [SDTCisFP<0>]>, + [SDNPInGlue, SDNPOutGlue]>; + +def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2", + SDTypeProfile<1, 4, [SDTCisFP<0>]>, + [SDNPInGlue]>; + //===----------------------------------------------------------------------===// // Flow Control Profile Types //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 56214cd07dc..52bf2aeb87d 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -948,7 +948,28 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name. return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1), DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1))); - + case AMDGPUIntrinsic::SI_fs_constant: { + SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3)); + SDValue Glue = M0.getValue(1); + return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, + DAG.getConstant(2, DL, MVT::i32), // P0 + Op.getOperand(1), Op.getOperand(2), Glue); + } + case AMDGPUIntrinsic::SI_fs_interp: { + SDValue IJ = Op.getOperand(4); + SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ, + DAG.getConstant(0, DL, MVT::i32)); + SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ, + DAG.getConstant(1, DL, MVT::i32)); + SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3)); + SDValue Glue = M0.getValue(1); + SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL, + DAG.getVTList(MVT::f32, MVT::Glue), + I, Op.getOperand(1), Op.getOperand(2), Glue); + Glue = SDValue(P1.getNode(), 1); + return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J, + Op.getOperand(1), Op.getOperand(2), Glue); + } default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 608240413ac..4e482b75380 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -1670,8 +1670,8 @@ class VINTRP_Real_vi op, string opName, dag outs, dag ins, SIMCInstr; multiclass VINTRP_m op, string opName, dag outs, dag ins, string asm, - string disableEncoding = "", string constraints = "", - list pattern = []> { + list pattern = [], + string disableEncoding = "", string constraints = ""> { let DisableEncoding = disableEncoding, Constraints = constraints in { def "" : VINTRP_Pseudo ; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 0449ba8cd18..7137baabc39 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1433,28 +1433,36 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI , "v_rsq_clamp_f64", // VINTRP Instructions //===----------------------------------------------------------------------===// +let Uses = [M0] in { + // FIXME: Specify SchedRW for VINTRP insturctions. defm V_INTERP_P1_F32 : VINTRP_m < 0x00000000, "v_interp_p1_f32", (outs VGPR_32:$dst), - (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]", - "$m0">; + (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr), + "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]", + [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan), + (i32 imm:$attr)))]>; defm V_INTERP_P2_F32 : VINTRP_m < 0x00000001, "v_interp_p2_f32", (outs VGPR_32:$dst), - (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]", - "$src0,$m0", + (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr), + "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]", + [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan), + (i32 imm:$attr)))], + "$src0", "$src0 = $dst">; defm V_INTERP_MOV_F32 : VINTRP_m < 0x00000002, "v_interp_mov_f32", (outs VGPR_32:$dst), - (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [$m0]", - "$m0">; + (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr), + "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [m0]", + [(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan), + (i32 imm:$attr)))]>; + +} // End Uses = [M0] //===----------------------------------------------------------------------===// // VOP2 Instructions @@ -2724,27 +2732,6 @@ def : Pat < (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm:$imm))) >; -/********** ===================== **********/ -/********** Interpolation Paterns **********/ -/********** ===================== **********/ - -// The value of $params is constant through out the entire kernel. -// We need to use S_MOV_B32 $params, because CSE ignores copies, so -// without it we end up with a lot of redundant moves. - -def : Pat < - (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params), - (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)) ->; - -def : Pat < - (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij), - (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0), - imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)), - (EXTRACT_SUBREG $ij, sub1), - imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)) ->; - /********** ================== **********/ /********** Intrinsic Patterns **********/ /********** ================== **********/ -- 2.34.1