From bad4e7b748f615d19ea0b3b5beebd69decd24be3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 21 Nov 2014 22:31:46 +0000 Subject: [PATCH] R600/SI: Add an s_mov_b32 to patterns which use the M0RegClass We need to use a s_mov_b32 rather than a copy, so that CSE will eliminate redundant moves to the m0 register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222584 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIInstrInfo.cpp | 20 -------------------- lib/Target/R600/SIInstructions.td | 12 ++++++++---- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 8343362d116..1a0010c03dc 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -326,26 +326,6 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned Opcode; const int16_t *SubIndices; - if (AMDGPU::M0 == DestReg) { - // Check if M0 isn't already set to this value - for (MachineBasicBlock::reverse_iterator E = MBB.rend(), - I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) { - - if (!I->definesRegister(AMDGPU::M0)) - continue; - - unsigned Opc = I->getOpcode(); - if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32) - break; - - if (!I->readsRegister(SrcReg)) - break; - - // The copy isn't necessary - return; - } - } - if (AMDGPU::SReg_32RegClass.contains(DestReg)) { assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index e1eb95580ac..00ce9bfcc26 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -2499,17 +2499,21 @@ def : Pat < /********** Interpolation Paterns **********/ /********** ===================== **********/ +// The value of $params is constant through out the entire kernel. +// We need to use S_MOV_B32 $params, because CSE ignores copies, so +// without it we end up with a lot of redundant moves. + def : Pat < (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params), - (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params) + (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)) >; def : Pat < - (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij), + (int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij), (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0), - imm:$attr_chan, imm:$attr, i32:$params), + imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)), (EXTRACT_SUBREG $ij, sub1), - imm:$attr_chan, imm:$attr, $params) + imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)) >; /********** ================== **********/ -- 2.34.1