From: Christian Konig Date: Wed, 10 Apr 2013 08:39:16 +0000 (+0000) Subject: R600/SI: dynamical figure out the reg class of MIMG X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=4d0e8a8a3e2e5b98f598acad4d57452b99d52e74;p=oota-llvm.git R600/SI: dynamical figure out the reg class of MIMG Depending on the number of bits set in the writemask. Signed-off-by: Christian König Reviewed-by: Michel Dänzer git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179166 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index f6001445f4b..dacb03325bc 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -107,6 +107,9 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { } else if (AMDGPU::VReg_64RegClass.contains(reg)) { isSGPR = false; width = 2; + } else if (AMDGPU::VReg_96RegClass.contains(reg)) { + isSGPR = false; + width = 3; } else if (AMDGPU::SReg_128RegClass.contains(reg)) { isSGPR = true; width = 4; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index d214135eae7..0147464ab34 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -720,7 +720,7 @@ unsigned SubIdx2Lane(unsigned Idx) { void SITargetLowering::adjustWritemask(MachineSDNode *&Node, SelectionDAG &DAG) const { SDNode *Users[4] = { }; - unsigned Writemask = 0; + unsigned Writemask = 0, Lane = 0; // Try to figure out the used register components for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end(); @@ -731,7 +731,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) return; - unsigned Lane = SubIdx2Lane(I->getConstantOperandVal(1)); + Lane = SubIdx2Lane(I->getConstantOperandVal(1)); // Abort if we have more than one user per component if (Users[Lane]) @@ -752,6 +752,16 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, Ops.push_back(Node->getOperand(i)); Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + // If we only got one lane, replace it with a copy + if (Writemask == (1U << Lane)) { + SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32); + SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + DebugLoc(), MVT::f32, + SDValue(Node, 0), RC); + DAG.ReplaceAllUsesWith(Users[Lane], Copy); + return; + } + // Update the users of the node with the new indices for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) { @@ -780,3 +790,28 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, return foldOperands(Node, DAG); } + +/// \brief Assign the register class depending on the number of +/// bits set in the writemask +void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + if (AMDGPU::isMIMG(MI->getOpcode()) == -1) + return; + + unsigned VReg = MI->getOperand(0).getReg(); + unsigned Writemask = MI->getOperand(1).getImm(); + unsigned BitsSet = 0; + for (unsigned i = 0; i < 4; ++i) + BitsSet += Writemask & (1 << i) ? 1 : 0; + + const TargetRegisterClass *RC; + switch (BitsSet) { + default: return; + case 1: RC = &AMDGPU::VReg_32RegClass; break; + case 2: RC = &AMDGPU::VReg_64RegClass; break; + case 3: RC = &AMDGPU::VReg_96RegClass; break; + } + + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MRI.setRegClass(VReg, RC); +} diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index e8a8ee337af..de637bea37b 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -53,6 +53,8 @@ public: virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; + virtual void AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const; int32_t analyzeImmediate(const SDNode *N) const; }; diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 0bfcef562f0..9a04c609b6c 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -58,6 +58,10 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 }; + const int16_t Sub0_2[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0 + }; + const int16_t Sub0_1[] = { AMDGPU::sub0, AMDGPU::sub1, 0 }; @@ -125,6 +129,11 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opcode = AMDGPU::V_MOV_B32_e32; SubIndices = Sub0_1; + } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); + Opcode = AMDGPU::V_MOV_B32_e32; + SubIndices = Sub0_2; + } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || AMDGPU::SReg_128RegClass.contains(SrcReg)); diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 7e2e4dccabd..a97dbaae778 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -346,6 +346,7 @@ class MIMG_Load_Helper op, string asm> : MIMG < []> { let mayLoad = 1; let mayStore = 0; + let hasPostISelHook = 1; } //===----------------------------------------------------------------------===// @@ -379,6 +380,7 @@ def getCommuteOrig : InstrMapping { let ValueCols = [["1"]]; } +// Test if the supplied opcode is an MIMG instruction def isMIMG : InstrMapping { let FilterClass = "MIMG_Load_Helper"; let RowFields = ["Inst"]; diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 2c101073c3d..244d4c00348 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -94,6 +94,12 @@ def VGPR_64 : RegisterTuples<[sub0, sub1], [(add (trunc VGPR_32, 255)), (add (shl VGPR_32, 1))]>; +// VGPR 96-bit registers +def VGPR_96 : RegisterTuples<[sub0, sub1, sub2], + [(add (trunc VGPR_32, 254)), + (add (shl VGPR_32, 1)), + (add (shl VGPR_32, 2))]>; + // VGPR 128-bit registers def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3], [(add (trunc VGPR_32, 253)), @@ -162,6 +168,10 @@ def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>; def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>; +def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> { + let Size = 96; +} + def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>; diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll index d8031c38a6f..de06354a564 100644 --- a/test/CodeGen/R600/llvm.SI.sample.ll +++ b/test/CodeGen/R600/llvm.SI.sample.ll @@ -1,21 +1,21 @@ ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 3 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 2 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 1 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 4 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 5 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 9 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 6 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 10 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 12 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11 -;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14 -;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11 +;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14 +;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0