R600/SI: dynamical figure out the reg class of MIMG

author Christian Konig <christian.koenig@amd.com>

Wed, 10 Apr 2013 08:39:16 +0000 (08:39 +0000)

committer Christian Konig <christian.koenig@amd.com>

Wed, 10 Apr 2013 08:39:16 +0000 (08:39 +0000)
author Christian Konig <christian.koenig@amd.com>
Wed, 10 Apr 2013 08:39:16 +0000 (08:39 +0000)
committer Christian Konig <christian.koenig@amd.com>
Wed, 10 Apr 2013 08:39:16 +0000 (08:39 +0000)
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp

index f6001445f4b354d08918ff34c66e13168eaf87dc..dacb03325bc71a094c29506cbd787576d6b5ac59 100644 (file)
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -107,6 +107,9 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
          } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
            isSGPR = false;
            width = 2;
+        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 3;
          } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
            isSGPR = true;
            width = 4;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp

index d214135eae7b5611a41e2e0ed9a5395b19e4e810..0147464ab34e80f5c41009f953f2eac85c2fd1fa 100644 (file)
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -720,7 +720,7 @@ unsigned SubIdx2Lane(unsigned Idx) {
  void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
                                         SelectionDAG &DAG) const {
    SDNode *Users[4] = { };
-  unsigned Writemask = 0;
+  unsigned Writemask = 0, Lane = 0;
  
    // Try to figure out the used register components
    for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
@@ -731,7 +731,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
          I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
        return;
  
-    unsigned Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+    Lane = SubIdx2Lane(I->getConstantOperandVal(1));
  
      // Abort if we have more than one user per component
      if (Users[Lane])
@@ -752,6 +752,16 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
      Ops.push_back(Node->getOperand(i));
    Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
  
+  // If we only got one lane, replace it with a copy
+  if (Writemask == (1U << Lane)) {
+    SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
+    SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                      DebugLoc(), MVT::f32,
+                                      SDValue(Node, 0), RC);
+    DAG.ReplaceAllUsesWith(Users[Lane], Copy);
+    return;
+  }
+
    // Update the users of the node with the new indices
    for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
  
@@ -780,3 +790,28 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
  
    return foldOperands(Node, DAG);
  }
+
+/// \brief Assign the register class depending on the number of
+/// bits set in the writemask
+void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                                     SDNode *Node) const {
+  if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
+    return;
+
+  unsigned VReg = MI->getOperand(0).getReg();
+  unsigned Writemask = MI->getOperand(1).getImm();
+  unsigned BitsSet = 0;
+  for (unsigned i = 0; i < 4; ++i)
+    BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+  const TargetRegisterClass *RC;
+  switch (BitsSet) {
+  default: return;
+  case 1:  RC = &AMDGPU::VReg_32RegClass; break;
+  case 2:  RC = &AMDGPU::VReg_64RegClass; break;
+  case 3:  RC = &AMDGPU::VReg_96RegClass; break;
+  }
+
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MRI.setRegClass(VReg, RC);
+}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h

index e8a8ee337af1b2f663e3d263d6b638f5fcf2fd08..de637bea37b9350a9e4baef57688d01adeffc48f 100644 (file)
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -53,6 +53,8 @@ public:
    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
    virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+  virtual void AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                             SDNode *Node) const;
  
    int32_t analyzeImmediate(const SDNode *N) const;
  };
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp

index 0bfcef562f045d67d2877b997da47bed4e06efaf..9a04c609b6c91c51c92e7ec08bd5fb5af381f345 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -58,6 +58,10 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
    };
  
+  const int16_t Sub0_2[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+  };
+
    const int16_t Sub0_1[] = {
      AMDGPU::sub0, AMDGPU::sub1, 0
    };
@@ -125,6 +129,11 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
      Opcode = AMDGPU::V_MOV_B32_e32;
      SubIndices = Sub0_1;
  
+  } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_2;
+
    } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
      assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
            AMDGPU::SReg_128RegClass.contains(SrcReg));
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td

index 7e2e4dccabd9ed258f18421e2f62effca31e419f..a97dbaae77800ccc0a67a7920bf4158f27fb4002 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -346,6 +346,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
    []> {
    let mayLoad = 1;
    let mayStore = 0;
+  let hasPostISelHook = 1;
  }
  
  //===----------------------------------------------------------------------===//
@@ -379,6 +380,7 @@ def getCommuteOrig : InstrMapping {
    let ValueCols = [["1"]];
  }
  
+// Test if the supplied opcode is an MIMG instruction
  def isMIMG : InstrMapping {
    let FilterClass = "MIMG_Load_Helper";
    let RowFields = ["Inst"];
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td

index 2c101073c3daacc1341c63a14de187c6d06004ed..244d4c00348d3b5a45532811194ab1e99f2247dd 100644 (file)
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -94,6 +94,12 @@ def VGPR_64 : RegisterTuples<[sub0, sub1],
                               [(add (trunc VGPR_32, 255)),
                                (add (shl VGPR_32, 1))]>;
  
+// VGPR 96-bit registers
+def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
+                             [(add (trunc VGPR_32, 254)),
+                              (add (shl VGPR_32, 1)),
+                              (add (shl VGPR_32, 2))]>;
+
  // VGPR 128-bit registers
  def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
                                [(add (trunc VGPR_32, 253)),
@@ -162,6 +168,10 @@ def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
  
  def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
  
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
+  let Size = 96;
+}
+
  def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
  
  def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll

index d8031c38a6f6debe77e1a3c4267b5139e69b752b..de06354a5646774782d002f23b04698a6ea10469 100644 (file)
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -1,21 +1,21 @@
  ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
  
  ;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 3
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 2
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 1
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 4
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 5
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 9
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 6
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 10
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 12
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
-;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
-;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 8
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
  
  define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
     %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
author	Christian Konig <christian.koenig@amd.com>
	Wed, 10 Apr 2013 08:39:16 +0000 (08:39 +0000)
committer	Christian Konig <christian.koenig@amd.com>
	Wed, 10 Apr 2013 08:39:16 +0000 (08:39 +0000)
lib/Target/R600/AMDGPUAsmPrinter.cpp		patch \| blob \| history
lib/Target/R600/SIISelLowering.cpp		patch \| blob \| history
lib/Target/R600/SIISelLowering.h		patch \| blob \| history
lib/Target/R600/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/R600/SIInstrInfo.td		patch \| blob \| history
lib/Target/R600/SIRegisterInfo.td		patch \| blob \| history
test/CodeGen/R600/llvm.SI.sample.ll		patch \| blob \| history