R600/SI: Partially move operand legalization to post-isel hook.

author Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 26 Sep 2014 17:54:59 +0000 (17:54 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Fri, 26 Sep 2014 17:54:59 +0000 (17:54 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 26 Sep 2014 17:54:59 +0000 (17:54 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Fri, 26 Sep 2014 17:54:59 +0000 (17:54 +0000)
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp

index 8a3ab46ad22eff34933f7d191c5390e6442f049a..417356d800f9cfbcd8bdf5cc46a25687b7cd0171 100644 (file)
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -1648,57 +1648,6 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
    return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
  }
  
-/// \brief Make sure that we don't exeed the number of allowed scalars
-void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
-                                       unsigned RegClass,
-                                       bool &ScalarSlotUsed) const {
-
-  if (!isVSrc(RegClass))
-    return;
-
-  // First map the operands register class to a destination class
-  switch (RegClass) {
-    case AMDGPU::VSrc_32RegClassID:
-    case AMDGPU::VCSrc_32RegClassID:
-      RegClass = AMDGPU::VReg_32RegClassID;
-      break;
-    case AMDGPU::VSrc_64RegClassID:
-    case AMDGPU::VCSrc_64RegClassID:
-      RegClass = AMDGPU::VReg_64RegClassID;
-      break;
-   default:
-    llvm_unreachable("Unknown vsrc reg class");
-  }
-
-  // Nothing to do if they fit naturally
-  if (fitsRegClass(DAG, Operand, RegClass))
-    return;
-
-  // If the scalar slot isn't used yet use it now
-  if (!ScalarSlotUsed) {
-    ScalarSlotUsed = true;
-    return;
-  }
-
-  // This is a conservative aproach. It is possible that we can't determine the
-  // correct register class and copy too often, but better safe than sorry.
-
-  SDNode *Node;
-  // We can't use COPY_TO_REGCLASS with FrameIndex arguments.
-  if (isa<FrameIndexSDNode>(Operand) ||
-      isa<GlobalAddressSDNode>(Operand)) {
-    unsigned Opcode = Operand.getValueType() == MVT::i32 ?
-                      AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
-    Node = DAG.getMachineNode(Opcode, SDLoc(), Operand.getValueType(),
-                              Operand);
-  } else {
-    SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
-    Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
-                              Operand.getValueType(), Operand, RC);
-  }
-  Operand = SDValue(Node, 0);
-}
-
  /// \returns true if \p Node's operands are different from the SDValue list
  /// \p Ops
  static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
@@ -1710,8 +1659,9 @@ static bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
    return false;
  }
  
-/// \brief Try to commute instructions and insert copies in order to satisfy the
-/// operand constraints.
+/// TODO: This needs to be removed. It's current primary purpose is to fold
+/// immediates into operands when legal. The legalization parts are redundant
+/// with SIInstrInfo::legalizeOperands which is called in a post-isel hook.
  SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
                                             SelectionDAG &DAG) const {
    // Original encoding (either e32 or e64)
@@ -1784,11 +1734,9 @@ SDNode *SITargetLowering::legalizeOperands(MachineSDNode *Node,
      // Is this a VSrc or SSrc operand?
      unsigned RegClass = Desc->OpInfo[Op].RegClass;
      if (isVSrc(RegClass) || isSSrc(RegClass)) {
-      // Try to fold the immediates
-      if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
-        // Folding didn't work, make sure we don't hit the SReg limit.
-        ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
-      }
+      // Try to fold the immediates. If this ends up with multiple constant bus
+      // uses, it will be legalized later.
+      foldImm(Ops[i], Immediate, ScalarSlotUsed);
        continue;
      }
  
@@ -1938,6 +1886,8 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
    const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
        getTargetMachine().getSubtargetImpl()->getInstrInfo());
  
+  TII->legalizeOperands(MI);
+
    if (TII->isMIMG(MI->getOpcode())) {
      unsigned VReg = MI->getOperand(0).getReg();
      unsigned Writemask = MI->getOperand(1).getImm();
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h

index f953b482fdd6c3a883e839ced6f861974ae7f635..9cf4dbcb2fd27cdc71fd17ea849ef61d5eb6a99c 100644 (file)
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -47,8 +47,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
                                                  const SDValue &Op) const;
    bool fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
                      unsigned RegClass) const;
-  void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
-                       unsigned RegClass, bool &ScalarSlotUsed) const;
  
    SDNode *legalizeOperands(MachineSDNode *N, SelectionDAG &DAG) const;
    void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td

index 8369a0c86870784529cb698eeef5f8810b5d61d1..c1fc4b3a9741ae8d03ff1e99490c82bda1167f8b 100644 (file)
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -42,6 +42,10 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
    let TSFlags{10} = MUBUF;
    let TSFlags{11} = MTBUF;
    let TSFlags{12} = FLAT;
+
+  // Most instructions require adjustments after selection to satisfy
+  // operand requirements.
+  let hasPostISelHook = 1;
  }
  
  class Enc32 {
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp

index fb5dad138456eb4b087f72302f996f0f9f457792..ed8d9793af77a64c3632b51d8b27ec5348d81c06 100644 (file)
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -1394,22 +1394,41 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
  
      int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
  
-    // First we need to consider the instruction's operand requirements before
-    // legalizing. Some operands are required to be SGPRs, but we are still
-    // bound by the constant bus requirement to only use one.
-    //
-    // If the operand's class is an SGPR, we can never move it.
-    for (unsigned i = 0; i < 3; ++i) {
-      int Idx = VOP3Idx[i];
-      if (Idx == -1)
+    for (const MachineOperand &MO : MI->implicit_operands()) {
+      // We only care about reads.
+      if (MO.isDef())
+        continue;
+
+      if (MO.getReg() == AMDGPU::VCC) {
+        SGPRReg = AMDGPU::VCC;
          break;
+      }
  
-      if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
-        SGPRReg = MI->getOperand(Idx).getReg();
+      if (MO.getReg() == AMDGPU::FLAT_SCR) {
+        SGPRReg = AMDGPU::FLAT_SCR;
          break;
        }
      }
  
+
+    if (SGPRReg == AMDGPU::NoRegister) {
+      // First we need to consider the instruction's operand requirements before
+      // legalizing. Some operands are required to be SGPRs, but we are still
+      // bound by the constant bus requirement to only use one.
+      //
+      // If the operand's class is an SGPR, we can never move it.
+      for (unsigned i = 0; i < 3; ++i) {
+        int Idx = VOP3Idx[i];
+        if (Idx == -1)
+          break;
+
+        if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
+          SGPRReg = MI->getOperand(Idx).getReg();
+          break;
+        }
+      }
+    }
+
      for (unsigned i = 0; i < 3; ++i) {
        int Idx = VOP3Idx[i];
        if (Idx == -1)
diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/R600/fneg.f64.ll

index 61d95135a4ac7b62e19b72d68f4cc3cb364938d5..f0b341b7a6b7679b05d71a460a5714734ba99eba 100644 (file)
--- a/test/CodeGen/R600/fneg.f64.ll
+++ b/test/CodeGen/R600/fneg.f64.ll
@@ -50,7 +50,7 @@ define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
  ; SI-LABEL: @fneg_fold
  ; SI: S_LOAD_DWORDX2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
  ; SI-NOT: XOR
-; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], {{v\[[0-9]+:[0-9]+\]}}
+; SI: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
  define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
    %fsub = fsub double -0.0, %in
    %fmul = fmul double %fsub, %in
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll

index 72cd15c7d70521d11b9315cb9691d42c970f0ff8..8631301b49c0067e9ff08f5c0e557c104cdeeb8f 100644 (file)
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -59,7 +59,7 @@ define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
  ; FUNC-LABEL: @fneg_fold
  ; SI: S_LOAD_DWORD [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
  ; SI-NOT: XOR
-; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], v{{[0-9]+}}
+; SI: V_MUL_F32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
  define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
    %fsub = fsub float -0.0, %in
    %fmul = fmul float %fsub, %in
diff --git a/test/CodeGen/R600/rotl.ll b/test/CodeGen/R600/rotl.ll

index 8c86fb5aeabcd6236147c927e60c545da7dd4d4c..a9dee8ca78c91e5d0428941fa171fdecfd455d9a 100644 (file)
--- a/test/CodeGen/R600/rotl.ll
+++ b/test/CodeGen/R600/rotl.ll
@@ -8,7 +8,7 @@
  
  ; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
  ; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
-; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
+; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
  define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
  entry:
    %0 = shl i32 %x, %y
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll

index cc942c10a91e1424fccad0ae578f869e91ca953d..eb1176f58550d0e050fc74b9e1dd6bb6e6176521 100644 (file)
--- a/test/CodeGen/R600/seto.ll
+++ b/test/CodeGen/R600/seto.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_O_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
  
+; CHECK-LABEL: @main
+; CHECK: V_CMP_O_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
  define void @main(float %p) {
  main_body:
    %c = fcmp oeq float %p, %p
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll

index 33007fc754b89330601962bfee03973801b1cb3e..a78e8e6b45ba9b984830256532c3010a50846ff2 100644 (file)
--- a/test/CodeGen/R600/setuo.ll
+++ b/test/CodeGen/R600/setuo.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_U_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
  
+; CHECK-LABEL: @main
+; CHECK: V_CMP_U_F32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: V_CNDMASK_B32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
  define void @main(float %p) {
  main_body:
    %c = fcmp une float %p, %p
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 26 Sep 2014 17:54:59 +0000 (17:54 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Fri, 26 Sep 2014 17:54:59 +0000 (17:54 +0000)
lib/Target/R600/SIISelLowering.cpp		patch \| blob \| history
lib/Target/R600/SIISelLowering.h		patch \| blob \| history
lib/Target/R600/SIInstrFormats.td		patch \| blob \| history
lib/Target/R600/SIInstrInfo.cpp		patch \| blob \| history
test/CodeGen/R600/fneg.f64.ll		patch \| blob \| history
test/CodeGen/R600/fneg.ll		patch \| blob \| history
test/CodeGen/R600/rotl.ll		patch \| blob \| history
test/CodeGen/R600/seto.ll		patch \| blob \| history
test/CodeGen/R600/setuo.ll		patch \| blob \| history