R600/SI: Simplify debug printing

[oota-llvm.git] / lib / Target / R600 / SIFixSGPRCopies.cpp
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp

index 7f07b01f087abebd993d88851ea99648dd64e284..086b44433c2ebf04096e3d4a50a8fe4d0d48cbb8 100644 (file)
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -23,9 +23,9 @@
  ///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
  ///  BB2:
  ///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
-///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> 
+///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
+///
  ///
-/// 
  /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
  /// code will look like this:
  ///
@@ -43,7 +43,7 @@
  /// Now that the result of the PHI instruction is an SGPR, the register
  /// allocator is now forced to constrain the register class of %vreg3 to
  /// <sgpr> so we end up with final code like this:
-/// 
+///
  /// BB0:
  ///   %vreg0 <sgpr> = SCALAR_INST
  ///    ...
@@ -55,7 +55,7 @@
  ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
  ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
  ///
-/// Now this code contains an illegal copy from a VGPR to an SGPR. 
+/// Now this code contains an illegal copy from a VGPR to an SGPR.
  ///
  /// In order to avoid this problem, this pass searches for PHI instructions
  /// which define a <vsrc> register and constrains its definition class to
@@ -66,29 +66,42 @@
  //===----------------------------------------------------------------------===//
  
  #include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
  #include "SIInstrInfo.h"
  #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
  #include "llvm/Target/TargetMachine.h"
  
  using namespace llvm;
  
+#define DEBUG_TYPE "sgpr-copies"
+
  namespace {
  
  class SIFixSGPRCopies : public MachineFunctionPass {
  
  private:
    static char ID;
-  const TargetRegisterClass *inferRegClass(const TargetRegisterInfo *TRI,
+  const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
                                             const MachineRegisterInfo &MRI,
-                                           unsigned Reg) const;
+                                           unsigned Reg,
+                                           unsigned SubReg) const;
+  const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
+                                                 const MachineRegisterInfo &MRI,
+                                                 unsigned Reg,
+                                                 unsigned SubReg) const;
+  bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
+                        const MachineRegisterInfo &MRI) const;
  
  public:
    SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
  
-  virtual bool runOnMachineFunction(MachineFunction &MF);
+  bool runOnMachineFunction(MachineFunction &MF) override;
  
-  const char *getPassName() const {
+  const char *getPassName() const override {
      return "SI Fix SGPR copies";
    }
  
@@ -102,25 +115,41 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
    return new SIFixSGPRCopies(tm);
  }
  
-/// This functions walks the use/def chains starting with the definition of
-/// \p Reg until it finds an Instruction that isn't a COPY returns
-/// the register class of that instruction.
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClass(
-                                                 const TargetRegisterInfo *TRI,
+static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
+  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    if (!MI.getOperand(i).isReg() ||
+        !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
+      continue;
+
+    if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
+      return true;
+  }
+  return false;
+}
+
+/// This functions walks the use list of Reg until it finds an Instruction
+/// that isn't a COPY returns the register class of that instruction.
+/// \return The register defined by the first non-COPY instruction.
+const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
+                                                 const SIRegisterInfo *TRI,
                                                   const MachineRegisterInfo &MRI,
-                                                 unsigned Reg) const {
+                                                 unsigned Reg,
+                                                 unsigned SubReg) const {
    // The Reg parameter to the function must always be defined by either a PHI
    // or a COPY, therefore it cannot be a physical register.
    assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
           "Reg cannot be a physical register");
  
    const TargetRegisterClass *RC = MRI.getRegClass(Reg);
-  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
-                                         E = MRI.use_end(); I != E; ++I) {
+  RC = TRI->getSubRegClass(RC, SubReg);
+  for (MachineRegisterInfo::use_instr_iterator
+       I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) {
      switch (I->getOpcode()) {
      case AMDGPU::COPY:
-      RC = TRI->getCommonSubClass(RC, inferRegClass(TRI, MRI,
-                                                    I->getOperand(0).getReg()));
+      RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
+                                  I->getOperand(0).getReg(),
+                                  I->getOperand(0).getSubReg()));
        break;
      }
    }
@@ -128,9 +157,49 @@ const TargetRegisterClass *SIFixSGPRCopies::inferRegClass(
    return RC;
  }
  
+const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
+                                                 const SIRegisterInfo *TRI,
+                                                 const MachineRegisterInfo &MRI,
+                                                 unsigned Reg,
+                                                 unsigned SubReg) const {
+  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+    const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
+    return TRI->getSubRegClass(RC, SubReg);
+  }
+  MachineInstr *Def = MRI.getVRegDef(Reg);
+  if (Def->getOpcode() != AMDGPU::COPY) {
+    return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
+  }
+
+  return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
+                                   Def->getOperand(1).getSubReg());
+}
+
+bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
+                                      const SIRegisterInfo *TRI,
+                                      const MachineRegisterInfo &MRI) const {
+
+  unsigned DstReg = Copy.getOperand(0).getReg();
+  unsigned SrcReg = Copy.getOperand(1).getReg();
+  unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
+  const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+  const TargetRegisterClass *SrcRC;
+
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+      DstRC == &AMDGPU::M0RegRegClass ||
+      MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
+    return false;
+
+  SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
+  return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
+}
+
  bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
    MachineRegisterInfo &MRI = MF.getRegInfo();
-  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  const SIRegisterInfo *TRI =
+      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
    for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                    BI != BE; ++BI) {
  
@@ -138,13 +207,121 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
                                                        I != E; ++I) {
        MachineInstr &MI = *I;
-      if (MI.getOpcode() != AMDGPU::PHI) {
-        continue;
+      if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
+        DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
+        DEBUG(MI.print(dbgs()));
+        TII->moveToVALU(MI);
+
+      }
+
+      switch (MI.getOpcode()) {
+      default: continue;
+      case AMDGPU::PHI: {
+        DEBUG(dbgs() << " Fixing PHI:\n");
+        DEBUG(MI.print(dbgs()));
+
+        for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
+          unsigned Reg = MI.getOperand(i).getReg();
+          const TargetRegisterClass *RC = inferRegClassFromDef(TRI, MRI, Reg,
+                                                  MI.getOperand(0).getSubReg());
+          MRI.constrainRegClass(Reg, RC);
+        }
+        unsigned Reg = MI.getOperand(0).getReg();
+        const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
+                                                  MI.getOperand(0).getSubReg());
+        if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
+          MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
+        }
+
+        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
+          break;
+
+        // If a PHI node defines an SGPR and any of its operands are VGPRs,
+        // then we need to move it to the VALU.
+        //
+        // Also, if a PHI node defines an SGPR and has all SGPR operands
+        // we must move it to the VALU, because the SGPR operands will
+        // all end up being assigned the same register, which means
+        // there is a potential for a conflict if different threads take
+        // different control flow paths.
+        //
+        // For Example:
+        //
+        // sgpr0 = def;
+        // ...
+        // sgpr1 = def;
+        // ...
+        // sgpr2 = PHI sgpr0, sgpr1
+        // use sgpr2;
+        //
+        // Will Become:
+        //
+        // sgpr2 = def;
+        // ...
+        // sgpr2 = def;
+        // ...
+        // use sgpr2
+        //
+        // FIXME: This is OK if the branching decision is made based on an
+        // SGPR value.
+        bool SGPRBranch = false;
+
+        // The one exception to this rule is when one of the operands
+        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
+        // instruction.  In this case, there we know the program will
+        // never enter the second block (the loop) without entering
+        // the first block (where the condition is computed), so there
+        // is no chance for values to be over-written.
+
+        bool HasBreakDef = false;
+        for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
+          unsigned Reg = MI.getOperand(i).getReg();
+          if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
+            TII->moveToVALU(MI);
+            break;
+          }
+          MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
+          assert(DefInstr);
+          switch(DefInstr->getOpcode()) {
+
+          case AMDGPU::SI_BREAK:
+          case AMDGPU::SI_IF_BREAK:
+          case AMDGPU::SI_ELSE_BREAK:
+          // If we see a PHI instruction that defines an SGPR, then that PHI
+          // instruction has already been considered and should have
+          // a *_BREAK as an operand.
+          case AMDGPU::PHI:
+            HasBreakDef = true;
+            break;
+          }
+        }
+
+        if (!SGPRBranch && !HasBreakDef)
+          TII->moveToVALU(MI);
+        break;
+      }
+      case AMDGPU::REG_SEQUENCE: {
+        if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
+            !hasVGPROperands(MI, TRI))
+          continue;
+
+        DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
+
+        TII->moveToVALU(MI);
+        break;
+      }
+      case AMDGPU::INSERT_SUBREG: {
+        const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
+        DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
+        Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
+        Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
+        if (TRI->isSGPRClass(DstRC) &&
+            (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
+          DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
+          TII->moveToVALU(MI);
+        }
+        break;
        }
-      unsigned Reg = MI.getOperand(0).getReg();
-      const TargetRegisterClass *RC = inferRegClass(TRI, MRI, Reg);
-      if (TRI->getCommonSubClass(RC, &AMDGPU::VReg_32RegClass)) {
-        MRI.constrainRegClass(Reg, &AMDGPU::VReg_32RegClass);
        }
      }
    }