lib/Target/AMDGPU/SIFixSGPRCopies.cpp

   1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer
  12 /// will sometimes generate these illegal copies in situations like this:
  13 ///
  14 ///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
  15 ///
  16 /// BB0:
  17 ///   %vreg0 <sgpr> = SCALAR_INST
  18 ///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
  19 ///    ...
  20 ///    BRANCH %cond BB1, BB2
  21 ///  BB1:
  22 ///    %vreg2 <vgpr> = VECTOR_INST
  23 ///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
  24 ///  BB2:
  25 ///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
  26 ///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
  27 ///
  28 ///
  29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting
  30 /// code will look like this:
  31 ///
  32 /// BB0:
  33 ///   %vreg0 <sgpr> = SCALAR_INST
  34 ///    ...
  35 ///    BRANCH %cond BB1, BB2
  36 /// BB1:
  37 ///   %vreg2 <vgpr> = VECTOR_INST
  38 ///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
  39 /// BB2:
  40 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
  41 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
  42 ///
  43 /// Now that the result of the PHI instruction is an SGPR, the register
  44 /// allocator is now forced to constrain the register class of %vreg3 to
  45 /// <sgpr> so we end up with final code like this:
  46 ///
  47 /// BB0:
  48 ///   %vreg0 <sgpr> = SCALAR_INST
  49 ///    ...
  50 ///    BRANCH %cond BB1, BB2
  51 /// BB1:
  52 ///   %vreg2 <vgpr> = VECTOR_INST
  53 ///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
  54 /// BB2:
  55 ///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
  56 ///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
  57 ///
  58 /// Now this code contains an illegal copy from a VGPR to an SGPR.
  59 ///
  60 /// In order to avoid this problem, this pass searches for PHI instructions
  61 /// which define a <vsrc> register and constrains its definition class to
  62 /// <vgpr> if the user of the PHI's definition register is a vector instruction.
  63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer
  64 /// will be unable to perform the COPY removal from the above example  which
  65 /// ultimately led to the creation of an illegal COPY.
  66 //===----------------------------------------------------------------------===//
  67
  68 #include "AMDGPU.h"
  69 #include "AMDGPUSubtarget.h"
  70 #include "SIInstrInfo.h"
  71 #include "llvm/CodeGen/MachineFunctionPass.h"
  72 #include "llvm/CodeGen/MachineInstrBuilder.h"
  73 #include "llvm/CodeGen/MachineRegisterInfo.h"
  74 #include "llvm/Support/Debug.h"
  75 #include "llvm/Support/raw_ostream.h"
  76 #include "llvm/Target/TargetMachine.h"
  77
  78 using namespace llvm;
  79
  80 #define DEBUG_TYPE "sgpr-copies"
  81
  82 namespace {
  83
  84 class SIFixSGPRCopies : public MachineFunctionPass {
  85
  86 private:
  87   static char ID;
  88
  89 public:
  90   SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
  91
  92   bool runOnMachineFunction(MachineFunction &MF) override;
  93
  94   const char *getPassName() const override {
  95     return "SI Fix SGPR copies";
  96   }
  97
  98   void getAnalysisUsage(AnalysisUsage &AU) const override {
  99     AU.setPreservesCFG();
 100     MachineFunctionPass::getAnalysisUsage(AU);
 101   }
 102 };
 103
 104 } // End anonymous namespace
 105
 106 char SIFixSGPRCopies::ID = 0;
 107
 108 FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
 109   return new SIFixSGPRCopies(tm);
 110 }
 111
 112 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
 113   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
 114   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
 115     if (!MI.getOperand(i).isReg() ||
 116         !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
 117       continue;
 118
 119     if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
 120       return true;
 121   }
 122   return false;
 123 }
 124
 125 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
 126 getCopyRegClasses(const MachineInstr &Copy,
 127                   const SIRegisterInfo &TRI,
 128                   const MachineRegisterInfo &MRI) {
 129   unsigned DstReg = Copy.getOperand(0).getReg();
 130   unsigned SrcReg = Copy.getOperand(1).getReg();
 131
 132   const TargetRegisterClass *SrcRC =
 133     TargetRegisterInfo::isVirtualRegister(SrcReg) ?
 134     MRI.getRegClass(SrcReg) :
 135     TRI.getPhysRegClass(SrcReg);
 136
 137   // We don't really care about the subregister here.
 138   // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
 139
 140   const TargetRegisterClass *DstRC =
 141     TargetRegisterInfo::isVirtualRegister(DstReg) ?
 142     MRI.getRegClass(DstReg) :
 143     TRI.getPhysRegClass(DstReg);
 144
 145   return std::make_pair(SrcRC, DstRC);
 146 }
 147
 148 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
 149                              const TargetRegisterClass *DstRC,
 150                              const SIRegisterInfo &TRI) {
 151   return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
 152 }
 153
 154 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
 155                              const TargetRegisterClass *DstRC,
 156                              const SIRegisterInfo &TRI) {
 157   return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
 158 }
 159
 160 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
 161 //
 162 // SGPRx = ...
 163 // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
 164 // VGPRz = COPY SGPRy
 165 //
 166 // ==>
 167 //
 168 // VGPRx = COPY SGPRx
 169 // VGPRz = REG_SEQUENCE VGPRx, sub0
 170 //
 171 // This exposes immediate folding opportunities when materializing 64-bit
 172 // immediates.
 173 static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
 174                                         const SIRegisterInfo *TRI,
 175                                         const SIInstrInfo *TII,
 176                                         MachineRegisterInfo &MRI) {
 177   assert(MI.isRegSequence());
 178
 179   unsigned DstReg = MI.getOperand(0).getReg();
 180   if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
 181     return false;
 182
 183   if (!MRI.hasOneUse(DstReg))
 184     return false;
 185
 186   MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
 187   if (!CopyUse.isCopy())
 188     return false;
 189
 190   const TargetRegisterClass *SrcRC, *DstRC;
 191   std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
 192
 193   if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
 194     return false;
 195
 196   // TODO: Could have multiple extracts?
 197   unsigned SubReg = CopyUse.getOperand(1).getSubReg();
 198   if (SubReg != AMDGPU::NoSubRegister)
 199     return false;
 200
 201   MRI.setRegClass(DstReg, DstRC);
 202
 203   // SGPRx = ...
 204   // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
 205   // VGPRz = COPY SGPRy
 206
 207   // =>
 208   // VGPRx = COPY SGPRx
 209   // VGPRz = REG_SEQUENCE VGPRx, sub0
 210
 211   MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
 212
 213   for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
 214     unsigned SrcReg = MI.getOperand(I).getReg();
 215     unsigned SrcSubReg = MI.getOperand(I).getReg();
 216
 217     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
 218     assert(TRI->isSGPRClass(SrcRC) &&
 219            "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
 220
 221     SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
 222     const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
 223
 224     unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
 225
 226     BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
 227       .addOperand(MI.getOperand(I));
 228
 229     MI.getOperand(I).setReg(TmpReg);
 230   }
 231
 232   CopyUse.eraseFromParent();
 233   return true;
 234 }
 235
 236 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
 237   MachineRegisterInfo &MRI = MF.getRegInfo();
 238   const SIRegisterInfo *TRI =
 239       static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
 240   const SIInstrInfo *TII =
 241       static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
 242   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
 243                                                   BI != BE; ++BI) {
 244
 245     MachineBasicBlock &MBB = *BI;
 246     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
 247                                                       I != E; ++I) {
 248       MachineInstr &MI = *I;
 249
 250       switch (MI.getOpcode()) {
 251       default:
 252         continue;
 253       case AMDGPU::COPY: {
 254         // If the destination register is a physical register there isn't really
 255         // much we can do to fix this.
 256         if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
 257           continue;
 258
 259         const TargetRegisterClass *SrcRC, *DstRC;
 260         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
 261         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
 262           DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
 263           TII->moveToVALU(MI);
 264         }
 265
 266         break;
 267       }
 268       case AMDGPU::PHI: {
 269         DEBUG(dbgs() << "Fixing PHI: " << MI);
 270         unsigned Reg = MI.getOperand(0).getReg();
 271         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
 272           break;
 273
 274         // If a PHI node defines an SGPR and any of its operands are VGPRs,
 275         // then we need to move it to the VALU.
 276         //
 277         // Also, if a PHI node defines an SGPR and has all SGPR operands
 278         // we must move it to the VALU, because the SGPR operands will
 279         // all end up being assigned the same register, which means
 280         // there is a potential for a conflict if different threads take
 281         // different control flow paths.
 282         //
 283         // For Example:
 284         //
 285         // sgpr0 = def;
 286         // ...
 287         // sgpr1 = def;
 288         // ...
 289         // sgpr2 = PHI sgpr0, sgpr1
 290         // use sgpr2;
 291         //
 292         // Will Become:
 293         //
 294         // sgpr2 = def;
 295         // ...
 296         // sgpr2 = def;
 297         // ...
 298         // use sgpr2
 299         //
 300         // FIXME: This is OK if the branching decision is made based on an
 301         // SGPR value.
 302         bool SGPRBranch = false;
 303
 304         // The one exception to this rule is when one of the operands
 305         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
 306         // instruction.  In this case, there we know the program will
 307         // never enter the second block (the loop) without entering
 308         // the first block (where the condition is computed), so there
 309         // is no chance for values to be over-written.
 310
 311         bool HasBreakDef = false;
 312         for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
 313           unsigned Reg = MI.getOperand(i).getReg();
 314           if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
 315             TII->moveToVALU(MI);
 316             break;
 317           }
 318           MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
 319           assert(DefInstr);
 320           switch(DefInstr->getOpcode()) {
 321
 322           case AMDGPU::SI_BREAK:
 323           case AMDGPU::SI_IF_BREAK:
 324           case AMDGPU::SI_ELSE_BREAK:
 325           // If we see a PHI instruction that defines an SGPR, then that PHI
 326           // instruction has already been considered and should have
 327           // a *_BREAK as an operand.
 328           case AMDGPU::PHI:
 329             HasBreakDef = true;
 330             break;
 331           }
 332         }
 333
 334         if (!SGPRBranch && !HasBreakDef)
 335           TII->moveToVALU(MI);
 336         break;
 337       }
 338       case AMDGPU::REG_SEQUENCE: {
 339         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
 340             !hasVGPROperands(MI, TRI)) {
 341           foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
 342           continue;
 343         }
 344
 345         DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
 346
 347         TII->moveToVALU(MI);
 348         break;
 349       }
 350       case AMDGPU::INSERT_SUBREG: {
 351         const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
 352         DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
 353         Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
 354         Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
 355         if (TRI->isSGPRClass(DstRC) &&
 356             (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
 357           DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
 358           TII->moveToVALU(MI);
 359         }
 360         break;
 361       }
 362       }
 363     }
 364   }
 365
 366   return true;
 367 }