lib/Target/R600/SIFoldOperands.cpp

   1 //===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 /// \file
   9 //===----------------------------------------------------------------------===//
  10 //
  11
  12 #include "AMDGPU.h"
  13 #include "AMDGPUSubtarget.h"
  14 #include "SIInstrInfo.h"
  15 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
  16 #include "llvm/CodeGen/MachineDominators.h"
  17 #include "llvm/CodeGen/MachineFunctionPass.h"
  18 #include "llvm/CodeGen/MachineInstrBuilder.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/IR/LLVMContext.h"
  21 #include "llvm/IR/Function.h"
  22 #include "llvm/Support/Debug.h"
  23 #include "llvm/Target/TargetMachine.h"
  24
  25 #define DEBUG_TYPE "si-fold-operands"
  26 using namespace llvm;
  27
  28 namespace {
  29
  30 class SIFoldOperands : public MachineFunctionPass {
  31 public:
  32   static char ID;
  33
  34 public:
  35   SIFoldOperands() : MachineFunctionPass(ID) {
  36     initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
  37   }
  38
  39   bool runOnMachineFunction(MachineFunction &MF) override;
  40
  41   const char *getPassName() const override {
  42     return "SI Fold Operands";
  43   }
  44
  45   void getAnalysisUsage(AnalysisUsage &AU) const override {
  46     AU.addRequired<MachineDominatorTree>();
  47     AU.setPreservesCFG();
  48     MachineFunctionPass::getAnalysisUsage(AU);
  49   }
  50 };
  51
  52 struct FoldCandidate {
  53   MachineInstr *UseMI;
  54   unsigned UseOpNo;
  55   MachineOperand *OpToFold;
  56   uint64_t ImmToFold;
  57
  58   FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
  59       UseMI(MI), UseOpNo(OpNo), OpToFold(FoldOp), ImmToFold(0) { }
  60
  61   FoldCandidate(MachineInstr *MI, unsigned OpNo, uint64_t Imm) :
  62       UseMI(MI), UseOpNo(OpNo), OpToFold(nullptr), ImmToFold(Imm) { }
  63
  64   bool isImm() const {
  65     return !OpToFold;
  66   }
  67 };
  68
  69 } // End anonymous namespace.
  70
  71 INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
  72                       "SI Fold Operands", false, false)
  73 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  74 INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
  75                     "SI Fold Operands", false, false)
  76
  77 char SIFoldOperands::ID = 0;
  78
  79 char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
  80
  81 FunctionPass *llvm::createSIFoldOperandsPass() {
  82   return new SIFoldOperands();
  83 }
  84
  85 static bool isSafeToFold(unsigned Opcode) {
  86   switch(Opcode) {
  87   case AMDGPU::V_MOV_B32_e32:
  88   case AMDGPU::V_MOV_B32_e64:
  89   case AMDGPU::V_MOV_B64_PSEUDO:
  90   case AMDGPU::S_MOV_B32:
  91   case AMDGPU::S_MOV_B64:
  92   case AMDGPU::COPY:
  93     return true;
  94   default:
  95     return false;
  96   }
  97 }
  98
  99 static bool updateOperand(FoldCandidate &Fold,
 100                           const TargetRegisterInfo &TRI) {
 101   MachineInstr *MI = Fold.UseMI;
 102   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
 103   assert(Old.isReg());
 104
 105   if (Fold.isImm()) {
 106     Old.ChangeToImmediate(Fold.ImmToFold);
 107     return true;
 108   }
 109
 110   MachineOperand *New = Fold.OpToFold;
 111   if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
 112       TargetRegisterInfo::isVirtualRegister(New->getReg())) {
 113     Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
 114     return true;
 115   }
 116
 117   // FIXME: Handle physical registers.
 118
 119   return false;
 120 }
 121
 122 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
 123   MachineRegisterInfo &MRI = MF.getRegInfo();
 124   const SIInstrInfo *TII =
 125       static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
 126   const SIRegisterInfo &TRI = TII->getRegisterInfo();
 127
 128   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
 129                                                   BI != BE; ++BI) {
 130
 131     MachineBasicBlock &MBB = *BI;
 132     MachineBasicBlock::iterator I, Next;
 133     for (I = MBB.begin(); I != MBB.end(); I = Next) {
 134       Next = std::next(I);
 135       MachineInstr &MI = *I;
 136
 137       if (!isSafeToFold(MI.getOpcode()))
 138         continue;
 139
 140       MachineOperand &OpToFold = MI.getOperand(1);
 141
 142       // FIXME: Fold operands with subregs.
 143       if (OpToFold.isReg() &&
 144           (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
 145            OpToFold.getSubReg()))
 146         continue;
 147
 148       std::vector<FoldCandidate> FoldList;
 149       for (MachineRegisterInfo::use_iterator
 150            Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
 151            Use != E; ++Use) {
 152
 153         MachineInstr *UseMI = Use->getParent();
 154         const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
 155
 156         // FIXME: Fold operands with subregs.
 157         if (UseOp.isReg() && UseOp.getSubReg() && OpToFold.isReg()) {
 158           continue;
 159         }
 160
 161         bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
 162         APInt Imm;
 163
 164         if (FoldingImm) {
 165           const TargetRegisterClass *UseRC = MRI.getRegClass(UseOp.getReg());
 166
 167           if (OpToFold.isFPImm()) {
 168             Imm = OpToFold.getFPImm()->getValueAPF().bitcastToAPInt();
 169           } else {
 170             Imm = APInt(64, OpToFold.getImm());
 171           }
 172
 173           // Split 64-bit constants into 32-bits for folding.
 174           if (UseOp.getSubReg()) {
 175             if (UseRC->getSize() != 8)
 176               continue;
 177
 178             if (UseOp.getSubReg() == AMDGPU::sub0) {
 179               Imm = Imm.getLoBits(32);
 180             } else {
 181               assert(UseOp.getSubReg() == AMDGPU::sub1);
 182               Imm = Imm.getHiBits(32);
 183             }
 184           }
 185
 186           // In order to fold immediates into copies, we need to change the
 187           // copy to a MOV.
 188           if (UseMI->getOpcode() == AMDGPU::COPY) {
 189             unsigned MovOp = TII->getMovOpcode(
 190                 MRI.getRegClass(UseMI->getOperand(0).getReg()));
 191             if (MovOp == AMDGPU::COPY)
 192               continue;
 193
 194             UseMI->setDesc(TII->get(MovOp));
 195           }
 196         }
 197
 198         const MCInstrDesc &UseDesc = UseMI->getDesc();
 199
 200         // Don't fold into target independent nodes.  Target independent opcodes
 201         // don't have defined register classes.
 202         if (UseDesc.isVariadic() ||
 203             UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
 204           continue;
 205
 206
 207         if (FoldingImm) {
 208           const MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
 209           if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &ImmOp)) {
 210             FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(),
 211                                Imm.getSExtValue()));
 212           }
 213           continue;
 214         }
 215
 216         // Normal substitution with registers
 217         if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) {
 218           FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), &OpToFold));
 219           continue;
 220         }
 221
 222         // FIXME: We could commute the instruction to create more opportunites
 223         // for folding.  This will only be useful if we have 32-bit instructions.
 224
 225         // FIXME: We could try to change the instruction from 64-bit to 32-bit
 226         // to enable more folding opportunites.  The shrink operands pass
 227         // already does this.
 228       }
 229
 230       for (FoldCandidate &Fold : FoldList) {
 231         if (updateOperand(Fold, TRI)) {
 232           // Clear kill flags.
 233           if (!Fold.isImm()) {
 234             assert(Fold.OpToFold && Fold.OpToFold->isReg());
 235             Fold.OpToFold->setIsKill(false);
 236           }
 237           DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
 238                 Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
 239         }
 240       }
 241     }
 242   }
 243   return false;
 244 }