R600: Packetize instructions
authorVincent Lejeune <vljn@ovi.com>
Tue, 30 Apr 2013 00:14:27 +0000 (00:14 +0000)
committerVincent Lejeune <vljn@ovi.com>
Tue, 30 Apr 2013 00:14:27 +0000 (00:14 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180760 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/R600/AMDGPU.h
lib/Target/R600/AMDGPUTargetMachine.cpp
lib/Target/R600/CMakeLists.txt
lib/Target/R600/R600InstrInfo.cpp
lib/Target/R600/R600Packetizer.cpp [new file with mode: 0644]
lib/Target/R600/R600RegisterInfo.td

index 0b01433cc926a1ad719f83c00bc2ff7e4812eb9b..9792bd86f298c30c4a7d7a668d98043f14cc796a 100644 (file)
@@ -24,6 +24,7 @@ class AMDGPUTargetMachine;
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
 FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600Packetizer(TargetMachine &tm);
 FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
 
 // SI Passes
index e7ea876e2abbfd8ce2042a2e850892bfc27aed9f..0ec67ce13b3ae10abd3f6c5f26c5b6352b9bce7b 100644 (file)
@@ -153,8 +153,9 @@ bool AMDGPUPassConfig::addPreEmitPass() {
     addPass(createAMDGPUCFGStructurizerPass(*TM));
     addPass(createR600EmitClauseMarkers(*TM));
     addPass(createR600ExpandSpecialInstrsPass(*TM));
-    addPass(createR600ControlFlowFinalizer(*TM));
     addPass(&FinalizeMachineBundlesID);
+    addPass(createR600Packetizer(*TM));
+    addPass(createR600ControlFlowFinalizer(*TM));
   } else {
     addPass(createSILowerControlFlowPass(*TM));
   }
index 8efba5846befbf94b5d75720dae8bd5780f0bb5f..2ad2047278ca410c1b256fa02484c10f822912af 100644 (file)
@@ -42,6 +42,7 @@ add_llvm_target(R600CodeGen
   R600ISelLowering.cpp
   R600MachineFunctionInfo.cpp
   R600MachineScheduler.cpp
+  R600Packetizer.cpp
   R600RegisterInfo.cpp
   SIAnnotateControlFlow.cpp
   SIInsertWaits.cpp
index b1edb137da58ed390bee8a4bbddfe866731c4bca..371ffc12f1e427f5c90c2ceb269dedbb536243ac 100644 (file)
@@ -209,10 +209,19 @@ R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
       int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
       if (SrcIdx < 0)
         break;
-      if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+      unsigned Reg = MI->getOperand(SrcIdx).getReg();
+      if (Reg == AMDGPU::ALU_CONST) {
         unsigned Const = MI->getOperand(
             getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
         Consts.push_back(Const);
+        continue;
+      }
+      if (AMDGPU::R600_KC0RegClass.contains(Reg) ||
+          AMDGPU::R600_KC1RegClass.contains(Reg)) {
+        unsigned Index = RI.getEncodingValue(Reg) & 0xff;
+        unsigned Chan = RI.getHWRegChan(Reg);
+        Consts.push_back((Index << 2) | Chan);
+        continue;
       }
     }
   }
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
new file mode 100644 (file)
index 0000000..05e96f1
--- /dev/null
@@ -0,0 +1,446 @@
+//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass implements instructions packetization for R600. It unsets isLast
+/// bit of instructions inside a bundle and substitutes src register with
+/// PreviousVector when applicable.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600PACKETIZER_CPP
+#define R600PACKETIZER_CPP
+
+#define DEBUG_TYPE "packets"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+
+namespace llvm {
+
+class R600Packetizer : public MachineFunctionPass {
+
+public:
+  static char ID;
+  R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addPreserved<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const char *getPassName() const {
+    return "R600 Packetizer";
+  }
+
+  bool runOnMachineFunction(MachineFunction &Fn);
+};
+char R600Packetizer::ID = 0;
+
+class R600PacketizerList : public VLIWPacketizerList {
+
+private:
+  const R600InstrInfo *TII;
+  const R600RegisterInfo &TRI;
+
+  enum BankSwizzle {
+    ALU_VEC_012 = 0,
+    ALU_VEC_021,
+    ALU_VEC_120,
+    ALU_VEC_102,
+    ALU_VEC_201,
+    ALU_VEC_210
+  };
+
+  unsigned getSlot(const MachineInstr *MI) const {
+    return TRI.getHWRegChan(MI->getOperand(0).getReg());
+  }
+
+  std::vector<unsigned> getPreviousVector(MachineBasicBlock::iterator I) const {
+    std::vector<unsigned> Result;
+    I--;
+    if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
+      return Result;
+    MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+    if (I->isBundle())
+      BI++;
+    while (BI->isBundledWithPred() && !TII->isPredicated(BI)) {
+      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600Operands::WRITE);
+      if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm())
+        Result.push_back(BI->getOperand(0).getReg());
+      BI++;
+    }
+    return Result;
+  }
+
+  void substitutePV(MachineInstr *MI, const std::vector<unsigned> &PV) const {
+    R600Operands::Ops Ops[] = {
+      R600Operands::SRC0,
+      R600Operands::SRC1,
+      R600Operands::SRC2
+    };
+    for (unsigned i = 0; i < 3; i++) {
+      int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
+      if (OperandIdx < 0)
+        continue;
+      unsigned Src = MI->getOperand(OperandIdx).getReg();
+      for (unsigned j = 0, e = PV.size(); j < e; j++) {
+        if (Src == PV[j]) {
+          unsigned Chan = TRI.getHWRegChan(Src);
+          unsigned PVReg;
+          switch (Chan) {
+          case 0:
+            PVReg = AMDGPU::PV_X;
+            break;
+          case 1:
+            PVReg = AMDGPU::PV_Y;
+            break;
+          case 2:
+            PVReg = AMDGPU::PV_Z;
+            break;
+          case 3:
+            PVReg = AMDGPU::PV_W;
+            break;
+          default:
+            llvm_unreachable("Invalid Chan");
+          }
+          MI->getOperand(OperandIdx).setReg(PVReg);
+          break;
+        }
+      }
+    }
+  }
+public:
+  // Ctor.
+  R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+                        MachineDominatorTree &MDT)
+  : VLIWPacketizerList(MF, MLI, MDT, true),
+    TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
+    TRI(TII->getRegisterInfo()) { }
+
+  // initPacketizerState - initialize some internal flags.
+  void initPacketizerState() { }
+
+  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+  bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
+    return false;
+  }
+
+  // isSoloInstruction - return true if instruction MI can not be packetized
+  // with any other instruction, which means that MI itself is a packet.
+  bool isSoloInstruction(MachineInstr *MI) {
+    if (TII->isVector(*MI))
+      return true;
+    if (!TII->isALUInstr(MI->getOpcode()))
+      return true;
+    if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)
+      return true;
+    if (TII->isTransOnly(MI))
+      return true;
+    return false;
+  }
+
+  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+  // together.
+  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+    MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
+    if (getSlot(MII) <= getSlot(MIJ))
+      return false;
+    // Does MII and MIJ share the same pred_sel ?
+    int OpI = TII->getOperandIdx(MII->getOpcode(), R600Operands::PRED_SEL),
+        OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600Operands::PRED_SEL);
+    unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
+        PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
+    if (PredI != PredJ)
+      return false;
+    if (SUJ->isSucc(SUI)) {
+      for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
+        const SDep &Dep = SUJ->Succs[i];
+        if (Dep.getSUnit() != SUI)
+          continue;
+        if (Dep.getKind() == SDep::Anti)
+          continue;
+        if (Dep.getKind() == SDep::Output)
+          if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
+            continue;
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+  // and SUJ.
+  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;}
+
+  void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
+    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600Operands::LAST);
+    MI->getOperand(LastOp).setImm(Bit);
+  }
+
+  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
+    CurrentPacketMIs.push_back(MI);
+    bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
+    DEBUG(
+      if (!FitsConstLimits) {
+        dbgs() << "Couldn't pack :\n";
+        MI->dump();
+        dbgs() << "with the following packets :\n";
+        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+          CurrentPacketMIs[i]->dump();
+          dbgs() << "\n";
+        }
+        dbgs() << "because of Consts read limitations\n";
+      });
+    const std::vector<unsigned> &PV = getPreviousVector(MI);
+    bool FitsReadPortLimits = fitsReadPortLimitation(CurrentPacketMIs, PV);
+    DEBUG(
+      if (!FitsReadPortLimits) {
+        dbgs() << "Couldn't pack :\n";
+        MI->dump();
+        dbgs() << "with the following packets :\n";
+        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+          CurrentPacketMIs[i]->dump();
+          dbgs() << "\n";
+        }
+        dbgs() << "because of Read port limitations\n";
+      });
+    bool isBundlable = FitsConstLimits && FitsReadPortLimits;
+    CurrentPacketMIs.pop_back();
+    if (!isBundlable) {
+      endPacket(MI->getParent(), MI);
+      substitutePV(MI, getPreviousVector(MI));
+      return VLIWPacketizerList::addToPacket(MI);
+    }
+    if (!CurrentPacketMIs.empty())
+      setIsLastBit(CurrentPacketMIs.back(), 0);
+    substitutePV(MI, PV);
+    return VLIWPacketizerList::addToPacket(MI);
+  }
+private:
+  std::vector<std::pair<int, unsigned> >
+  ExtractSrcs(const MachineInstr *MI, const std::vector<unsigned> &PV) const {
+    R600Operands::Ops Ops[] = {
+      R600Operands::SRC0,
+      R600Operands::SRC1,
+      R600Operands::SRC2
+    };
+    std::vector<std::pair<int, unsigned> > Result;
+    for (unsigned i = 0; i < 3; i++) {
+      int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
+      if (OperandIdx < 0){
+        Result.push_back(std::pair<int, unsigned>(-1,0));
+        continue;
+      }
+      unsigned Src = MI->getOperand(OperandIdx).getReg();
+      if (std::find(PV.begin(), PV.end(), Src) != PV.end()) {
+        Result.push_back(std::pair<int, unsigned>(-1,0));
+        continue;
+      }
+      unsigned Reg = TRI.getEncodingValue(Src) & 0xff;
+      if (Reg > 127) {
+        Result.push_back(std::pair<int, unsigned>(-1,0));
+        continue;
+      }
+      unsigned Chan = TRI.getHWRegChan(Src);
+      Result.push_back(std::pair<int, unsigned>(Reg, Chan));
+    }
+    return Result;
+  }
+
+  std::vector<std::pair<int, unsigned> >
+  Swizzle(std::vector<std::pair<int, unsigned> > Src,
+  BankSwizzle Swz) const {
+    switch (Swz) {
+    case ALU_VEC_012:
+      break;
+    case ALU_VEC_021:
+      std::swap(Src[1], Src[2]);
+      break;
+    case ALU_VEC_102:
+      std::swap(Src[0], Src[1]);
+      break;
+    case ALU_VEC_120:
+      std::swap(Src[0], Src[1]);
+      std::swap(Src[0], Src[2]);
+      break;
+    case ALU_VEC_201:
+      std::swap(Src[0], Src[2]);
+      std::swap(Src[0], Src[1]);
+      break;
+    case ALU_VEC_210:
+      std::swap(Src[0], Src[2]);
+      break;
+    }
+    return Src;
+  }
+
+  bool isLegal(const std::vector<MachineInstr *> &IG,
+      const std::vector<BankSwizzle> &Swz,
+      const std::vector<unsigned> &PV) const {
+    assert (Swz.size() == IG.size());
+    int Vector[4][3];
+    memset(Vector, -1, sizeof(Vector));
+    for (unsigned i = 0, e = IG.size(); i < e; i++) {
+      const std::vector<std::pair<int, unsigned> > &Srcs =
+          Swizzle(ExtractSrcs(IG[i], PV), Swz[i]);
+      for (unsigned j = 0; j < 3; j++) {
+        const std::pair<int, unsigned> &Src = Srcs[j];
+        if (Src.first < 0)
+          continue;
+        if (Vector[Src.second][j] < 0)
+          Vector[Src.second][j] = Src.first;
+        if (Vector[Src.second][j] != Src.first)
+          return false;
+      }
+    }
+    return true;
+  }
+
+  bool recursiveFitsFPLimitation(
+  std::vector<MachineInstr *> IG,
+  const std::vector<unsigned> &PV,
+  std::vector<BankSwizzle> &SwzCandidate,
+  std::vector<MachineInstr *> CurrentlyChecked)
+      const {
+    if (!isLegal(CurrentlyChecked, SwzCandidate, PV))
+      return false;
+    if (IG.size() == CurrentlyChecked.size()) {
+      return true;
+    }
+    BankSwizzle AvailableSwizzle[] = {
+      ALU_VEC_012,
+      ALU_VEC_021,
+      ALU_VEC_120,
+      ALU_VEC_102,
+      ALU_VEC_201,
+      ALU_VEC_210
+    };
+    CurrentlyChecked.push_back(IG[CurrentlyChecked.size()]);
+    for (unsigned i = 0; i < 6; i++) {
+      SwzCandidate.push_back(AvailableSwizzle[i]);
+      if (recursiveFitsFPLimitation(IG, PV, SwzCandidate, CurrentlyChecked))
+        return true;
+      SwzCandidate.pop_back();
+    }
+    return false;
+  }
+
+  bool fitsReadPortLimitation(
+  std::vector<MachineInstr *> IG,
+  const std::vector<unsigned> &PV)
+      const {
+    //Todo : support shared src0 - src1 operand
+    std::vector<BankSwizzle> SwzCandidate;
+    bool Result = recursiveFitsFPLimitation(IG, PV, SwzCandidate,
+        std::vector<MachineInstr *>());
+    if (!Result)
+      return false;
+    for (unsigned i = 0, e = IG.size(); i < e; i++) {
+      MachineInstr *MI = IG[i];
+      unsigned Op = TII->getOperandIdx(MI->getOpcode(),
+          R600Operands::BANK_SWIZZLE);
+      MI->getOperand(Op).setImm(SwzCandidate[i]);
+    }
+    return true;
+  }
+};
+
+bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+  // Instantiate the packetizer.
+  R600PacketizerList Packetizer(Fn, MLI, MDT);
+
+  // DFA state table should not be empty.
+  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+  //
+  // Loop over all basic blocks and remove KILL pseudo-instructions
+  // These instructions confuse the dependence analysis. Consider:
+  // D0 = ...   (Insn 0)
+  // R0 = KILL R0, D0 (Insn 1)
+  // R0 = ... (Insn 2)
+  // Here, Insn 1 will result in the dependence graph not emitting an output
+  // dependence between Insn 0 and Insn 2. This can lead to incorrect
+  // packetization
+  //
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    MachineBasicBlock::iterator End = MBB->end();
+    MachineBasicBlock::iterator MI = MBB->begin();
+    while (MI != End) {
+      if (MI->isKill()) {
+        MachineBasicBlock::iterator DeleteMI = MI;
+        ++MI;
+        MBB->erase(DeleteMI);
+        End = MBB->end();
+        continue;
+      }
+      ++MI;
+    }
+  }
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    // Find scheduling regions and schedule / packetize each region.
+    unsigned RemainingCount = MBB->size();
+    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+        RegionEnd != MBB->begin();) {
+      // The next region starts above the previous region. Look backward in the
+      // instruction stream until we find the nearest boundary.
+      MachineBasicBlock::iterator I = RegionEnd;
+      for(;I != MBB->begin(); --I, --RemainingCount) {
+        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
+          break;
+      }
+      I = MBB->begin();
+
+      // Skip empty scheduling regions.
+      if (I == RegionEnd) {
+        RegionEnd = llvm::prior(RegionEnd);
+        --RemainingCount;
+        continue;
+      }
+      // Skip regions with one instruction.
+      if (I == llvm::prior(RegionEnd)) {
+        RegionEnd = llvm::prior(RegionEnd);
+        continue;
+      }
+
+      Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+      RegionEnd = I;
+    }
+  }
+
+  return true;
+
+}
+
+}
+
+llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
+  return new R600Packetizer(tm);
+}
+
+#endif // R600PACKETIZER_CPP
index 03f49761ea401db42966d32807873e7514ccb085..694431921b6409b1de07c08494068d1904429c66 100644 (file)
@@ -89,7 +89,10 @@ def ONE_INT : R600Reg<"1", 250>;
 def HALF : R600Reg<"0.5", 252>;
 def NEG_HALF : R600Reg<"-0.5", 252>;
 def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
-def PV_X : R600Reg<"pv.x", 254>;
+def PV_X : R600RegWithChan<"PV.x", 254, "X">;
+def PV_Y : R600RegWithChan<"PV.y", 254, "Y">;
+def PV_Z : R600RegWithChan<"PV.z", 254, "Z">;
+def PV_W : R600RegWithChan<"PV.w", 254, "W">;
 def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
 def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
 def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;