From 6afb65c2b709cfa078d0f6f6c5feceb2abab8036 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 5 Jun 2013 01:48:30 +0000 Subject: [PATCH] Revert "R600: Add a pass that merge Vector Register" This reverts commit r183279. CodeGen/R600/texture-input-merge.ll was failing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@183286 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPU.h | 1 - lib/Target/R600/AMDGPUTargetMachine.cpp | 5 - lib/Target/R600/CMakeLists.txt | 1 - .../R600/R600OptimizeVectorRegisters.cpp | 363 ------------------ test/CodeGen/R600/texture-input-merge.ll | 30 -- 5 files changed, 400 deletions(-) delete mode 100644 lib/Target/R600/R600OptimizeVectorRegisters.cpp delete mode 100644 test/CodeGen/R600/texture-input-merge.ll diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index d3ef4260d65..f9d70c9c5a3 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,7 +23,6 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600TextureIntrinsicsReplacer(); FunctionPass* createR600KernelParametersPass(const DataLayout *TD); -FunctionPass *createR600VectorRegMerger(TargetMachine &tm); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm); FunctionPass *createR600Packetizer(TargetMachine &tm); diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index c52af56175e..88dc5832905 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -130,11 +130,6 @@ bool AMDGPUPassConfig::addInstSelector() { bool AMDGPUPassConfig::addPreRegAlloc() { addPass(createAMDGPUConvertToISAPass(*TM)); - const AMDGPUSubtarget &ST = TM->getSubtarget(); - - if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { - addPass(createR600VectorRegMerger(*TM)); - } return false; } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 558d001af2e..c5ce9dc789e 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -41,7 +41,6 @@ add_llvm_target(R600CodeGen R600ISelLowering.cpp R600MachineFunctionInfo.cpp R600MachineScheduler.cpp - R600OptimizeVectorRegisters.cpp R600Packetizer.cpp R600RegisterInfo.cpp R600TextureIntrinsicsReplacer.cpp diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp deleted file mode 100644 index 6811dbc8790..00000000000 --- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp +++ /dev/null @@ -1,363 +0,0 @@ -//===--------------------- R600MergeVectorRegisters.cpp -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This pass merges inputs of swizzeable instructions into vector sharing -/// common data and/or have enough undef subreg using swizzle abilities. -/// -/// For instance let's consider the following pseudo code : -/// vreg5 = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 -/// ... -/// vreg7 = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 -/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 -/// -/// is turned into : -/// vreg5 = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 -/// ... -/// vreg7 = INSERT_SUBREG vreg4, sub3 -/// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 -/// -/// This allow regalloc to reduce register pressure for vector registers and -/// to reduce MOV count. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "vec-merger" -#include "llvm/Support/Debug.h" -#include "AMDGPU.h" -#include "R600InstrInfo.h" -#include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - -static bool -isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { - for (MachineRegisterInfo::def_iterator It = MRI.def_begin(Reg), - E = MRI.def_end(); It != E; ++It) { - return (*It).isImplicitDef(); - } - llvm_unreachable("Reg without a def"); - return false; -} - -class RegSeqInfo { -public: - MachineInstr *Instr; - DenseMap RegToChan; - std::vector UndefReg; - RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { - assert (MI->getOpcode() == AMDGPU::REG_SEQUENCE); - for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { - MachineOperand &MO = Instr->getOperand(i); - unsigned Chan = Instr->getOperand(i + 1).getImm(); - if (isImplicitlyDef(MRI, MO.getReg())) - UndefReg.push_back(Chan); - else - RegToChan[MO.getReg()] = Chan; - } - } - RegSeqInfo() {} - - bool operator==(const RegSeqInfo &RSI) const { - return RSI.Instr == Instr; - } -}; - -class R600VectorRegMerger : public MachineFunctionPass { -private: - MachineRegisterInfo *MRI; - const R600InstrInfo *TII; - bool canSwizzle(const MachineInstr &) const; - bool areAllUsesSwizzeable(unsigned Reg) const; - void SwizzleInput(MachineInstr &, - const std::vector > &) const; - bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, - std::vector > &Remap) const; - bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, - std::vector > &RemapChan); - bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, - std::vector > &RemapChan); - MachineInstr *RebuildVector(RegSeqInfo *MI, - const RegSeqInfo *BaseVec, - const std::vector > &RemapChan) const; - void RemoveMI(MachineInstr *); - void trackRSI(const RegSeqInfo &RSI); - - typedef DenseMap > InstructionSetMap; - DenseMap PreviousRegSeq; - InstructionSetMap PreviousRegSeqByReg; - InstructionSetMap PreviousRegSeqByUndefCount; -public: - static char ID; - R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), - TII (static_cast(tm.getInstrInfo())) { } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - const char *getPassName() const { - return "R600 Vector Registers Merge Pass"; - } - - bool runOnMachineFunction(MachineFunction &Fn); -}; - -char R600VectorRegMerger::ID = 0; - -bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) - const { - if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) - return true; - switch (MI.getOpcode()) { - case AMDGPU::R600_ExportSwz: - case AMDGPU::EG_ExportSwz: - return true; - default: - return false; - } -} - -bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, - RegSeqInfo *ToMerge, std::vector< std::pair > &Remap) - const { - unsigned CurrentUndexIdx = 0; - for (DenseMap::iterator It = ToMerge->RegToChan.begin(), - E = ToMerge->RegToChan.end(); It != E; ++It) { - DenseMap::const_iterator PosInUntouched = - Untouched->RegToChan.find((*It).first); - if (PosInUntouched != Untouched->RegToChan.end()) { - Remap.push_back(std::pair - ((*It).second, (*PosInUntouched).second)); - continue; - } - if (CurrentUndexIdx >= Untouched->UndefReg.size()) - return false; - Remap.push_back(std::pair - ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); - } - - return true; -} - -MachineInstr *R600VectorRegMerger::RebuildVector( - RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, - const std::vector > &RemapChan) const { - unsigned Reg = RSI->Instr->getOperand(0).getReg(); - MachineBasicBlock::iterator Pos = RSI->Instr; - MachineBasicBlock &MBB = *Pos->getParent(); - DebugLoc DL = Pos->getDebugLoc(); - - unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); - DenseMap UpdatedRegToChan = BaseRSI->RegToChan; - std::vector UpdatedUndef = BaseRSI->UndefReg; - for (DenseMap::iterator It = RSI->RegToChan.begin(), - E = RSI->RegToChan.end(); It != E; ++It) { - if (BaseRSI->RegToChan.find((*It).first) != BaseRSI->RegToChan.end()) { - UpdatedRegToChan[(*It).first] = (*It).second; - continue; - } - unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); - unsigned SubReg = (*It).first; - unsigned Swizzle = (*It).second; - unsigned Chan; - for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { - if (RemapChan[j].first == Swizzle) { - Chan = RemapChan[j].second; - break; - } - } - MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), - DstReg) - .addReg(SrcVec) - .addReg(SubReg) - .addImm(Chan); - UpdatedRegToChan[SubReg] = Chan; - for (std::vector::iterator RemoveIt = UpdatedUndef.begin(), - RemoveE = UpdatedUndef.end(); RemoveIt != RemoveE; ++ RemoveIt) { - if (*RemoveIt == Chan) - UpdatedUndef.erase(RemoveIt); - } - DEBUG(dbgs() << " ->"; Tmp->dump();); - SrcVec = DstReg; - } - Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg) - .addReg(SrcVec); - DEBUG(dbgs() << " ->"; Pos->dump();); - - DEBUG(dbgs() << " Updating Swizzle:\n"); - for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), - E = MRI->use_end(); It != E; ++It) { - DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); - SwizzleInput(*It, RemapChan); - DEBUG((*It).dump()); - } - RSI->Instr->eraseFromParent(); - - // Update RSI - RSI->Instr = Pos; - RSI->RegToChan = UpdatedRegToChan; - RSI->UndefReg = UpdatedUndef; - - return Pos; -} - -void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { - for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), - E = PreviousRegSeqByReg.end(); It != E; ++It) { - std::vector &MIs = (*It).second; - MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); - } - for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), - E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { - std::vector &MIs = (*It).second; - MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); - } -} - -void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, - const std::vector > &RemapChan) const { - unsigned Offset; - if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) - Offset = 2; - else - Offset = 3; - for (unsigned i = 0; i < 4; i++) { - unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; - for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { - if (RemapChan[j].first == Swizzle) { - MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); - break; - } - } - } -} - -bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { - for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), - E = MRI->use_end(); It != E; ++It) { - if (!canSwizzle(*It)) - return false; - } - return true; -} - -bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, - RegSeqInfo &CompatibleRSI, - std::vector > &RemapChan) { - for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), - MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { - if (!MOp->isReg()) - continue; - if (PreviousRegSeqByReg[MOp->getReg()].empty()) - continue; - std::vector MIs = PreviousRegSeqByReg[MOp->getReg()]; - for (unsigned i = 0, e = MIs.size(); i < e; i++) { - CompatibleRSI = PreviousRegSeq[MIs[i]]; - if (RSI == CompatibleRSI) - continue; - if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) - return true; - } - } - return false; -} - -bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, - RegSeqInfo &CompatibleRSI, - std::vector > &RemapChan) { - unsigned NeededUndefs = 4 - RSI.UndefReg.size(); - if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) - return false; - std::vector &MIs = - PreviousRegSeqByUndefCount[NeededUndefs]; - CompatibleRSI = PreviousRegSeq[MIs.back()]; - tryMergeVector(&CompatibleRSI, &RSI, RemapChan); - return true; -} - -void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { - for (DenseMap::const_iterator - It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { - PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); - } - PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); - PreviousRegSeq[RSI.Instr] = RSI; -} - -bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { - MRI = &(Fn.getRegInfo()); - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) { - MachineBasicBlock *MB = MBB; - PreviousRegSeq.clear(); - PreviousRegSeqByReg.clear(); - PreviousRegSeqByUndefCount.clear(); - - for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); - MII != MIIE; ++MII) { - MachineInstr *MI = MII; - if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) - continue; - - RegSeqInfo RSI(*MRI, MI); - - // All uses of MI are swizzeable ? - unsigned Reg = MI->getOperand(0).getReg(); - if (!areAllUsesSwizzeable(Reg)) - continue; - - DEBUG (dbgs() << "Trying to optimize "; - MI->dump(); - ); - - RegSeqInfo CandidateRSI; - std::vector > RemapChan; - DEBUG(dbgs() << "Using common slots...\n";); - if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { - // Remove CandidateRSI mapping - RemoveMI(CandidateRSI.Instr); - MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); - trackRSI(RSI); - continue; - } - DEBUG(dbgs() << "Using free slots...\n";); - RemapChan.clear(); - if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { - RemoveMI(CandidateRSI.Instr); - MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); - trackRSI(RSI); - continue; - } - //Failed to merge - trackRSI(RSI); - } - } - return false; -} - -} - -llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { - return new R600VectorRegMerger(tm); -} - diff --git a/test/CodeGen/R600/texture-input-merge.ll b/test/CodeGen/R600/texture-input-merge.ll deleted file mode 100644 index 5d0ecef3069..00000000000 --- a/test/CodeGen/R600/texture-input-merge.ll +++ /dev/null @@ -1,30 +0,0 @@ -;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s - -;CHECK-NOT: MOV - -define void @test() { - %1 = call float @llvm.R600.load.input(i32 0) - %2 = call float @llvm.R600.load.input(i32 1) - %3 = call float @llvm.R600.load.input(i32 2) - %4 = call float @llvm.R600.load.input(i32 3) - %5 = fmul float %1, 3.0 - %6 = fmul float %2, 3.0 - %7 = fmul float %3, 3.0 - %8 = fmul float %4, 3.0 - %9 = insertelement <4 x float> undef, float %5, i32 0 - %10 = insertelement <4 x float> %9, float %6, i32 1 - %11 = insertelement <4 x float> undef, float %7, i32 0 - %12 = insertelement <4 x float> %11, float %5, i32 1 - %13 = insertelement <4 x float> undef, float %8, i32 0 - %14 = call <4 x float> @llvm.R600.tex(<4 x float> %10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) - %15 = call <4 x float> @llvm.R600.tex(<4 x float> %12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) - %16 = call <4 x float> @llvm.R600.tex(<4 x float> %13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) - %17 = fadd <4 x float> %14, %15 - %18 = fadd <4 x float> %17, %16 - call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 0) - ret void -} - -declare float @llvm.R600.load.input(i32) readnone -declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone -declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) -- 2.34.1