R600: Turn TEX/VTX into native instructions
[oota-llvm.git] / lib / Target / R600 / R600ControlFlowFinalizer.cpp
1 //===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass compute turns all control flow pseudo instructions into native one
12 /// computing their address on the fly ; it also sets STACK_SIZE info.
13 //===----------------------------------------------------------------------===//
14
15 #define DEBUG_TYPE "r600cf"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/raw_ostream.h"
18
19 #include "AMDGPU.h"
20 #include "R600Defines.h"
21 #include "R600InstrInfo.h"
22 #include "R600MachineFunctionInfo.h"
23 #include "R600RegisterInfo.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27
28 namespace llvm {
29
30 class R600ControlFlowFinalizer : public MachineFunctionPass {
31
32 private:
33   typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
34
35   enum ControlFlowInstruction {
36     CF_TC,
37     CF_VC,
38     CF_CALL_FS,
39     CF_WHILE_LOOP,
40     CF_END_LOOP,
41     CF_LOOP_BREAK,
42     CF_LOOP_CONTINUE,
43     CF_JUMP,
44     CF_ELSE,
45     CF_POP,
46     CF_END
47   };
48
49   static char ID;
50   const R600InstrInfo *TII;
51   unsigned MaxFetchInst;
52   const AMDGPUSubtarget &ST;
53
54   bool IsTrivialInst(MachineInstr *MI) const {
55     switch (MI->getOpcode()) {
56     case AMDGPU::KILL:
57     case AMDGPU::RETURN:
58       return true;
59     default:
60       return false;
61     }
62   }
63
64   const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
65     unsigned Opcode = 0;
66     bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
67     switch (CFI) {
68     case CF_TC:
69       Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
70       break;
71     case CF_VC:
72       Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
73       break;
74     case CF_CALL_FS:
75       Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
76       break;
77     case CF_WHILE_LOOP:
78       Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
79       break;
80     case CF_END_LOOP:
81       Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
82       break;
83     case CF_LOOP_BREAK:
84       Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
85       break;
86     case CF_LOOP_CONTINUE:
87       Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
88       break;
89     case CF_JUMP:
90       Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
91       break;
92     case CF_ELSE:
93       Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
94       break;
95     case CF_POP:
96       Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
97       break;
98     case CF_END:
99       if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) {
100         Opcode = AMDGPU::CF_END_CM;
101         break;
102       }
103       Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
104       break;
105     }
106     assert (Opcode && "No opcode selected");
107     return TII->get(Opcode);
108   }
109
110   ClauseFile
111   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
112       const {
113     MachineBasicBlock::iterator ClauseHead = I;
114     std::vector<MachineInstr *> ClauseContent;
115     unsigned AluInstCount = 0;
116     bool IsTex = TII->usesTextureCache(ClauseHead);
117     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
118       if (IsTrivialInst(I))
119         continue;
120       if (AluInstCount > MaxFetchInst)
121         break;
122       if ((IsTex && !TII->usesTextureCache(I)) ||
123           (!IsTex && !TII->usesVertexCache(I)))
124         break;
125       AluInstCount ++;
126       ClauseContent.push_back(I);
127     }
128     MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
129         getHWInstrDesc(IsTex?CF_TC:CF_VC))
130         .addImm(0) // ADDR
131         .addImm(AluInstCount - 1); // COUNT
132     return ClauseFile(MIb, ClauseContent);
133   }
134
135   void
136   EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
137       unsigned &CfCount) {
138     CounterPropagateAddr(Clause.first, CfCount);
139     MachineBasicBlock *BB = Clause.first->getParent();
140     BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
141         .addImm(CfCount);
142     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
143       BB->splice(InsertPos, BB, Clause.second[i]);
144     }
145     CfCount += 2 * Clause.second.size();
146   }
147
148   void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
149     MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
150   }
151   void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
152       const {
153     for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
154         It != E; ++It) {
155       MachineInstr *MI = *It;
156       CounterPropagateAddr(MI, Addr);
157     }
158   }
159
160   unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
161     switch (ST.device()->getGeneration()) {
162     case AMDGPUDeviceInfo::HD4XXX:
163       if (hasPush)
164         StackSubEntry += 2;
165       break;
166     case AMDGPUDeviceInfo::HD5XXX:
167       if (hasPush)
168         StackSubEntry ++;
169     case AMDGPUDeviceInfo::HD6XXX:
170       StackSubEntry += 2;
171       break;
172     }
173     return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
174   }
175
176 public:
177   R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
178     TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
179     ST(tm.getSubtarget<AMDGPUSubtarget>()) {
180       const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
181       if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
182         MaxFetchInst = 8;
183       else
184         MaxFetchInst = 16;
185   }
186
187   virtual bool runOnMachineFunction(MachineFunction &MF) {
188     unsigned MaxStack = 0;
189     unsigned CurrentStack = 0;
190     bool hasPush;
191     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
192         ++MB) {
193       MachineBasicBlock &MBB = *MB;
194       unsigned CfCount = 0;
195       std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
196       std::vector<MachineInstr * > IfThenElseStack;
197       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
198       if (MFI->ShaderType == 1) {
199         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
200             getHWInstrDesc(CF_CALL_FS));
201         CfCount++;
202       }
203       std::vector<ClauseFile> FetchClauses;
204       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
205           I != E;) {
206         if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
207           DEBUG(dbgs() << CfCount << ":"; I->dump(););
208           FetchClauses.push_back(MakeFetchClause(MBB, I));
209           CfCount++;
210           continue;
211         }
212
213         MachineBasicBlock::iterator MI = I;
214         I++;
215         switch (MI->getOpcode()) {
216         case AMDGPU::CF_ALU_PUSH_BEFORE:
217           CurrentStack++;
218           MaxStack = std::max(MaxStack, CurrentStack);
219           hasPush = true;
220         case AMDGPU::CF_ALU:
221         case AMDGPU::EG_ExportBuf:
222         case AMDGPU::EG_ExportSwz:
223         case AMDGPU::R600_ExportBuf:
224         case AMDGPU::R600_ExportSwz:
225         case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
226         case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
227           DEBUG(dbgs() << CfCount << ":"; MI->dump(););
228           CfCount++;
229           break;
230         case AMDGPU::WHILELOOP: {
231           CurrentStack+=4;
232           MaxStack = std::max(MaxStack, CurrentStack);
233           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
234               getHWInstrDesc(CF_WHILE_LOOP))
235               .addImm(1);
236           std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
237               std::set<MachineInstr *>());
238           Pair.second.insert(MIb);
239           LoopStack.push_back(Pair);
240           MI->eraseFromParent();
241           CfCount++;
242           break;
243         }
244         case AMDGPU::ENDLOOP: {
245           CurrentStack-=4;
246           std::pair<unsigned, std::set<MachineInstr *> > Pair =
247               LoopStack.back();
248           LoopStack.pop_back();
249           CounterPropagateAddr(Pair.second, CfCount);
250           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
251               .addImm(Pair.first + 1);
252           MI->eraseFromParent();
253           CfCount++;
254           break;
255         }
256         case AMDGPU::IF_PREDICATE_SET: {
257           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
258               getHWInstrDesc(CF_JUMP))
259               .addImm(0)
260               .addImm(0);
261           IfThenElseStack.push_back(MIb);
262           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
263           MI->eraseFromParent();
264           CfCount++;
265           break;
266         }
267         case AMDGPU::ELSE: {
268           MachineInstr * JumpInst = IfThenElseStack.back();
269           IfThenElseStack.pop_back();
270           CounterPropagateAddr(JumpInst, CfCount);
271           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
272               getHWInstrDesc(CF_ELSE))
273               .addImm(0)
274               .addImm(1);
275           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
276           IfThenElseStack.push_back(MIb);
277           MI->eraseFromParent();
278           CfCount++;
279           break;
280         }
281         case AMDGPU::ENDIF: {
282           CurrentStack--;
283           MachineInstr *IfOrElseInst = IfThenElseStack.back();
284           IfThenElseStack.pop_back();
285           CounterPropagateAddr(IfOrElseInst, CfCount + 1);
286           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
287               getHWInstrDesc(CF_POP))
288               .addImm(CfCount + 1)
289               .addImm(1);
290           (void)MIb;
291           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
292           MI->eraseFromParent();
293           CfCount++;
294           break;
295         }
296         case AMDGPU::PREDICATED_BREAK: {
297           CurrentStack--;
298           CfCount += 3;
299           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
300               .addImm(CfCount)
301               .addImm(1);
302           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
303               getHWInstrDesc(CF_LOOP_BREAK))
304               .addImm(0);
305           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
306               .addImm(CfCount)
307               .addImm(1);
308           LoopStack.back().second.insert(MIb);
309           MI->eraseFromParent();
310           break;
311         }
312         case AMDGPU::CONTINUE: {
313           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
314               getHWInstrDesc(CF_LOOP_CONTINUE))
315               .addImm(0);
316           LoopStack.back().second.insert(MIb);
317           MI->eraseFromParent();
318           CfCount++;
319           break;
320         }
321         case AMDGPU::RETURN: {
322           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
323           CfCount++;
324           MI->eraseFromParent();
325           if (CfCount % 2) {
326             BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
327             CfCount++;
328           }
329           for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
330             EmitFetchClause(I, FetchClauses[i], CfCount);
331         }
332         default:
333           break;
334         }
335       }
336       MFI->StackSize = getHWStackSize(MaxStack, hasPush);
337     }
338
339     return false;
340   }
341
342   const char *getPassName() const {
343     return "R600 Control Flow Finalizer Pass";
344   }
345 };
346
347 char R600ControlFlowFinalizer::ID = 0;
348
349 }
350
351
352 llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
353   return new R600ControlFlowFinalizer(TM);
354 }