R600: Add FetchInst bit to instruction defs to denote vertex/tex instructions
[oota-llvm.git] / lib / Target / R600 / R600ControlFlowFinalizer.cpp
1 //===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass compute turns all control flow pseudo instructions into native one
12 /// computing their address on the fly ; it also sets STACK_SIZE info.
13 //===----------------------------------------------------------------------===//
14
15 #define DEBUG_TYPE "r600cf"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/raw_ostream.h"
18
19 #include "AMDGPU.h"
20 #include "R600Defines.h"
21 #include "R600InstrInfo.h"
22 #include "R600MachineFunctionInfo.h"
23 #include "R600RegisterInfo.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27
28 namespace llvm {
29
30 class R600ControlFlowFinalizer : public MachineFunctionPass {
31
32 private:
33   enum ControlFlowInstruction {
34     CF_TC,
35     CF_VC,
36     CF_CALL_FS,
37     CF_WHILE_LOOP,
38     CF_END_LOOP,
39     CF_LOOP_BREAK,
40     CF_LOOP_CONTINUE,
41     CF_JUMP,
42     CF_ELSE,
43     CF_POP,
44     CF_END
45   };
46
47   static char ID;
48   const R600InstrInfo *TII;
49   unsigned MaxFetchInst;
50   const AMDGPUSubtarget &ST;
51
52   bool IsTrivialInst(MachineInstr *MI) const {
53     switch (MI->getOpcode()) {
54     case AMDGPU::KILL:
55     case AMDGPU::RETURN:
56       return true;
57     default:
58       return false;
59     }
60   }
61
62   const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
63     unsigned Opcode = 0;
64     bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
65     switch (CFI) {
66     case CF_TC:
67       Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
68       break;
69     case CF_VC:
70       Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
71       break;
72     case CF_CALL_FS:
73       Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
74       break;
75     case CF_WHILE_LOOP:
76       Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
77       break;
78     case CF_END_LOOP:
79       Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
80       break;
81     case CF_LOOP_BREAK:
82       Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
83       break;
84     case CF_LOOP_CONTINUE:
85       Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
86       break;
87     case CF_JUMP:
88       Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
89       break;
90     case CF_ELSE:
91       Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
92       break;
93     case CF_POP:
94       Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
95       break;
96     case CF_END:
97       if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) {
98         Opcode = AMDGPU::CF_END_CM;
99         break;
100       }
101       Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
102       break;
103     }
104     assert (Opcode && "No opcode selected");
105     return TII->get(Opcode);
106   }
107
108   MachineBasicBlock::iterator
109   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
110       unsigned CfAddress) const {
111     MachineBasicBlock::iterator ClauseHead = I;
112     unsigned AluInstCount = 0;
113     bool IsTex = TII->usesTextureCache(ClauseHead);
114     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
115       if (IsTrivialInst(I))
116         continue;
117       if ((IsTex && !TII->usesTextureCache(I)) ||
118           (!IsTex && !TII->usesVertexCache(I)))
119         break;
120       AluInstCount ++;
121       if (AluInstCount > MaxFetchInst)
122         break;
123     }
124     BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
125         getHWInstrDesc(IsTex?CF_TC:CF_VC))
126         .addImm(CfAddress) // ADDR
127         .addImm(AluInstCount); // COUNT
128     return I;
129   }
130   void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
131     MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
132   }
133   void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr)
134       const {
135     for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end();
136         It != E; ++It) {
137       MachineInstr *MI = *It;
138       CounterPropagateAddr(MI, Addr);
139     }
140   }
141
142   unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
143     switch (ST.device()->getGeneration()) {
144     case AMDGPUDeviceInfo::HD4XXX:
145       if (hasPush)
146         StackSubEntry += 2;
147       break;
148     case AMDGPUDeviceInfo::HD5XXX:
149       if (hasPush)
150         StackSubEntry ++;
151     case AMDGPUDeviceInfo::HD6XXX:
152       StackSubEntry += 2;
153       break;
154     }
155     return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
156   }
157
158 public:
159   R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
160     TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
161     ST(tm.getSubtarget<AMDGPUSubtarget>()) {
162       const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
163       if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
164         MaxFetchInst = 8;
165       else
166         MaxFetchInst = 16;
167   }
168
169   virtual bool runOnMachineFunction(MachineFunction &MF) {
170     unsigned MaxStack = 0;
171     unsigned CurrentStack = 0;
172     bool hasPush;
173     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
174         ++MB) {
175       MachineBasicBlock &MBB = *MB;
176       unsigned CfCount = 0;
177       std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
178       std::vector<MachineInstr * > IfThenElseStack;
179       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
180       if (MFI->ShaderType == 1) {
181         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
182             getHWInstrDesc(CF_CALL_FS));
183         CfCount++;
184       }
185       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
186           I != E;) {
187         if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
188           DEBUG(dbgs() << CfCount << ":"; I->dump(););
189           I = MakeFetchClause(MBB, I, 0);
190           CfCount++;
191           continue;
192         }
193
194         MachineBasicBlock::iterator MI = I;
195         I++;
196         switch (MI->getOpcode()) {
197         case AMDGPU::CF_ALU_PUSH_BEFORE:
198           CurrentStack++;
199           MaxStack = std::max(MaxStack, CurrentStack);
200           hasPush = true;
201         case AMDGPU::CF_ALU:
202         case AMDGPU::EG_ExportBuf:
203         case AMDGPU::EG_ExportSwz:
204         case AMDGPU::R600_ExportBuf:
205         case AMDGPU::R600_ExportSwz:
206         case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
207         case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
208           DEBUG(dbgs() << CfCount << ":"; MI->dump(););
209           CfCount++;
210           break;
211         case AMDGPU::WHILELOOP: {
212           CurrentStack+=4;
213           MaxStack = std::max(MaxStack, CurrentStack);
214           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
215               getHWInstrDesc(CF_WHILE_LOOP))
216               .addImm(1);
217           std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
218               std::set<MachineInstr *>());
219           Pair.second.insert(MIb);
220           LoopStack.push_back(Pair);
221           MI->eraseFromParent();
222           CfCount++;
223           break;
224         }
225         case AMDGPU::ENDLOOP: {
226           CurrentStack-=4;
227           std::pair<unsigned, std::set<MachineInstr *> > Pair =
228               LoopStack.back();
229           LoopStack.pop_back();
230           CounterPropagateAddr(Pair.second, CfCount);
231           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
232               .addImm(Pair.first + 1);
233           MI->eraseFromParent();
234           CfCount++;
235           break;
236         }
237         case AMDGPU::IF_PREDICATE_SET: {
238           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
239               getHWInstrDesc(CF_JUMP))
240               .addImm(0)
241               .addImm(0);
242           IfThenElseStack.push_back(MIb);
243           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
244           MI->eraseFromParent();
245           CfCount++;
246           break;
247         }
248         case AMDGPU::ELSE: {
249           MachineInstr * JumpInst = IfThenElseStack.back();
250           IfThenElseStack.pop_back();
251           CounterPropagateAddr(JumpInst, CfCount);
252           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
253               getHWInstrDesc(CF_ELSE))
254               .addImm(0)
255               .addImm(1);
256           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
257           IfThenElseStack.push_back(MIb);
258           MI->eraseFromParent();
259           CfCount++;
260           break;
261         }
262         case AMDGPU::ENDIF: {
263           CurrentStack--;
264           MachineInstr *IfOrElseInst = IfThenElseStack.back();
265           IfThenElseStack.pop_back();
266           CounterPropagateAddr(IfOrElseInst, CfCount + 1);
267           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
268               getHWInstrDesc(CF_POP))
269               .addImm(CfCount + 1)
270               .addImm(1);
271           (void)MIb;
272           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
273           MI->eraseFromParent();
274           CfCount++;
275           break;
276         }
277         case AMDGPU::PREDICATED_BREAK: {
278           CurrentStack--;
279           CfCount += 3;
280           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
281               .addImm(CfCount)
282               .addImm(1);
283           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
284               getHWInstrDesc(CF_LOOP_BREAK))
285               .addImm(0);
286           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
287               .addImm(CfCount)
288               .addImm(1);
289           LoopStack.back().second.insert(MIb);
290           MI->eraseFromParent();
291           break;
292         }
293         case AMDGPU::CONTINUE: {
294           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
295               getHWInstrDesc(CF_LOOP_CONTINUE))
296               .addImm(0);
297           LoopStack.back().second.insert(MIb);
298           MI->eraseFromParent();
299           CfCount++;
300           break;
301         }
302         case AMDGPU::RETURN: {
303           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
304           CfCount++;
305           MI->eraseFromParent();
306           if (CfCount % 2) {
307             BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
308             CfCount++;
309           }
310         }
311         default:
312           break;
313         }
314       }
315       MFI->StackSize = getHWStackSize(MaxStack, hasPush);
316     }
317
318     return false;
319   }
320
321   const char *getPassName() const {
322     return "R600 Control Flow Finalizer Pass";
323   }
324 };
325
326 char R600ControlFlowFinalizer::ID = 0;
327
328 }
329
330
331 llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
332   return new R600ControlFlowFinalizer(TM);
333 }