1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Pipe0 : FuncUnit; // pipeline 0
20 def A9_Pipe1 : FuncUnit; // pipeline 1
21 def A9_LSPipe : FuncUnit; // LS pipe
22 def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN : FuncUnit; // FP register set, NEON side
26 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
28 def CortexA9Itineraries : ProcessorItineraries<
29 [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
30 // Two fully-pipelined integer ALU pipelines
31 // FIXME: There are no operand latencies for these instructions at all!
33 // Move instructions, unconditional
34 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
35 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
36 InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
37 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
38 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
39 InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
42 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
44 // Binary Instructions that produce a result
45 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
46 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
47 InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
48 InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
50 // Unary Instructions that produce a result
51 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
52 InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
53 InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
55 // Zero and sign extension instructions
56 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
57 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [3, 1, 1]>,
59 // Compare instructions
60 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
61 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
62 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
63 InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
65 // Move instructions, conditional
66 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
67 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
68 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
69 InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
71 // Integer multiply pipeline
73 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
74 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
75 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
76 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
77 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
78 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
79 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
80 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
81 InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
82 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
83 InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
84 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
85 // Integer load pipeline
86 // FIXME: The timings are some rough approximations
89 InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
90 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
93 InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
94 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
96 // Scaled register offset
97 InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
98 InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
100 // Immediate offset with update
101 InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
102 InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
104 // Register offset with update
105 InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
106 InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
108 // Scaled register offset with update
109 InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
110 InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
113 InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
114 InstrStage<1, [A9_LSPipe]>]>,
117 // Load multiple plus branch
118 InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
119 InstrStage<1, [A9_LSPipe]>,
120 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
123 // iLoadi + iALUr for t2LDRpci_pic.
124 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
125 InstrStage<1, [A9_LSPipe]>,
126 InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [4, 1]>,
128 // Integer store pipeline
131 InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>,
132 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
135 InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>,
136 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
138 // Scaled register offset
139 InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
140 InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
142 // Immediate offset with update
143 InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
144 InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
146 // Register offset with update
147 InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
148 InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
150 // Scaled register offset with update
151 InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
152 InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
155 InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
156 InstrStage<1, [A9_LSPipe]>]>,
159 // no delay slots, so the latency of a branch is unimportant
160 InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
162 // VFP and NEON shares the same register file. This means that every VFP
163 // instruction should wait for full completion of the consecutive NEON
164 // instruction and vice-versa. We model this behavior with two artificial FUs:
165 // DRegsVFP and DRegsVFP.
167 // Every VFP instruction:
168 // - Acquires DRegsVFP resource for 1 cycle
169 // - Reserves DRegsN resource for the whole duration (including time to
170 // register file writeback!).
171 // Every NEON instruction does the same but with FUs swapped.
173 // Since the reserved FU cannot be acquired, this models precisely
174 // "cross-domain" stalls.
177 // Issue through integer pipeline, and execute in NEON unit.
179 // FP Special Register to Integer Register File Move
180 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
181 InstrStage<2, [A9_DRegsN], 0, Reserved>,
182 InstrStage<1, [A9_Pipe1]>,
183 InstrStage<1, [A9_NPipe]>]>,
185 // Single-precision FP Unary
186 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
187 // Extra latency cycles since wbck is 2 cycles
188 InstrStage<3, [A9_DRegsN], 0, Reserved>,
189 InstrStage<1, [A9_Pipe1]>,
190 InstrStage<1, [A9_NPipe]>], [1, 1]>,
192 // Double-precision FP Unary
193 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
194 // Extra latency cycles since wbck is 2 cycles
195 InstrStage<3, [A9_DRegsN], 0, Reserved>,
196 InstrStage<1, [A9_Pipe1]>,
197 InstrStage<1, [A9_NPipe]>], [1, 1]>,
200 // Single-precision FP Compare
201 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
202 // Extra latency cycles since wbck is 4 cycles
203 InstrStage<5, [A9_DRegsN], 0, Reserved>,
204 InstrStage<1, [A9_Pipe1]>,
205 InstrStage<1, [A9_NPipe]>], [1, 1]>,
207 // Double-precision FP Compare
208 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
209 // Extra latency cycles since wbck is 4 cycles
210 InstrStage<5, [A9_DRegsN], 0, Reserved>,
211 InstrStage<1, [A9_Pipe1]>,
212 InstrStage<1, [A9_NPipe]>], [1, 1]>,
214 // Single to Double FP Convert
215 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
216 InstrStage<5, [A9_DRegsN], 0, Reserved>,
217 InstrStage<1, [A9_Pipe1]>,
218 InstrStage<1, [A9_NPipe]>], [4, 1]>,
220 // Double to Single FP Convert
221 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
222 InstrStage<5, [A9_DRegsN], 0, Reserved>,
223 InstrStage<1, [A9_Pipe1]>,
224 InstrStage<1, [A9_NPipe]>], [4, 1]>,
227 // Single to Half FP Convert
228 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
229 InstrStage<5, [A9_DRegsN], 0, Reserved>,
230 InstrStage<1, [A9_Pipe1]>,
231 InstrStage<1, [A9_NPipe]>], [4, 1]>,
233 // Half to Single FP Convert
234 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
235 InstrStage<3, [A9_DRegsN], 0, Reserved>,
236 InstrStage<1, [A9_Pipe1]>,
237 InstrStage<1, [A9_NPipe]>], [2, 1]>,
240 // Single-Precision FP to Integer Convert
241 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
242 InstrStage<5, [A9_DRegsN], 0, Reserved>,
243 InstrStage<1, [A9_Pipe1]>,
244 InstrStage<1, [A9_NPipe]>], [4, 1]>,
246 // Double-Precision FP to Integer Convert
247 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
248 InstrStage<5, [A9_DRegsN], 0, Reserved>,
249 InstrStage<1, [A9_Pipe1]>,
250 InstrStage<1, [A9_NPipe]>], [4, 1]>,
252 // Integer to Single-Precision FP Convert
253 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
254 InstrStage<5, [A9_DRegsN], 0, Reserved>,
255 InstrStage<1, [A9_Pipe1]>,
256 InstrStage<1, [A9_NPipe]>], [4, 1]>,
258 // Integer to Double-Precision FP Convert
259 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
260 InstrStage<5, [A9_DRegsN], 0, Reserved>,
261 InstrStage<1, [A9_Pipe1]>,
262 InstrStage<1, [A9_NPipe]>], [4, 1]>,
264 // Single-precision FP ALU
265 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
266 InstrStage<5, [A9_DRegsN], 0, Reserved>,
267 InstrStage<1, [A9_Pipe1]>,
268 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
270 // Double-precision FP ALU
271 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
272 InstrStage<5, [A9_DRegsN], 0, Reserved>,
273 InstrStage<1, [A9_Pipe1]>,
274 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
276 // Single-precision FP Multiply
277 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
278 InstrStage<6, [A9_DRegsN], 0, Reserved>,
279 InstrStage<1, [A9_Pipe1]>,
280 InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
282 // Double-precision FP Multiply
283 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
284 InstrStage<7, [A9_DRegsN], 0, Reserved>,
285 InstrStage<1, [A9_Pipe1]>,
286 InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
288 // Single-precision FP MAC
289 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
290 InstrStage<9, [A9_DRegsN], 0, Reserved>,
291 InstrStage<1, [A9_Pipe1]>,
292 InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
294 // Double-precision FP MAC
295 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
296 InstrStage<10, [A9_DRegsN], 0, Reserved>,
297 InstrStage<1, [A9_Pipe1]>,
298 InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
300 // Single-precision FP DIV
301 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
302 InstrStage<16, [A9_DRegsN], 0, Reserved>,
303 InstrStage<1, [A9_Pipe1]>,
304 InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
306 // Double-precision FP DIV
307 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
308 InstrStage<26, [A9_DRegsN], 0, Reserved>,
309 InstrStage<1, [A9_Pipe1]>,
310 InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
312 // Single-precision FP SQRT
313 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
314 InstrStage<18, [A9_DRegsN], 0, Reserved>,
315 InstrStage<1, [A9_Pipe1]>,
316 InstrStage<13, [A9_NPipe]>], [17, 1]>,
318 // Double-precision FP SQRT
319 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
320 InstrStage<33, [A9_DRegsN], 0, Reserved>,
321 InstrStage<1, [A9_Pipe1]>,
322 InstrStage<28, [A9_NPipe]>], [32, 1]>,
325 // Integer to Single-precision Move
326 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
327 // Extra 1 latency cycle since wbck is 2 cycles
328 InstrStage<3, [A9_DRegsN], 0, Reserved>,
329 InstrStage<1, [A9_Pipe1]>,
330 InstrStage<1, [A9_NPipe]>], [1, 1]>,
332 // Integer to Double-precision Move
333 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
334 // Extra 1 latency cycle since wbck is 2 cycles
335 InstrStage<3, [A9_DRegsN], 0, Reserved>,
336 InstrStage<1, [A9_Pipe1]>,
337 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
339 // Single-precision to Integer Move
340 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
341 InstrStage<2, [A9_DRegsN], 0, Reserved>,
342 InstrStage<1, [A9_Pipe1]>,
343 InstrStage<1, [A9_NPipe]>], [1, 1]>,
345 // Double-precision to Integer Move
346 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
347 InstrStage<2, [A9_DRegsN], 0, Reserved>,
348 InstrStage<1, [A9_Pipe1]>,
349 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
351 // Single-precision FP Load
352 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
353 InstrStage<2, [A9_DRegsN], 0, Reserved>,
354 InstrStage<1, [A9_Pipe1], 0>,
355 InstrStage<1, [A9_LSPipe]>,
356 InstrStage<1, [A9_NPipe]>]>,
358 // Double-precision FP Load
359 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
360 InstrStage<2, [A9_DRegsN], 0, Reserved>,
361 InstrStage<1, [A9_Pipe1], 0>,
362 InstrStage<1, [A9_LSPipe]>,
363 InstrStage<1, [A9_NPipe]>]>,
366 InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
367 InstrStage<2, [A9_DRegsN], 0, Reserved>,
368 InstrStage<1, [A9_Pipe1], 0>,
369 InstrStage<1, [A9_LSPipe]>,
370 InstrStage<1, [A9_NPipe]>]>,
372 // Single-precision FP Store
373 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
374 InstrStage<2, [A9_DRegsN], 0, Reserved>,
375 InstrStage<1, [A9_Pipe1], 0>,
376 InstrStage<1, [A9_LSPipe]>,
377 InstrStage<1, [A9_NPipe]>]>,
379 // Double-precision FP Store
380 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
381 InstrStage<2, [A9_DRegsN], 0, Reserved>,
382 InstrStage<1, [A9_Pipe1], 0>,
383 InstrStage<1, [A9_LSPipe]>,
384 InstrStage<1, [A9_NPipe]>]>,
387 InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
388 InstrStage<2, [A9_DRegsN], 0, Reserved>,
389 InstrStage<1, [A9_Pipe1], 0>,
390 InstrStage<1, [A9_LSPipe]>,
391 InstrStage<1, [A9_NPipe]>]>,
393 // Issue through integer pipeline, and execute in NEON unit.
394 // FIXME: Neon pipeline and LdSt unit are multiplexed.
395 // Add some syntactic sugar to model this!
397 // FIXME: We don't model this instruction properly
398 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
399 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
400 InstrStage<1, [A9_Pipe1], 0>,
401 InstrStage<1, [A9_LSPipe]>,
402 InstrStage<1, [A9_NPipe]>]>,
405 // FIXME: We don't model this instruction properly
406 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
407 // Extra latency cycles since wbck is 6 cycles
408 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
409 InstrStage<1, [A9_Pipe1], 0>,
410 InstrStage<1, [A9_LSPipe]>,
411 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
414 // FIXME: We don't model this instruction properly
415 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
416 // Extra latency cycles since wbck is 6 cycles
417 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
418 InstrStage<1, [A9_Pipe1], 0>,
419 InstrStage<1, [A9_LSPipe]>,
420 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
423 // FIXME: We don't model this instruction properly
424 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
425 // Extra latency cycles since wbck is 6 cycles
426 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
427 InstrStage<1, [A9_Pipe1], 0>,
428 InstrStage<1, [A9_LSPipe]>,
429 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
432 // FIXME: We don't model this instruction properly
433 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
434 // Extra latency cycles since wbck is 6 cycles
435 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
436 InstrStage<1, [A9_Pipe1], 0>,
437 InstrStage<1, [A9_LSPipe]>,
438 InstrStage<1, [A9_NPipe]>]>,
440 // Double-register Integer Unary
441 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
442 // Extra latency cycles since wbck is 6 cycles
443 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
444 InstrStage<1, [A9_Pipe1]>,
445 InstrStage<1, [A9_NPipe]>], [4, 2]>,
447 // Quad-register Integer Unary
448 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
449 // Extra latency cycles since wbck is 6 cycles
450 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
451 InstrStage<1, [A9_Pipe1]>,
452 InstrStage<1, [A9_NPipe]>], [4, 2]>,
454 // Double-register Integer Q-Unary
455 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
456 // Extra latency cycles since wbck is 6 cycles
457 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
458 InstrStage<1, [A9_Pipe1]>,
459 InstrStage<1, [A9_NPipe]>], [4, 1]>,
461 // Quad-register Integer CountQ-Unary
462 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
463 // Extra latency cycles since wbck is 6 cycles
464 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
465 InstrStage<1, [A9_Pipe1]>,
466 InstrStage<1, [A9_NPipe]>], [4, 1]>,
468 // Double-register Integer Binary
469 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
470 // Extra latency cycles since wbck is 6 cycles
471 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
472 InstrStage<1, [A9_Pipe1]>,
473 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
475 // Quad-register Integer Binary
476 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
477 // Extra latency cycles since wbck is 6 cycles
478 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
479 InstrStage<1, [A9_Pipe1]>,
480 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
482 // Double-register Integer Subtract
483 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
484 // Extra latency cycles since wbck is 6 cycles
485 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
486 InstrStage<1, [A9_Pipe1]>,
487 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
489 // Quad-register Integer Subtract
490 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
491 // Extra latency cycles since wbck is 6 cycles
492 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
493 InstrStage<1, [A9_Pipe1]>,
494 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
496 // Double-register Integer Shift
497 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
498 // Extra latency cycles since wbck is 6 cycles
499 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
500 InstrStage<1, [A9_Pipe1]>,
501 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
503 // Quad-register Integer Shift
504 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
505 // Extra latency cycles since wbck is 6 cycles
506 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
507 InstrStage<1, [A9_Pipe1]>,
508 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
510 // Double-register Integer Shift (4 cycle)
511 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
512 // Extra latency cycles since wbck is 6 cycles
513 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
514 InstrStage<1, [A9_Pipe1]>,
515 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
517 // Quad-register Integer Shift (4 cycle)
518 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
519 // Extra latency cycles since wbck is 6 cycles
520 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
521 InstrStage<1, [A9_Pipe1]>,
522 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
524 // Double-register Integer Binary (4 cycle)
525 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
526 // Extra latency cycles since wbck is 6 cycles
527 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
528 InstrStage<1, [A9_Pipe1]>,
529 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
531 // Quad-register Integer Binary (4 cycle)
532 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
533 // Extra latency cycles since wbck is 6 cycles
534 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
535 InstrStage<1, [A9_Pipe1]>,
536 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
538 // Double-register Integer Subtract (4 cycle)
539 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
540 // Extra latency cycles since wbck is 6 cycles
541 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
542 InstrStage<1, [A9_Pipe1]>,
543 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
545 // Quad-register Integer Subtract (4 cycle)
546 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
547 // Extra latency cycles since wbck is 6 cycles
548 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
549 InstrStage<1, [A9_Pipe1]>,
550 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
553 // Double-register Integer Count
554 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
555 // Extra latency cycles since wbck is 6 cycles
556 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
557 InstrStage<1, [A9_Pipe1]>,
558 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
560 // Quad-register Integer Count
561 // Result written in N3, but that is relative to the last cycle of multicycle,
562 // so we use 4 for those cases
563 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
564 // Extra latency cycles since wbck is 7 cycles
565 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
566 InstrStage<1, [A9_Pipe1]>,
567 InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
569 // Double-register Absolute Difference and Accumulate
570 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
571 // Extra latency cycles since wbck is 6 cycles
572 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
573 InstrStage<1, [A9_Pipe1]>,
574 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
576 // Quad-register Absolute Difference and Accumulate
577 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
578 // Extra latency cycles since wbck is 6 cycles
579 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
580 InstrStage<1, [A9_Pipe1]>,
581 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
583 // Double-register Integer Pair Add Long
584 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
585 // Extra latency cycles since wbck is 6 cycles
586 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
587 InstrStage<1, [A9_Pipe1]>,
588 InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
590 // Quad-register Integer Pair Add Long
591 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
592 // Extra latency cycles since wbck is 6 cycles
593 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
594 InstrStage<1, [A9_Pipe1]>,
595 InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
598 // Double-register Integer Multiply (.8, .16)
599 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
600 // Extra latency cycles since wbck is 6 cycles
601 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
602 InstrStage<1, [A9_Pipe1]>,
603 InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
605 // Quad-register Integer Multiply (.8, .16)
606 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
607 // Extra latency cycles since wbck is 7 cycles
608 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
609 InstrStage<1, [A9_Pipe1]>,
610 InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
613 // Double-register Integer Multiply (.32)
614 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
615 // Extra latency cycles since wbck is 7 cycles
616 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
617 InstrStage<1, [A9_Pipe1]>,
618 InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
620 // Quad-register Integer Multiply (.32)
621 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
622 // Extra latency cycles since wbck is 9 cycles
623 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
624 InstrStage<1, [A9_Pipe1]>,
625 InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
627 // Double-register Integer Multiply-Accumulate (.8, .16)
628 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
629 // Extra latency cycles since wbck is 6 cycles
630 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
631 InstrStage<1, [A9_Pipe1]>,
632 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
634 // Double-register Integer Multiply-Accumulate (.32)
635 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
636 // Extra latency cycles since wbck is 7 cycles
637 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
638 InstrStage<1, [A9_Pipe1]>,
639 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
641 // Quad-register Integer Multiply-Accumulate (.8, .16)
642 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
643 // Extra latency cycles since wbck is 7 cycles
644 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
645 InstrStage<1, [A9_Pipe1]>,
646 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
648 // Quad-register Integer Multiply-Accumulate (.32)
649 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
650 // Extra latency cycles since wbck is 9 cycles
651 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
652 InstrStage<1, [A9_Pipe1]>,
653 InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
656 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
657 // Extra latency cycles since wbck is 6 cycles
658 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
659 InstrStage<1, [A9_Pipe1]>,
660 InstrStage<1, [A9_NPipe]>], [3]>,
662 // Double-register Permute Move
663 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
664 // FIXME: all latencies are arbitrary, no information is available
665 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
666 InstrStage<1, [A9_Pipe1]>,
667 InstrStage<1, [A9_LSPipe]>], [2, 1]>,
669 // Quad-register Permute Move
670 // Result written in N2, but that is relative to the last cycle of multicycle,
671 // so we use 3 for those cases
672 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
673 // FIXME: all latencies are arbitrary, no information is available
674 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
675 InstrStage<1, [A9_Pipe1]>,
676 InstrStage<2, [A9_NPipe]>], [3, 1]>,
678 // Integer to Single-precision Move
679 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
680 // FIXME: all latencies are arbitrary, no information is available
681 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
682 InstrStage<1, [A9_Pipe1]>,
683 InstrStage<1, [A9_NPipe]>], [2, 1]>,
685 // Integer to Double-precision Move
686 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
687 // FIXME: all latencies are arbitrary, no information is available
688 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
689 InstrStage<1, [A9_Pipe1]>,
690 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
692 // Single-precision to Integer Move
693 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
694 // FIXME: all latencies are arbitrary, no information is available
695 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
696 InstrStage<1, [A9_Pipe1]>,
697 InstrStage<1, [A9_NPipe]>], [2, 1]>,
699 // Double-precision to Integer Move
700 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
701 // FIXME: all latencies are arbitrary, no information is available
702 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
703 InstrStage<1, [A9_Pipe1]>,
704 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
706 // Integer to Lane Move
707 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
708 // FIXME: all latencies are arbitrary, no information is available
709 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
710 InstrStage<1, [A9_Pipe1]>,
711 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
714 // Double-register FP Unary
715 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
716 // Extra latency cycles since wbck is 6 cycles
717 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
718 InstrStage<1, [A9_Pipe1]>,
719 InstrStage<1, [A9_NPipe]>], [5, 2]>,
721 // Quad-register FP Unary
722 // Result written in N5, but that is relative to the last cycle of multicycle,
723 // so we use 6 for those cases
724 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
725 // Extra latency cycles since wbck is 7 cycles
726 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
727 InstrStage<1, [A9_Pipe1]>,
728 InstrStage<2, [A9_NPipe]>], [6, 2]>,
730 // Double-register FP Binary
731 // FIXME: We're using this itin for many instructions and [2, 2] here is too
733 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
734 // Extra latency cycles since wbck is 7 cycles
735 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
736 InstrStage<1, [A9_Pipe1]>,
737 InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
739 // Quad-register FP Binary
740 // Result written in N5, but that is relative to the last cycle of multicycle,
741 // so we use 6 for those cases
742 // FIXME: We're using this itin for many instructions and [2, 2] here is too
744 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
745 // Extra latency cycles since wbck is 8 cycles
746 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
747 InstrStage<1, [A9_Pipe1]>,
748 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
750 // Double-register FP Multiple-Accumulate
751 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
752 // Extra latency cycles since wbck is 7 cycles
753 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
754 InstrStage<1, [A9_Pipe1]>,
755 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
757 // Quad-register FP Multiple-Accumulate
758 // Result written in N9, but that is relative to the last cycle of multicycle,
759 // so we use 10 for those cases
760 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
761 // Extra latency cycles since wbck is 9 cycles
762 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
763 InstrStage<1, [A9_Pipe1]>,
764 InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
766 // Double-register Reciprical Step
767 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
768 // Extra latency cycles since wbck is 7 cycles
769 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
770 InstrStage<1, [A9_Pipe1]>,
771 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
773 // Quad-register Reciprical Step
774 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
775 // Extra latency cycles since wbck is 9 cycles
776 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
777 InstrStage<1, [A9_Pipe1]>,
778 InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
780 // Double-register Permute
781 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
782 // Extra latency cycles since wbck is 6 cycles
783 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
784 InstrStage<1, [A9_Pipe1]>,
785 InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
787 // Quad-register Permute
788 // Result written in N2, but that is relative to the last cycle of multicycle,
789 // so we use 3 for those cases
790 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
791 // Extra latency cycles since wbck is 7 cycles
792 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
793 InstrStage<1, [A9_Pipe1]>,
794 InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
796 // Quad-register Permute (3 cycle issue)
797 // Result written in N2, but that is relative to the last cycle of multicycle,
798 // so we use 4 for those cases
799 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
800 // Extra latency cycles since wbck is 8 cycles
801 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
802 InstrStage<1, [A9_Pipe1]>,
803 InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
806 // Double-register VEXT
807 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
808 // Extra latency cycles since wbck is 7 cycles
809 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
810 InstrStage<1, [A9_Pipe1]>,
811 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
813 // Quad-register VEXT
814 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
815 // Extra latency cycles since wbck is 9 cycles
816 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
817 InstrStage<1, [A9_Pipe1]>,
818 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
821 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
822 // Extra latency cycles since wbck is 7 cycles
823 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
824 InstrStage<1, [A9_Pipe1]>,
825 InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
826 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
827 // Extra latency cycles since wbck is 7 cycles
828 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
829 InstrStage<1, [A9_Pipe1]>,
830 InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
831 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
832 // Extra latency cycles since wbck is 8 cycles
833 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
834 InstrStage<1, [A9_Pipe1]>,
835 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
836 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
837 // Extra latency cycles since wbck is 8 cycles
838 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
839 InstrStage<1, [A9_Pipe1]>,
840 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
843 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
844 // Extra latency cycles since wbck is 7 cycles
845 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
846 InstrStage<1, [A9_Pipe1]>,
847 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
848 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
849 // Extra latency cycles since wbck is 7 cycles
850 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
851 InstrStage<1, [A9_Pipe1]>,
852 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
853 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
854 // Extra latency cycles since wbck is 8 cycles
855 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
856 InstrStage<1, [A9_Pipe1]>,
857 InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
858 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
859 // Extra latency cycles since wbck is 8 cycles
860 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
861 InstrStage<1, [A9_Pipe1]>,
862 InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>