1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Pipe0 : FuncUnit; // pipeline 0
20 def A9_Pipe1 : FuncUnit; // pipeline 1
21 def A9_LSPipe : FuncUnit; // LS pipe
22 def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN : FuncUnit; // FP register set, NEON side
26 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
28 def CortexA9Itineraries : ProcessorItineraries<
29 [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
30 // Two fully-pipelined integer ALU pipelines
31 // FIXME: There are no operand latencies for these instructions at all!
33 // Move instructions, unconditional
34 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
35 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
36 InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
37 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
38 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
39 InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
42 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
44 // Binary Instructions that produce a result
45 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
46 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
47 InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
48 InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
50 // Unary Instructions that produce a result
51 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
52 InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
53 InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
55 // Compare instructions
56 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
57 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
58 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
59 InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
61 // Move instructions, conditional
62 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
63 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
64 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
65 InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
67 // Integer multiply pipeline
69 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
70 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
71 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
72 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
73 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
74 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
75 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
76 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
77 InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
78 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
79 InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
80 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
81 // Integer load pipeline
82 // FIXME: The timings are some rough approximations
85 InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
86 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
89 InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
90 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
92 // Scaled register offset
93 InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
94 InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
96 // Immediate offset with update
97 InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
98 InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
100 // Register offset with update
101 InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
102 InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
104 // Scaled register offset with update
105 InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
106 InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
109 InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
110 InstrStage<1, [A9_LSPipe]>]>,
113 // Load multiple plus branch
114 InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
115 InstrStage<1, [A9_LSPipe]>,
116 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
118 // Integer store pipeline
121 InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>,
122 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
125 InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>,
126 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
128 // Scaled register offset
129 InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
130 InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
132 // Immediate offset with update
133 InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
134 InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
136 // Register offset with update
137 InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
138 InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
140 // Scaled register offset with update
141 InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
142 InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
145 InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
146 InstrStage<1, [A9_LSPipe]>]>,
149 // no delay slots, so the latency of a branch is unimportant
150 InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
152 // VFP and NEON shares the same register file. This means that every VFP
153 // instruction should wait for full completion of the consecutive NEON
154 // instruction and vice-versa. We model this behavior with two artificial FUs:
155 // DRegsVFP and DRegsVFP.
157 // Every VFP instruction:
158 // - Acquires DRegsVFP resource for 1 cycle
159 // - Reserves DRegsN resource for the whole duration (including time to
160 // register file writeback!).
161 // Every NEON instruction does the same but with FUs swapped.
163 // Since the reserved FU cannot be acquired, this models precisely
164 // "cross-domain" stalls.
167 // Issue through integer pipeline, and execute in NEON unit.
169 // FP Special Register to Integer Register File Move
170 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
171 InstrStage<2, [A9_DRegsN], 0, Reserved>,
172 InstrStage<1, [A9_Pipe1]>,
173 InstrStage<1, [A9_NPipe]>]>,
175 // Single-precision FP Unary
176 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
177 // Extra latency cycles since wbck is 2 cycles
178 InstrStage<3, [A9_DRegsN], 0, Reserved>,
179 InstrStage<1, [A9_Pipe1]>,
180 InstrStage<1, [A9_NPipe]>], [1, 1]>,
182 // Double-precision FP Unary
183 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
184 // Extra latency cycles since wbck is 2 cycles
185 InstrStage<3, [A9_DRegsN], 0, Reserved>,
186 InstrStage<1, [A9_Pipe1]>,
187 InstrStage<1, [A9_NPipe]>], [1, 1]>,
190 // Single-precision FP Compare
191 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
192 // Extra latency cycles since wbck is 4 cycles
193 InstrStage<5, [A9_DRegsN], 0, Reserved>,
194 InstrStage<1, [A9_Pipe1]>,
195 InstrStage<1, [A9_NPipe]>], [1, 1]>,
197 // Double-precision FP Compare
198 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
199 // Extra latency cycles since wbck is 4 cycles
200 InstrStage<5, [A9_DRegsN], 0, Reserved>,
201 InstrStage<1, [A9_Pipe1]>,
202 InstrStage<1, [A9_NPipe]>], [1, 1]>,
204 // Single to Double FP Convert
205 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
206 InstrStage<5, [A9_DRegsN], 0, Reserved>,
207 InstrStage<1, [A9_Pipe1]>,
208 InstrStage<1, [A9_NPipe]>], [4, 1]>,
210 // Double to Single FP Convert
211 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
212 InstrStage<5, [A9_DRegsN], 0, Reserved>,
213 InstrStage<1, [A9_Pipe1]>,
214 InstrStage<1, [A9_NPipe]>], [4, 1]>,
217 // Single to Half FP Convert
218 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
219 InstrStage<5, [A9_DRegsN], 0, Reserved>,
220 InstrStage<1, [A9_Pipe1]>,
221 InstrStage<1, [A9_NPipe]>], [4, 1]>,
223 // Half to Single FP Convert
224 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
225 InstrStage<3, [A9_DRegsN], 0, Reserved>,
226 InstrStage<1, [A9_Pipe1]>,
227 InstrStage<1, [A9_NPipe]>], [2, 1]>,
230 // Single-Precision FP to Integer Convert
231 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
232 InstrStage<5, [A9_DRegsN], 0, Reserved>,
233 InstrStage<1, [A9_Pipe1]>,
234 InstrStage<1, [A9_NPipe]>], [4, 1]>,
236 // Double-Precision FP to Integer Convert
237 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
238 InstrStage<5, [A9_DRegsN], 0, Reserved>,
239 InstrStage<1, [A9_Pipe1]>,
240 InstrStage<1, [A9_NPipe]>], [4, 1]>,
242 // Integer to Single-Precision FP Convert
243 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
244 InstrStage<5, [A9_DRegsN], 0, Reserved>,
245 InstrStage<1, [A9_Pipe1]>,
246 InstrStage<1, [A9_NPipe]>], [4, 1]>,
248 // Integer to Double-Precision FP Convert
249 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
250 InstrStage<5, [A9_DRegsN], 0, Reserved>,
251 InstrStage<1, [A9_Pipe1]>,
252 InstrStage<1, [A9_NPipe]>], [4, 1]>,
254 // Single-precision FP ALU
255 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
256 InstrStage<5, [A9_DRegsN], 0, Reserved>,
257 InstrStage<1, [A9_Pipe1]>,
258 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
260 // Double-precision FP ALU
261 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
262 InstrStage<5, [A9_DRegsN], 0, Reserved>,
263 InstrStage<1, [A9_Pipe1]>,
264 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
266 // Single-precision FP Multiply
267 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
268 InstrStage<6, [A9_DRegsN], 0, Reserved>,
269 InstrStage<1, [A9_Pipe1]>,
270 InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
272 // Double-precision FP Multiply
273 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
274 InstrStage<7, [A9_DRegsN], 0, Reserved>,
275 InstrStage<1, [A9_Pipe1]>,
276 InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
278 // Single-precision FP MAC
279 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
280 InstrStage<9, [A9_DRegsN], 0, Reserved>,
281 InstrStage<1, [A9_Pipe1]>,
282 InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
284 // Double-precision FP MAC
285 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
286 InstrStage<10, [A9_DRegsN], 0, Reserved>,
287 InstrStage<1, [A9_Pipe1]>,
288 InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
290 // Single-precision FP DIV
291 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
292 InstrStage<16, [A9_DRegsN], 0, Reserved>,
293 InstrStage<1, [A9_Pipe1]>,
294 InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
296 // Double-precision FP DIV
297 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
298 InstrStage<26, [A9_DRegsN], 0, Reserved>,
299 InstrStage<1, [A9_Pipe1]>,
300 InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
302 // Single-precision FP SQRT
303 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
304 InstrStage<18, [A9_DRegsN], 0, Reserved>,
305 InstrStage<1, [A9_Pipe1]>,
306 InstrStage<13, [A9_NPipe]>], [17, 1]>,
308 // Double-precision FP SQRT
309 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
310 InstrStage<33, [A9_DRegsN], 0, Reserved>,
311 InstrStage<1, [A9_Pipe1]>,
312 InstrStage<28, [A9_NPipe]>], [32, 1]>,
315 // Integer to Single-precision Move
316 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
317 // Extra 1 latency cycle since wbck is 2 cycles
318 InstrStage<3, [A9_DRegsN], 0, Reserved>,
319 InstrStage<1, [A9_Pipe1]>,
320 InstrStage<1, [A9_NPipe]>], [1, 1]>,
322 // Integer to Double-precision Move
323 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
324 // Extra 1 latency cycle since wbck is 2 cycles
325 InstrStage<3, [A9_DRegsN], 0, Reserved>,
326 InstrStage<1, [A9_Pipe1]>,
327 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
329 // Single-precision to Integer Move
330 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
331 InstrStage<2, [A9_DRegsN], 0, Reserved>,
332 InstrStage<1, [A9_Pipe1]>,
333 InstrStage<1, [A9_NPipe]>], [1, 1]>,
335 // Double-precision to Integer Move
336 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
337 InstrStage<2, [A9_DRegsN], 0, Reserved>,
338 InstrStage<1, [A9_Pipe1]>,
339 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
341 // Single-precision FP Load
342 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
343 InstrStage<2, [A9_DRegsN], 0, Reserved>,
344 InstrStage<1, [A9_Pipe1], 0>,
345 InstrStage<1, [A9_LSPipe]>,
346 InstrStage<1, [A9_NPipe]>]>,
348 // Double-precision FP Load
349 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
350 InstrStage<2, [A9_DRegsN], 0, Reserved>,
351 InstrStage<1, [A9_Pipe1], 0>,
352 InstrStage<1, [A9_LSPipe]>,
353 InstrStage<1, [A9_NPipe]>]>,
356 InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
357 InstrStage<2, [A9_DRegsN], 0, Reserved>,
358 InstrStage<1, [A9_Pipe1], 0>,
359 InstrStage<1, [A9_LSPipe]>,
360 InstrStage<1, [A9_NPipe]>]>,
362 // Single-precision FP Store
363 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
364 InstrStage<2, [A9_DRegsN], 0, Reserved>,
365 InstrStage<1, [A9_Pipe1], 0>,
366 InstrStage<1, [A9_LSPipe]>,
367 InstrStage<1, [A9_NPipe]>]>,
369 // Double-precision FP Store
370 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
371 InstrStage<2, [A9_DRegsN], 0, Reserved>,
372 InstrStage<1, [A9_Pipe1], 0>,
373 InstrStage<1, [A9_LSPipe]>,
374 InstrStage<1, [A9_NPipe]>]>,
377 InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
378 InstrStage<2, [A9_DRegsN], 0, Reserved>,
379 InstrStage<1, [A9_Pipe1], 0>,
380 InstrStage<1, [A9_LSPipe]>,
381 InstrStage<1, [A9_NPipe]>]>,
383 // Issue through integer pipeline, and execute in NEON unit.
384 // FIXME: Neon pipeline and LdSt unit are multiplexed.
385 // Add some syntactic sugar to model this!
387 // FIXME: We don't model this instruction properly
388 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
389 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
390 InstrStage<1, [A9_Pipe1], 0>,
391 InstrStage<1, [A9_LSPipe]>,
392 InstrStage<1, [A9_NPipe]>]>,
395 // FIXME: We don't model this instruction properly
396 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
397 // Extra latency cycles since wbck is 6 cycles
398 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
399 InstrStage<1, [A9_Pipe1], 0>,
400 InstrStage<1, [A9_LSPipe]>,
401 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
404 // FIXME: We don't model this instruction properly
405 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
406 // Extra latency cycles since wbck is 6 cycles
407 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
408 InstrStage<1, [A9_Pipe1], 0>,
409 InstrStage<1, [A9_LSPipe]>,
410 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
413 // FIXME: We don't model this instruction properly
414 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
415 // Extra latency cycles since wbck is 6 cycles
416 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
417 InstrStage<1, [A9_Pipe1], 0>,
418 InstrStage<1, [A9_LSPipe]>,
419 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
422 // FIXME: We don't model this instruction properly
423 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
424 // Extra latency cycles since wbck is 6 cycles
425 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
426 InstrStage<1, [A9_Pipe1], 0>,
427 InstrStage<1, [A9_LSPipe]>,
428 InstrStage<1, [A9_NPipe]>]>,
430 // Double-register Integer Unary
431 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
432 // Extra latency cycles since wbck is 6 cycles
433 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
434 InstrStage<1, [A9_Pipe1]>,
435 InstrStage<1, [A9_NPipe]>], [4, 2]>,
437 // Quad-register Integer Unary
438 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
439 // Extra latency cycles since wbck is 6 cycles
440 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
441 InstrStage<1, [A9_Pipe1]>,
442 InstrStage<1, [A9_NPipe]>], [4, 2]>,
444 // Double-register Integer Q-Unary
445 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
446 // Extra latency cycles since wbck is 6 cycles
447 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
448 InstrStage<1, [A9_Pipe1]>,
449 InstrStage<1, [A9_NPipe]>], [4, 1]>,
451 // Quad-register Integer CountQ-Unary
452 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
453 // Extra latency cycles since wbck is 6 cycles
454 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
455 InstrStage<1, [A9_Pipe1]>,
456 InstrStage<1, [A9_NPipe]>], [4, 1]>,
458 // Double-register Integer Binary
459 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
460 // Extra latency cycles since wbck is 6 cycles
461 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
462 InstrStage<1, [A9_Pipe1]>,
463 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
465 // Quad-register Integer Binary
466 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
467 // Extra latency cycles since wbck is 6 cycles
468 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
469 InstrStage<1, [A9_Pipe1]>,
470 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
472 // Double-register Integer Subtract
473 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
474 // Extra latency cycles since wbck is 6 cycles
475 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
476 InstrStage<1, [A9_Pipe1]>,
477 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
479 // Quad-register Integer Subtract
480 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
481 // Extra latency cycles since wbck is 6 cycles
482 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
483 InstrStage<1, [A9_Pipe1]>,
484 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
486 // Double-register Integer Shift
487 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
488 // Extra latency cycles since wbck is 6 cycles
489 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
490 InstrStage<1, [A9_Pipe1]>,
491 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
493 // Quad-register Integer Shift
494 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
495 // Extra latency cycles since wbck is 6 cycles
496 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
497 InstrStage<1, [A9_Pipe1]>,
498 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
500 // Double-register Integer Shift (4 cycle)
501 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
502 // Extra latency cycles since wbck is 6 cycles
503 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
504 InstrStage<1, [A9_Pipe1]>,
505 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
507 // Quad-register Integer Shift (4 cycle)
508 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
509 // Extra latency cycles since wbck is 6 cycles
510 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
511 InstrStage<1, [A9_Pipe1]>,
512 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
514 // Double-register Integer Binary (4 cycle)
515 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
516 // Extra latency cycles since wbck is 6 cycles
517 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
518 InstrStage<1, [A9_Pipe1]>,
519 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
521 // Quad-register Integer Binary (4 cycle)
522 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
523 // Extra latency cycles since wbck is 6 cycles
524 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
525 InstrStage<1, [A9_Pipe1]>,
526 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
528 // Double-register Integer Subtract (4 cycle)
529 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
530 // Extra latency cycles since wbck is 6 cycles
531 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
532 InstrStage<1, [A9_Pipe1]>,
533 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
535 // Quad-register Integer Subtract (4 cycle)
536 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
537 // Extra latency cycles since wbck is 6 cycles
538 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
539 InstrStage<1, [A9_Pipe1]>,
540 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
543 // Double-register Integer Count
544 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
545 // Extra latency cycles since wbck is 6 cycles
546 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
547 InstrStage<1, [A9_Pipe1]>,
548 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
550 // Quad-register Integer Count
551 // Result written in N3, but that is relative to the last cycle of multicycle,
552 // so we use 4 for those cases
553 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
554 // Extra latency cycles since wbck is 7 cycles
555 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
556 InstrStage<1, [A9_Pipe1]>,
557 InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
559 // Double-register Absolute Difference and Accumulate
560 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
561 // Extra latency cycles since wbck is 6 cycles
562 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
563 InstrStage<1, [A9_Pipe1]>,
564 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
566 // Quad-register Absolute Difference and Accumulate
567 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
568 // Extra latency cycles since wbck is 6 cycles
569 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
570 InstrStage<1, [A9_Pipe1]>,
571 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
573 // Double-register Integer Pair Add Long
574 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
575 // Extra latency cycles since wbck is 6 cycles
576 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
577 InstrStage<1, [A9_Pipe1]>,
578 InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
580 // Quad-register Integer Pair Add Long
581 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
582 // Extra latency cycles since wbck is 6 cycles
583 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
584 InstrStage<1, [A9_Pipe1]>,
585 InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
588 // Double-register Integer Multiply (.8, .16)
589 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
590 // Extra latency cycles since wbck is 6 cycles
591 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
592 InstrStage<1, [A9_Pipe1]>,
593 InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
595 // Quad-register Integer Multiply (.8, .16)
596 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
597 // Extra latency cycles since wbck is 7 cycles
598 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
599 InstrStage<1, [A9_Pipe1]>,
600 InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
603 // Double-register Integer Multiply (.32)
604 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
605 // Extra latency cycles since wbck is 7 cycles
606 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
607 InstrStage<1, [A9_Pipe1]>,
608 InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
610 // Quad-register Integer Multiply (.32)
611 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
612 // Extra latency cycles since wbck is 9 cycles
613 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
614 InstrStage<1, [A9_Pipe1]>,
615 InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
617 // Double-register Integer Multiply-Accumulate (.8, .16)
618 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
619 // Extra latency cycles since wbck is 6 cycles
620 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
621 InstrStage<1, [A9_Pipe1]>,
622 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
624 // Double-register Integer Multiply-Accumulate (.32)
625 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
626 // Extra latency cycles since wbck is 7 cycles
627 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
628 InstrStage<1, [A9_Pipe1]>,
629 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
631 // Quad-register Integer Multiply-Accumulate (.8, .16)
632 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
633 // Extra latency cycles since wbck is 7 cycles
634 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
635 InstrStage<1, [A9_Pipe1]>,
636 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
638 // Quad-register Integer Multiply-Accumulate (.32)
639 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
640 // Extra latency cycles since wbck is 9 cycles
641 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
642 InstrStage<1, [A9_Pipe1]>,
643 InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
646 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
647 // Extra latency cycles since wbck is 6 cycles
648 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
649 InstrStage<1, [A9_Pipe1]>,
650 InstrStage<1, [A9_NPipe]>], [3]>,
652 // Double-register Permute Move
653 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
654 // FIXME: all latencies are arbitrary, no information is available
655 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
656 InstrStage<1, [A9_Pipe1]>,
657 InstrStage<1, [A9_LSPipe]>], [2, 1]>,
659 // Quad-register Permute Move
660 // Result written in N2, but that is relative to the last cycle of multicycle,
661 // so we use 3 for those cases
662 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
663 // FIXME: all latencies are arbitrary, no information is available
664 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
665 InstrStage<1, [A9_Pipe1]>,
666 InstrStage<2, [A9_NPipe]>], [3, 1]>,
668 // Integer to Single-precision Move
669 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
670 // FIXME: all latencies are arbitrary, no information is available
671 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
672 InstrStage<1, [A9_Pipe1]>,
673 InstrStage<1, [A9_NPipe]>], [2, 1]>,
675 // Integer to Double-precision Move
676 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
677 // FIXME: all latencies are arbitrary, no information is available
678 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
679 InstrStage<1, [A9_Pipe1]>,
680 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
682 // Single-precision to Integer Move
683 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
684 // FIXME: all latencies are arbitrary, no information is available
685 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
686 InstrStage<1, [A9_Pipe1]>,
687 InstrStage<1, [A9_NPipe]>], [2, 1]>,
689 // Double-precision to Integer Move
690 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
691 // FIXME: all latencies are arbitrary, no information is available
692 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
693 InstrStage<1, [A9_Pipe1]>,
694 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
696 // Integer to Lane Move
697 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
698 // FIXME: all latencies are arbitrary, no information is available
699 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
700 InstrStage<1, [A9_Pipe1]>,
701 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
704 // Double-register FP Unary
705 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
706 // Extra latency cycles since wbck is 6 cycles
707 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
708 InstrStage<1, [A9_Pipe1]>,
709 InstrStage<1, [A9_NPipe]>], [5, 2]>,
711 // Quad-register FP Unary
712 // Result written in N5, but that is relative to the last cycle of multicycle,
713 // so we use 6 for those cases
714 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
715 // Extra latency cycles since wbck is 7 cycles
716 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
717 InstrStage<1, [A9_Pipe1]>,
718 InstrStage<2, [A9_NPipe]>], [6, 2]>,
720 // Double-register FP Binary
721 // FIXME: We're using this itin for many instructions and [2, 2] here is too
723 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
724 // Extra latency cycles since wbck is 7 cycles
725 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
726 InstrStage<1, [A9_Pipe1]>,
727 InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
729 // Quad-register FP Binary
730 // Result written in N5, but that is relative to the last cycle of multicycle,
731 // so we use 6 for those cases
732 // FIXME: We're using this itin for many instructions and [2, 2] here is too
734 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
735 // Extra latency cycles since wbck is 8 cycles
736 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
737 InstrStage<1, [A9_Pipe1]>,
738 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
740 // Double-register FP Multiple-Accumulate
741 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
742 // Extra latency cycles since wbck is 7 cycles
743 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
744 InstrStage<1, [A9_Pipe1]>,
745 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
747 // Quad-register FP Multiple-Accumulate
748 // Result written in N9, but that is relative to the last cycle of multicycle,
749 // so we use 10 for those cases
750 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
751 // Extra latency cycles since wbck is 9 cycles
752 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
753 InstrStage<1, [A9_Pipe1]>,
754 InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
756 // Double-register Reciprical Step
757 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
758 // Extra latency cycles since wbck is 7 cycles
759 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
760 InstrStage<1, [A9_Pipe1]>,
761 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
763 // Quad-register Reciprical Step
764 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
765 // Extra latency cycles since wbck is 9 cycles
766 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
767 InstrStage<1, [A9_Pipe1]>,
768 InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
770 // Double-register Permute
771 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
772 // Extra latency cycles since wbck is 6 cycles
773 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
774 InstrStage<1, [A9_Pipe1]>,
775 InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
777 // Quad-register Permute
778 // Result written in N2, but that is relative to the last cycle of multicycle,
779 // so we use 3 for those cases
780 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
781 // Extra latency cycles since wbck is 7 cycles
782 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
783 InstrStage<1, [A9_Pipe1]>,
784 InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
786 // Quad-register Permute (3 cycle issue)
787 // Result written in N2, but that is relative to the last cycle of multicycle,
788 // so we use 4 for those cases
789 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
790 // Extra latency cycles since wbck is 8 cycles
791 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
792 InstrStage<1, [A9_Pipe1]>,
793 InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
796 // Double-register VEXT
797 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
798 // Extra latency cycles since wbck is 7 cycles
799 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
800 InstrStage<1, [A9_Pipe1]>,
801 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
803 // Quad-register VEXT
804 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
805 // Extra latency cycles since wbck is 9 cycles
806 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
807 InstrStage<1, [A9_Pipe1]>,
808 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
811 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
812 // Extra latency cycles since wbck is 7 cycles
813 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
814 InstrStage<1, [A9_Pipe1]>,
815 InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
816 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
817 // Extra latency cycles since wbck is 7 cycles
818 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
819 InstrStage<1, [A9_Pipe1]>,
820 InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
821 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
822 // Extra latency cycles since wbck is 8 cycles
823 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
824 InstrStage<1, [A9_Pipe1]>,
825 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
826 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
827 // Extra latency cycles since wbck is 8 cycles
828 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
829 InstrStage<1, [A9_Pipe1]>,
830 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
833 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
834 // Extra latency cycles since wbck is 7 cycles
835 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
836 InstrStage<1, [A9_Pipe1]>,
837 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
838 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
839 // Extra latency cycles since wbck is 7 cycles
840 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
841 InstrStage<1, [A9_Pipe1]>,
842 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
843 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
844 // Extra latency cycles since wbck is 8 cycles
845 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
846 InstrStage<1, [A9_Pipe1]>,
847 InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
848 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
849 // Extra latency cycles since wbck is 8 cycles
850 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
851 InstrStage<1, [A9_Pipe1]>,
852 InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>