1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Pipe0 : FuncUnit; // pipeline 0
20 def A9_Pipe1 : FuncUnit; // pipeline 1
21 def A9_LSPipe : FuncUnit; // LS pipe
22 def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN : FuncUnit; // FP register set, NEON side
26 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
28 def CortexA9Itineraries : ProcessorItineraries<
29 [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
30 // Two fully-pipelined integer ALU pipelines
31 // FIXME: There are no operand latencies for these instructions at all!
33 // Move instructions, unconditional
34 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
35 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
36 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
37 InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
40 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
42 // Binary Instructions that produce a result
43 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
44 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
45 InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
46 InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
48 // Unary Instructions that produce a result
49 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
50 InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
51 InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
53 // Compare instructions
54 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
55 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
56 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
57 InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
59 // Move instructions, conditional
60 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
61 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
62 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
63 InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
65 // Integer multiply pipeline
67 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
68 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
69 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
70 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
71 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
72 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
73 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
74 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
75 InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
76 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
77 InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
78 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
79 // Integer load pipeline
80 // FIXME: The timings are some rough approximations
83 InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
84 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
87 InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
88 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
90 // Scaled register offset
91 InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
92 InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
94 // Immediate offset with update
95 InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
96 InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
98 // Register offset with update
99 InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
100 InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
102 // Scaled register offset with update
103 InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
104 InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
107 InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
108 InstrStage<1, [A9_LSPipe]>]>,
111 // Load multiple plus branch
112 InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
113 InstrStage<1, [A9_LSPipe]>,
114 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
116 // Integer store pipeline
119 InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>,
120 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
123 InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>,
124 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
126 // Scaled register offset
127 InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
128 InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
130 // Immediate offset with update
131 InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
132 InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
134 // Register offset with update
135 InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
136 InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
138 // Scaled register offset with update
139 InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
140 InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
143 InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
144 InstrStage<1, [A9_LSPipe]>]>,
147 // no delay slots, so the latency of a branch is unimportant
148 InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
150 // VFP and NEON shares the same register file. This means that every VFP
151 // instruction should wait for full completion of the consecutive NEON
152 // instruction and vice-versa. We model this behavior with two artificial FUs:
153 // DRegsVFP and DRegsVFP.
155 // Every VFP instruction:
156 // - Acquires DRegsVFP resource for 1 cycle
157 // - Reserves DRegsN resource for the whole duration (including time to
158 // register file writeback!).
159 // Every NEON instruction does the same but with FUs swapped.
161 // Since the reserved FU cannot be acquired, this models precisely
162 // "cross-domain" stalls.
165 // Issue through integer pipeline, and execute in NEON unit.
167 // FP Special Register to Integer Register File Move
168 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
169 InstrStage<2, [A9_DRegsN], 0, Reserved>,
170 InstrStage<1, [A9_Pipe1]>,
171 InstrStage<1, [A9_NPipe]>]>,
173 // Single-precision FP Unary
174 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
175 // Extra latency cycles since wbck is 2 cycles
176 InstrStage<3, [A9_DRegsN], 0, Reserved>,
177 InstrStage<1, [A9_Pipe1]>,
178 InstrStage<1, [A9_NPipe]>], [1, 1]>,
180 // Double-precision FP Unary
181 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
182 // Extra latency cycles since wbck is 2 cycles
183 InstrStage<3, [A9_DRegsN], 0, Reserved>,
184 InstrStage<1, [A9_Pipe1]>,
185 InstrStage<1, [A9_NPipe]>], [1, 1]>,
188 // Single-precision FP Compare
189 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
190 // Extra latency cycles since wbck is 4 cycles
191 InstrStage<5, [A9_DRegsN], 0, Reserved>,
192 InstrStage<1, [A9_Pipe1]>,
193 InstrStage<1, [A9_NPipe]>], [1, 1]>,
195 // Double-precision FP Compare
196 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
197 // Extra latency cycles since wbck is 4 cycles
198 InstrStage<5, [A9_DRegsN], 0, Reserved>,
199 InstrStage<1, [A9_Pipe1]>,
200 InstrStage<1, [A9_NPipe]>], [1, 1]>,
202 // Single to Double FP Convert
203 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
204 InstrStage<5, [A9_DRegsN], 0, Reserved>,
205 InstrStage<1, [A9_Pipe1]>,
206 InstrStage<1, [A9_NPipe]>], [4, 1]>,
208 // Double to Single FP Convert
209 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
210 InstrStage<5, [A9_DRegsN], 0, Reserved>,
211 InstrStage<1, [A9_Pipe1]>,
212 InstrStage<1, [A9_NPipe]>], [4, 1]>,
215 // Single to Half FP Convert
216 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
217 InstrStage<5, [A9_DRegsN], 0, Reserved>,
218 InstrStage<1, [A9_Pipe1]>,
219 InstrStage<1, [A9_NPipe]>], [4, 1]>,
221 // Half to Single FP Convert
222 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
223 InstrStage<3, [A9_DRegsN], 0, Reserved>,
224 InstrStage<1, [A9_Pipe1]>,
225 InstrStage<1, [A9_NPipe]>], [2, 1]>,
228 // Single-Precision FP to Integer Convert
229 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
230 InstrStage<5, [A9_DRegsN], 0, Reserved>,
231 InstrStage<1, [A9_Pipe1]>,
232 InstrStage<1, [A9_NPipe]>], [4, 1]>,
234 // Double-Precision FP to Integer Convert
235 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
236 InstrStage<5, [A9_DRegsN], 0, Reserved>,
237 InstrStage<1, [A9_Pipe1]>,
238 InstrStage<1, [A9_NPipe]>], [4, 1]>,
240 // Integer to Single-Precision FP Convert
241 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
242 InstrStage<5, [A9_DRegsN], 0, Reserved>,
243 InstrStage<1, [A9_Pipe1]>,
244 InstrStage<1, [A9_NPipe]>], [4, 1]>,
246 // Integer to Double-Precision FP Convert
247 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
248 InstrStage<5, [A9_DRegsN], 0, Reserved>,
249 InstrStage<1, [A9_Pipe1]>,
250 InstrStage<1, [A9_NPipe]>], [4, 1]>,
252 // Single-precision FP ALU
253 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
254 InstrStage<5, [A9_DRegsN], 0, Reserved>,
255 InstrStage<1, [A9_Pipe1]>,
256 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
258 // Double-precision FP ALU
259 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
260 InstrStage<5, [A9_DRegsN], 0, Reserved>,
261 InstrStage<1, [A9_Pipe1]>,
262 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
264 // Single-precision FP Multiply
265 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
266 InstrStage<6, [A9_DRegsN], 0, Reserved>,
267 InstrStage<1, [A9_Pipe1]>,
268 InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
270 // Double-precision FP Multiply
271 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
272 InstrStage<7, [A9_DRegsN], 0, Reserved>,
273 InstrStage<1, [A9_Pipe1]>,
274 InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
276 // Single-precision FP MAC
277 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
278 InstrStage<9, [A9_DRegsN], 0, Reserved>,
279 InstrStage<1, [A9_Pipe1]>,
280 InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
282 // Double-precision FP MAC
283 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
284 InstrStage<10, [A9_DRegsN], 0, Reserved>,
285 InstrStage<1, [A9_Pipe1]>,
286 InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
288 // Single-precision FP DIV
289 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
290 InstrStage<16, [A9_DRegsN], 0, Reserved>,
291 InstrStage<1, [A9_Pipe1]>,
292 InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
294 // Double-precision FP DIV
295 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
296 InstrStage<26, [A9_DRegsN], 0, Reserved>,
297 InstrStage<1, [A9_Pipe1]>,
298 InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
300 // Single-precision FP SQRT
301 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
302 InstrStage<18, [A9_DRegsN], 0, Reserved>,
303 InstrStage<1, [A9_Pipe1]>,
304 InstrStage<13, [A9_NPipe]>], [17, 1]>,
306 // Double-precision FP SQRT
307 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
308 InstrStage<33, [A9_DRegsN], 0, Reserved>,
309 InstrStage<1, [A9_Pipe1]>,
310 InstrStage<28, [A9_NPipe]>], [32, 1]>,
313 // Integer to Single-precision Move
314 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
315 // Extra 1 latency cycle since wbck is 2 cycles
316 InstrStage<3, [A9_DRegsN], 0, Reserved>,
317 InstrStage<1, [A9_Pipe1]>,
318 InstrStage<1, [A9_NPipe]>], [1, 1]>,
320 // Integer to Double-precision Move
321 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
322 // Extra 1 latency cycle since wbck is 2 cycles
323 InstrStage<3, [A9_DRegsN], 0, Reserved>,
324 InstrStage<1, [A9_Pipe1]>,
325 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
327 // Single-precision to Integer Move
328 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
329 InstrStage<2, [A9_DRegsN], 0, Reserved>,
330 InstrStage<1, [A9_Pipe1]>,
331 InstrStage<1, [A9_NPipe]>], [1, 1]>,
333 // Double-precision to Integer Move
334 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
335 InstrStage<2, [A9_DRegsN], 0, Reserved>,
336 InstrStage<1, [A9_Pipe1]>,
337 InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
339 // Single-precision FP Load
340 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
341 InstrStage<2, [A9_DRegsN], 0, Reserved>,
342 InstrStage<1, [A9_Pipe1], 0>,
343 InstrStage<1, [A9_LSPipe]>,
344 InstrStage<1, [A9_NPipe]>]>,
346 // Double-precision FP Load
347 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
348 InstrStage<2, [A9_DRegsN], 0, Reserved>,
349 InstrStage<1, [A9_Pipe1], 0>,
350 InstrStage<1, [A9_LSPipe]>,
351 InstrStage<1, [A9_NPipe]>]>,
354 InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
355 InstrStage<2, [A9_DRegsN], 0, Reserved>,
356 InstrStage<1, [A9_Pipe1], 0>,
357 InstrStage<1, [A9_LSPipe]>,
358 InstrStage<1, [A9_NPipe]>]>,
360 // Single-precision FP Store
361 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
362 InstrStage<2, [A9_DRegsN], 0, Reserved>,
363 InstrStage<1, [A9_Pipe1], 0>,
364 InstrStage<1, [A9_LSPipe]>,
365 InstrStage<1, [A9_NPipe]>]>,
367 // Double-precision FP Store
368 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
369 InstrStage<2, [A9_DRegsN], 0, Reserved>,
370 InstrStage<1, [A9_Pipe1], 0>,
371 InstrStage<1, [A9_LSPipe]>,
372 InstrStage<1, [A9_NPipe]>]>,
375 InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
376 InstrStage<2, [A9_DRegsN], 0, Reserved>,
377 InstrStage<1, [A9_Pipe1], 0>,
378 InstrStage<1, [A9_LSPipe]>,
379 InstrStage<1, [A9_NPipe]>]>,
381 // Issue through integer pipeline, and execute in NEON unit.
382 // FIXME: Neon pipeline and LdSt unit are multiplexed.
383 // Add some syntactic sugar to model this!
385 // FIXME: We don't model this instruction properly
386 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
387 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
388 InstrStage<1, [A9_Pipe1], 0>,
389 InstrStage<1, [A9_LSPipe]>,
390 InstrStage<1, [A9_NPipe]>]>,
393 // FIXME: We don't model this instruction properly
394 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
395 // Extra latency cycles since wbck is 6 cycles
396 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
397 InstrStage<1, [A9_Pipe1], 0>,
398 InstrStage<1, [A9_LSPipe]>,
399 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
402 // FIXME: We don't model this instruction properly
403 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
404 // Extra latency cycles since wbck is 6 cycles
405 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
406 InstrStage<1, [A9_Pipe1], 0>,
407 InstrStage<1, [A9_LSPipe]>,
408 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
411 // FIXME: We don't model this instruction properly
412 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
413 // Extra latency cycles since wbck is 6 cycles
414 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
415 InstrStage<1, [A9_Pipe1], 0>,
416 InstrStage<1, [A9_LSPipe]>,
417 InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
420 // FIXME: We don't model this instruction properly
421 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
422 // Extra latency cycles since wbck is 6 cycles
423 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
424 InstrStage<1, [A9_Pipe1], 0>,
425 InstrStage<1, [A9_LSPipe]>,
426 InstrStage<1, [A9_NPipe]>]>,
428 // Double-register Integer Unary
429 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
430 // Extra latency cycles since wbck is 6 cycles
431 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
432 InstrStage<1, [A9_Pipe1]>,
433 InstrStage<1, [A9_NPipe]>], [4, 2]>,
435 // Quad-register Integer Unary
436 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
437 // Extra latency cycles since wbck is 6 cycles
438 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
439 InstrStage<1, [A9_Pipe1]>,
440 InstrStage<1, [A9_NPipe]>], [4, 2]>,
442 // Double-register Integer Q-Unary
443 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
444 // Extra latency cycles since wbck is 6 cycles
445 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
446 InstrStage<1, [A9_Pipe1]>,
447 InstrStage<1, [A9_NPipe]>], [4, 1]>,
449 // Quad-register Integer CountQ-Unary
450 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
451 // Extra latency cycles since wbck is 6 cycles
452 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
453 InstrStage<1, [A9_Pipe1]>,
454 InstrStage<1, [A9_NPipe]>], [4, 1]>,
456 // Double-register Integer Binary
457 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
458 // Extra latency cycles since wbck is 6 cycles
459 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
460 InstrStage<1, [A9_Pipe1]>,
461 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
463 // Quad-register Integer Binary
464 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
465 // Extra latency cycles since wbck is 6 cycles
466 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
467 InstrStage<1, [A9_Pipe1]>,
468 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
470 // Double-register Integer Subtract
471 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
472 // Extra latency cycles since wbck is 6 cycles
473 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
474 InstrStage<1, [A9_Pipe1]>,
475 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
477 // Quad-register Integer Subtract
478 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
479 // Extra latency cycles since wbck is 6 cycles
480 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
481 InstrStage<1, [A9_Pipe1]>,
482 InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
484 // Double-register Integer Shift
485 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
486 // Extra latency cycles since wbck is 6 cycles
487 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
488 InstrStage<1, [A9_Pipe1]>,
489 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
491 // Quad-register Integer Shift
492 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
493 // Extra latency cycles since wbck is 6 cycles
494 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
495 InstrStage<1, [A9_Pipe1]>,
496 InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
498 // Double-register Integer Shift (4 cycle)
499 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
500 // Extra latency cycles since wbck is 6 cycles
501 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
502 InstrStage<1, [A9_Pipe1]>,
503 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
505 // Quad-register Integer Shift (4 cycle)
506 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
507 // Extra latency cycles since wbck is 6 cycles
508 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
509 InstrStage<1, [A9_Pipe1]>,
510 InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
512 // Double-register Integer Binary (4 cycle)
513 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
514 // Extra latency cycles since wbck is 6 cycles
515 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
516 InstrStage<1, [A9_Pipe1]>,
517 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
519 // Quad-register Integer Binary (4 cycle)
520 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
521 // Extra latency cycles since wbck is 6 cycles
522 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
523 InstrStage<1, [A9_Pipe1]>,
524 InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
526 // Double-register Integer Subtract (4 cycle)
527 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
528 // Extra latency cycles since wbck is 6 cycles
529 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
530 InstrStage<1, [A9_Pipe1]>,
531 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
533 // Quad-register Integer Subtract (4 cycle)
534 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
535 // Extra latency cycles since wbck is 6 cycles
536 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
537 InstrStage<1, [A9_Pipe1]>,
538 InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
541 // Double-register Integer Count
542 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
543 // Extra latency cycles since wbck is 6 cycles
544 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
545 InstrStage<1, [A9_Pipe1]>,
546 InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
548 // Quad-register Integer Count
549 // Result written in N3, but that is relative to the last cycle of multicycle,
550 // so we use 4 for those cases
551 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
552 // Extra latency cycles since wbck is 7 cycles
553 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
554 InstrStage<1, [A9_Pipe1]>,
555 InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
557 // Double-register Absolute Difference and Accumulate
558 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
559 // Extra latency cycles since wbck is 6 cycles
560 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
561 InstrStage<1, [A9_Pipe1]>,
562 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
564 // Quad-register Absolute Difference and Accumulate
565 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
566 // Extra latency cycles since wbck is 6 cycles
567 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
568 InstrStage<1, [A9_Pipe1]>,
569 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
571 // Double-register Integer Pair Add Long
572 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
573 // Extra latency cycles since wbck is 6 cycles
574 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
575 InstrStage<1, [A9_Pipe1]>,
576 InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
578 // Quad-register Integer Pair Add Long
579 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
580 // Extra latency cycles since wbck is 6 cycles
581 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
582 InstrStage<1, [A9_Pipe1]>,
583 InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
586 // Double-register Integer Multiply (.8, .16)
587 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
588 // Extra latency cycles since wbck is 6 cycles
589 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
590 InstrStage<1, [A9_Pipe1]>,
591 InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
593 // Quad-register Integer Multiply (.8, .16)
594 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
595 // Extra latency cycles since wbck is 7 cycles
596 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
597 InstrStage<1, [A9_Pipe1]>,
598 InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
601 // Double-register Integer Multiply (.32)
602 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
603 // Extra latency cycles since wbck is 7 cycles
604 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
605 InstrStage<1, [A9_Pipe1]>,
606 InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
608 // Quad-register Integer Multiply (.32)
609 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
610 // Extra latency cycles since wbck is 9 cycles
611 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
612 InstrStage<1, [A9_Pipe1]>,
613 InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
615 // Double-register Integer Multiply-Accumulate (.8, .16)
616 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
617 // Extra latency cycles since wbck is 6 cycles
618 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
619 InstrStage<1, [A9_Pipe1]>,
620 InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
622 // Double-register Integer Multiply-Accumulate (.32)
623 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
624 // Extra latency cycles since wbck is 7 cycles
625 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
626 InstrStage<1, [A9_Pipe1]>,
627 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
629 // Quad-register Integer Multiply-Accumulate (.8, .16)
630 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
631 // Extra latency cycles since wbck is 7 cycles
632 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
633 InstrStage<1, [A9_Pipe1]>,
634 InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
636 // Quad-register Integer Multiply-Accumulate (.32)
637 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
638 // Extra latency cycles since wbck is 9 cycles
639 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
640 InstrStage<1, [A9_Pipe1]>,
641 InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
644 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
645 // Extra latency cycles since wbck is 6 cycles
646 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
647 InstrStage<1, [A9_Pipe1]>,
648 InstrStage<1, [A9_NPipe]>], [3]>,
650 // Double-register Permute Move
651 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
652 // FIXME: all latencies are arbitrary, no information is available
653 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
654 InstrStage<1, [A9_Pipe1]>,
655 InstrStage<1, [A9_LSPipe]>], [2, 1]>,
657 // Quad-register Permute Move
658 // Result written in N2, but that is relative to the last cycle of multicycle,
659 // so we use 3 for those cases
660 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
661 // FIXME: all latencies are arbitrary, no information is available
662 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
663 InstrStage<1, [A9_Pipe1]>,
664 InstrStage<2, [A9_NPipe]>], [3, 1]>,
666 // Integer to Single-precision Move
667 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
668 // FIXME: all latencies are arbitrary, no information is available
669 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
670 InstrStage<1, [A9_Pipe1]>,
671 InstrStage<1, [A9_NPipe]>], [2, 1]>,
673 // Integer to Double-precision Move
674 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
675 // FIXME: all latencies are arbitrary, no information is available
676 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
677 InstrStage<1, [A9_Pipe1]>,
678 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
680 // Single-precision to Integer Move
681 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
682 // FIXME: all latencies are arbitrary, no information is available
683 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
684 InstrStage<1, [A9_Pipe1]>,
685 InstrStage<1, [A9_NPipe]>], [2, 1]>,
687 // Double-precision to Integer Move
688 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
689 // FIXME: all latencies are arbitrary, no information is available
690 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
691 InstrStage<1, [A9_Pipe1]>,
692 InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
694 // Integer to Lane Move
695 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
696 // FIXME: all latencies are arbitrary, no information is available
697 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
698 InstrStage<1, [A9_Pipe1]>,
699 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
702 // Double-register FP Unary
703 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
704 // Extra latency cycles since wbck is 6 cycles
705 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
706 InstrStage<1, [A9_Pipe1]>,
707 InstrStage<1, [A9_NPipe]>], [5, 2]>,
709 // Quad-register FP Unary
710 // Result written in N5, but that is relative to the last cycle of multicycle,
711 // so we use 6 for those cases
712 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
713 // Extra latency cycles since wbck is 7 cycles
714 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
715 InstrStage<1, [A9_Pipe1]>,
716 InstrStage<2, [A9_NPipe]>], [6, 2]>,
718 // Double-register FP Binary
719 // FIXME: We're using this itin for many instructions and [2, 2] here is too
721 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
722 // Extra latency cycles since wbck is 7 cycles
723 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
724 InstrStage<1, [A9_Pipe1]>,
725 InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
727 // Quad-register FP Binary
728 // Result written in N5, but that is relative to the last cycle of multicycle,
729 // so we use 6 for those cases
730 // FIXME: We're using this itin for many instructions and [2, 2] here is too
732 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
733 // Extra latency cycles since wbck is 8 cycles
734 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
735 InstrStage<1, [A9_Pipe1]>,
736 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
738 // Double-register FP Multiple-Accumulate
739 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
740 // Extra latency cycles since wbck is 7 cycles
741 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
742 InstrStage<1, [A9_Pipe1]>,
743 InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
745 // Quad-register FP Multiple-Accumulate
746 // Result written in N9, but that is relative to the last cycle of multicycle,
747 // so we use 10 for those cases
748 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
749 // Extra latency cycles since wbck is 9 cycles
750 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
751 InstrStage<1, [A9_Pipe1]>,
752 InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
754 // Double-register Reciprical Step
755 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
756 // Extra latency cycles since wbck is 7 cycles
757 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
758 InstrStage<1, [A9_Pipe1]>,
759 InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
761 // Quad-register Reciprical Step
762 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
763 // Extra latency cycles since wbck is 9 cycles
764 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
765 InstrStage<1, [A9_Pipe1]>,
766 InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
768 // Double-register Permute
769 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
770 // Extra latency cycles since wbck is 6 cycles
771 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
772 InstrStage<1, [A9_Pipe1]>,
773 InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
775 // Quad-register Permute
776 // Result written in N2, but that is relative to the last cycle of multicycle,
777 // so we use 3 for those cases
778 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
779 // Extra latency cycles since wbck is 7 cycles
780 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
781 InstrStage<1, [A9_Pipe1]>,
782 InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
784 // Quad-register Permute (3 cycle issue)
785 // Result written in N2, but that is relative to the last cycle of multicycle,
786 // so we use 4 for those cases
787 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
788 // Extra latency cycles since wbck is 8 cycles
789 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
790 InstrStage<1, [A9_Pipe1]>,
791 InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
794 // Double-register VEXT
795 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
796 // Extra latency cycles since wbck is 7 cycles
797 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
798 InstrStage<1, [A9_Pipe1]>,
799 InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
801 // Quad-register VEXT
802 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
803 // Extra latency cycles since wbck is 9 cycles
804 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
805 InstrStage<1, [A9_Pipe1]>,
806 InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
809 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
810 // Extra latency cycles since wbck is 7 cycles
811 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
812 InstrStage<1, [A9_Pipe1]>,
813 InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
814 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
815 // Extra latency cycles since wbck is 7 cycles
816 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
817 InstrStage<1, [A9_Pipe1]>,
818 InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
819 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
820 // Extra latency cycles since wbck is 8 cycles
821 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
822 InstrStage<1, [A9_Pipe1]>,
823 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
824 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
825 // Extra latency cycles since wbck is 8 cycles
826 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
827 InstrStage<1, [A9_Pipe1]>,
828 InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
831 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
832 // Extra latency cycles since wbck is 7 cycles
833 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
834 InstrStage<1, [A9_Pipe1]>,
835 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
836 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
837 // Extra latency cycles since wbck is 7 cycles
838 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
839 InstrStage<1, [A9_Pipe1]>,
840 InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
841 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
842 // Extra latency cycles since wbck is 8 cycles
843 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
844 InstrStage<1, [A9_Pipe1]>,
845 InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
846 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
847 // Extra latency cycles since wbck is 8 cycles
848 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
849 InstrStage<1, [A9_Pipe1]>,
850 InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>