b37b3948f1711fc900ec5d67fed5804e6c19e094
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Pipe0   : FuncUnit; // pipeline 0
20 def A9_Pipe1   : FuncUnit; // pipeline 1
21 def A9_LSPipe  : FuncUnit; // LS pipe
22 def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
25
26 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
27 //
28 def CortexA9Itineraries : ProcessorItineraries<
29   [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
30   // Two fully-pipelined integer ALU pipelines
31   // FIXME: There are no operand latencies for these instructions at all!
32   //
33   // Move instructions, unconditional
34   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
35   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
36                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
37   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
38   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
39   InstrItinData<IIC_iMOVsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
40   //
41   // No operand cycles
42   InstrItinData<IIC_iALUx    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
43   //
44   // Binary Instructions that produce a result
45   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
46   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
47   InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
48   InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
49   //
50   // Unary Instructions that produce a result
51   InstrItinData<IIC_iUNAr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
52   InstrItinData<IIC_iUNAsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
53   InstrItinData<IIC_iUNAsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
54   //
55   // Compare instructions
56   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
57   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
58   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
59   InstrItinData<IIC_iCMPsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
60   //
61   // Move instructions, conditional
62   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
63   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
64   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
65   InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
66
67   // Integer multiply pipeline
68   //
69   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A9_Pipe1], 0>,
70                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
71   InstrItinData<IIC_iMAC16   , [InstrStage<1, [A9_Pipe1], 0>,
72                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
73   InstrItinData<IIC_iMUL32   , [InstrStage<1, [A9_Pipe1], 0>,
74                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
75   InstrItinData<IIC_iMAC32   , [InstrStage<1, [A9_Pipe1], 0>,
76                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
77   InstrItinData<IIC_iMUL64   , [InstrStage<2, [A9_Pipe1], 0>,
78                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
79   InstrItinData<IIC_iMAC64   , [InstrStage<2, [A9_Pipe1], 0>,
80                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
81   // Integer load pipeline
82   // FIXME: The timings are some rough approximations
83   //
84   // Immediate offset
85   InstrItinData<IIC_iLoadi   , [InstrStage<1, [A9_Pipe1]>,
86                                 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
87   //
88   // Register offset
89   InstrItinData<IIC_iLoadr   , [InstrStage<1, [A9_Pipe1]>,
90                                 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
91   //
92   // Scaled register offset
93   InstrItinData<IIC_iLoadsi  , [InstrStage<1, [A9_Pipe1]>,
94                                 InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
95   //
96   // Immediate offset with update
97   InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A9_Pipe1]>,
98                                 InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
99   //
100   // Register offset with update
101   InstrItinData<IIC_iLoadru  , [InstrStage<1, [A9_Pipe1]>,
102                                 InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
103   //
104   // Scaled register offset with update
105   InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
106                                 InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
107   //
108   // Load multiple
109   InstrItinData<IIC_iLoadm   , [InstrStage<1, [A9_Pipe1]>,
110                                 InstrStage<1, [A9_LSPipe]>]>,
111
112   //
113   // Load multiple plus branch
114   InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
115                                 InstrStage<1, [A9_LSPipe]>,
116                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
117
118   // Integer store pipeline
119   ///
120   // Immediate offset
121   InstrItinData<IIC_iStorei  , [InstrStage<1, [A9_Pipe1]>,
122                                 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
123   //
124   // Register offset
125   InstrItinData<IIC_iStorer  , [InstrStage<1, [ A9_Pipe1]>,
126                                 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
127   //
128   // Scaled register offset
129   InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
130                                 InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
131   //
132   // Immediate offset with update
133   InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
134                                 InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
135   //
136   // Register offset with update
137   InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
138                                 InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
139   //
140   // Scaled register offset with update
141   InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
142                                 InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
143   //
144   // Store multiple
145   InstrItinData<IIC_iStorem  , [InstrStage<1, [A9_Pipe1]>,
146                                 InstrStage<1, [A9_LSPipe]>]>,
147   // Branch
148   //
149   // no delay slots, so the latency of a branch is unimportant
150   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
151
152   // VFP and NEON shares the same register file. This means that every VFP
153   // instruction should wait for full completion of the consecutive NEON
154   // instruction and vice-versa. We model this behavior with two artificial FUs:
155   // DRegsVFP and DRegsVFP.
156   //
157   // Every VFP instruction:
158   //  - Acquires DRegsVFP resource for 1 cycle
159   //  - Reserves DRegsN resource for the whole duration (including time to
160   //    register file writeback!).
161   // Every NEON instruction does the same but with FUs swapped.
162   //
163   // Since the reserved FU cannot be acquired, this models precisely
164   // "cross-domain" stalls.
165
166   // VFP
167   // Issue through integer pipeline, and execute in NEON unit.
168
169   // FP Special Register to Integer Register File Move
170   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
171                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
172                               InstrStage<1, [A9_Pipe1]>,
173                               InstrStage<1, [A9_NPipe]>]>,
174   //
175   // Single-precision FP Unary
176   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
177                                // Extra latency cycles since wbck is 2 cycles
178                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
179                                InstrStage<1, [A9_Pipe1]>,
180                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
181   //
182   // Double-precision FP Unary
183   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
184                                // Extra latency cycles since wbck is 2 cycles
185                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
186                                InstrStage<1, [A9_Pipe1]>,
187                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
188
189   //
190   // Single-precision FP Compare
191   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
192                                // Extra latency cycles since wbck is 4 cycles
193                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
194                                InstrStage<1, [A9_Pipe1]>,
195                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
196   //
197   // Double-precision FP Compare
198   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
199                                // Extra latency cycles since wbck is 4 cycles
200                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
201                                InstrStage<1, [A9_Pipe1]>,
202                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
203   //
204   // Single to Double FP Convert
205   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
206                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
207                                InstrStage<1, [A9_Pipe1]>,
208                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
209   //
210   // Double to Single FP Convert
211   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
212                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
213                                InstrStage<1, [A9_Pipe1]>,
214                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
215
216   //
217   // Single to Half FP Convert
218   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
219                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
220                                InstrStage<1, [A9_Pipe1]>,
221                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
222   //
223   // Half to Single FP Convert
224   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
225                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
226                                InstrStage<1, [A9_Pipe1]>,
227                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
228
229   //
230   // Single-Precision FP to Integer Convert
231   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
232                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
233                                InstrStage<1, [A9_Pipe1]>,
234                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
235   //
236   // Double-Precision FP to Integer Convert
237   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
238                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
239                                InstrStage<1, [A9_Pipe1]>,
240                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
241   //
242   // Integer to Single-Precision FP Convert
243   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
244                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
245                                InstrStage<1, [A9_Pipe1]>,
246                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
247   //
248   // Integer to Double-Precision FP Convert
249   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
250                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
251                                InstrStage<1, [A9_Pipe1]>,
252                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
253   //
254   // Single-precision FP ALU
255   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
256                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
257                                InstrStage<1, [A9_Pipe1]>,
258                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
259   //
260   // Double-precision FP ALU
261   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
262                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
263                                InstrStage<1, [A9_Pipe1]>,
264                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
265   //
266   // Single-precision FP Multiply
267   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
268                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
269                                InstrStage<1, [A9_Pipe1]>,
270                                InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
271   //
272   // Double-precision FP Multiply
273   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
274                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
275                                InstrStage<1, [A9_Pipe1]>,
276                                InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
277   //
278   // Single-precision FP MAC
279   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
280                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
281                                InstrStage<1, [A9_Pipe1]>,
282                                InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
283   //
284   // Double-precision FP MAC
285   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
286                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
287                                InstrStage<1,  [A9_Pipe1]>,
288                                InstrStage<2,  [A9_NPipe]>], [9, 0, 1, 1]>,
289   //
290   // Single-precision FP DIV
291   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
292                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
293                                InstrStage<1,  [A9_Pipe1]>,
294                                InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
295   //
296   // Double-precision FP DIV
297   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
298                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
299                                InstrStage<1,  [A9_Pipe1]>,
300                                InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
301   //
302   // Single-precision FP SQRT
303   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
304                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
305                                InstrStage<1,  [A9_Pipe1]>,
306                                InstrStage<13, [A9_NPipe]>], [17, 1]>,
307   //
308   // Double-precision FP SQRT
309   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
310                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
311                                InstrStage<1,  [A9_Pipe1]>,
312                                InstrStage<28, [A9_NPipe]>], [32, 1]>,
313
314   //
315   // Integer to Single-precision Move
316   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
317                                // Extra 1 latency cycle since wbck is 2 cycles
318                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
319                                InstrStage<1, [A9_Pipe1]>,
320                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
321   //
322   // Integer to Double-precision Move
323   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
324                                // Extra 1 latency cycle since wbck is 2 cycles
325                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
326                                InstrStage<1, [A9_Pipe1]>,
327                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
328   //
329   // Single-precision to Integer Move
330   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
331                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
332                                InstrStage<1, [A9_Pipe1]>,
333                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
334   //
335   // Double-precision to Integer Move
336   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
337                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
338                                InstrStage<1, [A9_Pipe1]>,
339                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
340   //
341   // Single-precision FP Load
342   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
343                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
344                                InstrStage<1, [A9_Pipe1], 0>,
345                                InstrStage<1, [A9_LSPipe]>,
346                                InstrStage<1, [A9_NPipe]>]>,
347   //
348   // Double-precision FP Load
349   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
350                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
351                                InstrStage<1, [A9_Pipe1], 0>,
352                                InstrStage<1, [A9_LSPipe]>,
353                                InstrStage<1, [A9_NPipe]>]>,
354   //
355   // FP Load Multiple
356   InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
357                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
358                                InstrStage<1, [A9_Pipe1], 0>,
359                                InstrStage<1, [A9_LSPipe]>,
360                                InstrStage<1, [A9_NPipe]>]>,
361   //
362   // Single-precision FP Store
363   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
364                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
365                                InstrStage<1, [A9_Pipe1], 0>,
366                                InstrStage<1, [A9_LSPipe]>,
367                                InstrStage<1, [A9_NPipe]>]>,
368   //
369   // Double-precision FP Store
370   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
371                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
372                                InstrStage<1, [A9_Pipe1], 0>,
373                                InstrStage<1, [A9_LSPipe]>,
374                                InstrStage<1, [A9_NPipe]>]>,
375   //
376   // FP Store Multiple
377   InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
378                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
379                                InstrStage<1, [A9_Pipe1], 0>,
380                                InstrStage<1, [A9_LSPipe]>,
381                                InstrStage<1, [A9_NPipe]>]>,
382   // NEON
383   // Issue through integer pipeline, and execute in NEON unit.
384   // FIXME: Neon pipeline and LdSt unit are multiplexed.
385   //        Add some syntactic sugar to model this!
386   // VLD1
387   // FIXME: We don't model this instruction properly
388   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
389                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
390                                InstrStage<1, [A9_Pipe1], 0>,
391                                InstrStage<1, [A9_LSPipe]>,
392                                InstrStage<1, [A9_NPipe]>]>,
393   //
394   // VLD2
395   // FIXME: We don't model this instruction properly
396   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
397                                // Extra latency cycles since wbck is 6 cycles
398                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
399                                InstrStage<1, [A9_Pipe1], 0>,
400                                InstrStage<1, [A9_LSPipe]>,
401                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
402   //
403   // VLD3
404   // FIXME: We don't model this instruction properly
405   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
406                                // Extra latency cycles since wbck is 6 cycles
407                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
408                                InstrStage<1, [A9_Pipe1], 0>,
409                                InstrStage<1, [A9_LSPipe]>,
410                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
411   //
412   // VLD4
413   // FIXME: We don't model this instruction properly
414   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
415                                // Extra latency cycles since wbck is 6 cycles
416                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
417                                InstrStage<1, [A9_Pipe1], 0>,
418                                InstrStage<1, [A9_LSPipe]>,
419                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
420   //
421   // VST
422   // FIXME: We don't model this instruction properly
423   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
424                                // Extra latency cycles since wbck is 6 cycles
425                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
426                                InstrStage<1, [A9_Pipe1], 0>,
427                                InstrStage<1, [A9_LSPipe]>,
428                                InstrStage<1, [A9_NPipe]>]>,
429   //
430   // Double-register Integer Unary
431   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
432                                // Extra latency cycles since wbck is 6 cycles
433                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
434                                InstrStage<1, [A9_Pipe1]>,
435                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
436   //
437   // Quad-register Integer Unary
438   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
439                                // Extra latency cycles since wbck is 6 cycles
440                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
441                                InstrStage<1, [A9_Pipe1]>,
442                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
443   //
444   // Double-register Integer Q-Unary
445   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
446                                // Extra latency cycles since wbck is 6 cycles
447                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
448                                InstrStage<1, [A9_Pipe1]>,
449                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
450   //
451   // Quad-register Integer CountQ-Unary
452   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
453                                // Extra latency cycles since wbck is 6 cycles
454                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
455                                InstrStage<1, [A9_Pipe1]>,
456                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
457   //
458   // Double-register Integer Binary
459   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
460                                // Extra latency cycles since wbck is 6 cycles
461                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
462                                InstrStage<1, [A9_Pipe1]>,
463                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
464   //
465   // Quad-register Integer Binary
466   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
467                                // Extra latency cycles since wbck is 6 cycles
468                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
469                                InstrStage<1, [A9_Pipe1]>,
470                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
471   //
472   // Double-register Integer Subtract
473   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
474                                // Extra latency cycles since wbck is 6 cycles
475                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
476                                InstrStage<1, [A9_Pipe1]>,
477                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
478   //
479   // Quad-register Integer Subtract
480   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
481                                // Extra latency cycles since wbck is 6 cycles
482                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
483                                InstrStage<1, [A9_Pipe1]>,
484                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
485   //
486   // Double-register Integer Shift
487   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
488                                // Extra latency cycles since wbck is 6 cycles
489                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
490                                InstrStage<1, [A9_Pipe1]>,
491                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
492   //
493   // Quad-register Integer Shift
494   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
495                                // Extra latency cycles since wbck is 6 cycles
496                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
497                                InstrStage<1, [A9_Pipe1]>,
498                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
499   //
500   // Double-register Integer Shift (4 cycle)
501   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
502                                // Extra latency cycles since wbck is 6 cycles
503                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
504                                InstrStage<1, [A9_Pipe1]>,
505                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
506   //
507   // Quad-register Integer Shift (4 cycle)
508   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
509                                // Extra latency cycles since wbck is 6 cycles
510                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
511                                InstrStage<1, [A9_Pipe1]>,
512                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
513   //
514   // Double-register Integer Binary (4 cycle)
515   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
516                                // Extra latency cycles since wbck is 6 cycles
517                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
518                                InstrStage<1, [A9_Pipe1]>,
519                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
520   //
521   // Quad-register Integer Binary (4 cycle)
522   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
523                                // Extra latency cycles since wbck is 6 cycles
524                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
525                                InstrStage<1, [A9_Pipe1]>,
526                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
527   //
528   // Double-register Integer Subtract (4 cycle)
529   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
530                                // Extra latency cycles since wbck is 6 cycles
531                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
532                                InstrStage<1, [A9_Pipe1]>,
533                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
534   //
535   // Quad-register Integer Subtract (4 cycle)
536   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
537                                // Extra latency cycles since wbck is 6 cycles
538                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
539                                InstrStage<1, [A9_Pipe1]>,
540                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
541
542   //
543   // Double-register Integer Count
544   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
545                                // Extra latency cycles since wbck is 6 cycles
546                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
547                                InstrStage<1, [A9_Pipe1]>,
548                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
549   //
550   // Quad-register Integer Count
551   // Result written in N3, but that is relative to the last cycle of multicycle,
552   // so we use 4 for those cases
553   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
554                                // Extra latency cycles since wbck is 7 cycles
555                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
556                                InstrStage<1, [A9_Pipe1]>,
557                                InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
558   //
559   // Double-register Absolute Difference and Accumulate
560   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
561                                // Extra latency cycles since wbck is 6 cycles
562                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
563                                InstrStage<1, [A9_Pipe1]>,
564                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
565   //
566   // Quad-register Absolute Difference and Accumulate
567   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
568                                // Extra latency cycles since wbck is 6 cycles
569                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
570                                InstrStage<1, [A9_Pipe1]>,
571                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
572   //
573   // Double-register Integer Pair Add Long
574   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
575                                // Extra latency cycles since wbck is 6 cycles
576                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
577                                InstrStage<1, [A9_Pipe1]>,
578                                InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
579   //
580   // Quad-register Integer Pair Add Long
581   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
582                                // Extra latency cycles since wbck is 6 cycles
583                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
584                                InstrStage<1, [A9_Pipe1]>,
585                                InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
586
587   //
588   // Double-register Integer Multiply (.8, .16)
589   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
590                                // Extra latency cycles since wbck is 6 cycles
591                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
592                                InstrStage<1, [A9_Pipe1]>,
593                                InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
594   //
595   // Quad-register Integer Multiply (.8, .16)
596   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
597                                // Extra latency cycles since wbck is 7 cycles
598                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
599                                InstrStage<1, [A9_Pipe1]>,
600                                InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
601
602   //
603   // Double-register Integer Multiply (.32)
604   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
605                                // Extra latency cycles since wbck is 7 cycles
606                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
607                                InstrStage<1, [A9_Pipe1]>,
608                                InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
609   //
610   // Quad-register Integer Multiply (.32)
611   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
612                                // Extra latency cycles since wbck is 9 cycles
613                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
614                                InstrStage<1, [A9_Pipe1]>,
615                                InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
616   //
617   // Double-register Integer Multiply-Accumulate (.8, .16)
618   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
619                                // Extra latency cycles since wbck is 6 cycles
620                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
621                                InstrStage<1, [A9_Pipe1]>,
622                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
623   //
624   // Double-register Integer Multiply-Accumulate (.32)
625   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
626                                // Extra latency cycles since wbck is 7 cycles
627                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
628                                InstrStage<1, [A9_Pipe1]>,
629                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
630   //
631   // Quad-register Integer Multiply-Accumulate (.8, .16)
632   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
633                                // Extra latency cycles since wbck is 7 cycles
634                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
635                                InstrStage<1, [A9_Pipe1]>,
636                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
637   //
638   // Quad-register Integer Multiply-Accumulate (.32)
639   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
640                                // Extra latency cycles since wbck is 9 cycles
641                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
642                                InstrStage<1, [A9_Pipe1]>,
643                                InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
644   //
645   // Move Immediate
646   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
647                                // Extra latency cycles since wbck is 6 cycles
648                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
649                                InstrStage<1, [A9_Pipe1]>,
650                                InstrStage<1, [A9_NPipe]>], [3]>,
651   //
652   // Double-register Permute Move
653   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
654   // FIXME: all latencies are arbitrary, no information is available
655                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
656                                InstrStage<1, [A9_Pipe1]>,
657                                InstrStage<1, [A9_LSPipe]>], [2, 1]>,
658   //
659   // Quad-register Permute Move
660   // Result written in N2, but that is relative to the last cycle of multicycle,
661   // so we use 3 for those cases
662   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
663   // FIXME: all latencies are arbitrary, no information is available
664                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
665                                InstrStage<1, [A9_Pipe1]>,
666                                InstrStage<2, [A9_NPipe]>], [3, 1]>,
667   //
668   // Integer to Single-precision Move
669   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
670   // FIXME: all latencies are arbitrary, no information is available
671                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
672                                InstrStage<1, [A9_Pipe1]>,
673                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
674   //
675   // Integer to Double-precision Move
676   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
677   // FIXME: all latencies are arbitrary, no information is available
678                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
679                                InstrStage<1, [A9_Pipe1]>,
680                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
681   //
682   // Single-precision to Integer Move
683   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
684   // FIXME: all latencies are arbitrary, no information is available
685                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
686                                InstrStage<1, [A9_Pipe1]>,
687                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
688   //
689   // Double-precision to Integer Move
690   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
691   // FIXME: all latencies are arbitrary, no information is available
692                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
693                                InstrStage<1, [A9_Pipe1]>,
694                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
695   //
696   // Integer to Lane Move
697   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
698   // FIXME: all latencies are arbitrary, no information is available
699                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
700                                InstrStage<1, [A9_Pipe1]>,
701                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
702
703   //
704   // Double-register FP Unary
705   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
706                                // Extra latency cycles since wbck is 6 cycles
707                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
708                                InstrStage<1, [A9_Pipe1]>,
709                                InstrStage<1, [A9_NPipe]>], [5, 2]>,
710   //
711   // Quad-register FP Unary
712   // Result written in N5, but that is relative to the last cycle of multicycle,
713   // so we use 6 for those cases
714   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
715                                // Extra latency cycles since wbck is 7 cycles
716                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
717                                InstrStage<1, [A9_Pipe1]>,
718                                InstrStage<2, [A9_NPipe]>], [6, 2]>,
719   //
720   // Double-register FP Binary
721   // FIXME: We're using this itin for many instructions and [2, 2] here is too
722   // optimistic.
723   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
724                                // Extra latency cycles since wbck is 7 cycles
725                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
726                                InstrStage<1, [A9_Pipe1]>,
727                                InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
728   //
729   // Quad-register FP Binary
730   // Result written in N5, but that is relative to the last cycle of multicycle,
731   // so we use 6 for those cases
732   // FIXME: We're using this itin for many instructions and [2, 2] here is too
733   // optimistic.
734   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
735                                // Extra latency cycles since wbck is 8 cycles
736                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
737                                InstrStage<1, [A9_Pipe1]>,
738                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
739   //
740   // Double-register FP Multiple-Accumulate
741   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
742                                // Extra latency cycles since wbck is 7 cycles
743                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
744                                InstrStage<1, [A9_Pipe1]>,
745                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
746   //
747   // Quad-register FP Multiple-Accumulate
748   // Result written in N9, but that is relative to the last cycle of multicycle,
749   // so we use 10 for those cases
750   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
751                                // Extra latency cycles since wbck is 9 cycles
752                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
753                                InstrStage<1, [A9_Pipe1]>,
754                                InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
755   //
756   // Double-register Reciprical Step
757   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
758                                // Extra latency cycles since wbck is 7 cycles
759                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
760                                InstrStage<1, [A9_Pipe1]>,
761                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
762   //
763   // Quad-register Reciprical Step
764   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
765                                // Extra latency cycles since wbck is 9 cycles
766                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
767                                InstrStage<1, [A9_Pipe1]>,
768                                InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
769   //
770   // Double-register Permute
771   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
772                                // Extra latency cycles since wbck is 6 cycles
773                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
774                                InstrStage<1, [A9_Pipe1]>,
775                                InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
776   //
777   // Quad-register Permute
778   // Result written in N2, but that is relative to the last cycle of multicycle,
779   // so we use 3 for those cases
780   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
781                                // Extra latency cycles since wbck is 7 cycles
782                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
783                                InstrStage<1, [A9_Pipe1]>,
784                                InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
785   //
786   // Quad-register Permute (3 cycle issue)
787   // Result written in N2, but that is relative to the last cycle of multicycle,
788   // so we use 4 for those cases
789   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
790                                // Extra latency cycles since wbck is 8 cycles
791                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
792                                InstrStage<1, [A9_Pipe1]>,
793                                InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
794
795   //
796   // Double-register VEXT
797   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
798                                // Extra latency cycles since wbck is 7 cycles
799                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
800                                InstrStage<1, [A9_Pipe1]>,
801                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
802   //
803   // Quad-register VEXT
804   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
805                                // Extra latency cycles since wbck is 9 cycles
806                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
807                                InstrStage<1, [A9_Pipe1]>,
808                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
809   //
810   // VTB
811   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
812                                // Extra latency cycles since wbck is 7 cycles
813                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
814                                InstrStage<1, [A9_Pipe1]>,
815                                InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
816   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
817                                // Extra latency cycles since wbck is 7 cycles
818                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
819                                InstrStage<1, [A9_Pipe1]>,
820                                InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
821   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
822                                // Extra latency cycles since wbck is 8 cycles
823                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
824                                InstrStage<1, [A9_Pipe1]>,
825                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
826   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
827                                // Extra latency cycles since wbck is 8 cycles
828                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
829                                InstrStage<1, [A9_Pipe1]>,
830                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
831   //
832   // VTBX
833   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
834                                // Extra latency cycles since wbck is 7 cycles
835                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
836                                InstrStage<1, [A9_Pipe1]>,
837                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
838   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
839                                // Extra latency cycles since wbck is 7 cycles
840                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
841                                InstrStage<1, [A9_Pipe1]>,
842                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
843   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
844                                // Extra latency cycles since wbck is 8 cycles
845                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
846                                InstrStage<1, [A9_Pipe1]>,
847                                InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
848   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
849                                // Extra latency cycles since wbck is 8 cycles
850                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
851                                InstrStage<1, [A9_Pipe1]>,
852                               InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
853 ]>;