Re-commit 117518 and 117519 now that ARM MC test failures are out of the way.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LSUnit  : FuncUnit; // L/S Unit
28 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
30
31 // Bypasses
32 def A9_LdBypass : Bypass;
33
34 def CortexA9Itineraries : ProcessorItineraries<
35   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
36    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
37   [A9_LdBypass], [
38   // Two fully-pipelined integer ALU pipelines
39
40   //
41   // Move instructions, unconditional
42   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
43                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
44   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
45                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
46   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
48   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
50   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
52                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
53   //
54   // MVN instructions
55   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
56                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
57                               [1]>,
58   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
59                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
60                               [1, 1], [NoBypass, A9_LdBypass]>,
61   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
62                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
63                               [2, 1]>,
64   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
65                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
66                               [3, 1, 1]>,
67   //
68   // No operand cycles
69   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
70                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
71   //
72   // Binary Instructions that produce a result
73   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
74                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
75                             [1, 1], [NoBypass, A9_LdBypass]>,
76   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
77                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
78                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
79   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
80                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
81                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
82   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
83                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
84                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
85   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
86                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
87                             [3, 1, 1, 1],
88                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
89   //
90   // Bitwise Instructions that produce a result
91   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
92                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
93   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
94                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
95   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
97   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
98                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
99   //
100   // Unary Instructions that produce a result
101
102   // CLZ, RBIT, etc.
103   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
104                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
105
106   // BFC, BFI, UBFX, SBFX
107   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
109
110   //
111   // Zero and sign extension instructions
112   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
113                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
114   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
115                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
116   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
117                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
118   //
119   // Compare instructions
120   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
121                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
122                                [1], [A9_LdBypass]>,
123   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
124                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
125                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
126   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
127                                 [1, 1], [A9_LdBypass, NoBypass]>,
128   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
129                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
130                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
131   //
132   // Test instructions
133   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
135   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
136                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
137   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
138                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
139   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
141   //
142   // Move instructions, conditional
143   // FIXME: Correctly model the extra input dep on the destination.
144   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
145                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
146   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
147                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
148   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
150   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
152
153   // Integer multiply pipeline
154   //
155   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
156                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
157   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
158                                InstrStage<2, [A9_ALU0]>],
159                               [3, 1, 1, 1]>,
160   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
161                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
162   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
163                                InstrStage<2, [A9_ALU0]>],
164                               [4, 1, 1, 1]>,
165   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
167   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
168                                InstrStage<3, [A9_ALU0]>],
169                               [4, 5, 1, 1]>,
170   // Integer load pipeline
171   // FIXME: The timings are some rough approximations
172   //
173   // Immediate offset
174   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
175                                  InstrStage<1, [A9_MUX0], 0>,
176                                  InstrStage<1, [A9_AGU]>,
177                                  InstrStage<1, [A9_LSUnit]>],
178                                 [3, 1], [A9_LdBypass]>,
179   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
180                                  InstrStage<1, [A9_MUX0], 0>,
181                                  InstrStage<2, [A9_AGU]>,
182                                  InstrStage<1, [A9_LSUnit]>],
183                                 [4, 1], [A9_LdBypass]>,
184   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
185   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
186                                  InstrStage<1, [A9_MUX0], 0>,
187                                  InstrStage<2, [A9_AGU]>,
188                                  InstrStage<1, [A9_LSUnit]>],
189                                 [3, 3, 1], [A9_LdBypass]>,
190   //
191   // Register offset
192   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
193                                  InstrStage<1, [A9_MUX0], 0>,
194                                  InstrStage<1, [A9_AGU]>,
195                                  InstrStage<1, [A9_LSUnit]>],
196                                 [3, 1, 1], [A9_LdBypass]>,
197   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
198                                  InstrStage<1, [A9_MUX0], 0>,
199                                  InstrStage<2, [A9_AGU]>,
200                                  InstrStage<1, [A9_LSUnit]>],
201                                 [4, 1, 1], [A9_LdBypass]>,
202   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
203                                  InstrStage<1, [A9_MUX0], 0>,
204                                  InstrStage<2, [A9_AGU]>,
205                                  InstrStage<1, [A9_LSUnit]>],
206                                 [3, 3, 1, 1], [A9_LdBypass]>,
207   //
208   // Scaled register offset
209   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
210                                  InstrStage<1, [A9_MUX0], 0>,
211                                  InstrStage<1, [A9_AGU]>,
212                                  InstrStage<1, [A9_LSUnit]>],
213                                 [4, 1, 1], [A9_LdBypass]>,
214   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
215                                  InstrStage<1, [A9_MUX0], 0>,
216                                  InstrStage<2, [A9_AGU]>,
217                                  InstrStage<1, [A9_LSUnit]>],
218                                 [5, 1, 1], [A9_LdBypass]>,
219   //
220   // Immediate offset with update
221   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
222                                  InstrStage<1, [A9_MUX0], 0>,
223                                  InstrStage<1, [A9_AGU]>,
224                                  InstrStage<1, [A9_LSUnit]>],
225                                 [3, 2, 1], [A9_LdBypass]>,
226   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
227                                  InstrStage<1, [A9_MUX0], 0>,
228                                  InstrStage<2, [A9_AGU]>,
229                                  InstrStage<1, [A9_LSUnit]>],
230                                 [4, 3, 1], [A9_LdBypass]>,
231   //
232   // Register offset with update
233   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
234                                  InstrStage<1, [A9_MUX0], 0>,
235                                  InstrStage<1, [A9_AGU]>,
236                                  InstrStage<1, [A9_LSUnit]>],
237                                 [3, 2, 1, 1], [A9_LdBypass]>,
238   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
239                                  InstrStage<1, [A9_MUX0], 0>,
240                                  InstrStage<2, [A9_AGU]>,
241                                  InstrStage<1, [A9_LSUnit]>],
242                                 [4, 3, 1, 1], [A9_LdBypass]>,
243   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
244                                  InstrStage<1, [A9_MUX0], 0>,
245                                  InstrStage<2, [A9_AGU]>,
246                                  InstrStage<1, [A9_LSUnit]>],
247                                 [3, 3, 1, 1], [A9_LdBypass]>,
248   //
249   // Scaled register offset with update
250   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
251                                  InstrStage<1, [A9_MUX0], 0>,
252                                  InstrStage<1, [A9_AGU]>,
253                                  InstrStage<1, [A9_LSUnit]>],
254                                 [4, 3, 1, 1], [A9_LdBypass]>,
255   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
256                                   InstrStage<1, [A9_MUX0], 0>,
257                                   InstrStage<2, [A9_AGU]>,
258                                   InstrStage<1, [A9_LSUnit]>],
259                                  [5, 4, 1, 1], [A9_LdBypass]>,
260   //
261   // Load multiple, def is the 5th operand.
262   // FIXME: This assumes 3 to 4 registers.
263   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
264                                 InstrStage<1, [A9_MUX0], 0>,
265                                 InstrStage<2, [A9_AGU], 1>,
266                                 InstrStage<2, [A9_LSUnit]>],
267                                [1, 1, 1, 1, 3],
268                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
269   //
270   // Load multiple + update, defs are the 1st and 5th operands.
271   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
272                                 InstrStage<1, [A9_MUX0], 0>,
273                                 InstrStage<2, [A9_AGU], 1>,
274                                 InstrStage<2, [A9_LSUnit]>],
275                                [2, 1, 1, 1, 3],
276                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
277   //
278   // Load multiple plus branch
279   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
280                                 InstrStage<1, [A9_MUX0], 0>,
281                                 InstrStage<1, [A9_AGU], 1>,
282                                 InstrStage<2, [A9_LSUnit]>,
283                                 InstrStage<1, [A9_Branch]>],
284                                [1, 2, 1, 1, 3],
285                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
286   //
287   // Pop, def is the 3rd operand.
288   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
289                                 InstrStage<1, [A9_MUX0], 0>,
290                                 InstrStage<2, [A9_AGU], 1>,
291                                 InstrStage<2, [A9_LSUnit]>],
292                                [1, 1, 3],
293                                [NoBypass, NoBypass, A9_LdBypass]>,
294   //
295   // Pop + branch, def is the 3rd operand.
296   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
297                                 InstrStage<1, [A9_MUX0], 0>,
298                                 InstrStage<2, [A9_AGU], 1>,
299                                 InstrStage<2, [A9_LSUnit]>,
300                                 InstrStage<1, [A9_Branch]>],
301                                [1, 1, 3],
302                                [NoBypass, NoBypass, A9_LdBypass]>,
303
304   //
305   // iLoadi + iALUr for t2LDRpci_pic.
306   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
307                                 InstrStage<1, [A9_MUX0], 0>,
308                                 InstrStage<1, [A9_AGU]>,
309                                 InstrStage<1, [A9_LSUnit]>,
310                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
311                                [2, 1]>,
312
313   // Integer store pipeline
314   ///
315   // Immediate offset
316   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
317                                  InstrStage<1, [A9_MUX0], 0>,
318                                  InstrStage<1, [A9_AGU]>,
319                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
320   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
321                                  InstrStage<1, [A9_MUX0], 0>,
322                                  InstrStage<2, [A9_AGU], 1>,
323                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
324   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
325   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
326                                  InstrStage<1, [A9_MUX0], 0>,
327                                  InstrStage<2, [A9_AGU], 1>,
328                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
329   //
330   // Register offset
331   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
332                                  InstrStage<1, [A9_MUX0], 0>,
333                                  InstrStage<1, [A9_AGU]>,
334                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
335   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
336                                  InstrStage<1, [A9_MUX0], 0>,
337                                  InstrStage<2, [A9_AGU], 1>,
338                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
339   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
340                                  InstrStage<1, [A9_MUX0], 0>,
341                                  InstrStage<2, [A9_AGU], 1>,
342                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
343   //
344   // Scaled register offset
345   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
346                                   InstrStage<1, [A9_MUX0], 0>,
347                                   InstrStage<1, [A9_AGU]>,
348                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
349   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
350                                   InstrStage<1, [A9_MUX0], 0>,
351                                   InstrStage<2, [A9_AGU], 1>,
352                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
353   //
354   // Immediate offset with update
355   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
356                                   InstrStage<1, [A9_MUX0], 0>,
357                                   InstrStage<1, [A9_AGU]>,
358                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
359   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
360                                   InstrStage<1, [A9_MUX0], 0>,
361                                   InstrStage<2, [A9_AGU], 1>,
362                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
363   //
364   // Register offset with update
365   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
366                                   InstrStage<1, [A9_MUX0], 0>,
367                                   InstrStage<1, [A9_AGU]>,
368                                   InstrStage<1, [A9_LSUnit]>],
369                                  [2, 1, 1, 1]>,
370   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
371                                   InstrStage<1, [A9_MUX0], 0>,
372                                   InstrStage<2, [A9_AGU], 1>,
373                                   InstrStage<1, [A9_LSUnit]>],
374                                  [3, 1, 1, 1]>,
375   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
376                                   InstrStage<1, [A9_MUX0], 0>,
377                                   InstrStage<2, [A9_AGU], 1>,
378                                   InstrStage<1, [A9_LSUnit]>],
379                                  [3, 1, 1, 1]>,
380   //
381   // Scaled register offset with update
382   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
383                                     InstrStage<1, [A9_MUX0], 0>,
384                                     InstrStage<1, [A9_AGU]>,
385                                     InstrStage<1, [A9_LSUnit]>],
386                                    [2, 1, 1, 1]>,
387   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
388                                     InstrStage<1, [A9_MUX0], 0>,
389                                     InstrStage<2, [A9_AGU], 1>,
390                                     InstrStage<1, [A9_LSUnit]>],
391                                    [3, 1, 1, 1]>,
392   //
393   // Store multiple
394   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
395                                 InstrStage<1, [A9_MUX0], 0>,
396                                 InstrStage<1, [A9_AGU]>,
397                                 InstrStage<2, [A9_LSUnit]>]>,
398   //
399   // Store multiple + update
400   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
401                                 InstrStage<1, [A9_MUX0], 0>,
402                                 InstrStage<1, [A9_AGU]>,
403                                 InstrStage<2, [A9_LSUnit]>], [2]>,
404
405   // Branch
406   //
407   // no delay slots, so the latency of a branch is unimportant
408   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
409                                 InstrStage<1, [A9_Issue1], 0>,
410                                 InstrStage<1, [A9_Branch]>]>,
411
412   // VFP and NEON shares the same register file. This means that every VFP
413   // instruction should wait for full completion of the consecutive NEON
414   // instruction and vice-versa. We model this behavior with two artificial FUs:
415   // DRegsVFP and DRegsVFP.
416   //
417   // Every VFP instruction:
418   //  - Acquires DRegsVFP resource for 1 cycle
419   //  - Reserves DRegsN resource for the whole duration (including time to
420   //    register file writeback!).
421   // Every NEON instruction does the same but with FUs swapped.
422   //
423   // Since the reserved FU cannot be acquired, this models precisely
424   // "cross-domain" stalls.
425
426   // VFP
427   // Issue through integer pipeline, and execute in NEON unit.
428
429   // FP Special Register to Integer Register File Move
430   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
431                               InstrStage<1, [A9_MUX0], 0>,
432                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
433                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
434                               InstrStage<1, [A9_NPipe]>]>,
435   //
436   // Single-precision FP Unary
437   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
438                                InstrStage<1, [A9_MUX0], 0>,
439                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
440                                // Extra latency cycles since wbck is 2 cycles
441                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
442                                InstrStage<1, [A9_NPipe]>],
443                               [1, 1]>,
444   //
445   // Double-precision FP Unary
446   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
447                                InstrStage<1, [A9_MUX0], 0>,
448                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
449                                // Extra latency cycles since wbck is 2 cycles
450                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
451                                InstrStage<1, [A9_NPipe]>],
452                               [1, 1]>,
453
454   //
455   // Single-precision FP Compare
456   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
457                                InstrStage<1, [A9_MUX0], 0>,
458                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
459                                // Extra latency cycles since wbck is 4 cycles
460                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
461                                InstrStage<1, [A9_NPipe]>],
462                               [1, 1]>,
463   //
464   // Double-precision FP Compare
465   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
466                                InstrStage<1, [A9_MUX0], 0>,
467                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
468                                // Extra latency cycles since wbck is 4 cycles
469                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
470                                InstrStage<1, [A9_NPipe]>],
471                               [1, 1]>,
472   //
473   // Single to Double FP Convert
474   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
475                                InstrStage<1, [A9_MUX0], 0>,
476                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
477                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
478                                InstrStage<1, [A9_NPipe]>],
479                               [4, 1]>,
480   //
481   // Double to Single FP Convert
482   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
483                                InstrStage<1, [A9_MUX0], 0>,
484                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
485                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
486                                InstrStage<1, [A9_NPipe]>],
487                               [4, 1]>,
488
489   //
490   // Single to Half FP Convert
491   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
492                                InstrStage<1, [A9_MUX0], 0>,
493                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
494                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
495                                InstrStage<1, [A9_NPipe]>],
496                               [4, 1]>,
497   //
498   // Half to Single FP Convert
499   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
500                                InstrStage<1, [A9_MUX0], 0>,
501                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
502                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
503                                InstrStage<1, [A9_NPipe]>],
504                               [2, 1]>,
505
506   //
507   // Single-Precision FP to Integer Convert
508   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
509                                InstrStage<1, [A9_MUX0], 0>,
510                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
511                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
512                                InstrStage<1, [A9_NPipe]>],
513                               [4, 1]>,
514   //
515   // Double-Precision FP to Integer Convert
516   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
517                                InstrStage<1, [A9_MUX0], 0>,
518                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
519                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
520                                InstrStage<1, [A9_NPipe]>],
521                               [4, 1]>,
522   //
523   // Integer to Single-Precision FP Convert
524   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
525                                InstrStage<1, [A9_MUX0], 0>,
526                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
527                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
528                                InstrStage<1, [A9_NPipe]>],
529                               [4, 1]>,
530   //
531   // Integer to Double-Precision FP Convert
532   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
533                                InstrStage<1, [A9_MUX0], 0>,
534                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
535                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
536                                InstrStage<1, [A9_NPipe]>],
537                               [4, 1]>,
538   //
539   // Single-precision FP ALU
540   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
541                                InstrStage<1, [A9_MUX0], 0>,
542                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
543                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
544                                InstrStage<1, [A9_NPipe]>],
545                               [4, 1, 1]>,
546   //
547   // Double-precision FP ALU
548   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
549                                InstrStage<1, [A9_MUX0], 0>,
550                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
551                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
552                                InstrStage<1, [A9_NPipe]>],
553                               [4, 1, 1]>,
554   //
555   // Single-precision FP Multiply
556   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
557                                InstrStage<1, [A9_MUX0], 0>,
558                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
559                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
560                                InstrStage<1, [A9_NPipe]>],
561                               [5, 1, 1]>,
562   //
563   // Double-precision FP Multiply
564   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
565                                InstrStage<1, [A9_MUX0], 0>,
566                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
567                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
568                                InstrStage<2, [A9_NPipe]>],
569                               [6, 1, 1]>,
570   //
571   // Single-precision FP MAC
572   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
573                                InstrStage<1, [A9_MUX0], 0>,
574                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
575                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
576                                InstrStage<1, [A9_NPipe]>],
577                               [8, 1, 1, 1]>,
578   //
579   // Double-precision FP MAC
580   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
581                                InstrStage<1,  [A9_MUX0], 0>,
582                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
583                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
584                                InstrStage<2,  [A9_NPipe]>],
585                               [9, 1, 1, 1]>,
586   //
587   // Single-precision FP DIV
588   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
589                                InstrStage<1,  [A9_MUX0], 0>,
590                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
591                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
592                                InstrStage<10, [A9_NPipe]>],
593                               [15, 1, 1]>,
594   //
595   // Double-precision FP DIV
596   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
597                                InstrStage<1,  [A9_MUX0], 0>,
598                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
599                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
600                                InstrStage<20, [A9_NPipe]>],
601                               [25, 1, 1]>,
602   //
603   // Single-precision FP SQRT
604   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
605                                InstrStage<1,  [A9_MUX0], 0>,
606                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
607                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
608                                InstrStage<13, [A9_NPipe]>],
609                               [17, 1]>,
610   //
611   // Double-precision FP SQRT
612   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
613                                InstrStage<1,  [A9_MUX0], 0>,
614                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
615                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
616                                InstrStage<28, [A9_NPipe]>],
617                               [32, 1]>,
618
619   //
620   // Integer to Single-precision Move
621   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
622                                InstrStage<1, [A9_MUX0], 0>,
623                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
624                                // Extra 1 latency cycle since wbck is 2 cycles
625                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
626                                InstrStage<1, [A9_NPipe]>],
627                               [1, 1]>,
628   //
629   // Integer to Double-precision Move
630   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
631                                InstrStage<1, [A9_MUX0], 0>,
632                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
633                                // Extra 1 latency cycle since wbck is 2 cycles
634                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
635                                InstrStage<1, [A9_NPipe]>],
636                               [1, 1, 1]>,
637   //
638   // Single-precision to Integer Move
639   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
640                                InstrStage<1, [A9_MUX0], 0>,
641                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
642                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
643                                InstrStage<1, [A9_NPipe]>],
644                               [2, 1]>,
645   //
646   // Double-precision to Integer Move
647   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
648                                InstrStage<1, [A9_MUX0], 0>,
649                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
650                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
651                                InstrStage<1, [A9_NPipe]>],
652                               [2, 1, 1]>,
653   //
654   // Single-precision FP Load
655   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
656                                InstrStage<1, [A9_MUX0], 0>,
657                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
658                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
659                                InstrStage<1, [A9_NPipe]>,
660                                InstrStage<1, [A9_LSUnit]>],
661                               [1, 1]>,
662   //
663   // Double-precision FP Load
664   // FIXME: Result latency is 1 if address is 64-bit aligned.
665   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
666                                InstrStage<1, [A9_MUX0], 0>,
667                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
668                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
669                                InstrStage<1, [A9_NPipe]>,
670                                InstrStage<1, [A9_LSUnit]>],
671                               [2, 1]>,
672   //
673   // FP Load Multiple
674   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
675                                InstrStage<1, [A9_MUX0], 0>,
676                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
677                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
678                                InstrStage<1, [A9_NPipe]>,
679                                InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
680   //
681   // FP Load Multiple + update
682   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
683                                InstrStage<1, [A9_MUX0], 0>,
684                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
685                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
686                                InstrStage<1, [A9_NPipe]>,
687                                InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
688   //
689   // Single-precision FP Store
690   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
691                                InstrStage<1, [A9_MUX0], 0>,
692                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
693                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
694                                InstrStage<1, [A9_NPipe]>,
695                                InstrStage<1, [A9_LSUnit]>],
696                               [1, 1]>,
697   //
698   // Double-precision FP Store
699   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
700                                InstrStage<1, [A9_MUX0], 0>,
701                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
702                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
703                                InstrStage<1, [A9_NPipe]>,
704                                InstrStage<1, [A9_LSUnit]>],
705                               [1, 1]>,
706   //
707   // FP Store Multiple
708   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
709                                InstrStage<1, [A9_MUX0], 0>,
710                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
711                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
712                                InstrStage<1, [A9_NPipe]>,
713                                InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
714   //
715   // FP Store Multiple + update
716   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
717                                 InstrStage<1, [A9_MUX0], 0>,
718                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
719                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
720                                 InstrStage<1, [A9_NPipe]>,
721                                 InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
722   // NEON
723   // VLD1
724   // FIXME: Conservatively assume insufficent alignment.
725   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
726                                InstrStage<1, [A9_MUX0], 0>,
727                                InstrStage<1, [A9_DRegsN],   0, Required>,
728                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
729                                InstrStage<2, [A9_NPipe], 1>,
730                                InstrStage<2, [A9_LSUnit]>],
731                               [2, 1]>,
732   // VLD1x2
733   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
734                                InstrStage<1, [A9_MUX0], 0>,
735                                InstrStage<1, [A9_DRegsN],   0, Required>,
736                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
737                                InstrStage<2, [A9_NPipe], 1>,
738                                InstrStage<2, [A9_LSUnit]>],
739                               [2, 2, 1]>,
740   // VLD1x3
741   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
742                                InstrStage<1, [A9_MUX0], 0>,
743                                InstrStage<1, [A9_DRegsN],   0, Required>,
744                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
745                                InstrStage<3, [A9_NPipe], 1>,
746                                InstrStage<3, [A9_LSUnit]>],
747                               [2, 2, 3, 1]>,
748   // VLD1x4
749   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
750                                InstrStage<1, [A9_MUX0], 0>,
751                                InstrStage<1, [A9_DRegsN],   0, Required>,
752                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
753                                InstrStage<3, [A9_NPipe], 1>,
754                                InstrStage<3, [A9_LSUnit]>],
755                               [2, 2, 3, 3, 1]>,
756   // VLD1u
757   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
758                                InstrStage<1, [A9_MUX0], 0>,
759                                InstrStage<1, [A9_DRegsN],   0, Required>,
760                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
761                                InstrStage<2, [A9_NPipe], 1>,
762                                InstrStage<2, [A9_LSUnit]>],
763                               [2, 2, 1]>,
764   // VLD1x2u
765   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
766                                InstrStage<1, [A9_MUX0], 0>,
767                                InstrStage<1, [A9_DRegsN],   0, Required>,
768                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
769                                InstrStage<2, [A9_NPipe], 1>,
770                                InstrStage<2, [A9_LSUnit]>],
771                               [2, 2, 2, 1]>,
772   // VLD1x3u
773   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
774                                InstrStage<1, [A9_MUX0], 0>,
775                                InstrStage<1, [A9_DRegsN],   0, Required>,
776                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
777                                InstrStage<3, [A9_NPipe], 1>,
778                                InstrStage<3, [A9_LSUnit]>],
779                               [2, 2, 3, 2, 1]>,
780   // VLD1x4u
781   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
782                                InstrStage<1, [A9_MUX0], 0>,
783                                InstrStage<1, [A9_DRegsN],   0, Required>,
784                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
785                                InstrStage<3, [A9_NPipe], 1>,
786                                InstrStage<3, [A9_LSUnit]>],
787                               [2, 2, 3, 3, 2, 1]>,
788   //
789   // VLD2
790   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
791                                InstrStage<1, [A9_MUX0], 0>,
792                                InstrStage<1, [A9_DRegsN],   0, Required>,
793                                // Extra latency cycles since wbck is 7 cycles
794                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
795                                InstrStage<2, [A9_NPipe], 1>,
796                                InstrStage<2, [A9_LSUnit]>],
797                               [3, 3, 1]>,
798   //
799   // VLD2x2
800   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
801                                InstrStage<1, [A9_MUX0], 0>,
802                                InstrStage<1, [A9_DRegsN],   0, Required>,
803                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
804                                InstrStage<3, [A9_NPipe], 1>,
805                                InstrStage<3, [A9_LSUnit]>],
806                               [3, 4, 3, 4, 1]>,
807   //
808   // VLD2ln
809   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
810                                InstrStage<1, [A9_MUX0], 0>,
811                                InstrStage<1, [A9_DRegsN],   0, Required>,
812                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
813                                InstrStage<3, [A9_NPipe], 1>,
814                                InstrStage<3, [A9_LSUnit]>],
815                               [4, 4, 1, 1, 1, 1]>,
816   //
817   // VLD2u
818   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
819                                InstrStage<1, [A9_MUX0], 0>,
820                                InstrStage<1, [A9_DRegsN],   0, Required>,
821                                // Extra latency cycles since wbck is 7 cycles
822                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
823                                InstrStage<2, [A9_NPipe], 1>,
824                                InstrStage<2, [A9_LSUnit]>],
825                               [3, 3, 2, 1, 1, 1]>,
826   //
827   // VLD2x2u
828   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
829                                InstrStage<1, [A9_MUX0], 0>,
830                                InstrStage<1, [A9_DRegsN],   0, Required>,
831                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
832                                InstrStage<3, [A9_NPipe], 1>,
833                                InstrStage<3, [A9_LSUnit]>],
834                               [3, 4, 3, 4, 2, 1]>,
835   //
836   // VLD2lnu
837   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
838                                InstrStage<1, [A9_MUX0], 0>,
839                                InstrStage<1, [A9_DRegsN],   0, Required>,
840                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
841                                InstrStage<3, [A9_NPipe], 1>,
842                                InstrStage<3, [A9_LSUnit]>],
843                               [4, 4, 2, 1, 1, 1, 1, 1]>,
844   //
845   // VLD3
846   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
847                                InstrStage<1, [A9_MUX0], 0>,
848                                InstrStage<1, [A9_DRegsN],   0, Required>,
849                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
850                                InstrStage<4, [A9_NPipe], 1>,
851                                InstrStage<4, [A9_LSUnit]>],
852                               [4, 4, 5, 1]>,
853   //
854   // VLD3ln
855   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
856                                InstrStage<1, [A9_MUX0], 0>,
857                                InstrStage<1, [A9_DRegsN],   0, Required>,
858                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
859                                InstrStage<5, [A9_NPipe], 1>,
860                                InstrStage<5, [A9_LSUnit]>],
861                               [5, 5, 6, 1, 1, 1, 1, 2]>,
862   //
863   // VLD3u
864   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
865                                InstrStage<1, [A9_MUX0], 0>,
866                                InstrStage<1, [A9_DRegsN],   0, Required>,
867                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
868                                InstrStage<4, [A9_NPipe], 1>,
869                                InstrStage<4, [A9_LSUnit]>],
870                               [4, 4, 5, 2, 1]>,
871   //
872   // VLD3lnu
873   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
874                                InstrStage<1, [A9_MUX0], 0>,
875                                InstrStage<1, [A9_DRegsN],   0, Required>,
876                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
877                                InstrStage<5, [A9_NPipe], 1>,
878                                InstrStage<5, [A9_LSUnit]>],
879                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
880   //
881   // VLD4
882   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
883                                InstrStage<1, [A9_MUX0], 0>,
884                                InstrStage<1, [A9_DRegsN],   0, Required>,
885                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
886                                InstrStage<4, [A9_NPipe], 1>,
887                                InstrStage<4, [A9_LSUnit]>],
888                               [4, 4, 5, 5, 1]>,
889   //
890   // VLD4ln
891   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
892                                InstrStage<1, [A9_MUX0], 0>,
893                                InstrStage<1, [A9_DRegsN],   0, Required>,
894                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
895                                InstrStage<5, [A9_NPipe], 1>,
896                                InstrStage<5, [A9_LSUnit]>],
897                               [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
898   //
899   // VLD4u
900   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
901                                InstrStage<1, [A9_MUX0], 0>,
902                                InstrStage<1, [A9_DRegsN],   0, Required>,
903                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
904                                InstrStage<4, [A9_NPipe], 1>,
905                                InstrStage<4, [A9_LSUnit]>],
906                               [4, 4, 5, 5, 2, 1]>,
907   //
908   // VLD4lnu
909   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
910                                InstrStage<1, [A9_MUX0], 0>,
911                                InstrStage<1, [A9_DRegsN],   0, Required>,
912                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
913                                InstrStage<5, [A9_NPipe], 1>,
914                                InstrStage<5, [A9_LSUnit]>],
915                               [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
916   //
917   // VST1
918   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
919                                InstrStage<1, [A9_MUX0], 0>,
920                                InstrStage<1, [A9_DRegsN],   0, Required>,
921                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
922                                InstrStage<2, [A9_NPipe], 1>,
923                                InstrStage<2, [A9_LSUnit]>],
924                               [1, 1, 1]>,
925   //
926   // VST1x2
927   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
928                                InstrStage<1, [A9_MUX0], 0>,
929                                InstrStage<1, [A9_DRegsN],   0, Required>,
930                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
931                                InstrStage<2, [A9_NPipe], 1>,
932                                InstrStage<2, [A9_LSUnit]>],
933                               [1, 1, 1, 1]>,
934   //
935   // VST1x3
936   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
937                                InstrStage<1, [A9_MUX0], 0>,
938                                InstrStage<1, [A9_DRegsN],   0, Required>,
939                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
940                                InstrStage<3, [A9_NPipe], 1>,
941                                InstrStage<3, [A9_LSUnit]>],
942                               [1, 1, 1, 1, 2]>,
943   //
944   // VST1x4
945   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
946                                InstrStage<1, [A9_MUX0], 0>,
947                                InstrStage<1, [A9_DRegsN],   0, Required>,
948                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
949                                InstrStage<3, [A9_NPipe], 1>,
950                                InstrStage<3, [A9_LSUnit]>],
951                               [1, 1, 1, 1, 2, 2]>,
952   //
953   // VST1u
954   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
955                                InstrStage<1, [A9_MUX0], 0>,
956                                InstrStage<1, [A9_DRegsN],   0, Required>,
957                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
958                                InstrStage<2, [A9_NPipe], 1>,
959                                InstrStage<2, [A9_LSUnit]>],
960                               [2, 1, 1, 1, 1]>,
961   //
962   // VST1x2u
963   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
964                                InstrStage<1, [A9_MUX0], 0>,
965                                InstrStage<1, [A9_DRegsN],   0, Required>,
966                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
967                                InstrStage<2, [A9_NPipe], 1>,
968                                InstrStage<2, [A9_LSUnit]>],
969                               [2, 1, 1, 1, 1, 1]>,
970   //
971   // VST1x3u
972   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
973                                InstrStage<1, [A9_MUX0], 0>,
974                                InstrStage<1, [A9_DRegsN],   0, Required>,
975                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
976                                InstrStage<3, [A9_NPipe], 1>,
977                                InstrStage<3, [A9_LSUnit]>],
978                               [2, 1, 1, 1, 1, 1, 2]>,
979   //
980   // VST1x4u
981   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
982                                InstrStage<1, [A9_MUX0], 0>,
983                                InstrStage<1, [A9_DRegsN],   0, Required>,
984                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
985                                InstrStage<3, [A9_NPipe], 1>,
986                                InstrStage<3, [A9_LSUnit]>],
987                               [2, 1, 1, 1, 1, 1, 2, 2]>,
988   //
989   // VST2
990   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
991                                InstrStage<1, [A9_MUX0], 0>,
992                                InstrStage<1, [A9_DRegsN],   0, Required>,
993                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
994                                InstrStage<2, [A9_NPipe], 1>,
995                                InstrStage<2, [A9_LSUnit]>],
996                               [1, 1, 1, 1]>,
997   //
998   // VST2x2
999   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1000                                InstrStage<1, [A9_MUX0], 0>,
1001                                InstrStage<1, [A9_DRegsN],   0, Required>,
1002                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1003                                InstrStage<3, [A9_NPipe], 1>,
1004                                InstrStage<3, [A9_LSUnit]>],
1005                               [1, 1, 1, 1, 2, 2]>,
1006   //
1007   // VST2u
1008   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1009                                InstrStage<1, [A9_MUX0], 0>,
1010                                InstrStage<1, [A9_DRegsN],   0, Required>,
1011                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1012                                InstrStage<2, [A9_NPipe], 1>,
1013                                InstrStage<2, [A9_LSUnit]>],
1014                               [2, 1, 1, 1, 1, 1]>,
1015   //
1016   // VST2x2u
1017   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1018                                InstrStage<1, [A9_MUX0], 0>,
1019                                InstrStage<1, [A9_DRegsN],   0, Required>,
1020                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1021                                InstrStage<3, [A9_NPipe], 1>,
1022                                InstrStage<3, [A9_LSUnit]>],
1023                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1024   //
1025   // VST2ln
1026   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1027                                InstrStage<1, [A9_MUX0], 0>,
1028                                InstrStage<1, [A9_DRegsN],   0, Required>,
1029                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1030                                InstrStage<2, [A9_NPipe], 1>,
1031                                InstrStage<2, [A9_LSUnit]>],
1032                               [1, 1, 1, 1]>,
1033   //
1034   // VST2lnu
1035   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1036                                InstrStage<1, [A9_MUX0], 0>,
1037                                InstrStage<1, [A9_DRegsN],   0, Required>,
1038                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1039                                InstrStage<3, [A9_NPipe], 1>,
1040                                InstrStage<3, [A9_LSUnit]>],
1041                               [2, 1, 1, 1, 1, 1]>,
1042   //
1043   // VST3
1044   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1045                                InstrStage<1, [A9_MUX0], 0>,
1046                                InstrStage<1, [A9_DRegsN],   0, Required>,
1047                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1048                                InstrStage<3, [A9_NPipe], 1>,
1049                                InstrStage<3, [A9_LSUnit]>],
1050                               [1, 1, 1, 1, 2]>,
1051   //
1052   // VST3u
1053   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1054                                InstrStage<1, [A9_MUX0], 0>,
1055                                InstrStage<1, [A9_DRegsN],   0, Required>,
1056                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1057                                InstrStage<3, [A9_NPipe], 1>,
1058                                InstrStage<3, [A9_LSUnit]>],
1059                               [2, 1, 1, 1, 1, 1, 2]>,
1060   //
1061   // VST3ln
1062   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1063                                InstrStage<1, [A9_MUX0], 0>,
1064                                InstrStage<1, [A9_DRegsN],   0, Required>,
1065                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1066                                InstrStage<3, [A9_NPipe], 1>,
1067                                InstrStage<3, [A9_LSUnit]>],
1068                               [1, 1, 1, 1, 2]>,
1069   //
1070   // VST3lnu
1071   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1072                                InstrStage<1, [A9_MUX0], 0>,
1073                                InstrStage<1, [A9_DRegsN],   0, Required>,
1074                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1075                                InstrStage<3, [A9_NPipe], 1>,
1076                                InstrStage<3, [A9_LSUnit]>],
1077                               [2, 1, 1, 1, 1, 1, 2]>,
1078   //
1079   // VST4
1080   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1081                                InstrStage<1, [A9_MUX0], 0>,
1082                                InstrStage<1, [A9_DRegsN],   0, Required>,
1083                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1084                                InstrStage<3, [A9_NPipe], 1>,
1085                                InstrStage<3, [A9_LSUnit]>],
1086                               [1, 1, 1, 1, 2, 2]>,
1087   //
1088   // VST4u
1089   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1090                                InstrStage<1, [A9_MUX0], 0>,
1091                                InstrStage<1, [A9_DRegsN],   0, Required>,
1092                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1093                                InstrStage<3, [A9_NPipe], 1>,
1094                                InstrStage<3, [A9_LSUnit]>],
1095                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1096   //
1097   // VST4ln
1098   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1099                                InstrStage<1, [A9_MUX0], 0>,
1100                                InstrStage<1, [A9_DRegsN],   0, Required>,
1101                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1102                                InstrStage<3, [A9_NPipe], 1>,
1103                                InstrStage<3, [A9_LSUnit]>],
1104                               [1, 1, 1, 1, 2, 2]>,
1105   //
1106   // VST4lnu
1107   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1108                                InstrStage<1, [A9_MUX0], 0>,
1109                                InstrStage<1, [A9_DRegsN],   0, Required>,
1110                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1111                                InstrStage<3, [A9_NPipe], 1>,
1112                                InstrStage<3, [A9_LSUnit]>],
1113                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1114
1115   //
1116   // Double-register Integer Unary
1117   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1118                                InstrStage<1, [A9_MUX0], 0>,
1119                                InstrStage<1, [A9_DRegsN],   0, Required>,
1120                                // Extra latency cycles since wbck is 6 cycles
1121                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1122                                InstrStage<1, [A9_NPipe]>],
1123                               [4, 2]>,
1124   //
1125   // Quad-register Integer Unary
1126   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1127                                InstrStage<1, [A9_MUX0], 0>,
1128                                InstrStage<1, [A9_DRegsN],   0, Required>,
1129                                // Extra latency cycles since wbck is 6 cycles
1130                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1131                                InstrStage<1, [A9_NPipe]>],
1132                               [4, 2]>,
1133   //
1134   // Double-register Integer Q-Unary
1135   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1136                                InstrStage<1, [A9_MUX0], 0>,
1137                                InstrStage<1, [A9_DRegsN],   0, Required>,
1138                                // Extra latency cycles since wbck is 6 cycles
1139                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1140                                InstrStage<1, [A9_NPipe]>],
1141                               [4, 1]>,
1142   //
1143   // Quad-register Integer CountQ-Unary
1144   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1145                                InstrStage<1, [A9_MUX0], 0>,
1146                                InstrStage<1, [A9_DRegsN],   0, Required>,
1147                                // Extra latency cycles since wbck is 6 cycles
1148                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1149                                InstrStage<1, [A9_NPipe]>],
1150                               [4, 1]>,
1151   //
1152   // Double-register Integer Binary
1153   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1154                                InstrStage<1, [A9_MUX0], 0>,
1155                                InstrStage<1, [A9_DRegsN],   0, Required>,
1156                                // Extra latency cycles since wbck is 6 cycles
1157                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1158                                InstrStage<1, [A9_NPipe]>],
1159                               [3, 2, 2]>,
1160   //
1161   // Quad-register Integer Binary
1162   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1163                                InstrStage<1, [A9_MUX0], 0>,
1164                                InstrStage<1, [A9_DRegsN],   0, Required>,
1165                                // Extra latency cycles since wbck is 6 cycles
1166                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1167                                InstrStage<1, [A9_NPipe]>],
1168                               [3, 2, 2]>,
1169   //
1170   // Double-register Integer Subtract
1171   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1172                                InstrStage<1, [A9_MUX0], 0>,
1173                                InstrStage<1, [A9_DRegsN],   0, Required>,
1174                                // Extra latency cycles since wbck is 6 cycles
1175                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1176                                InstrStage<1, [A9_NPipe]>],
1177                               [3, 2, 1]>,
1178   //
1179   // Quad-register Integer Subtract
1180   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1181                                InstrStage<1, [A9_MUX0], 0>,
1182                                InstrStage<1, [A9_DRegsN],   0, Required>,
1183                                // Extra latency cycles since wbck is 6 cycles
1184                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1185                                InstrStage<1, [A9_NPipe]>],
1186                               [3, 2, 1]>,
1187   //
1188   // Double-register Integer Shift
1189   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1190                                InstrStage<1, [A9_MUX0], 0>,
1191                                InstrStage<1, [A9_DRegsN],   0, Required>,
1192                                // Extra latency cycles since wbck is 6 cycles
1193                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1194                                InstrStage<1, [A9_NPipe]>],
1195                               [3, 1, 1]>,
1196   //
1197   // Quad-register Integer Shift
1198   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1199                                InstrStage<1, [A9_MUX0], 0>,
1200                                InstrStage<1, [A9_DRegsN],   0, Required>,
1201                                // Extra latency cycles since wbck is 6 cycles
1202                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1203                                InstrStage<1, [A9_NPipe]>],
1204                               [3, 1, 1]>,
1205   //
1206   // Double-register Integer Shift (4 cycle)
1207   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1208                                InstrStage<1, [A9_MUX0], 0>,
1209                                InstrStage<1, [A9_DRegsN],   0, Required>,
1210                                // Extra latency cycles since wbck is 6 cycles
1211                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1212                                InstrStage<1, [A9_NPipe]>],
1213                               [4, 1, 1]>,
1214   //
1215   // Quad-register Integer Shift (4 cycle)
1216   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1217                                InstrStage<1, [A9_MUX0], 0>,
1218                                InstrStage<1, [A9_DRegsN],   0, Required>,
1219                                // Extra latency cycles since wbck is 6 cycles
1220                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1221                                InstrStage<1, [A9_NPipe]>],
1222                               [4, 1, 1]>,
1223   //
1224   // Double-register Integer Binary (4 cycle)
1225   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1226                                InstrStage<1, [A9_MUX0], 0>,
1227                                InstrStage<1, [A9_DRegsN],   0, Required>,
1228                                // Extra latency cycles since wbck is 6 cycles
1229                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1230                                InstrStage<1, [A9_NPipe]>],
1231                               [4, 2, 2]>,
1232   //
1233   // Quad-register Integer Binary (4 cycle)
1234   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1235                                InstrStage<1, [A9_MUX0], 0>,
1236                                InstrStage<1, [A9_DRegsN],   0, Required>,
1237                                // Extra latency cycles since wbck is 6 cycles
1238                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1239                                InstrStage<1, [A9_NPipe]>],
1240                               [4, 2, 2]>,
1241   //
1242   // Double-register Integer Subtract (4 cycle)
1243   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1244                                InstrStage<1, [A9_MUX0], 0>,
1245                                InstrStage<1, [A9_DRegsN],   0, Required>,
1246                                // Extra latency cycles since wbck is 6 cycles
1247                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1248                                InstrStage<1, [A9_NPipe]>],
1249                               [4, 2, 1]>,
1250   //
1251   // Quad-register Integer Subtract (4 cycle)
1252   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1253                                InstrStage<1, [A9_MUX0], 0>,
1254                                InstrStage<1, [A9_DRegsN],   0, Required>,
1255                                // Extra latency cycles since wbck is 6 cycles
1256                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1257                                InstrStage<1, [A9_NPipe]>],
1258                               [4, 2, 1]>,
1259
1260   //
1261   // Double-register Integer Count
1262   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1263                                InstrStage<1, [A9_MUX0], 0>,
1264                                InstrStage<1, [A9_DRegsN],   0, Required>,
1265                                // Extra latency cycles since wbck is 6 cycles
1266                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1267                                InstrStage<1, [A9_NPipe]>],
1268                               [3, 2, 2]>,
1269   //
1270   // Quad-register Integer Count
1271   // Result written in N3, but that is relative to the last cycle of multicycle,
1272   // so we use 4 for those cases
1273   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1274                                InstrStage<1, [A9_MUX0], 0>,
1275                                InstrStage<1, [A9_DRegsN],   0, Required>,
1276                                // Extra latency cycles since wbck is 7 cycles
1277                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1278                                InstrStage<2, [A9_NPipe]>],
1279                               [4, 2, 2]>,
1280   //
1281   // Double-register Absolute Difference and Accumulate
1282   InstrItinData<IIC_VABAD,    [InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1283                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1284                                InstrStage<1, [A9_MUX0], 0>,
1285                                InstrStage<1, [A9_DRegsN],   0, Required>,
1286                                // Extra latency cycles since wbck is 6 cycles
1287                                InstrStage<1, [A9_NPipe]>],
1288                               [6, 3, 2, 1]>,
1289   //
1290   // Quad-register Absolute Difference and Accumulate
1291   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1292                                InstrStage<1, [A9_MUX0], 0>,
1293                                InstrStage<1, [A9_DRegsN],   0, Required>,
1294                                // Extra latency cycles since wbck is 6 cycles
1295                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1296                                InstrStage<2, [A9_NPipe]>],
1297                               [6, 3, 2, 1]>,
1298   //
1299   // Double-register Integer Pair Add Long
1300   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1301                                InstrStage<1, [A9_MUX0], 0>,
1302                                InstrStage<1, [A9_DRegsN],   0, Required>,
1303                                // Extra latency cycles since wbck is 6 cycles
1304                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1305                                InstrStage<1, [A9_NPipe]>],
1306                               [6, 3, 1]>,
1307   //
1308   // Quad-register Integer Pair Add Long
1309   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1310                                InstrStage<1, [A9_MUX0], 0>,
1311                                InstrStage<1, [A9_DRegsN],   0, Required>,
1312                                // Extra latency cycles since wbck is 6 cycles
1313                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1314                                InstrStage<2, [A9_NPipe]>],
1315                               [6, 3, 1]>,
1316
1317   //
1318   // Double-register Integer Multiply (.8, .16)
1319   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1320                                InstrStage<1, [A9_MUX0], 0>,
1321                                InstrStage<1, [A9_DRegsN],   0, Required>,
1322                                // Extra latency cycles since wbck is 6 cycles
1323                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1324                                InstrStage<1, [A9_NPipe]>],
1325                               [6, 2, 2]>,
1326   //
1327   // Quad-register Integer Multiply (.8, .16)
1328   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1329                                InstrStage<1, [A9_MUX0], 0>,
1330                                InstrStage<1, [A9_DRegsN],   0, Required>,
1331                                // Extra latency cycles since wbck is 7 cycles
1332                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1333                                InstrStage<2, [A9_NPipe]>],
1334                               [7, 2, 2]>,
1335
1336   //
1337   // Double-register Integer Multiply (.32)
1338   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1339                                InstrStage<1, [A9_MUX0], 0>,
1340                                InstrStage<1, [A9_DRegsN],   0, Required>,
1341                                // Extra latency cycles since wbck is 7 cycles
1342                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1343                                InstrStage<2, [A9_NPipe]>],
1344                               [7, 2, 1]>,
1345   //
1346   // Quad-register Integer Multiply (.32)
1347   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1348                                InstrStage<1, [A9_MUX0], 0>,
1349                                InstrStage<1, [A9_DRegsN],   0, Required>,
1350                                // Extra latency cycles since wbck is 9 cycles
1351                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1352                                InstrStage<4, [A9_NPipe]>],
1353                               [9, 2, 1]>,
1354   //
1355   // Double-register Integer Multiply-Accumulate (.8, .16)
1356   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1357                                InstrStage<1, [A9_MUX0], 0>,
1358                                InstrStage<1, [A9_DRegsN],   0, Required>,
1359                                // Extra latency cycles since wbck is 6 cycles
1360                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1361                                InstrStage<1, [A9_NPipe]>],
1362                               [6, 3, 2, 2]>,
1363   //
1364   // Double-register Integer Multiply-Accumulate (.32)
1365   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1366                                InstrStage<1, [A9_MUX0], 0>,
1367                                InstrStage<1, [A9_DRegsN],   0, Required>,
1368                                // Extra latency cycles since wbck is 7 cycles
1369                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1370                                InstrStage<2, [A9_NPipe]>],
1371                               [7, 3, 2, 1]>,
1372   //
1373   // Quad-register Integer Multiply-Accumulate (.8, .16)
1374   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1375                                InstrStage<1, [A9_MUX0], 0>,
1376                                InstrStage<1, [A9_DRegsN],   0, Required>,
1377                                // Extra latency cycles since wbck is 7 cycles
1378                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1379                                InstrStage<2, [A9_NPipe]>],
1380                               [7, 3, 2, 2]>,
1381   //
1382   // Quad-register Integer Multiply-Accumulate (.32)
1383   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1384                                InstrStage<1, [A9_MUX0], 0>,
1385                                InstrStage<1, [A9_DRegsN],   0, Required>,
1386                                // Extra latency cycles since wbck is 9 cycles
1387                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1388                                InstrStage<4, [A9_NPipe]>],
1389                               [9, 3, 2, 1]>,
1390
1391   //
1392   // Move
1393   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1394                                InstrStage<1, [A9_MUX0], 0>,
1395                                InstrStage<1, [A9_DRegsN],   0, Required>,
1396                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1397                                InstrStage<1, [A9_NPipe]>],
1398                               [1,1]>,
1399   //
1400   // Move Immediate
1401   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1402                                InstrStage<1, [A9_MUX0], 0>,
1403                                InstrStage<1, [A9_DRegsN],   0, Required>,
1404                                // Extra latency cycles since wbck is 6 cycles
1405                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1406                                InstrStage<1, [A9_NPipe]>],
1407                               [3]>,
1408   //
1409   // Double-register Permute Move
1410   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1411                                InstrStage<1, [A9_MUX0], 0>,
1412                                InstrStage<1, [A9_DRegsN],   0, Required>,
1413                                // Extra latency cycles since wbck is 6 cycles
1414                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1415                                InstrStage<1, [A9_NPipe]>],
1416                               [2, 1]>,
1417   //
1418   // Quad-register Permute Move
1419   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1420                                InstrStage<1, [A9_MUX0], 0>,
1421                                InstrStage<1, [A9_DRegsN],   0, Required>,
1422                                // Extra latency cycles since wbck is 6 cycles
1423                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1424                                InstrStage<1, [A9_NPipe]>],
1425                               [2, 1]>,
1426   //
1427   // Integer to Single-precision Move
1428   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1429                                InstrStage<1, [A9_MUX0], 0>,
1430                                InstrStage<1, [A9_DRegsN],   0, Required>,
1431                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1432                                InstrStage<1, [A9_NPipe]>],
1433                               [1, 1]>,
1434   //
1435   // Integer to Double-precision Move
1436   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1437                                InstrStage<1, [A9_MUX0], 0>,
1438                                InstrStage<1, [A9_DRegsN],   0, Required>,
1439                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1440                                InstrStage<1, [A9_NPipe]>],
1441                               [1, 1, 1]>,
1442   //
1443   // Single-precision to Integer Move
1444   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1445                                InstrStage<1, [A9_MUX0], 0>,
1446                                InstrStage<1, [A9_DRegsN],   0, Required>,
1447                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1448                                InstrStage<1, [A9_NPipe]>],
1449                               [2, 1]>,
1450   //
1451   // Double-precision to Integer Move
1452   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1453                                InstrStage<1, [A9_MUX0], 0>,
1454                                InstrStage<1, [A9_DRegsN],   0, Required>,
1455                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1456                                InstrStage<1, [A9_NPipe]>],
1457                               [2, 2, 1]>,
1458   //
1459   // Integer to Lane Move
1460   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1461                                InstrStage<1, [A9_MUX0], 0>,
1462                                InstrStage<1, [A9_DRegsN],   0, Required>,
1463                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1464                                InstrStage<2, [A9_NPipe]>],
1465                               [3, 1, 1]>,
1466
1467   //
1468   // Vector narrow move
1469   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1470                                InstrStage<1, [A9_MUX0], 0>,
1471                                InstrStage<1, [A9_DRegsN],   0, Required>,
1472                                // Extra latency cycles since wbck is 6 cycles
1473                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1474                                InstrStage<1, [A9_NPipe]>],
1475                               [3, 1]>,
1476   //
1477   // Double-register FP Unary
1478   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1479                                InstrStage<1, [A9_MUX0], 0>,
1480                                InstrStage<1, [A9_DRegsN],   0, Required>,
1481                                // Extra latency cycles since wbck is 6 cycles
1482                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1483                                InstrStage<1, [A9_NPipe]>],
1484                               [5, 2]>,
1485   //
1486   // Quad-register FP Unary
1487   // Result written in N5, but that is relative to the last cycle of multicycle,
1488   // so we use 6 for those cases
1489   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1490                                InstrStage<1, [A9_MUX0], 0>,
1491                                InstrStage<1, [A9_DRegsN],   0, Required>,
1492                                // Extra latency cycles since wbck is 7 cycles
1493                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1494                                InstrStage<2, [A9_NPipe]>],
1495                               [6, 2]>,
1496   //
1497   // Double-register FP Binary
1498   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1499   // optimistic.
1500   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1501                                InstrStage<1, [A9_MUX0], 0>,
1502                                InstrStage<1, [A9_DRegsN],   0, Required>,
1503                                // Extra latency cycles since wbck is 6 cycles
1504                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1505                                InstrStage<1, [A9_NPipe]>],
1506                               [5, 2, 2]>,
1507
1508   //
1509   // VPADD, etc.
1510   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1511                                InstrStage<1, [A9_MUX0], 0>,
1512                                InstrStage<1, [A9_DRegsN],   0, Required>,
1513                                // Extra latency cycles since wbck is 6 cycles
1514                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1515                                InstrStage<1, [A9_NPipe]>],
1516                               [5, 1, 1]>,
1517   //
1518   // Double-register FP VMUL
1519   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1520                                InstrStage<1, [A9_MUX0], 0>,
1521                                InstrStage<1, [A9_DRegsN],   0, Required>,
1522                                // Extra latency cycles since wbck is 6 cycles
1523                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1524                                InstrStage<1, [A9_NPipe]>],
1525                               [5, 2, 1]>,
1526   //
1527   // Quad-register FP Binary
1528   // Result written in N5, but that is relative to the last cycle of multicycle,
1529   // so we use 6 for those cases
1530   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1531   // optimistic.
1532   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1533                                InstrStage<1, [A9_MUX0], 0>,
1534                                InstrStage<1, [A9_DRegsN],   0, Required>,
1535                                // Extra latency cycles since wbck is 7 cycles
1536                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1537                                InstrStage<2, [A9_NPipe]>],
1538                               [6, 2, 2]>,
1539   //
1540   // Quad-register FP VMUL
1541   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1542                                InstrStage<1, [A9_MUX0], 0>,
1543                                InstrStage<1, [A9_DRegsN],   0, Required>,
1544                                // Extra latency cycles since wbck is 7 cycles
1545                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1546                                InstrStage<1, [A9_NPipe]>],
1547                               [6, 2, 1]>,
1548   //
1549   // Double-register FP Multiple-Accumulate
1550   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1551                                InstrStage<1, [A9_MUX0], 0>,
1552                                InstrStage<1, [A9_DRegsN],   0, Required>,
1553                                // Extra latency cycles since wbck is 7 cycles
1554                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1555                                InstrStage<2, [A9_NPipe]>],
1556                               [6, 3, 2, 1]>,
1557   //
1558   // Quad-register FP Multiple-Accumulate
1559   // Result written in N9, but that is relative to the last cycle of multicycle,
1560   // so we use 10 for those cases
1561   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1562                                InstrStage<1, [A9_MUX0], 0>,
1563                                InstrStage<1, [A9_DRegsN],   0, Required>,
1564                                // Extra latency cycles since wbck is 9 cycles
1565                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1566                                InstrStage<4, [A9_NPipe]>],
1567                               [8, 4, 2, 1]>,
1568   //
1569   // Double-register Reciprical Step
1570   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1571                                InstrStage<1, [A9_MUX0], 0>,
1572                                InstrStage<1, [A9_DRegsN],   0, Required>,
1573                                // Extra latency cycles since wbck is 10 cycles
1574                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1575                                InstrStage<1, [A9_NPipe]>],
1576                               [9, 2, 2]>,
1577   //
1578   // Quad-register Reciprical Step
1579   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1580                                InstrStage<1, [A9_MUX0], 0>,
1581                                InstrStage<1, [A9_DRegsN],   0, Required>,
1582                                // Extra latency cycles since wbck is 11 cycles
1583                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1584                                InstrStage<2, [A9_NPipe]>],
1585                               [10, 2, 2]>,
1586   //
1587   // Double-register Permute
1588   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1589                                InstrStage<1, [A9_MUX0], 0>,
1590                                InstrStage<1, [A9_DRegsN],   0, Required>,
1591                                // Extra latency cycles since wbck is 6 cycles
1592                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1593                                InstrStage<1, [A9_NPipe]>],
1594                               [2, 2, 1, 1]>,
1595   //
1596   // Quad-register Permute
1597   // Result written in N2, but that is relative to the last cycle of multicycle,
1598   // so we use 3 for those cases
1599   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1600                                InstrStage<1, [A9_MUX0], 0>,
1601                                InstrStage<1, [A9_DRegsN],   0, Required>,
1602                                // Extra latency cycles since wbck is 7 cycles
1603                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1604                                InstrStage<2, [A9_NPipe]>],
1605                               [3, 3, 1, 1]>,
1606   //
1607   // Quad-register Permute (3 cycle issue)
1608   // Result written in N2, but that is relative to the last cycle of multicycle,
1609   // so we use 4 for those cases
1610   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1611                                InstrStage<1, [A9_MUX0], 0>,
1612                                InstrStage<1, [A9_DRegsN],   0, Required>,
1613                                // Extra latency cycles since wbck is 8 cycles
1614                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1615                                InstrStage<3, [A9_NPipe]>],
1616                               [4, 4, 1, 1]>,
1617
1618   //
1619   // Double-register VEXT
1620   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1621                                InstrStage<1, [A9_MUX0], 0>,
1622                                InstrStage<1, [A9_DRegsN],   0, Required>,
1623                                // Extra latency cycles since wbck is 6 cycles
1624                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1625                                InstrStage<1, [A9_NPipe]>],
1626                               [2, 1, 1]>,
1627   //
1628   // Quad-register VEXT
1629   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1630                                InstrStage<1, [A9_MUX0], 0>,
1631                                InstrStage<1, [A9_DRegsN],   0, Required>,
1632                                // Extra latency cycles since wbck is 7 cycles
1633                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1634                                InstrStage<2, [A9_NPipe]>],
1635                               [3, 1, 2]>,
1636   //
1637   // VTB
1638   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1639                                InstrStage<1, [A9_MUX0], 0>,
1640                                InstrStage<1, [A9_DRegsN],   0, Required>,
1641                                // Extra latency cycles since wbck is 7 cycles
1642                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1643                                InstrStage<2, [A9_NPipe]>],
1644                               [3, 2, 1]>,
1645   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1646                                InstrStage<1, [A9_MUX0], 0>,
1647                                InstrStage<2, [A9_DRegsN],   0, Required>,
1648                                // Extra latency cycles since wbck is 7 cycles
1649                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1650                                InstrStage<2, [A9_NPipe]>],
1651                               [3, 2, 2, 1]>,
1652   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1653                                InstrStage<1, [A9_MUX0], 0>,
1654                                InstrStage<2, [A9_DRegsN],   0, Required>,
1655                                // Extra latency cycles since wbck is 8 cycles
1656                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1657                                InstrStage<3, [A9_NPipe]>],
1658                               [4, 2, 2, 3, 1]>,
1659   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1660                                InstrStage<1, [A9_MUX0], 0>,
1661                                InstrStage<1, [A9_DRegsN],   0, Required>,
1662                                // Extra latency cycles since wbck is 8 cycles
1663                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1664                                InstrStage<3, [A9_NPipe]>],
1665                               [4, 2, 2, 3, 3, 1]>,
1666   //
1667   // VTBX
1668   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1669                                InstrStage<1, [A9_MUX0], 0>,
1670                                InstrStage<1, [A9_DRegsN],   0, Required>,
1671                                // Extra latency cycles since wbck is 7 cycles
1672                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1673                                InstrStage<2, [A9_NPipe]>],
1674                               [3, 1, 2, 1]>,
1675   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1676                                InstrStage<1, [A9_MUX0], 0>,
1677                                InstrStage<1, [A9_DRegsN],   0, Required>,
1678                                // Extra latency cycles since wbck is 7 cycles
1679                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1680                                InstrStage<2, [A9_NPipe]>],
1681                               [3, 1, 2, 2, 1]>,
1682   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1683                                InstrStage<1, [A9_MUX0], 0>,
1684                                InstrStage<1, [A9_DRegsN],   0, Required>,
1685                                // Extra latency cycles since wbck is 8 cycles
1686                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1687                                InstrStage<3, [A9_NPipe]>],
1688                               [4, 1, 2, 2, 3, 1]>,
1689   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1690                                InstrStage<1, [A9_MUX0], 0>,
1691                                InstrStage<1, [A9_DRegsN],   0, Required>,
1692                                // Extra latency cycles since wbck is 8 cycles
1693                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1694                                InstrStage<2, [A9_NPipe]>],
1695                               [4, 1, 2, 2, 3, 3, 1]>
1696 ]>;