Fix the ARM IIC_iCMPsi itinerary and add an important assert.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LSUnit  : FuncUnit; // L/S Unit
28 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
30
31 // Bypasses
32 def A9_LdBypass : Bypass;
33
34 def CortexA9Itineraries : ProcessorItineraries<
35   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
36    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
37   [A9_LdBypass], [
38   // Two fully-pipelined integer ALU pipelines
39
40   //
41   // Move instructions, unconditional
42   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
43                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
44   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
45                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
46   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
48   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
50   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
52                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
53   //
54   // MVN instructions
55   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
56                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
57                               [1]>,
58   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
59                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
60                               [1, 1], [NoBypass, A9_LdBypass]>,
61   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
62                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
63                               [2, 1]>,
64   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
65                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
66                               [3, 1, 1]>,
67   //
68   // No operand cycles
69   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
70                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
71   //
72   // Binary Instructions that produce a result
73   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
74                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
75                             [1, 1], [NoBypass, A9_LdBypass]>,
76   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
77                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
78                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
79   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
80                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
81                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
82   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
83                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
84                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
85   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
86                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
87                             [3, 1, 1, 1],
88                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
89   //
90   // Bitwise Instructions that produce a result
91   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
92                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
93   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
94                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
95   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
97   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
98                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
99   //
100   // Unary Instructions that produce a result
101
102   // CLZ, RBIT, etc.
103   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
104                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
105
106   // BFC, BFI, UBFX, SBFX
107   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
109
110   //
111   // Zero and sign extension instructions
112   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
113                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
114   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
115                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
116   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
117                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
118   //
119   // Compare instructions
120   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
121                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
122                                [1], [A9_LdBypass]>,
123   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
124                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
125                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
126   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
127                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
128                                 [1, 1], [A9_LdBypass, NoBypass]>,
129   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
130                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
131                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
132   //
133   // Test instructions
134   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
136   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
138   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
139                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
140   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
142   //
143   // Move instructions, conditional
144   // FIXME: Correctly model the extra input dep on the destination.
145   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
146                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
147   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
149   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
151   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
152                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
153   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
154                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
155                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
156                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
157
158   // Integer multiply pipeline
159   //
160   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
161                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
162   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
163                                InstrStage<2, [A9_ALU0]>],
164                               [3, 1, 1, 1]>,
165   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
167   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
168                                InstrStage<2, [A9_ALU0]>],
169                               [4, 1, 1, 1]>,
170   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
171                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
172   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
173                                InstrStage<3, [A9_ALU0]>],
174                               [4, 5, 1, 1]>,
175   // Integer load pipeline
176   // FIXME: The timings are some rough approximations
177   //
178   // Immediate offset
179   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
180                                  InstrStage<1, [A9_MUX0], 0>,
181                                  InstrStage<1, [A9_AGU], 0>,
182                                  InstrStage<1, [A9_LSUnit]>],
183                                 [3, 1], [A9_LdBypass]>,
184   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
185                                  InstrStage<1, [A9_MUX0], 0>,
186                                  InstrStage<2, [A9_AGU], 0>,
187                                  InstrStage<1, [A9_LSUnit]>],
188                                 [4, 1], [A9_LdBypass]>,
189   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
190   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
191                                  InstrStage<1, [A9_MUX0], 0>,
192                                  InstrStage<2, [A9_AGU], 0>,
193                                  InstrStage<1, [A9_LSUnit]>],
194                                 [3, 3, 1], [A9_LdBypass]>,
195   //
196   // Register offset
197   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
198                                  InstrStage<1, [A9_MUX0], 0>,
199                                  InstrStage<1, [A9_AGU], 0>,
200                                  InstrStage<1, [A9_LSUnit]>],
201                                 [3, 1, 1], [A9_LdBypass]>,
202   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
203                                  InstrStage<1, [A9_MUX0], 0>,
204                                  InstrStage<2, [A9_AGU], 0>,
205                                  InstrStage<1, [A9_LSUnit]>],
206                                 [4, 1, 1], [A9_LdBypass]>,
207   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
208                                  InstrStage<1, [A9_MUX0], 0>,
209                                  InstrStage<2, [A9_AGU], 0>,
210                                  InstrStage<1, [A9_LSUnit]>],
211                                 [3, 3, 1, 1], [A9_LdBypass]>,
212   //
213   // Scaled register offset
214   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
215                                  InstrStage<1, [A9_MUX0], 0>,
216                                  InstrStage<1, [A9_AGU], 0>,
217                                  InstrStage<1, [A9_LSUnit], 0>],
218                                 [4, 1, 1], [A9_LdBypass]>,
219   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
220                                  InstrStage<1, [A9_MUX0], 0>,
221                                  InstrStage<2, [A9_AGU], 0>,
222                                  InstrStage<1, [A9_LSUnit]>],
223                                 [5, 1, 1], [A9_LdBypass]>,
224   //
225   // Immediate offset with update
226   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
227                                  InstrStage<1, [A9_MUX0], 0>,
228                                  InstrStage<1, [A9_AGU], 0>,
229                                  InstrStage<1, [A9_LSUnit]>],
230                                 [3, 2, 1], [A9_LdBypass]>,
231   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
232                                  InstrStage<1, [A9_MUX0], 0>,
233                                  InstrStage<2, [A9_AGU], 0>,
234                                  InstrStage<1, [A9_LSUnit]>],
235                                 [4, 3, 1], [A9_LdBypass]>,
236   //
237   // Register offset with update
238   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
239                                  InstrStage<1, [A9_MUX0], 0>,
240                                  InstrStage<1, [A9_AGU], 0>,
241                                  InstrStage<1, [A9_LSUnit]>],
242                                 [3, 2, 1, 1], [A9_LdBypass]>,
243   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
244                                  InstrStage<1, [A9_MUX0], 0>,
245                                  InstrStage<2, [A9_AGU], 0>,
246                                  InstrStage<1, [A9_LSUnit]>],
247                                 [4, 3, 1, 1], [A9_LdBypass]>,
248   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
249                                  InstrStage<1, [A9_MUX0], 0>,
250                                  InstrStage<2, [A9_AGU], 0>,
251                                  InstrStage<1, [A9_LSUnit]>],
252                                 [3, 3, 1, 1], [A9_LdBypass]>,
253   //
254   // Scaled register offset with update
255   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
256                                  InstrStage<1, [A9_MUX0], 0>,
257                                  InstrStage<1, [A9_AGU], 0>,
258                                  InstrStage<1, [A9_LSUnit]>],
259                                 [4, 3, 1, 1], [A9_LdBypass]>,
260   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
261                                   InstrStage<1, [A9_MUX0], 0>,
262                                   InstrStage<2, [A9_AGU], 0>,
263                                   InstrStage<1, [A9_LSUnit]>],
264                                  [5, 4, 1, 1], [A9_LdBypass]>,
265   //
266   // Load multiple, def is the 5th operand.
267   // FIXME: This assumes 3 to 4 registers.
268   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
269                                 InstrStage<1, [A9_MUX0], 0>,
270                                 InstrStage<2, [A9_AGU], 1>,
271                                 InstrStage<2, [A9_LSUnit]>],
272                                [1, 1, 1, 1, 3],
273                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
274   //
275   // Load multiple + update, defs are the 1st and 5th operands.
276   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
277                                 InstrStage<1, [A9_MUX0], 0>,
278                                 InstrStage<2, [A9_AGU], 1>,
279                                 InstrStage<2, [A9_LSUnit]>],
280                                [2, 1, 1, 1, 3],
281                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
282   //
283   // Load multiple plus branch
284   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
285                                 InstrStage<1, [A9_MUX0], 0>,
286                                 InstrStage<1, [A9_AGU], 1>,
287                                 InstrStage<2, [A9_LSUnit]>,
288                                 InstrStage<1, [A9_Branch]>],
289                                [1, 2, 1, 1, 3],
290                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
291   //
292   // Pop, def is the 3rd operand.
293   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
294                                 InstrStage<1, [A9_MUX0], 0>,
295                                 InstrStage<2, [A9_AGU], 1>,
296                                 InstrStage<2, [A9_LSUnit]>],
297                                [1, 1, 3],
298                                [NoBypass, NoBypass, A9_LdBypass]>,
299   //
300   // Pop + branch, def is the 3rd operand.
301   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
302                                 InstrStage<1, [A9_MUX0], 0>,
303                                 InstrStage<2, [A9_AGU], 1>,
304                                 InstrStage<2, [A9_LSUnit]>,
305                                 InstrStage<1, [A9_Branch]>],
306                                [1, 1, 3],
307                                [NoBypass, NoBypass, A9_LdBypass]>,
308
309   //
310   // iLoadi + iALUr for t2LDRpci_pic.
311   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
312                                 InstrStage<1, [A9_MUX0], 0>,
313                                 InstrStage<1, [A9_AGU], 0>,
314                                 InstrStage<1, [A9_LSUnit]>,
315                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
316                                [2, 1]>,
317
318   // Integer store pipeline
319   ///
320   // Immediate offset
321   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322                                  InstrStage<1, [A9_MUX0], 0>,
323                                  InstrStage<1, [A9_AGU], 0>,
324                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
325   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
326                                  InstrStage<1, [A9_MUX0], 0>,
327                                  InstrStage<2, [A9_AGU], 1>,
328                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
329   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
330   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
331                                  InstrStage<1, [A9_MUX0], 0>,
332                                  InstrStage<2, [A9_AGU], 1>,
333                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
334   //
335   // Register offset
336   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
337                                  InstrStage<1, [A9_MUX0], 0>,
338                                  InstrStage<1, [A9_AGU], 0>,
339                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
340   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341                                  InstrStage<1, [A9_MUX0], 0>,
342                                  InstrStage<2, [A9_AGU], 1>,
343                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
344   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
345                                  InstrStage<1, [A9_MUX0], 0>,
346                                  InstrStage<2, [A9_AGU], 1>,
347                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
348   //
349   // Scaled register offset
350   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351                                   InstrStage<1, [A9_MUX0], 0>,
352                                   InstrStage<1, [A9_AGU], 0>,
353                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
354   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355                                   InstrStage<1, [A9_MUX0], 0>,
356                                   InstrStage<2, [A9_AGU], 1>,
357                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
358   //
359   // Immediate offset with update
360   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361                                   InstrStage<1, [A9_MUX0], 0>,
362                                   InstrStage<1, [A9_AGU], 0>,
363                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
364   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
365                                   InstrStage<1, [A9_MUX0], 0>,
366                                   InstrStage<2, [A9_AGU], 1>,
367                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
368   //
369   // Register offset with update
370   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
371                                   InstrStage<1, [A9_MUX0], 0>,
372                                   InstrStage<1, [A9_AGU], 0>,
373                                   InstrStage<1, [A9_LSUnit]>],
374                                  [2, 1, 1, 1]>,
375   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
376                                   InstrStage<1, [A9_MUX0], 0>,
377                                   InstrStage<2, [A9_AGU], 1>,
378                                   InstrStage<1, [A9_LSUnit]>],
379                                  [3, 1, 1, 1]>,
380   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
381                                   InstrStage<1, [A9_MUX0], 0>,
382                                   InstrStage<2, [A9_AGU], 1>,
383                                   InstrStage<1, [A9_LSUnit]>],
384                                  [3, 1, 1, 1]>,
385   //
386   // Scaled register offset with update
387   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
388                                     InstrStage<1, [A9_MUX0], 0>,
389                                     InstrStage<1, [A9_AGU], 0>,
390                                     InstrStage<1, [A9_LSUnit]>],
391                                    [2, 1, 1, 1]>,
392   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
393                                     InstrStage<1, [A9_MUX0], 0>,
394                                     InstrStage<2, [A9_AGU], 1>,
395                                     InstrStage<1, [A9_LSUnit]>],
396                                    [3, 1, 1, 1]>,
397   //
398   // Store multiple
399   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
400                                 InstrStage<1, [A9_MUX0], 0>,
401                                 InstrStage<1, [A9_AGU], 0>,
402                                 InstrStage<2, [A9_LSUnit]>]>,
403   //
404   // Store multiple + update
405   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
406                                 InstrStage<1, [A9_MUX0], 0>,
407                                 InstrStage<1, [A9_AGU], 0>,
408                                 InstrStage<2, [A9_LSUnit]>], [2]>,
409
410   //
411   // Preload
412   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
413
414   // Branch
415   //
416   // no delay slots, so the latency of a branch is unimportant
417   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
418                                 InstrStage<1, [A9_Issue1], 0>,
419                                 InstrStage<1, [A9_Branch]>]>,
420
421   // VFP and NEON shares the same register file. This means that every VFP
422   // instruction should wait for full completion of the consecutive NEON
423   // instruction and vice-versa. We model this behavior with two artificial FUs:
424   // DRegsVFP and DRegsVFP.
425   //
426   // Every VFP instruction:
427   //  - Acquires DRegsVFP resource for 1 cycle
428   //  - Reserves DRegsN resource for the whole duration (including time to
429   //    register file writeback!).
430   // Every NEON instruction does the same but with FUs swapped.
431   //
432   // Since the reserved FU cannot be acquired, this models precisely
433   // "cross-domain" stalls.
434
435   // VFP
436   // Issue through integer pipeline, and execute in NEON unit.
437
438   // FP Special Register to Integer Register File Move
439   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
440                               InstrStage<1, [A9_MUX0], 0>,
441                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
442                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
443                               InstrStage<1, [A9_NPipe]>],
444                              [1]>,
445   //
446   // Single-precision FP Unary
447   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
448                                InstrStage<1, [A9_MUX0], 0>,
449                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
450                                // Extra latency cycles since wbck is 2 cycles
451                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
452                                InstrStage<1, [A9_NPipe]>],
453                               [1, 1]>,
454   //
455   // Double-precision FP Unary
456   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
457                                InstrStage<1, [A9_MUX0], 0>,
458                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
459                                // Extra latency cycles since wbck is 2 cycles
460                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
461                                InstrStage<1, [A9_NPipe]>],
462                               [1, 1]>,
463
464   //
465   // Single-precision FP Compare
466   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
467                                InstrStage<1, [A9_MUX0], 0>,
468                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
469                                // Extra latency cycles since wbck is 4 cycles
470                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
471                                InstrStage<1, [A9_NPipe]>],
472                               [1, 1]>,
473   //
474   // Double-precision FP Compare
475   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
476                                InstrStage<1, [A9_MUX0], 0>,
477                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
478                                // Extra latency cycles since wbck is 4 cycles
479                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
480                                InstrStage<1, [A9_NPipe]>],
481                               [1, 1]>,
482   //
483   // Single to Double FP Convert
484   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
485                                InstrStage<1, [A9_MUX0], 0>,
486                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
487                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
488                                InstrStage<1, [A9_NPipe]>],
489                               [4, 1]>,
490   //
491   // Double to Single FP Convert
492   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
493                                InstrStage<1, [A9_MUX0], 0>,
494                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
495                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
496                                InstrStage<1, [A9_NPipe]>],
497                               [4, 1]>,
498
499   //
500   // Single to Half FP Convert
501   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
502                                InstrStage<1, [A9_MUX0], 0>,
503                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
504                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
505                                InstrStage<1, [A9_NPipe]>],
506                               [4, 1]>,
507   //
508   // Half to Single FP Convert
509   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
510                                InstrStage<1, [A9_MUX0], 0>,
511                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
512                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
513                                InstrStage<1, [A9_NPipe]>],
514                               [2, 1]>,
515
516   //
517   // Single-Precision FP to Integer Convert
518   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
519                                InstrStage<1, [A9_MUX0], 0>,
520                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
521                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
522                                InstrStage<1, [A9_NPipe]>],
523                               [4, 1]>,
524   //
525   // Double-Precision FP to Integer Convert
526   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
527                                InstrStage<1, [A9_MUX0], 0>,
528                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
529                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
530                                InstrStage<1, [A9_NPipe]>],
531                               [4, 1]>,
532   //
533   // Integer to Single-Precision FP Convert
534   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
535                                InstrStage<1, [A9_MUX0], 0>,
536                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
537                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
538                                InstrStage<1, [A9_NPipe]>],
539                               [4, 1]>,
540   //
541   // Integer to Double-Precision FP Convert
542   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
543                                InstrStage<1, [A9_MUX0], 0>,
544                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
545                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
546                                InstrStage<1, [A9_NPipe]>],
547                               [4, 1]>,
548   //
549   // Single-precision FP ALU
550   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
551                                InstrStage<1, [A9_MUX0], 0>,
552                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
553                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
554                                InstrStage<1, [A9_NPipe]>],
555                               [4, 1, 1]>,
556   //
557   // Double-precision FP ALU
558   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
559                                InstrStage<1, [A9_MUX0], 0>,
560                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
561                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
562                                InstrStage<1, [A9_NPipe]>],
563                               [4, 1, 1]>,
564   //
565   // Single-precision FP Multiply
566   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
567                                InstrStage<1, [A9_MUX0], 0>,
568                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
569                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
570                                InstrStage<1, [A9_NPipe]>],
571                               [5, 1, 1]>,
572   //
573   // Double-precision FP Multiply
574   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
575                                InstrStage<1, [A9_MUX0], 0>,
576                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
577                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
578                                InstrStage<2, [A9_NPipe]>],
579                               [6, 1, 1]>,
580   //
581   // Single-precision FP MAC
582   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
583                                InstrStage<1, [A9_MUX0], 0>,
584                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
585                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
586                                InstrStage<1, [A9_NPipe]>],
587                               [8, 1, 1, 1]>,
588   //
589   // Double-precision FP MAC
590   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
591                                InstrStage<1,  [A9_MUX0], 0>,
592                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
593                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
594                                InstrStage<2,  [A9_NPipe]>],
595                               [9, 1, 1, 1]>,
596   //
597   // Single-precision FP DIV
598   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
599                                InstrStage<1,  [A9_MUX0], 0>,
600                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
601                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
602                                InstrStage<10, [A9_NPipe]>],
603                               [15, 1, 1]>,
604   //
605   // Double-precision FP DIV
606   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
607                                InstrStage<1,  [A9_MUX0], 0>,
608                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
609                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
610                                InstrStage<20, [A9_NPipe]>],
611                               [25, 1, 1]>,
612   //
613   // Single-precision FP SQRT
614   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
615                                InstrStage<1,  [A9_MUX0], 0>,
616                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
617                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
618                                InstrStage<13, [A9_NPipe]>],
619                               [17, 1]>,
620   //
621   // Double-precision FP SQRT
622   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
623                                InstrStage<1,  [A9_MUX0], 0>,
624                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
625                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
626                                InstrStage<28, [A9_NPipe]>],
627                               [32, 1]>,
628
629   //
630   // Integer to Single-precision Move
631   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
632                                InstrStage<1, [A9_MUX0], 0>,
633                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
634                                // Extra 1 latency cycle since wbck is 2 cycles
635                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
636                                InstrStage<1, [A9_NPipe]>],
637                               [1, 1]>,
638   //
639   // Integer to Double-precision Move
640   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
641                                InstrStage<1, [A9_MUX0], 0>,
642                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
643                                // Extra 1 latency cycle since wbck is 2 cycles
644                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
645                                InstrStage<1, [A9_NPipe]>],
646                               [1, 1, 1]>,
647   //
648   // Single-precision to Integer Move
649   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
650                                InstrStage<1, [A9_MUX0], 0>,
651                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
652                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
653                                InstrStage<1, [A9_NPipe]>],
654                               [2, 1]>,
655   //
656   // Double-precision to Integer Move
657   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
658                                InstrStage<1, [A9_MUX0], 0>,
659                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
660                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
661                                InstrStage<1, [A9_NPipe]>],
662                               [2, 1, 1]>,
663   //
664   // Single-precision FP Load
665   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
666                                InstrStage<1, [A9_MUX0], 0>,
667                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
668                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
669                                InstrStage<1, [A9_NPipe], 0>,
670                                InstrStage<1, [A9_LSUnit]>],
671                               [1, 1]>,
672   //
673   // Double-precision FP Load
674   // FIXME: Result latency is 1 if address is 64-bit aligned.
675   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676                                InstrStage<1, [A9_MUX0], 0>,
677                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
678                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
679                                InstrStage<1, [A9_NPipe], 0>,
680                                InstrStage<1, [A9_LSUnit]>],
681                               [2, 1]>,
682   //
683   // FP Load Multiple
684   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
685                                InstrStage<1, [A9_MUX0], 0>,
686                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
687                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
688                                InstrStage<1, [A9_NPipe], 0>,
689                                InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
690   //
691   // FP Load Multiple + update
692   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
693                                InstrStage<1, [A9_MUX0], 0>,
694                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
695                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
696                                InstrStage<1, [A9_NPipe], 0>,
697                                InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
698   //
699   // Single-precision FP Store
700   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
701                                InstrStage<1, [A9_MUX0], 0>,
702                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
703                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
704                                InstrStage<1, [A9_NPipe], 0>,
705                                InstrStage<1, [A9_LSUnit]>],
706                               [1, 1]>,
707   //
708   // Double-precision FP Store
709   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
710                                InstrStage<1, [A9_MUX0], 0>,
711                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
712                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
713                                InstrStage<1, [A9_NPipe], 0>,
714                                InstrStage<1, [A9_LSUnit]>],
715                               [1, 1]>,
716   //
717   // FP Store Multiple
718   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
719                                InstrStage<1, [A9_MUX0], 0>,
720                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
721                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
722                                InstrStage<1, [A9_NPipe], 0>,
723                                InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
724   //
725   // FP Store Multiple + update
726   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
727                                 InstrStage<1, [A9_MUX0], 0>,
728                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
729                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
730                                 InstrStage<1, [A9_NPipe], 0>,
731                                 InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
732   // NEON
733   // VLD1
734   // FIXME: Conservatively assume insufficent alignment.
735   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
736                                InstrStage<1, [A9_MUX0], 0>,
737                                InstrStage<1, [A9_DRegsN],   0, Required>,
738                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
739                                InstrStage<2, [A9_NPipe], 0>,
740                                InstrStage<2, [A9_LSUnit]>],
741                               [2, 1]>,
742   // VLD1x2
743   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
744                                InstrStage<1, [A9_MUX0], 0>,
745                                InstrStage<1, [A9_DRegsN],   0, Required>,
746                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
747                                InstrStage<2, [A9_NPipe], 0>,
748                                InstrStage<2, [A9_LSUnit]>],
749                               [2, 2, 1]>,
750   // VLD1x3
751   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
752                                InstrStage<1, [A9_MUX0], 0>,
753                                InstrStage<1, [A9_DRegsN],   0, Required>,
754                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
755                                InstrStage<3, [A9_NPipe], 0>,
756                                InstrStage<3, [A9_LSUnit]>],
757                               [2, 2, 3, 1]>,
758   // VLD1x4
759   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
760                                InstrStage<1, [A9_MUX0], 0>,
761                                InstrStage<1, [A9_DRegsN],   0, Required>,
762                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
763                                InstrStage<3, [A9_NPipe], 0>,
764                                InstrStage<3, [A9_LSUnit]>],
765                               [2, 2, 3, 3, 1]>,
766   // VLD1u
767   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
768                                InstrStage<1, [A9_MUX0], 0>,
769                                InstrStage<1, [A9_DRegsN],   0, Required>,
770                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
771                                InstrStage<2, [A9_NPipe], 0>,
772                                InstrStage<2, [A9_LSUnit]>],
773                               [2, 2, 1]>,
774   // VLD1x2u
775   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
776                                InstrStage<1, [A9_MUX0], 0>,
777                                InstrStage<1, [A9_DRegsN],   0, Required>,
778                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
779                                InstrStage<2, [A9_NPipe], 0>,
780                                InstrStage<2, [A9_LSUnit]>],
781                               [2, 2, 2, 1]>,
782   // VLD1x3u
783   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
784                                InstrStage<1, [A9_MUX0], 0>,
785                                InstrStage<1, [A9_DRegsN],   0, Required>,
786                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
787                                InstrStage<3, [A9_NPipe], 0>,
788                                InstrStage<3, [A9_LSUnit]>],
789                               [2, 2, 3, 2, 1]>,
790   // VLD1x4u
791   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
792                                InstrStage<1, [A9_MUX0], 0>,
793                                InstrStage<1, [A9_DRegsN],   0, Required>,
794                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
795                                InstrStage<3, [A9_NPipe], 0>,
796                                InstrStage<3, [A9_LSUnit]>],
797                               [2, 2, 3, 3, 2, 1]>,
798   //
799   // VLD1ln
800   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
801                                InstrStage<1, [A9_MUX0], 0>,
802                                InstrStage<1, [A9_DRegsN],   0, Required>,
803                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
804                                InstrStage<3, [A9_NPipe], 0>,
805                                InstrStage<3, [A9_LSUnit]>],
806                               [4, 1, 1, 1]>,
807   //
808   // VLD1lnu
809   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
810                                InstrStage<1, [A9_MUX0], 0>,
811                                InstrStage<1, [A9_DRegsN],   0, Required>,
812                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
813                                InstrStage<3, [A9_NPipe], 0>,
814                                InstrStage<3, [A9_LSUnit]>],
815                               [4, 2, 1, 1, 1, 1]>,
816   //
817   // VLD1dup
818   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
819                                InstrStage<1, [A9_MUX0], 0>,
820                                InstrStage<1, [A9_DRegsN],   0, Required>,
821                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
822                                InstrStage<2, [A9_NPipe], 0>,
823                                InstrStage<2, [A9_LSUnit]>],
824                               [3, 1]>,
825   //
826   // VLD1dupu
827   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
828                                InstrStage<1, [A9_MUX0], 0>,
829                                InstrStage<1, [A9_DRegsN],   0, Required>,
830                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
831                                InstrStage<2, [A9_NPipe], 0>,
832                                InstrStage<2, [A9_LSUnit]>],
833                               [3, 2, 1, 1]>,
834   //
835   // VLD2
836   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
837                                InstrStage<1, [A9_MUX0], 0>,
838                                InstrStage<1, [A9_DRegsN],   0, Required>,
839                                // Extra latency cycles since wbck is 7 cycles
840                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
841                                InstrStage<2, [A9_NPipe], 0>,
842                                InstrStage<2, [A9_LSUnit]>],
843                               [3, 3, 1]>,
844   //
845   // VLD2x2
846   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
847                                InstrStage<1, [A9_MUX0], 0>,
848                                InstrStage<1, [A9_DRegsN],   0, Required>,
849                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
850                                InstrStage<3, [A9_NPipe], 0>,
851                                InstrStage<3, [A9_LSUnit]>],
852                               [3, 4, 3, 4, 1]>,
853   //
854   // VLD2ln
855   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
856                                InstrStage<1, [A9_MUX0], 0>,
857                                InstrStage<1, [A9_DRegsN],   0, Required>,
858                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
859                                InstrStage<3, [A9_NPipe], 0>,
860                                InstrStage<3, [A9_LSUnit]>],
861                               [4, 4, 1, 1, 1, 1]>,
862   //
863   // VLD2u
864   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
865                                InstrStage<1, [A9_MUX0], 0>,
866                                InstrStage<1, [A9_DRegsN],   0, Required>,
867                                // Extra latency cycles since wbck is 7 cycles
868                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
869                                InstrStage<2, [A9_NPipe], 0>,
870                                InstrStage<2, [A9_LSUnit]>],
871                               [3, 3, 2, 1, 1, 1]>,
872   //
873   // VLD2x2u
874   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
875                                InstrStage<1, [A9_MUX0], 0>,
876                                InstrStage<1, [A9_DRegsN],   0, Required>,
877                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
878                                InstrStage<3, [A9_NPipe], 0>,
879                                InstrStage<3, [A9_LSUnit]>],
880                               [3, 4, 3, 4, 2, 1]>,
881   //
882   // VLD2lnu
883   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
884                                InstrStage<1, [A9_MUX0], 0>,
885                                InstrStage<1, [A9_DRegsN],   0, Required>,
886                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
887                                InstrStage<3, [A9_NPipe], 0>,
888                                InstrStage<3, [A9_LSUnit]>],
889                               [4, 4, 2, 1, 1, 1, 1, 1]>,
890   //
891   // VLD2dup
892   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
893                                InstrStage<1, [A9_MUX0], 0>,
894                                InstrStage<1, [A9_DRegsN],   0, Required>,
895                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
896                                InstrStage<2, [A9_NPipe], 0>,
897                                InstrStage<2, [A9_LSUnit]>],
898                               [3, 3, 1]>,
899   //
900   // VLD2dupu
901   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
902                                InstrStage<1, [A9_MUX0], 0>,
903                                InstrStage<1, [A9_DRegsN],   0, Required>,
904                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
905                                InstrStage<2, [A9_NPipe], 0>,
906                                InstrStage<2, [A9_LSUnit]>],
907                               [3, 3, 2, 1, 1]>,
908   //
909   // VLD3
910   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
911                                InstrStage<1, [A9_MUX0], 0>,
912                                InstrStage<1, [A9_DRegsN],   0, Required>,
913                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
914                                InstrStage<4, [A9_NPipe], 0>,
915                                InstrStage<4, [A9_LSUnit]>],
916                               [4, 4, 5, 1]>,
917   //
918   // VLD3ln
919   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
920                                InstrStage<1, [A9_MUX0], 0>,
921                                InstrStage<1, [A9_DRegsN],   0, Required>,
922                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
923                                InstrStage<5, [A9_NPipe], 0>,
924                                InstrStage<5, [A9_LSUnit]>],
925                               [5, 5, 6, 1, 1, 1, 1, 2]>,
926   //
927   // VLD3u
928   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
929                                InstrStage<1, [A9_MUX0], 0>,
930                                InstrStage<1, [A9_DRegsN],   0, Required>,
931                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
932                                InstrStage<4, [A9_NPipe], 0>,
933                                InstrStage<4, [A9_LSUnit]>],
934                               [4, 4, 5, 2, 1]>,
935   //
936   // VLD3lnu
937   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
938                                InstrStage<1, [A9_MUX0], 0>,
939                                InstrStage<1, [A9_DRegsN],   0, Required>,
940                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
941                                InstrStage<5, [A9_NPipe], 0>,
942                                InstrStage<5, [A9_LSUnit]>],
943                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
944   //
945   // VLD3dup
946   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
947                                InstrStage<1, [A9_MUX0], 0>,
948                                InstrStage<1, [A9_DRegsN],   0, Required>,
949                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
950                                InstrStage<3, [A9_NPipe], 0>,
951                                InstrStage<3, [A9_LSUnit]>],
952                               [3, 3, 4, 1]>,
953   //
954   // VLD3dupu
955   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
956                                InstrStage<1, [A9_MUX0], 0>,
957                                InstrStage<1, [A9_DRegsN],   0, Required>,
958                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
959                                InstrStage<3, [A9_NPipe], 0>,
960                                InstrStage<3, [A9_LSUnit]>],
961                               [3, 3, 4, 2, 1, 1]>,
962   //
963   // VLD4
964   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
965                                InstrStage<1, [A9_MUX0], 0>,
966                                InstrStage<1, [A9_DRegsN],   0, Required>,
967                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
968                                InstrStage<4, [A9_NPipe], 0>,
969                                InstrStage<4, [A9_LSUnit]>],
970                               [4, 4, 5, 5, 1]>,
971   //
972   // VLD4ln
973   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
974                                InstrStage<1, [A9_MUX0], 0>,
975                                InstrStage<1, [A9_DRegsN],   0, Required>,
976                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
977                                InstrStage<5, [A9_NPipe], 0>,
978                                InstrStage<5, [A9_LSUnit]>],
979                               [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
980   //
981   // VLD4u
982   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
983                                InstrStage<1, [A9_MUX0], 0>,
984                                InstrStage<1, [A9_DRegsN],   0, Required>,
985                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
986                                InstrStage<4, [A9_NPipe], 0>,
987                                InstrStage<4, [A9_LSUnit]>],
988                               [4, 4, 5, 5, 2, 1]>,
989   //
990   // VLD4lnu
991   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
992                                InstrStage<1, [A9_MUX0], 0>,
993                                InstrStage<1, [A9_DRegsN],   0, Required>,
994                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
995                                InstrStage<5, [A9_NPipe], 0>,
996                                InstrStage<5, [A9_LSUnit]>],
997                               [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
998   //
999   // VLD4dup
1000   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1001                                InstrStage<1, [A9_MUX0], 0>,
1002                                InstrStage<1, [A9_DRegsN],   0, Required>,
1003                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1004                                InstrStage<3, [A9_NPipe], 0>,
1005                                InstrStage<3, [A9_LSUnit]>],
1006                               [3, 3, 4, 4, 1]>,
1007   //
1008   // VLD4dupu
1009   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1010                                InstrStage<1, [A9_MUX0], 0>,
1011                                InstrStage<1, [A9_DRegsN],   0, Required>,
1012                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1013                                InstrStage<3, [A9_NPipe], 0>,
1014                                InstrStage<3, [A9_LSUnit]>],
1015                               [3, 3, 4, 4, 2, 1, 1]>,
1016   //
1017   // VST1
1018   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1019                                InstrStage<1, [A9_MUX0], 0>,
1020                                InstrStage<1, [A9_DRegsN],   0, Required>,
1021                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1022                                InstrStage<2, [A9_NPipe], 0>,
1023                                InstrStage<2, [A9_LSUnit]>],
1024                               [1, 1, 1]>,
1025   //
1026   // VST1x2
1027   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1028                                InstrStage<1, [A9_MUX0], 0>,
1029                                InstrStage<1, [A9_DRegsN],   0, Required>,
1030                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1031                                InstrStage<2, [A9_NPipe], 0>,
1032                                InstrStage<2, [A9_LSUnit]>],
1033                               [1, 1, 1, 1]>,
1034   //
1035   // VST1x3
1036   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1037                                InstrStage<1, [A9_MUX0], 0>,
1038                                InstrStage<1, [A9_DRegsN],   0, Required>,
1039                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1040                                InstrStage<3, [A9_NPipe], 0>,
1041                                InstrStage<3, [A9_LSUnit]>],
1042                               [1, 1, 1, 1, 2]>,
1043   //
1044   // VST1x4
1045   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1046                                InstrStage<1, [A9_MUX0], 0>,
1047                                InstrStage<1, [A9_DRegsN],   0, Required>,
1048                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1049                                InstrStage<3, [A9_NPipe], 0>,
1050                                InstrStage<3, [A9_LSUnit]>],
1051                               [1, 1, 1, 1, 2, 2]>,
1052   //
1053   // VST1u
1054   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1055                                InstrStage<1, [A9_MUX0], 0>,
1056                                InstrStage<1, [A9_DRegsN],   0, Required>,
1057                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1058                                InstrStage<2, [A9_NPipe], 0>,
1059                                InstrStage<2, [A9_LSUnit]>],
1060                               [2, 1, 1, 1, 1]>,
1061   //
1062   // VST1x2u
1063   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1064                                InstrStage<1, [A9_MUX0], 0>,
1065                                InstrStage<1, [A9_DRegsN],   0, Required>,
1066                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1067                                InstrStage<2, [A9_NPipe], 0>,
1068                                InstrStage<2, [A9_LSUnit]>],
1069                               [2, 1, 1, 1, 1, 1]>,
1070   //
1071   // VST1x3u
1072   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1073                                InstrStage<1, [A9_MUX0], 0>,
1074                                InstrStage<1, [A9_DRegsN],   0, Required>,
1075                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1076                                InstrStage<3, [A9_NPipe], 0>,
1077                                InstrStage<3, [A9_LSUnit]>],
1078                               [2, 1, 1, 1, 1, 1, 2]>,
1079   //
1080   // VST1x4u
1081   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1082                                InstrStage<1, [A9_MUX0], 0>,
1083                                InstrStage<1, [A9_DRegsN],   0, Required>,
1084                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1085                                InstrStage<3, [A9_NPipe], 0>,
1086                                InstrStage<3, [A9_LSUnit]>],
1087                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1088   //
1089   // VST1ln
1090   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1091                                InstrStage<1, [A9_MUX0], 0>,
1092                                InstrStage<1, [A9_DRegsN],   0, Required>,
1093                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1094                                InstrStage<2, [A9_NPipe], 0>,
1095                                InstrStage<2, [A9_LSUnit]>],
1096                               [1, 1, 1]>,
1097   //
1098   // VST1lnu
1099   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1100                                InstrStage<1, [A9_MUX0], 0>,
1101                                InstrStage<1, [A9_DRegsN],   0, Required>,
1102                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1103                                InstrStage<3, [A9_NPipe], 0>,
1104                                InstrStage<3, [A9_LSUnit]>],
1105                               [2, 1, 1, 1, 1]>,
1106   //
1107   // VST2
1108   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1109                                InstrStage<1, [A9_MUX0], 0>,
1110                                InstrStage<1, [A9_DRegsN],   0, Required>,
1111                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1112                                InstrStage<2, [A9_NPipe], 0>,
1113                                InstrStage<2, [A9_LSUnit]>],
1114                               [1, 1, 1, 1]>,
1115   //
1116   // VST2x2
1117   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1118                                InstrStage<1, [A9_MUX0], 0>,
1119                                InstrStage<1, [A9_DRegsN],   0, Required>,
1120                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1121                                InstrStage<3, [A9_NPipe], 0>,
1122                                InstrStage<3, [A9_LSUnit]>],
1123                               [1, 1, 1, 1, 2, 2]>,
1124   //
1125   // VST2u
1126   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1127                                InstrStage<1, [A9_MUX0], 0>,
1128                                InstrStage<1, [A9_DRegsN],   0, Required>,
1129                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1130                                InstrStage<2, [A9_NPipe], 0>,
1131                                InstrStage<2, [A9_LSUnit]>],
1132                               [2, 1, 1, 1, 1, 1]>,
1133   //
1134   // VST2x2u
1135   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1136                                InstrStage<1, [A9_MUX0], 0>,
1137                                InstrStage<1, [A9_DRegsN],   0, Required>,
1138                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1139                                InstrStage<3, [A9_NPipe], 0>,
1140                                InstrStage<3, [A9_LSUnit]>],
1141                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1142   //
1143   // VST2ln
1144   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1145                                InstrStage<1, [A9_MUX0], 0>,
1146                                InstrStage<1, [A9_DRegsN],   0, Required>,
1147                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1148                                InstrStage<2, [A9_NPipe], 0>,
1149                                InstrStage<2, [A9_LSUnit]>],
1150                               [1, 1, 1, 1]>,
1151   //
1152   // VST2lnu
1153   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1154                                InstrStage<1, [A9_MUX0], 0>,
1155                                InstrStage<1, [A9_DRegsN],   0, Required>,
1156                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1157                                InstrStage<3, [A9_NPipe], 0>,
1158                                InstrStage<3, [A9_LSUnit]>],
1159                               [2, 1, 1, 1, 1, 1]>,
1160   //
1161   // VST3
1162   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1163                                InstrStage<1, [A9_MUX0], 0>,
1164                                InstrStage<1, [A9_DRegsN],   0, Required>,
1165                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1166                                InstrStage<3, [A9_NPipe], 0>,
1167                                InstrStage<3, [A9_LSUnit]>],
1168                               [1, 1, 1, 1, 2]>,
1169   //
1170   // VST3u
1171   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1172                                InstrStage<1, [A9_MUX0], 0>,
1173                                InstrStage<1, [A9_DRegsN],   0, Required>,
1174                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1175                                InstrStage<3, [A9_NPipe], 0>,
1176                                InstrStage<3, [A9_LSUnit]>],
1177                               [2, 1, 1, 1, 1, 1, 2]>,
1178   //
1179   // VST3ln
1180   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1181                                InstrStage<1, [A9_MUX0], 0>,
1182                                InstrStage<1, [A9_DRegsN],   0, Required>,
1183                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1184                                InstrStage<3, [A9_NPipe], 0>,
1185                                InstrStage<3, [A9_LSUnit]>],
1186                               [1, 1, 1, 1, 2]>,
1187   //
1188   // VST3lnu
1189   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1190                                InstrStage<1, [A9_MUX0], 0>,
1191                                InstrStage<1, [A9_DRegsN],   0, Required>,
1192                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1193                                InstrStage<3, [A9_NPipe], 0>,
1194                                InstrStage<3, [A9_LSUnit]>],
1195                               [2, 1, 1, 1, 1, 1, 2]>,
1196   //
1197   // VST4
1198   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1199                                InstrStage<1, [A9_MUX0], 0>,
1200                                InstrStage<1, [A9_DRegsN],   0, Required>,
1201                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1202                                InstrStage<3, [A9_NPipe], 0>,
1203                                InstrStage<3, [A9_LSUnit]>],
1204                               [1, 1, 1, 1, 2, 2]>,
1205   //
1206   // VST4u
1207   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1208                                InstrStage<1, [A9_MUX0], 0>,
1209                                InstrStage<1, [A9_DRegsN],   0, Required>,
1210                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1211                                InstrStage<3, [A9_NPipe], 0>,
1212                                InstrStage<3, [A9_LSUnit]>],
1213                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1214   //
1215   // VST4ln
1216   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1217                                InstrStage<1, [A9_MUX0], 0>,
1218                                InstrStage<1, [A9_DRegsN],   0, Required>,
1219                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1220                                InstrStage<3, [A9_NPipe], 0>,
1221                                InstrStage<3, [A9_LSUnit]>],
1222                               [1, 1, 1, 1, 2, 2]>,
1223   //
1224   // VST4lnu
1225   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1226                                InstrStage<1, [A9_MUX0], 0>,
1227                                InstrStage<1, [A9_DRegsN],   0, Required>,
1228                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1229                                InstrStage<3, [A9_NPipe], 0>,
1230                                InstrStage<3, [A9_LSUnit]>],
1231                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1232
1233   //
1234   // Double-register Integer Unary
1235   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1236                                InstrStage<1, [A9_MUX0], 0>,
1237                                InstrStage<1, [A9_DRegsN],   0, Required>,
1238                                // Extra latency cycles since wbck is 6 cycles
1239                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1240                                InstrStage<1, [A9_NPipe]>],
1241                               [4, 2]>,
1242   //
1243   // Quad-register Integer Unary
1244   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1245                                InstrStage<1, [A9_MUX0], 0>,
1246                                InstrStage<1, [A9_DRegsN],   0, Required>,
1247                                // Extra latency cycles since wbck is 6 cycles
1248                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1249                                InstrStage<1, [A9_NPipe]>],
1250                               [4, 2]>,
1251   //
1252   // Double-register Integer Q-Unary
1253   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1254                                InstrStage<1, [A9_MUX0], 0>,
1255                                InstrStage<1, [A9_DRegsN],   0, Required>,
1256                                // Extra latency cycles since wbck is 6 cycles
1257                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1258                                InstrStage<1, [A9_NPipe]>],
1259                               [4, 1]>,
1260   //
1261   // Quad-register Integer CountQ-Unary
1262   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1263                                InstrStage<1, [A9_MUX0], 0>,
1264                                InstrStage<1, [A9_DRegsN],   0, Required>,
1265                                // Extra latency cycles since wbck is 6 cycles
1266                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1267                                InstrStage<1, [A9_NPipe]>],
1268                               [4, 1]>,
1269   //
1270   // Double-register Integer Binary
1271   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1272                                InstrStage<1, [A9_MUX0], 0>,
1273                                InstrStage<1, [A9_DRegsN],   0, Required>,
1274                                // Extra latency cycles since wbck is 6 cycles
1275                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1276                                InstrStage<1, [A9_NPipe]>],
1277                               [3, 2, 2]>,
1278   //
1279   // Quad-register Integer Binary
1280   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1281                                InstrStage<1, [A9_MUX0], 0>,
1282                                InstrStage<1, [A9_DRegsN],   0, Required>,
1283                                // Extra latency cycles since wbck is 6 cycles
1284                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1285                                InstrStage<1, [A9_NPipe]>],
1286                               [3, 2, 2]>,
1287   //
1288   // Double-register Integer Subtract
1289   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1290                                InstrStage<1, [A9_MUX0], 0>,
1291                                InstrStage<1, [A9_DRegsN],   0, Required>,
1292                                // Extra latency cycles since wbck is 6 cycles
1293                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1294                                InstrStage<1, [A9_NPipe]>],
1295                               [3, 2, 1]>,
1296   //
1297   // Quad-register Integer Subtract
1298   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1299                                InstrStage<1, [A9_MUX0], 0>,
1300                                InstrStage<1, [A9_DRegsN],   0, Required>,
1301                                // Extra latency cycles since wbck is 6 cycles
1302                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1303                                InstrStage<1, [A9_NPipe]>],
1304                               [3, 2, 1]>,
1305   //
1306   // Double-register Integer Shift
1307   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1308                                InstrStage<1, [A9_MUX0], 0>,
1309                                InstrStage<1, [A9_DRegsN],   0, Required>,
1310                                // Extra latency cycles since wbck is 6 cycles
1311                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1312                                InstrStage<1, [A9_NPipe]>],
1313                               [3, 1, 1]>,
1314   //
1315   // Quad-register Integer Shift
1316   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1317                                InstrStage<1, [A9_MUX0], 0>,
1318                                InstrStage<1, [A9_DRegsN],   0, Required>,
1319                                // Extra latency cycles since wbck is 6 cycles
1320                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1321                                InstrStage<1, [A9_NPipe]>],
1322                               [3, 1, 1]>,
1323   //
1324   // Double-register Integer Shift (4 cycle)
1325   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1326                                InstrStage<1, [A9_MUX0], 0>,
1327                                InstrStage<1, [A9_DRegsN],   0, Required>,
1328                                // Extra latency cycles since wbck is 6 cycles
1329                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1330                                InstrStage<1, [A9_NPipe]>],
1331                               [4, 1, 1]>,
1332   //
1333   // Quad-register Integer Shift (4 cycle)
1334   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1335                                InstrStage<1, [A9_MUX0], 0>,
1336                                InstrStage<1, [A9_DRegsN],   0, Required>,
1337                                // Extra latency cycles since wbck is 6 cycles
1338                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1339                                InstrStage<1, [A9_NPipe]>],
1340                               [4, 1, 1]>,
1341   //
1342   // Double-register Integer Binary (4 cycle)
1343   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1344                                InstrStage<1, [A9_MUX0], 0>,
1345                                InstrStage<1, [A9_DRegsN],   0, Required>,
1346                                // Extra latency cycles since wbck is 6 cycles
1347                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1348                                InstrStage<1, [A9_NPipe]>],
1349                               [4, 2, 2]>,
1350   //
1351   // Quad-register Integer Binary (4 cycle)
1352   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1353                                InstrStage<1, [A9_MUX0], 0>,
1354                                InstrStage<1, [A9_DRegsN],   0, Required>,
1355                                // Extra latency cycles since wbck is 6 cycles
1356                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1357                                InstrStage<1, [A9_NPipe]>],
1358                               [4, 2, 2]>,
1359   //
1360   // Double-register Integer Subtract (4 cycle)
1361   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1362                                InstrStage<1, [A9_MUX0], 0>,
1363                                InstrStage<1, [A9_DRegsN],   0, Required>,
1364                                // Extra latency cycles since wbck is 6 cycles
1365                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1366                                InstrStage<1, [A9_NPipe]>],
1367                               [4, 2, 1]>,
1368   //
1369   // Quad-register Integer Subtract (4 cycle)
1370   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1371                                InstrStage<1, [A9_MUX0], 0>,
1372                                InstrStage<1, [A9_DRegsN],   0, Required>,
1373                                // Extra latency cycles since wbck is 6 cycles
1374                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1375                                InstrStage<1, [A9_NPipe]>],
1376                               [4, 2, 1]>,
1377
1378   //
1379   // Double-register Integer Count
1380   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1381                                InstrStage<1, [A9_MUX0], 0>,
1382                                InstrStage<1, [A9_DRegsN],   0, Required>,
1383                                // Extra latency cycles since wbck is 6 cycles
1384                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1385                                InstrStage<1, [A9_NPipe]>],
1386                               [3, 2, 2]>,
1387   //
1388   // Quad-register Integer Count
1389   // Result written in N3, but that is relative to the last cycle of multicycle,
1390   // so we use 4 for those cases
1391   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1392                                InstrStage<1, [A9_MUX0], 0>,
1393                                InstrStage<1, [A9_DRegsN],   0, Required>,
1394                                // Extra latency cycles since wbck is 7 cycles
1395                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1396                                InstrStage<2, [A9_NPipe]>],
1397                               [4, 2, 2]>,
1398   //
1399   // Double-register Absolute Difference and Accumulate
1400   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1401                                InstrStage<1, [A9_MUX0], 0>,
1402                                InstrStage<1, [A9_DRegsN],   0, Required>,
1403                                // Extra latency cycles since wbck is 6 cycles
1404                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1405                                InstrStage<1, [A9_NPipe]>],
1406                               [6, 3, 2, 1]>,
1407   //
1408   // Quad-register Absolute Difference and Accumulate
1409   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1410                                InstrStage<1, [A9_MUX0], 0>,
1411                                InstrStage<1, [A9_DRegsN],   0, Required>,
1412                                // Extra latency cycles since wbck is 6 cycles
1413                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1414                                InstrStage<2, [A9_NPipe]>],
1415                               [6, 3, 2, 1]>,
1416   //
1417   // Double-register Integer Pair Add Long
1418   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1419                                InstrStage<1, [A9_MUX0], 0>,
1420                                InstrStage<1, [A9_DRegsN],   0, Required>,
1421                                // Extra latency cycles since wbck is 6 cycles
1422                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1423                                InstrStage<1, [A9_NPipe]>],
1424                               [6, 3, 1]>,
1425   //
1426   // Quad-register Integer Pair Add Long
1427   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1428                                InstrStage<1, [A9_MUX0], 0>,
1429                                InstrStage<1, [A9_DRegsN],   0, Required>,
1430                                // Extra latency cycles since wbck is 6 cycles
1431                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1432                                InstrStage<2, [A9_NPipe]>],
1433                               [6, 3, 1]>,
1434
1435   //
1436   // Double-register Integer Multiply (.8, .16)
1437   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1438                                InstrStage<1, [A9_MUX0], 0>,
1439                                InstrStage<1, [A9_DRegsN],   0, Required>,
1440                                // Extra latency cycles since wbck is 6 cycles
1441                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1442                                InstrStage<1, [A9_NPipe]>],
1443                               [6, 2, 2]>,
1444   //
1445   // Quad-register Integer Multiply (.8, .16)
1446   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1447                                InstrStage<1, [A9_MUX0], 0>,
1448                                InstrStage<1, [A9_DRegsN],   0, Required>,
1449                                // Extra latency cycles since wbck is 7 cycles
1450                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1451                                InstrStage<2, [A9_NPipe]>],
1452                               [7, 2, 2]>,
1453
1454   //
1455   // Double-register Integer Multiply (.32)
1456   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1457                                InstrStage<1, [A9_MUX0], 0>,
1458                                InstrStage<1, [A9_DRegsN],   0, Required>,
1459                                // Extra latency cycles since wbck is 7 cycles
1460                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1461                                InstrStage<2, [A9_NPipe]>],
1462                               [7, 2, 1]>,
1463   //
1464   // Quad-register Integer Multiply (.32)
1465   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1466                                InstrStage<1, [A9_MUX0], 0>,
1467                                InstrStage<1, [A9_DRegsN],   0, Required>,
1468                                // Extra latency cycles since wbck is 9 cycles
1469                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1470                                InstrStage<4, [A9_NPipe]>],
1471                               [9, 2, 1]>,
1472   //
1473   // Double-register Integer Multiply-Accumulate (.8, .16)
1474   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1475                                InstrStage<1, [A9_MUX0], 0>,
1476                                InstrStage<1, [A9_DRegsN],   0, Required>,
1477                                // Extra latency cycles since wbck is 6 cycles
1478                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1479                                InstrStage<1, [A9_NPipe]>],
1480                               [6, 3, 2, 2]>,
1481   //
1482   // Double-register Integer Multiply-Accumulate (.32)
1483   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1484                                InstrStage<1, [A9_MUX0], 0>,
1485                                InstrStage<1, [A9_DRegsN],   0, Required>,
1486                                // Extra latency cycles since wbck is 7 cycles
1487                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1488                                InstrStage<2, [A9_NPipe]>],
1489                               [7, 3, 2, 1]>,
1490   //
1491   // Quad-register Integer Multiply-Accumulate (.8, .16)
1492   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1493                                InstrStage<1, [A9_MUX0], 0>,
1494                                InstrStage<1, [A9_DRegsN],   0, Required>,
1495                                // Extra latency cycles since wbck is 7 cycles
1496                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1497                                InstrStage<2, [A9_NPipe]>],
1498                               [7, 3, 2, 2]>,
1499   //
1500   // Quad-register Integer Multiply-Accumulate (.32)
1501   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1502                                InstrStage<1, [A9_MUX0], 0>,
1503                                InstrStage<1, [A9_DRegsN],   0, Required>,
1504                                // Extra latency cycles since wbck is 9 cycles
1505                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1506                                InstrStage<4, [A9_NPipe]>],
1507                               [9, 3, 2, 1]>,
1508
1509   //
1510   // Move
1511   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1512                                InstrStage<1, [A9_MUX0], 0>,
1513                                InstrStage<1, [A9_DRegsN],   0, Required>,
1514                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1515                                InstrStage<1, [A9_NPipe]>],
1516                               [1,1]>,
1517   //
1518   // Move Immediate
1519   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1520                                InstrStage<1, [A9_MUX0], 0>,
1521                                InstrStage<1, [A9_DRegsN],   0, Required>,
1522                                // Extra latency cycles since wbck is 6 cycles
1523                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1524                                InstrStage<1, [A9_NPipe]>],
1525                               [3]>,
1526   //
1527   // Double-register Permute Move
1528   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1529                                InstrStage<1, [A9_MUX0], 0>,
1530                                InstrStage<1, [A9_DRegsN],   0, Required>,
1531                                // Extra latency cycles since wbck is 6 cycles
1532                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1533                                InstrStage<1, [A9_NPipe]>],
1534                               [2, 1]>,
1535   //
1536   // Quad-register Permute Move
1537   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1538                                InstrStage<1, [A9_MUX0], 0>,
1539                                InstrStage<1, [A9_DRegsN],   0, Required>,
1540                                // Extra latency cycles since wbck is 6 cycles
1541                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1542                                InstrStage<1, [A9_NPipe]>],
1543                               [2, 1]>,
1544   //
1545   // Integer to Single-precision Move
1546   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1547                                InstrStage<1, [A9_MUX0], 0>,
1548                                InstrStage<1, [A9_DRegsN],   0, Required>,
1549                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1550                                InstrStage<1, [A9_NPipe]>],
1551                               [1, 1]>,
1552   //
1553   // Integer to Double-precision Move
1554   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1555                                InstrStage<1, [A9_MUX0], 0>,
1556                                InstrStage<1, [A9_DRegsN],   0, Required>,
1557                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1558                                InstrStage<1, [A9_NPipe]>],
1559                               [1, 1, 1]>,
1560   //
1561   // Single-precision to Integer Move
1562   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1563                                InstrStage<1, [A9_MUX0], 0>,
1564                                InstrStage<1, [A9_DRegsN],   0, Required>,
1565                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1566                                InstrStage<1, [A9_NPipe]>],
1567                               [2, 1]>,
1568   //
1569   // Double-precision to Integer Move
1570   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1571                                InstrStage<1, [A9_MUX0], 0>,
1572                                InstrStage<1, [A9_DRegsN],   0, Required>,
1573                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1574                                InstrStage<1, [A9_NPipe]>],
1575                               [2, 2, 1]>,
1576   //
1577   // Integer to Lane Move
1578   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1579                                InstrStage<1, [A9_MUX0], 0>,
1580                                InstrStage<1, [A9_DRegsN],   0, Required>,
1581                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1582                                InstrStage<2, [A9_NPipe]>],
1583                               [3, 1, 1]>,
1584
1585   //
1586   // Vector narrow move
1587   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1588                                InstrStage<1, [A9_MUX0], 0>,
1589                                InstrStage<1, [A9_DRegsN],   0, Required>,
1590                                // Extra latency cycles since wbck is 6 cycles
1591                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1592                                InstrStage<1, [A9_NPipe]>],
1593                               [3, 1]>,
1594   //
1595   // Double-register FP Unary
1596   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1597                                InstrStage<1, [A9_MUX0], 0>,
1598                                InstrStage<1, [A9_DRegsN],   0, Required>,
1599                                // Extra latency cycles since wbck is 6 cycles
1600                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1601                                InstrStage<1, [A9_NPipe]>],
1602                               [5, 2]>,
1603   //
1604   // Quad-register FP Unary
1605   // Result written in N5, but that is relative to the last cycle of multicycle,
1606   // so we use 6 for those cases
1607   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1608                                InstrStage<1, [A9_MUX0], 0>,
1609                                InstrStage<1, [A9_DRegsN],   0, Required>,
1610                                // Extra latency cycles since wbck is 7 cycles
1611                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1612                                InstrStage<2, [A9_NPipe]>],
1613                               [6, 2]>,
1614   //
1615   // Double-register FP Binary
1616   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1617   // optimistic.
1618   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1619                                InstrStage<1, [A9_MUX0], 0>,
1620                                InstrStage<1, [A9_DRegsN],   0, Required>,
1621                                // Extra latency cycles since wbck is 6 cycles
1622                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1623                                InstrStage<1, [A9_NPipe]>],
1624                               [5, 2, 2]>,
1625
1626   //
1627   // VPADD, etc.
1628   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1629                                InstrStage<1, [A9_MUX0], 0>,
1630                                InstrStage<1, [A9_DRegsN],   0, Required>,
1631                                // Extra latency cycles since wbck is 6 cycles
1632                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1633                                InstrStage<1, [A9_NPipe]>],
1634                               [5, 1, 1]>,
1635   //
1636   // Double-register FP VMUL
1637   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1638                                InstrStage<1, [A9_MUX0], 0>,
1639                                InstrStage<1, [A9_DRegsN],   0, Required>,
1640                                // Extra latency cycles since wbck is 6 cycles
1641                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1642                                InstrStage<1, [A9_NPipe]>],
1643                               [5, 2, 1]>,
1644   //
1645   // Quad-register FP Binary
1646   // Result written in N5, but that is relative to the last cycle of multicycle,
1647   // so we use 6 for those cases
1648   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1649   // optimistic.
1650   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1651                                InstrStage<1, [A9_MUX0], 0>,
1652                                InstrStage<1, [A9_DRegsN],   0, Required>,
1653                                // Extra latency cycles since wbck is 7 cycles
1654                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1655                                InstrStage<2, [A9_NPipe]>],
1656                               [6, 2, 2]>,
1657   //
1658   // Quad-register FP VMUL
1659   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1660                                InstrStage<1, [A9_MUX0], 0>,
1661                                InstrStage<1, [A9_DRegsN],   0, Required>,
1662                                // Extra latency cycles since wbck is 7 cycles
1663                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1664                                InstrStage<1, [A9_NPipe]>],
1665                               [6, 2, 1]>,
1666   //
1667   // Double-register FP Multiple-Accumulate
1668   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1669                                InstrStage<1, [A9_MUX0], 0>,
1670                                InstrStage<1, [A9_DRegsN],   0, Required>,
1671                                // Extra latency cycles since wbck is 7 cycles
1672                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1673                                InstrStage<2, [A9_NPipe]>],
1674                               [6, 3, 2, 1]>,
1675   //
1676   // Quad-register FP Multiple-Accumulate
1677   // Result written in N9, but that is relative to the last cycle of multicycle,
1678   // so we use 10 for those cases
1679   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1680                                InstrStage<1, [A9_MUX0], 0>,
1681                                InstrStage<1, [A9_DRegsN],   0, Required>,
1682                                // Extra latency cycles since wbck is 9 cycles
1683                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1684                                InstrStage<4, [A9_NPipe]>],
1685                               [8, 4, 2, 1]>,
1686   //
1687   // Double-register Reciprical Step
1688   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1689                                InstrStage<1, [A9_MUX0], 0>,
1690                                InstrStage<1, [A9_DRegsN],   0, Required>,
1691                                // Extra latency cycles since wbck is 10 cycles
1692                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1693                                InstrStage<1, [A9_NPipe]>],
1694                               [9, 2, 2]>,
1695   //
1696   // Quad-register Reciprical Step
1697   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1698                                InstrStage<1, [A9_MUX0], 0>,
1699                                InstrStage<1, [A9_DRegsN],   0, Required>,
1700                                // Extra latency cycles since wbck is 11 cycles
1701                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1702                                InstrStage<2, [A9_NPipe]>],
1703                               [10, 2, 2]>,
1704   //
1705   // Double-register Permute
1706   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1707                                InstrStage<1, [A9_MUX0], 0>,
1708                                InstrStage<1, [A9_DRegsN],   0, Required>,
1709                                // Extra latency cycles since wbck is 6 cycles
1710                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1711                                InstrStage<1, [A9_NPipe]>],
1712                               [2, 2, 1, 1]>,
1713   //
1714   // Quad-register Permute
1715   // Result written in N2, but that is relative to the last cycle of multicycle,
1716   // so we use 3 for those cases
1717   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1718                                InstrStage<1, [A9_MUX0], 0>,
1719                                InstrStage<1, [A9_DRegsN],   0, Required>,
1720                                // Extra latency cycles since wbck is 7 cycles
1721                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1722                                InstrStage<2, [A9_NPipe]>],
1723                               [3, 3, 1, 1]>,
1724   //
1725   // Quad-register Permute (3 cycle issue)
1726   // Result written in N2, but that is relative to the last cycle of multicycle,
1727   // so we use 4 for those cases
1728   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1729                                InstrStage<1, [A9_MUX0], 0>,
1730                                InstrStage<1, [A9_DRegsN],   0, Required>,
1731                                // Extra latency cycles since wbck is 8 cycles
1732                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1733                                InstrStage<3, [A9_NPipe]>],
1734                               [4, 4, 1, 1]>,
1735
1736   //
1737   // Double-register VEXT
1738   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1739                                InstrStage<1, [A9_MUX0], 0>,
1740                                InstrStage<1, [A9_DRegsN],   0, Required>,
1741                                // Extra latency cycles since wbck is 6 cycles
1742                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1743                                InstrStage<1, [A9_NPipe]>],
1744                               [2, 1, 1]>,
1745   //
1746   // Quad-register VEXT
1747   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1748                                InstrStage<1, [A9_MUX0], 0>,
1749                                InstrStage<1, [A9_DRegsN],   0, Required>,
1750                                // Extra latency cycles since wbck is 7 cycles
1751                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1752                                InstrStage<2, [A9_NPipe]>],
1753                               [3, 1, 2]>,
1754   //
1755   // VTB
1756   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1757                                InstrStage<1, [A9_MUX0], 0>,
1758                                InstrStage<1, [A9_DRegsN],   0, Required>,
1759                                // Extra latency cycles since wbck is 7 cycles
1760                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1761                                InstrStage<2, [A9_NPipe]>],
1762                               [3, 2, 1]>,
1763   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1764                                InstrStage<1, [A9_MUX0], 0>,
1765                                InstrStage<2, [A9_DRegsN],   0, Required>,
1766                                // Extra latency cycles since wbck is 7 cycles
1767                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1768                                InstrStage<2, [A9_NPipe]>],
1769                               [3, 2, 2, 1]>,
1770   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1771                                InstrStage<1, [A9_MUX0], 0>,
1772                                InstrStage<2, [A9_DRegsN],   0, Required>,
1773                                // Extra latency cycles since wbck is 8 cycles
1774                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1775                                InstrStage<3, [A9_NPipe]>],
1776                               [4, 2, 2, 3, 1]>,
1777   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1778                                InstrStage<1, [A9_MUX0], 0>,
1779                                InstrStage<1, [A9_DRegsN],   0, Required>,
1780                                // Extra latency cycles since wbck is 8 cycles
1781                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1782                                InstrStage<3, [A9_NPipe]>],
1783                               [4, 2, 2, 3, 3, 1]>,
1784   //
1785   // VTBX
1786   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1787                                InstrStage<1, [A9_MUX0], 0>,
1788                                InstrStage<1, [A9_DRegsN],   0, Required>,
1789                                // Extra latency cycles since wbck is 7 cycles
1790                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1791                                InstrStage<2, [A9_NPipe]>],
1792                               [3, 1, 2, 1]>,
1793   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1794                                InstrStage<1, [A9_MUX0], 0>,
1795                                InstrStage<1, [A9_DRegsN],   0, Required>,
1796                                // Extra latency cycles since wbck is 7 cycles
1797                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1798                                InstrStage<2, [A9_NPipe]>],
1799                               [3, 1, 2, 2, 1]>,
1800   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1801                                InstrStage<1, [A9_MUX0], 0>,
1802                                InstrStage<1, [A9_DRegsN],   0, Required>,
1803                                // Extra latency cycles since wbck is 8 cycles
1804                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1805                                InstrStage<3, [A9_NPipe]>],
1806                               [4, 1, 2, 2, 3, 1]>,
1807   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1808                                InstrStage<1, [A9_MUX0], 0>,
1809                                InstrStage<1, [A9_DRegsN],   0, Required>,
1810                                // Extra latency cycles since wbck is 8 cycles
1811                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1812                                InstrStage<2, [A9_NPipe]>],
1813                               [4, 1, 2, 2, 3, 3, 1]>
1814 ]>;