Revert "Make NumMicroOps a variable in the subtarget's instruction itinerary."
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LSUnit  : FuncUnit; // L/S Unit
28 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
30
31 // Bypasses
32 def A9_LdBypass : Bypass;
33
34 def CortexA9Itineraries : MultiIssueItineraries<
35   2, // IssueWidth - FIXME: A9_Issue0, A9_Issue1 are now redundant.
36   0, // MinLatency - FIXME: for misched, remove InstrStage for OOO operations.
37   2, // LoadLatency - optimistic, assumes bypass, overriden by OperandCycles.
38   10, // HighLatency - currently unused.
39   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
40    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
41   [A9_LdBypass], [
42   // Two fully-pipelined integer ALU pipelines
43
44   //
45   // Move instructions, unconditional
46   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
48   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
50   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
52   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
53                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
54   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
55                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
56                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
57   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
58                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
59                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
60                                   InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
61   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
62                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
63                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
64                                InstrStage<1, [A9_MUX0], 0>,
65                                InstrStage<1, [A9_AGU], 0>,
66                                InstrStage<1, [A9_LSUnit]>], [5]>,
67   //
68   // MVN instructions
69   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
70                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
71                               [1]>,
72   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
73                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
74                               [1, 1], [NoBypass, A9_LdBypass]>,
75   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
76                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
77                               [2, 1]>,
78   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
79                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
80                               [3, 1, 1]>,
81   //
82   // No operand cycles
83   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
85   //
86   // Binary Instructions that produce a result
87   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
88                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
89                             [1, 1], [NoBypass, A9_LdBypass]>,
90   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
91                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
92                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
93   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
94                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
95                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
96   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
98                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
99   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
100                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
101                             [3, 1, 1, 1],
102                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
103   //
104   // Bitwise Instructions that produce a result
105   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
106                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
107   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
109   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
110                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
111   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
112                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
113   //
114   // Unary Instructions that produce a result
115
116   // CLZ, RBIT, etc.
117   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
119
120   // BFC, BFI, UBFX, SBFX
121   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
123
124   //
125   // Zero and sign extension instructions
126   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
127                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
128   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
129                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
130   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
131                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
132   //
133   // Compare instructions
134   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
136                                [1], [A9_LdBypass]>,
137   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
138                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
139                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
140   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
142                                 [1, 1], [A9_LdBypass, NoBypass]>,
143   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
144                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
145                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
146   //
147   // Test instructions
148   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
150   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
152   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
153                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
154   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
155                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
156   //
157   // Move instructions, conditional
158   // FIXME: Correctly model the extra input dep on the destination.
159   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
161   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
163   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
165   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
167   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
168                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
169                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
170                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
171
172   // Integer multiply pipeline
173   //
174   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
175                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
176   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
177                                InstrStage<2, [A9_ALU0]>],
178                               [3, 1, 1, 1]>,
179   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
180                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
181   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
182                                InstrStage<2, [A9_ALU0]>],
183                               [4, 1, 1, 1]>,
184   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
185                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
186   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187                                InstrStage<3, [A9_ALU0]>],
188                               [4, 5, 1, 1]>,
189   // Integer load pipeline
190   // FIXME: The timings are some rough approximations
191   //
192   // Immediate offset
193   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194                                  InstrStage<1, [A9_MUX0], 0>,
195                                  InstrStage<1, [A9_AGU], 0>,
196                                  InstrStage<1, [A9_LSUnit]>],
197                                 [3, 1], [A9_LdBypass]>,
198   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199                                  InstrStage<1, [A9_MUX0], 0>,
200                                  InstrStage<2, [A9_AGU], 0>,
201                                  InstrStage<1, [A9_LSUnit]>],
202                                 [4, 1], [A9_LdBypass]>,
203   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
204   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
205                                  InstrStage<1, [A9_MUX0], 0>,
206                                  InstrStage<2, [A9_AGU], 0>,
207                                  InstrStage<1, [A9_LSUnit]>],
208                                 [3, 3, 1], [A9_LdBypass]>,
209   //
210   // Register offset
211   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
212                                  InstrStage<1, [A9_MUX0], 0>,
213                                  InstrStage<1, [A9_AGU], 0>,
214                                  InstrStage<1, [A9_LSUnit]>],
215                                 [3, 1, 1], [A9_LdBypass]>,
216   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
217                                  InstrStage<1, [A9_MUX0], 0>,
218                                  InstrStage<2, [A9_AGU], 0>,
219                                  InstrStage<1, [A9_LSUnit]>],
220                                 [4, 1, 1], [A9_LdBypass]>,
221   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
222                                  InstrStage<1, [A9_MUX0], 0>,
223                                  InstrStage<2, [A9_AGU], 0>,
224                                  InstrStage<1, [A9_LSUnit]>],
225                                 [3, 3, 1, 1], [A9_LdBypass]>,
226   //
227   // Scaled register offset
228   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
229                                  InstrStage<1, [A9_MUX0], 0>,
230                                  InstrStage<1, [A9_AGU], 0>,
231                                  InstrStage<1, [A9_LSUnit], 0>],
232                                 [4, 1, 1], [A9_LdBypass]>,
233   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
234                                  InstrStage<1, [A9_MUX0], 0>,
235                                  InstrStage<2, [A9_AGU], 0>,
236                                  InstrStage<1, [A9_LSUnit]>],
237                                 [5, 1, 1], [A9_LdBypass]>,
238   //
239   // Immediate offset with update
240   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
241                                  InstrStage<1, [A9_MUX0], 0>,
242                                  InstrStage<1, [A9_AGU], 0>,
243                                  InstrStage<1, [A9_LSUnit]>],
244                                 [3, 2, 1], [A9_LdBypass]>,
245   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
246                                  InstrStage<1, [A9_MUX0], 0>,
247                                  InstrStage<2, [A9_AGU], 0>,
248                                  InstrStage<1, [A9_LSUnit]>],
249                                 [4, 3, 1], [A9_LdBypass]>,
250   //
251   // Register offset with update
252   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
253                                  InstrStage<1, [A9_MUX0], 0>,
254                                  InstrStage<1, [A9_AGU], 0>,
255                                  InstrStage<1, [A9_LSUnit]>],
256                                 [3, 2, 1, 1], [A9_LdBypass]>,
257   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
258                                  InstrStage<1, [A9_MUX0], 0>,
259                                  InstrStage<2, [A9_AGU], 0>,
260                                  InstrStage<1, [A9_LSUnit]>],
261                                 [4, 3, 1, 1], [A9_LdBypass]>,
262   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
263                                  InstrStage<1, [A9_MUX0], 0>,
264                                  InstrStage<2, [A9_AGU], 0>,
265                                  InstrStage<1, [A9_LSUnit]>],
266                                 [3, 3, 1, 1], [A9_LdBypass]>,
267   //
268   // Scaled register offset with update
269   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
270                                  InstrStage<1, [A9_MUX0], 0>,
271                                  InstrStage<1, [A9_AGU], 0>,
272                                  InstrStage<1, [A9_LSUnit]>],
273                                 [4, 3, 1, 1], [A9_LdBypass]>,
274   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
275                                   InstrStage<1, [A9_MUX0], 0>,
276                                   InstrStage<2, [A9_AGU], 0>,
277                                   InstrStage<1, [A9_LSUnit]>],
278                                  [5, 4, 1, 1], [A9_LdBypass]>,
279   //
280   // Load multiple, def is the 5th operand.
281   // FIXME: This assumes 3 to 4 registers.
282   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
283                                 InstrStage<1, [A9_MUX0], 0>,
284                                 InstrStage<2, [A9_AGU], 1>,
285                                 InstrStage<2, [A9_LSUnit]>],
286                                [1, 1, 1, 1, 3],
287                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
288   //
289   // Load multiple + update, defs are the 1st and 5th operands.
290   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
291                                 InstrStage<1, [A9_MUX0], 0>,
292                                 InstrStage<2, [A9_AGU], 1>,
293                                 InstrStage<2, [A9_LSUnit]>],
294                                [2, 1, 1, 1, 3],
295                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
296   //
297   // Load multiple plus branch
298   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
299                                 InstrStage<1, [A9_MUX0], 0>,
300                                 InstrStage<1, [A9_AGU], 1>,
301                                 InstrStage<2, [A9_LSUnit]>,
302                                 InstrStage<1, [A9_Branch]>],
303                                [1, 2, 1, 1, 3],
304                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
305   //
306   // Pop, def is the 3rd operand.
307   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
308                                 InstrStage<1, [A9_MUX0], 0>,
309                                 InstrStage<2, [A9_AGU], 1>,
310                                 InstrStage<2, [A9_LSUnit]>],
311                                [1, 1, 3],
312                                [NoBypass, NoBypass, A9_LdBypass]>,
313   //
314   // Pop + branch, def is the 3rd operand.
315   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
316                                 InstrStage<1, [A9_MUX0], 0>,
317                                 InstrStage<2, [A9_AGU], 1>,
318                                 InstrStage<2, [A9_LSUnit]>,
319                                 InstrStage<1, [A9_Branch]>],
320                                [1, 1, 3],
321                                [NoBypass, NoBypass, A9_LdBypass]>,
322
323   //
324   // iLoadi + iALUr for t2LDRpci_pic.
325   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
326                                 InstrStage<1, [A9_MUX0], 0>,
327                                 InstrStage<1, [A9_AGU], 0>,
328                                 InstrStage<1, [A9_LSUnit]>,
329                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
330                                [2, 1]>,
331
332   // Integer store pipeline
333   ///
334   // Immediate offset
335   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
336                                  InstrStage<1, [A9_MUX0], 0>,
337                                  InstrStage<1, [A9_AGU], 0>,
338                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
339   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
340                                  InstrStage<1, [A9_MUX0], 0>,
341                                  InstrStage<2, [A9_AGU], 1>,
342                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
343   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
344   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
345                                  InstrStage<1, [A9_MUX0], 0>,
346                                  InstrStage<2, [A9_AGU], 1>,
347                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
348   //
349   // Register offset
350   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351                                  InstrStage<1, [A9_MUX0], 0>,
352                                  InstrStage<1, [A9_AGU], 0>,
353                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
354   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355                                  InstrStage<1, [A9_MUX0], 0>,
356                                  InstrStage<2, [A9_AGU], 1>,
357                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
358   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
359                                  InstrStage<1, [A9_MUX0], 0>,
360                                  InstrStage<2, [A9_AGU], 1>,
361                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
362   //
363   // Scaled register offset
364   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
365                                   InstrStage<1, [A9_MUX0], 0>,
366                                   InstrStage<1, [A9_AGU], 0>,
367                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
368   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
369                                   InstrStage<1, [A9_MUX0], 0>,
370                                   InstrStage<2, [A9_AGU], 1>,
371                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
372   //
373   // Immediate offset with update
374   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
375                                   InstrStage<1, [A9_MUX0], 0>,
376                                   InstrStage<1, [A9_AGU], 0>,
377                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
378   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
379                                   InstrStage<1, [A9_MUX0], 0>,
380                                   InstrStage<2, [A9_AGU], 1>,
381                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
382   //
383   // Register offset with update
384   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
385                                   InstrStage<1, [A9_MUX0], 0>,
386                                   InstrStage<1, [A9_AGU], 0>,
387                                   InstrStage<1, [A9_LSUnit]>],
388                                  [2, 1, 1, 1]>,
389   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
390                                   InstrStage<1, [A9_MUX0], 0>,
391                                   InstrStage<2, [A9_AGU], 1>,
392                                   InstrStage<1, [A9_LSUnit]>],
393                                  [3, 1, 1, 1]>,
394   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
395                                   InstrStage<1, [A9_MUX0], 0>,
396                                   InstrStage<2, [A9_AGU], 1>,
397                                   InstrStage<1, [A9_LSUnit]>],
398                                  [3, 1, 1, 1]>,
399   //
400   // Scaled register offset with update
401   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
402                                     InstrStage<1, [A9_MUX0], 0>,
403                                     InstrStage<1, [A9_AGU], 0>,
404                                     InstrStage<1, [A9_LSUnit]>],
405                                    [2, 1, 1, 1]>,
406   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
407                                     InstrStage<1, [A9_MUX0], 0>,
408                                     InstrStage<2, [A9_AGU], 1>,
409                                     InstrStage<1, [A9_LSUnit]>],
410                                    [3, 1, 1, 1]>,
411   //
412   // Store multiple
413   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
414                                 InstrStage<1, [A9_MUX0], 0>,
415                                 InstrStage<1, [A9_AGU], 0>,
416                                 InstrStage<2, [A9_LSUnit]>]>,
417   //
418   // Store multiple + update
419   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
420                                 InstrStage<1, [A9_MUX0], 0>,
421                                 InstrStage<1, [A9_AGU], 0>,
422                                 InstrStage<2, [A9_LSUnit]>], [2]>,
423
424   //
425   // Preload
426   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
427
428   // Branch
429   //
430   // no delay slots, so the latency of a branch is unimportant
431   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
432                                 InstrStage<1, [A9_Issue1], 0>,
433                                 InstrStage<1, [A9_Branch]>]>,
434
435   // VFP and NEON shares the same register file. This means that every VFP
436   // instruction should wait for full completion of the consecutive NEON
437   // instruction and vice-versa. We model this behavior with two artificial FUs:
438   // DRegsVFP and DRegsVFP.
439   //
440   // Every VFP instruction:
441   //  - Acquires DRegsVFP resource for 1 cycle
442   //  - Reserves DRegsN resource for the whole duration (including time to
443   //    register file writeback!).
444   // Every NEON instruction does the same but with FUs swapped.
445   //
446   // Since the reserved FU cannot be acquired, this models precisely
447   // "cross-domain" stalls.
448
449   // VFP
450   // Issue through integer pipeline, and execute in NEON unit.
451
452   // FP Special Register to Integer Register File Move
453   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
454                               InstrStage<1, [A9_MUX0], 0>,
455                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
456                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
457                               InstrStage<1, [A9_NPipe]>],
458                              [1]>,
459   //
460   // Single-precision FP Unary
461   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
462                                InstrStage<1, [A9_MUX0], 0>,
463                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
464                                // Extra latency cycles since wbck is 2 cycles
465                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
466                                InstrStage<1, [A9_NPipe]>],
467                               [1, 1]>,
468   //
469   // Double-precision FP Unary
470   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
471                                InstrStage<1, [A9_MUX0], 0>,
472                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
473                                // Extra latency cycles since wbck is 2 cycles
474                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
475                                InstrStage<1, [A9_NPipe]>],
476                               [1, 1]>,
477
478   //
479   // Single-precision FP Compare
480   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
481                                InstrStage<1, [A9_MUX0], 0>,
482                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
483                                // Extra latency cycles since wbck is 4 cycles
484                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
485                                InstrStage<1, [A9_NPipe]>],
486                               [1, 1]>,
487   //
488   // Double-precision FP Compare
489   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
490                                InstrStage<1, [A9_MUX0], 0>,
491                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
492                                // Extra latency cycles since wbck is 4 cycles
493                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
494                                InstrStage<1, [A9_NPipe]>],
495                               [1, 1]>,
496   //
497   // Single to Double FP Convert
498   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
499                                InstrStage<1, [A9_MUX0], 0>,
500                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
501                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
502                                InstrStage<1, [A9_NPipe]>],
503                               [4, 1]>,
504   //
505   // Double to Single FP Convert
506   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
507                                InstrStage<1, [A9_MUX0], 0>,
508                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
509                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
510                                InstrStage<1, [A9_NPipe]>],
511                               [4, 1]>,
512
513   //
514   // Single to Half FP Convert
515   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
516                                InstrStage<1, [A9_MUX0], 0>,
517                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
518                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
519                                InstrStage<1, [A9_NPipe]>],
520                               [4, 1]>,
521   //
522   // Half to Single FP Convert
523   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
524                                InstrStage<1, [A9_MUX0], 0>,
525                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
526                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
527                                InstrStage<1, [A9_NPipe]>],
528                               [2, 1]>,
529
530   //
531   // Single-Precision FP to Integer Convert
532   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
533                                InstrStage<1, [A9_MUX0], 0>,
534                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
535                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
536                                InstrStage<1, [A9_NPipe]>],
537                               [4, 1]>,
538   //
539   // Double-Precision FP to Integer Convert
540   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
541                                InstrStage<1, [A9_MUX0], 0>,
542                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
543                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
544                                InstrStage<1, [A9_NPipe]>],
545                               [4, 1]>,
546   //
547   // Integer to Single-Precision FP Convert
548   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
549                                InstrStage<1, [A9_MUX0], 0>,
550                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
551                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
552                                InstrStage<1, [A9_NPipe]>],
553                               [4, 1]>,
554   //
555   // Integer to Double-Precision FP Convert
556   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
557                                InstrStage<1, [A9_MUX0], 0>,
558                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
559                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
560                                InstrStage<1, [A9_NPipe]>],
561                               [4, 1]>,
562   //
563   // Single-precision FP ALU
564   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
565                                InstrStage<1, [A9_MUX0], 0>,
566                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
567                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
568                                InstrStage<1, [A9_NPipe]>],
569                               [4, 1, 1]>,
570   //
571   // Double-precision FP ALU
572   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
573                                InstrStage<1, [A9_MUX0], 0>,
574                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
575                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
576                                InstrStage<1, [A9_NPipe]>],
577                               [4, 1, 1]>,
578   //
579   // Single-precision FP Multiply
580   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
581                                InstrStage<1, [A9_MUX0], 0>,
582                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
583                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
584                                InstrStage<1, [A9_NPipe]>],
585                               [5, 1, 1]>,
586   //
587   // Double-precision FP Multiply
588   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
589                                InstrStage<1, [A9_MUX0], 0>,
590                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
591                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
592                                InstrStage<2, [A9_NPipe]>],
593                               [6, 1, 1]>,
594   //
595   // Single-precision FP MAC
596   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
597                                InstrStage<1, [A9_MUX0], 0>,
598                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
599                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
600                                InstrStage<1, [A9_NPipe]>],
601                               [8, 1, 1, 1]>,
602   //
603   // Double-precision FP MAC
604   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
605                                InstrStage<1,  [A9_MUX0], 0>,
606                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
607                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
608                                InstrStage<2,  [A9_NPipe]>],
609                               [9, 1, 1, 1]>,
610   //
611   // Single-precision Fused FP MAC
612   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
613                                InstrStage<1, [A9_MUX0], 0>,
614                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
615                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
616                                InstrStage<1, [A9_NPipe]>],
617                               [8, 1, 1, 1]>,
618   //
619   // Double-precision Fused FP MAC
620   InstrItinData<IIC_fpFMAC64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
621                                InstrStage<1,  [A9_MUX0], 0>,
622                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
623                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
624                                InstrStage<2,  [A9_NPipe]>],
625                               [9, 1, 1, 1]>,
626   //
627   // Single-precision FP DIV
628   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
629                                InstrStage<1,  [A9_MUX0], 0>,
630                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
631                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
632                                InstrStage<10, [A9_NPipe]>],
633                               [15, 1, 1]>,
634   //
635   // Double-precision FP DIV
636   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
637                                InstrStage<1,  [A9_MUX0], 0>,
638                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
639                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
640                                InstrStage<20, [A9_NPipe]>],
641                               [25, 1, 1]>,
642   //
643   // Single-precision FP SQRT
644   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
645                                InstrStage<1,  [A9_MUX0], 0>,
646                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
647                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
648                                InstrStage<13, [A9_NPipe]>],
649                               [17, 1]>,
650   //
651   // Double-precision FP SQRT
652   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
653                                InstrStage<1,  [A9_MUX0], 0>,
654                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
655                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
656                                InstrStage<28, [A9_NPipe]>],
657                               [32, 1]>,
658
659   //
660   // Integer to Single-precision Move
661   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
662                                InstrStage<1, [A9_MUX0], 0>,
663                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
664                                // Extra 1 latency cycle since wbck is 2 cycles
665                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
666                                InstrStage<1, [A9_NPipe]>],
667                               [1, 1]>,
668   //
669   // Integer to Double-precision Move
670   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
671                                InstrStage<1, [A9_MUX0], 0>,
672                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
673                                // Extra 1 latency cycle since wbck is 2 cycles
674                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
675                                InstrStage<1, [A9_NPipe]>],
676                               [1, 1, 1]>,
677   //
678   // Single-precision to Integer Move
679   //
680   // On A9 move-from-VFP is free to issue with no stall if other VFP
681   // operations are in flight. I assume it still can't dual-issue though.
682   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
683                                InstrStage<1, [A9_MUX0], 0>],
684                               [2, 1]>,
685   //
686   // Double-precision to Integer Move
687   //
688   // On A9 move-from-VFP is free to issue with no stall if other VFP
689   // operations are in flight. I assume it still can't dual-issue though.
690   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
691                                InstrStage<1, [A9_MUX0], 0>],
692                               [2, 1, 1]>,
693   //
694   // Single-precision FP Load
695   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
696                                InstrStage<1, [A9_MUX0], 0>,
697                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
698                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
699                                InstrStage<1, [A9_NPipe], 0>,
700                                InstrStage<1, [A9_LSUnit]>],
701                               [1, 1]>,
702   //
703   // Double-precision FP Load
704   // FIXME: Result latency is 1 if address is 64-bit aligned.
705   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
706                                InstrStage<1, [A9_MUX0], 0>,
707                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
708                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
709                                InstrStage<1, [A9_NPipe], 0>,
710                                InstrStage<1, [A9_LSUnit]>],
711                               [2, 1]>,
712   //
713   // FP Load Multiple
714   // FIXME: assumes 2 doubles which requires 2 LS cycles.
715   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
716                                InstrStage<1, [A9_MUX0], 0>,
717                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
718                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
719                                InstrStage<1, [A9_NPipe], 0>,
720                                InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
721   //
722   // FP Load Multiple + update
723   // FIXME: assumes 2 doubles which requires 2 LS cycles.
724   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
725                                InstrStage<1, [A9_MUX0], 0>,
726                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
727                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
728                                InstrStage<1, [A9_NPipe], 0>,
729                                InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
730   //
731   // Single-precision FP Store
732   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
733                                InstrStage<1, [A9_MUX0], 0>,
734                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
735                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
736                                InstrStage<1, [A9_NPipe], 0>,
737                                InstrStage<1, [A9_LSUnit]>],
738                               [1, 1]>,
739   //
740   // Double-precision FP Store
741   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
742                                InstrStage<1, [A9_MUX0], 0>,
743                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
744                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
745                                InstrStage<1, [A9_NPipe], 0>,
746                                InstrStage<1, [A9_LSUnit]>],
747                               [1, 1]>,
748   //
749   // FP Store Multiple
750   // FIXME: assumes 2 doubles which requires 2 LS cycles.
751   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
752                                InstrStage<1, [A9_MUX0], 0>,
753                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
754                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
755                                InstrStage<1, [A9_NPipe], 0>,
756                                InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
757   //
758   // FP Store Multiple + update
759   // FIXME: assumes 2 doubles which requires 2 LS cycles.
760   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
761                                 InstrStage<1, [A9_MUX0], 0>,
762                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
763                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
764                                 InstrStage<1, [A9_NPipe], 0>,
765                                 InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
766   // NEON
767   // VLD1
768   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
769                                InstrStage<1, [A9_MUX0], 0>,
770                                InstrStage<1, [A9_DRegsN],   0, Required>,
771                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
772                                InstrStage<1, [A9_NPipe], 0>,
773                                InstrStage<1, [A9_LSUnit]>],
774                               [1, 1]>,
775   // VLD1x2
776   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
777                                InstrStage<1, [A9_MUX0], 0>,
778                                InstrStage<1, [A9_DRegsN],   0, Required>,
779                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
780                                InstrStage<1, [A9_NPipe], 0>,
781                                InstrStage<1, [A9_LSUnit]>],
782                               [1, 1, 1]>,
783   // VLD1x3
784   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
785                                InstrStage<1, [A9_MUX0], 0>,
786                                InstrStage<1, [A9_DRegsN],   0, Required>,
787                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
788                                InstrStage<2, [A9_NPipe], 0>,
789                                InstrStage<2, [A9_LSUnit]>],
790                               [1, 1, 2, 1]>,
791   // VLD1x4
792   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
793                                InstrStage<1, [A9_MUX0], 0>,
794                                InstrStage<1, [A9_DRegsN],   0, Required>,
795                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
796                                InstrStage<2, [A9_NPipe], 0>,
797                                InstrStage<2, [A9_LSUnit]>],
798                               [1, 1, 2, 2, 1]>,
799   // VLD1u
800   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
801                                InstrStage<1, [A9_MUX0], 0>,
802                                InstrStage<1, [A9_DRegsN],   0, Required>,
803                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
804                                InstrStage<1, [A9_NPipe], 0>,
805                                InstrStage<1, [A9_LSUnit]>],
806                               [1, 2, 1]>,
807   // VLD1x2u
808   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
809                                InstrStage<1, [A9_MUX0], 0>,
810                                InstrStage<1, [A9_DRegsN],   0, Required>,
811                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
812                                InstrStage<1, [A9_NPipe], 0>,
813                                InstrStage<1, [A9_LSUnit]>],
814                               [1, 1, 2, 1]>,
815   // VLD1x3u
816   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
817                                InstrStage<1, [A9_MUX0], 0>,
818                                InstrStage<1, [A9_DRegsN],   0, Required>,
819                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
820                                InstrStage<2, [A9_NPipe], 0>,
821                                InstrStage<2, [A9_LSUnit]>],
822                               [1, 1, 2, 2, 1]>,
823   // VLD1x4u
824   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
825                                InstrStage<1, [A9_MUX0], 0>,
826                                InstrStage<1, [A9_DRegsN],   0, Required>,
827                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
828                                InstrStage<2, [A9_NPipe], 0>,
829                                InstrStage<2, [A9_LSUnit]>],
830                               [1, 1, 2, 2, 2, 1]>,
831   //
832   // VLD1ln
833   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
834                                InstrStage<1, [A9_MUX0], 0>,
835                                InstrStage<1, [A9_DRegsN],   0, Required>,
836                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
837                                InstrStage<2, [A9_NPipe], 0>,
838                                InstrStage<2, [A9_LSUnit]>],
839                               [3, 1, 1, 1]>,
840   //
841   // VLD1lnu
842   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
843                                InstrStage<1, [A9_MUX0], 0>,
844                                InstrStage<1, [A9_DRegsN],   0, Required>,
845                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
846                                InstrStage<2, [A9_NPipe], 0>,
847                                InstrStage<2, [A9_LSUnit]>],
848                               [3, 2, 1, 1, 1, 1]>,
849   //
850   // VLD1dup
851   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
852                                InstrStage<1, [A9_MUX0], 0>,
853                                InstrStage<1, [A9_DRegsN],   0, Required>,
854                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
855                                InstrStage<1, [A9_NPipe], 0>,
856                                InstrStage<1, [A9_LSUnit]>],
857                               [2, 1]>,
858   //
859   // VLD1dupu
860   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
861                                InstrStage<1, [A9_MUX0], 0>,
862                                InstrStage<1, [A9_DRegsN],   0, Required>,
863                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
864                                InstrStage<1, [A9_NPipe], 0>,
865                                InstrStage<1, [A9_LSUnit]>],
866                               [2, 2, 1, 1]>,
867   //
868   // VLD2
869   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
870                                InstrStage<1, [A9_MUX0], 0>,
871                                InstrStage<1, [A9_DRegsN],   0, Required>,
872                                // Extra latency cycles since wbck is 7 cycles
873                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
874                                InstrStage<1, [A9_NPipe], 0>,
875                                InstrStage<1, [A9_LSUnit]>],
876                               [2, 2, 1]>,
877   //
878   // VLD2x2
879   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
880                                InstrStage<1, [A9_MUX0], 0>,
881                                InstrStage<1, [A9_DRegsN],   0, Required>,
882                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
883                                InstrStage<2, [A9_NPipe], 0>,
884                                InstrStage<2, [A9_LSUnit]>],
885                               [2, 3, 2, 3, 1]>,
886   //
887   // VLD2ln
888   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
889                                InstrStage<1, [A9_MUX0], 0>,
890                                InstrStage<1, [A9_DRegsN],   0, Required>,
891                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
892                                InstrStage<2, [A9_NPipe], 0>,
893                                InstrStage<2, [A9_LSUnit]>],
894                               [3, 3, 1, 1, 1, 1]>,
895   //
896   // VLD2u
897   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
898                                InstrStage<1, [A9_MUX0], 0>,
899                                InstrStage<1, [A9_DRegsN],   0, Required>,
900                                // Extra latency cycles since wbck is 7 cycles
901                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
902                                InstrStage<1, [A9_NPipe], 0>,
903                                InstrStage<1, [A9_LSUnit]>],
904                               [2, 2, 2, 1, 1, 1]>,
905   //
906   // VLD2x2u
907   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
908                                InstrStage<1, [A9_MUX0], 0>,
909                                InstrStage<1, [A9_DRegsN],   0, Required>,
910                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
911                                InstrStage<2, [A9_NPipe], 0>,
912                                InstrStage<2, [A9_LSUnit]>],
913                               [2, 3, 2, 3, 2, 1]>,
914   //
915   // VLD2lnu
916   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
917                                InstrStage<1, [A9_MUX0], 0>,
918                                InstrStage<1, [A9_DRegsN],   0, Required>,
919                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
920                                InstrStage<2, [A9_NPipe], 0>,
921                                InstrStage<2, [A9_LSUnit]>],
922                               [3, 3, 2, 1, 1, 1, 1, 1]>,
923   //
924   // VLD2dup
925   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
926                                InstrStage<1, [A9_MUX0], 0>,
927                                InstrStage<1, [A9_DRegsN],   0, Required>,
928                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
929                                InstrStage<1, [A9_NPipe], 0>,
930                                InstrStage<1, [A9_LSUnit]>],
931                               [2, 2, 1]>,
932   //
933   // VLD2dupu
934   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
935                                InstrStage<1, [A9_MUX0], 0>,
936                                InstrStage<1, [A9_DRegsN],   0, Required>,
937                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
938                                InstrStage<1, [A9_NPipe], 0>,
939                                InstrStage<1, [A9_LSUnit]>],
940                               [2, 2, 2, 1, 1]>,
941   //
942   // VLD3
943   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
944                                InstrStage<1, [A9_MUX0], 0>,
945                                InstrStage<1, [A9_DRegsN],   0, Required>,
946                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
947                                InstrStage<3, [A9_NPipe], 0>,
948                                InstrStage<3, [A9_LSUnit]>],
949                               [3, 3, 4, 1]>,
950   //
951   // VLD3ln
952   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
953                                InstrStage<1, [A9_MUX0], 0>,
954                                InstrStage<1, [A9_DRegsN],   0, Required>,
955                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
956                                InstrStage<5, [A9_NPipe], 0>,
957                                InstrStage<5, [A9_LSUnit]>],
958                               [5, 5, 6, 1, 1, 1, 1, 2]>,
959   //
960   // VLD3u
961   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
962                                InstrStage<1, [A9_MUX0], 0>,
963                                InstrStage<1, [A9_DRegsN],   0, Required>,
964                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
965                                InstrStage<3, [A9_NPipe], 0>,
966                                InstrStage<3, [A9_LSUnit]>],
967                               [3, 3, 4, 2, 1]>,
968   //
969   // VLD3lnu
970   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
971                                InstrStage<1, [A9_MUX0], 0>,
972                                InstrStage<1, [A9_DRegsN],   0, Required>,
973                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
974                                InstrStage<5, [A9_NPipe], 0>,
975                                InstrStage<5, [A9_LSUnit]>],
976                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
977   //
978   // VLD3dup
979   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
980                                InstrStage<1, [A9_MUX0], 0>,
981                                InstrStage<1, [A9_DRegsN],   0, Required>,
982                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
983                                InstrStage<3, [A9_NPipe], 0>,
984                                InstrStage<3, [A9_LSUnit]>],
985                               [3, 3, 4, 1]>,
986   //
987   // VLD3dupu
988   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
989                                InstrStage<1, [A9_MUX0], 0>,
990                                InstrStage<1, [A9_DRegsN],   0, Required>,
991                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
992                                InstrStage<3, [A9_NPipe], 0>,
993                                InstrStage<3, [A9_LSUnit]>],
994                               [3, 3, 4, 2, 1, 1]>,
995   //
996   // VLD4
997   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
998                                InstrStage<1, [A9_MUX0], 0>,
999                                InstrStage<1, [A9_DRegsN],   0, Required>,
1000                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1001                                InstrStage<3, [A9_NPipe], 0>,
1002                                InstrStage<3, [A9_LSUnit]>],
1003                               [3, 3, 4, 4, 1]>,
1004   //
1005   // VLD4ln
1006   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1007                                InstrStage<1, [A9_MUX0], 0>,
1008                                InstrStage<1, [A9_DRegsN],   0, Required>,
1009                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1010                                InstrStage<4, [A9_NPipe], 0>,
1011                                InstrStage<4, [A9_LSUnit]>],
1012                               [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
1013   //
1014   // VLD4u
1015   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1016                                InstrStage<1, [A9_MUX0], 0>,
1017                                InstrStage<1, [A9_DRegsN],   0, Required>,
1018                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1019                                InstrStage<3, [A9_NPipe], 0>,
1020                                InstrStage<3, [A9_LSUnit]>],
1021                               [3, 3, 4, 4, 2, 1]>,
1022   //
1023   // VLD4lnu
1024   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1025                                InstrStage<1, [A9_MUX0], 0>,
1026                                InstrStage<1, [A9_DRegsN],   0, Required>,
1027                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1028                                InstrStage<4, [A9_NPipe], 0>,
1029                                InstrStage<4, [A9_LSUnit]>],
1030                               [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1031   //
1032   // VLD4dup
1033   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1034                                InstrStage<1, [A9_MUX0], 0>,
1035                                InstrStage<1, [A9_DRegsN],   0, Required>,
1036                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1037                                InstrStage<2, [A9_NPipe], 0>,
1038                                InstrStage<2, [A9_LSUnit]>],
1039                               [2, 2, 3, 3, 1]>,
1040   //
1041   // VLD4dupu
1042   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1043                                InstrStage<1, [A9_MUX0], 0>,
1044                                InstrStage<1, [A9_DRegsN],   0, Required>,
1045                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1046                                InstrStage<2, [A9_NPipe], 0>,
1047                                InstrStage<2, [A9_LSUnit]>],
1048                               [2, 2, 3, 3, 2, 1, 1]>,
1049   //
1050   // VST1
1051   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1052                                InstrStage<1, [A9_MUX0], 0>,
1053                                InstrStage<1, [A9_DRegsN],   0, Required>,
1054                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1055                                InstrStage<1, [A9_NPipe], 0>,
1056                                InstrStage<1, [A9_LSUnit]>],
1057                               [1, 1, 1]>,
1058   //
1059   // VST1x2
1060   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1061                                InstrStage<1, [A9_MUX0], 0>,
1062                                InstrStage<1, [A9_DRegsN],   0, Required>,
1063                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1064                                InstrStage<1, [A9_NPipe], 0>,
1065                                InstrStage<1, [A9_LSUnit]>],
1066                               [1, 1, 1, 1]>,
1067   //
1068   // VST1x3
1069   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1070                                InstrStage<1, [A9_MUX0], 0>,
1071                                InstrStage<1, [A9_DRegsN],   0, Required>,
1072                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1073                                InstrStage<2, [A9_NPipe], 0>,
1074                                InstrStage<2, [A9_LSUnit]>],
1075                               [1, 1, 1, 1, 2]>,
1076   //
1077   // VST1x4
1078   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1079                                InstrStage<1, [A9_MUX0], 0>,
1080                                InstrStage<1, [A9_DRegsN],   0, Required>,
1081                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1082                                InstrStage<2, [A9_NPipe], 0>,
1083                                InstrStage<2, [A9_LSUnit]>],
1084                               [1, 1, 1, 1, 2, 2]>,
1085   //
1086   // VST1u
1087   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1088                                InstrStage<1, [A9_MUX0], 0>,
1089                                InstrStage<1, [A9_DRegsN],   0, Required>,
1090                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1091                                InstrStage<1, [A9_NPipe], 0>,
1092                                InstrStage<1, [A9_LSUnit]>],
1093                               [2, 1, 1, 1, 1]>,
1094   //
1095   // VST1x2u
1096   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1097                                InstrStage<1, [A9_MUX0], 0>,
1098                                InstrStage<1, [A9_DRegsN],   0, Required>,
1099                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1100                                InstrStage<1, [A9_NPipe], 0>,
1101                                InstrStage<1, [A9_LSUnit]>],
1102                               [2, 1, 1, 1, 1, 1]>,
1103   //
1104   // VST1x3u
1105   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1106                                InstrStage<1, [A9_MUX0], 0>,
1107                                InstrStage<1, [A9_DRegsN],   0, Required>,
1108                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1109                                InstrStage<2, [A9_NPipe], 0>,
1110                                InstrStage<2, [A9_LSUnit]>],
1111                               [2, 1, 1, 1, 1, 1, 2]>,
1112   //
1113   // VST1x4u
1114   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1115                                InstrStage<1, [A9_MUX0], 0>,
1116                                InstrStage<1, [A9_DRegsN],   0, Required>,
1117                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1118                                InstrStage<2, [A9_NPipe], 0>,
1119                                InstrStage<2, [A9_LSUnit]>],
1120                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1121   //
1122   // VST1ln
1123   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1124                                InstrStage<1, [A9_MUX0], 0>,
1125                                InstrStage<1, [A9_DRegsN],   0, Required>,
1126                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1127                                InstrStage<1, [A9_NPipe], 0>,
1128                                InstrStage<1, [A9_LSUnit]>],
1129                               [1, 1, 1]>,
1130   //
1131   // VST1lnu
1132   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1133                                InstrStage<1, [A9_MUX0], 0>,
1134                                InstrStage<1, [A9_DRegsN],   0, Required>,
1135                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1136                                InstrStage<1, [A9_NPipe], 0>,
1137                                InstrStage<1, [A9_LSUnit]>],
1138                               [2, 1, 1, 1, 1]>,
1139   //
1140   // VST2
1141   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1142                                InstrStage<1, [A9_MUX0], 0>,
1143                                InstrStage<1, [A9_DRegsN],   0, Required>,
1144                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1145                                InstrStage<1, [A9_NPipe], 0>,
1146                                InstrStage<1, [A9_LSUnit]>],
1147                               [1, 1, 1, 1]>,
1148   //
1149   // VST2x2
1150   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1151                                InstrStage<1, [A9_MUX0], 0>,
1152                                InstrStage<1, [A9_DRegsN],   0, Required>,
1153                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1154                                InstrStage<3, [A9_NPipe], 0>,
1155                                InstrStage<3, [A9_LSUnit]>],
1156                               [1, 1, 1, 1, 2, 2]>,
1157   //
1158   // VST2u
1159   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1160                                InstrStage<1, [A9_MUX0], 0>,
1161                                InstrStage<1, [A9_DRegsN],   0, Required>,
1162                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1163                                InstrStage<1, [A9_NPipe], 0>,
1164                                InstrStage<1, [A9_LSUnit]>],
1165                               [2, 1, 1, 1, 1, 1]>,
1166   //
1167   // VST2x2u
1168   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1169                                InstrStage<1, [A9_MUX0], 0>,
1170                                InstrStage<1, [A9_DRegsN],   0, Required>,
1171                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1172                                InstrStage<3, [A9_NPipe], 0>,
1173                                InstrStage<3, [A9_LSUnit]>],
1174                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1175   //
1176   // VST2ln
1177   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1178                                InstrStage<1, [A9_MUX0], 0>,
1179                                InstrStage<1, [A9_DRegsN],   0, Required>,
1180                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1181                                InstrStage<1, [A9_NPipe], 0>,
1182                                InstrStage<1, [A9_LSUnit]>],
1183                               [1, 1, 1, 1]>,
1184   //
1185   // VST2lnu
1186   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1187                                InstrStage<1, [A9_MUX0], 0>,
1188                                InstrStage<1, [A9_DRegsN],   0, Required>,
1189                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1190                                InstrStage<1, [A9_NPipe], 0>,
1191                                InstrStage<1, [A9_LSUnit]>],
1192                               [2, 1, 1, 1, 1, 1]>,
1193   //
1194   // VST3
1195   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1196                                InstrStage<1, [A9_MUX0], 0>,
1197                                InstrStage<1, [A9_DRegsN],   0, Required>,
1198                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1199                                InstrStage<2, [A9_NPipe], 0>,
1200                                InstrStage<2, [A9_LSUnit]>],
1201                               [1, 1, 1, 1, 2]>,
1202   //
1203   // VST3u
1204   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1205                                InstrStage<1, [A9_MUX0], 0>,
1206                                InstrStage<1, [A9_DRegsN],   0, Required>,
1207                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1208                                InstrStage<2, [A9_NPipe], 0>,
1209                                InstrStage<2, [A9_LSUnit]>],
1210                               [2, 1, 1, 1, 1, 1, 2]>,
1211   //
1212   // VST3ln
1213   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1214                                InstrStage<1, [A9_MUX0], 0>,
1215                                InstrStage<1, [A9_DRegsN],   0, Required>,
1216                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1217                                InstrStage<3, [A9_NPipe], 0>,
1218                                InstrStage<3, [A9_LSUnit]>],
1219                               [1, 1, 1, 1, 2]>,
1220   //
1221   // VST3lnu
1222   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1223                                InstrStage<1, [A9_MUX0], 0>,
1224                                InstrStage<1, [A9_DRegsN],   0, Required>,
1225                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1226                                InstrStage<3, [A9_NPipe], 0>,
1227                                InstrStage<3, [A9_LSUnit]>],
1228                               [2, 1, 1, 1, 1, 1, 2]>,
1229   //
1230   // VST4
1231   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1232                                InstrStage<1, [A9_MUX0], 0>,
1233                                InstrStage<1, [A9_DRegsN],   0, Required>,
1234                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1235                                InstrStage<2, [A9_NPipe], 0>,
1236                                InstrStage<2, [A9_LSUnit]>],
1237                               [1, 1, 1, 1, 2, 2]>,
1238   //
1239   // VST4u
1240   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1241                                InstrStage<1, [A9_MUX0], 0>,
1242                                InstrStage<1, [A9_DRegsN],   0, Required>,
1243                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1244                                InstrStage<2, [A9_NPipe], 0>,
1245                                InstrStage<2, [A9_LSUnit]>],
1246                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1247   //
1248   // VST4ln
1249   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1250                                InstrStage<1, [A9_MUX0], 0>,
1251                                InstrStage<1, [A9_DRegsN],   0, Required>,
1252                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1253                                InstrStage<2, [A9_NPipe], 0>,
1254                                InstrStage<2, [A9_LSUnit]>],
1255                               [1, 1, 1, 1, 2, 2]>,
1256   //
1257   // VST4lnu
1258   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1259                                InstrStage<1, [A9_MUX0], 0>,
1260                                InstrStage<1, [A9_DRegsN],   0, Required>,
1261                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1262                                InstrStage<2, [A9_NPipe], 0>,
1263                                InstrStage<2, [A9_LSUnit]>],
1264                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1265
1266   //
1267   // Double-register Integer Unary
1268   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1269                                InstrStage<1, [A9_MUX0], 0>,
1270                                InstrStage<1, [A9_DRegsN],   0, Required>,
1271                                // Extra latency cycles since wbck is 6 cycles
1272                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1273                                InstrStage<1, [A9_NPipe]>],
1274                               [4, 2]>,
1275   //
1276   // Quad-register Integer Unary
1277   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1278                                InstrStage<1, [A9_MUX0], 0>,
1279                                InstrStage<1, [A9_DRegsN],   0, Required>,
1280                                // Extra latency cycles since wbck is 6 cycles
1281                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1282                                InstrStage<1, [A9_NPipe]>],
1283                               [4, 2]>,
1284   //
1285   // Double-register Integer Q-Unary
1286   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1287                                InstrStage<1, [A9_MUX0], 0>,
1288                                InstrStage<1, [A9_DRegsN],   0, Required>,
1289                                // Extra latency cycles since wbck is 6 cycles
1290                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1291                                InstrStage<1, [A9_NPipe]>],
1292                               [4, 1]>,
1293   //
1294   // Quad-register Integer CountQ-Unary
1295   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1296                                InstrStage<1, [A9_MUX0], 0>,
1297                                InstrStage<1, [A9_DRegsN],   0, Required>,
1298                                // Extra latency cycles since wbck is 6 cycles
1299                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1300                                InstrStage<1, [A9_NPipe]>],
1301                               [4, 1]>,
1302   //
1303   // Double-register Integer Binary
1304   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1305                                InstrStage<1, [A9_MUX0], 0>,
1306                                InstrStage<1, [A9_DRegsN],   0, Required>,
1307                                // Extra latency cycles since wbck is 6 cycles
1308                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1309                                InstrStage<1, [A9_NPipe]>],
1310                               [3, 2, 2]>,
1311   //
1312   // Quad-register Integer Binary
1313   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1314                                InstrStage<1, [A9_MUX0], 0>,
1315                                InstrStage<1, [A9_DRegsN],   0, Required>,
1316                                // Extra latency cycles since wbck is 6 cycles
1317                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1318                                InstrStage<1, [A9_NPipe]>],
1319                               [3, 2, 2]>,
1320   //
1321   // Double-register Integer Subtract
1322   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1323                                InstrStage<1, [A9_MUX0], 0>,
1324                                InstrStage<1, [A9_DRegsN],   0, Required>,
1325                                // Extra latency cycles since wbck is 6 cycles
1326                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1327                                InstrStage<1, [A9_NPipe]>],
1328                               [3, 2, 1]>,
1329   //
1330   // Quad-register Integer Subtract
1331   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1332                                InstrStage<1, [A9_MUX0], 0>,
1333                                InstrStage<1, [A9_DRegsN],   0, Required>,
1334                                // Extra latency cycles since wbck is 6 cycles
1335                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1336                                InstrStage<1, [A9_NPipe]>],
1337                               [3, 2, 1]>,
1338   //
1339   // Double-register Integer Shift
1340   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1341                                InstrStage<1, [A9_MUX0], 0>,
1342                                InstrStage<1, [A9_DRegsN],   0, Required>,
1343                                // Extra latency cycles since wbck is 6 cycles
1344                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1345                                InstrStage<1, [A9_NPipe]>],
1346                               [3, 1, 1]>,
1347   //
1348   // Quad-register Integer Shift
1349   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1350                                InstrStage<1, [A9_MUX0], 0>,
1351                                InstrStage<1, [A9_DRegsN],   0, Required>,
1352                                // Extra latency cycles since wbck is 6 cycles
1353                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1354                                InstrStage<1, [A9_NPipe]>],
1355                               [3, 1, 1]>,
1356   //
1357   // Double-register Integer Shift (4 cycle)
1358   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1359                                InstrStage<1, [A9_MUX0], 0>,
1360                                InstrStage<1, [A9_DRegsN],   0, Required>,
1361                                // Extra latency cycles since wbck is 6 cycles
1362                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1363                                InstrStage<1, [A9_NPipe]>],
1364                               [4, 1, 1]>,
1365   //
1366   // Quad-register Integer Shift (4 cycle)
1367   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1368                                InstrStage<1, [A9_MUX0], 0>,
1369                                InstrStage<1, [A9_DRegsN],   0, Required>,
1370                                // Extra latency cycles since wbck is 6 cycles
1371                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1372                                InstrStage<1, [A9_NPipe]>],
1373                               [4, 1, 1]>,
1374   //
1375   // Double-register Integer Binary (4 cycle)
1376   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1377                                InstrStage<1, [A9_MUX0], 0>,
1378                                InstrStage<1, [A9_DRegsN],   0, Required>,
1379                                // Extra latency cycles since wbck is 6 cycles
1380                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1381                                InstrStage<1, [A9_NPipe]>],
1382                               [4, 2, 2]>,
1383   //
1384   // Quad-register Integer Binary (4 cycle)
1385   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1386                                InstrStage<1, [A9_MUX0], 0>,
1387                                InstrStage<1, [A9_DRegsN],   0, Required>,
1388                                // Extra latency cycles since wbck is 6 cycles
1389                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1390                                InstrStage<1, [A9_NPipe]>],
1391                               [4, 2, 2]>,
1392   //
1393   // Double-register Integer Subtract (4 cycle)
1394   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1395                                InstrStage<1, [A9_MUX0], 0>,
1396                                InstrStage<1, [A9_DRegsN],   0, Required>,
1397                                // Extra latency cycles since wbck is 6 cycles
1398                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1399                                InstrStage<1, [A9_NPipe]>],
1400                               [4, 2, 1]>,
1401   //
1402   // Quad-register Integer Subtract (4 cycle)
1403   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1404                                InstrStage<1, [A9_MUX0], 0>,
1405                                InstrStage<1, [A9_DRegsN],   0, Required>,
1406                                // Extra latency cycles since wbck is 6 cycles
1407                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1408                                InstrStage<1, [A9_NPipe]>],
1409                               [4, 2, 1]>,
1410
1411   //
1412   // Double-register Integer Count
1413   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1414                                InstrStage<1, [A9_MUX0], 0>,
1415                                InstrStage<1, [A9_DRegsN],   0, Required>,
1416                                // Extra latency cycles since wbck is 6 cycles
1417                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1418                                InstrStage<1, [A9_NPipe]>],
1419                               [3, 2, 2]>,
1420   //
1421   // Quad-register Integer Count
1422   // Result written in N3, but that is relative to the last cycle of multicycle,
1423   // so we use 4 for those cases
1424   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1425                                InstrStage<1, [A9_MUX0], 0>,
1426                                InstrStage<1, [A9_DRegsN],   0, Required>,
1427                                // Extra latency cycles since wbck is 7 cycles
1428                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1429                                InstrStage<2, [A9_NPipe]>],
1430                               [4, 2, 2]>,
1431   //
1432   // Double-register Absolute Difference and Accumulate
1433   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1434                                InstrStage<1, [A9_MUX0], 0>,
1435                                InstrStage<1, [A9_DRegsN],   0, Required>,
1436                                // Extra latency cycles since wbck is 6 cycles
1437                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1438                                InstrStage<1, [A9_NPipe]>],
1439                               [6, 3, 2, 1]>,
1440   //
1441   // Quad-register Absolute Difference and Accumulate
1442   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1443                                InstrStage<1, [A9_MUX0], 0>,
1444                                InstrStage<1, [A9_DRegsN],   0, Required>,
1445                                // Extra latency cycles since wbck is 6 cycles
1446                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1447                                InstrStage<2, [A9_NPipe]>],
1448                               [6, 3, 2, 1]>,
1449   //
1450   // Double-register Integer Pair Add Long
1451   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1452                                InstrStage<1, [A9_MUX0], 0>,
1453                                InstrStage<1, [A9_DRegsN],   0, Required>,
1454                                // Extra latency cycles since wbck is 6 cycles
1455                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1456                                InstrStage<1, [A9_NPipe]>],
1457                               [6, 3, 1]>,
1458   //
1459   // Quad-register Integer Pair Add Long
1460   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1461                                InstrStage<1, [A9_MUX0], 0>,
1462                                InstrStage<1, [A9_DRegsN],   0, Required>,
1463                                // Extra latency cycles since wbck is 6 cycles
1464                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1465                                InstrStage<2, [A9_NPipe]>],
1466                               [6, 3, 1]>,
1467
1468   //
1469   // Double-register Integer Multiply (.8, .16)
1470   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1471                                InstrStage<1, [A9_MUX0], 0>,
1472                                InstrStage<1, [A9_DRegsN],   0, Required>,
1473                                // Extra latency cycles since wbck is 6 cycles
1474                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1475                                InstrStage<1, [A9_NPipe]>],
1476                               [6, 2, 2]>,
1477   //
1478   // Quad-register Integer Multiply (.8, .16)
1479   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1480                                InstrStage<1, [A9_MUX0], 0>,
1481                                InstrStage<1, [A9_DRegsN],   0, Required>,
1482                                // Extra latency cycles since wbck is 7 cycles
1483                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1484                                InstrStage<2, [A9_NPipe]>],
1485                               [7, 2, 2]>,
1486
1487   //
1488   // Double-register Integer Multiply (.32)
1489   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1490                                InstrStage<1, [A9_MUX0], 0>,
1491                                InstrStage<1, [A9_DRegsN],   0, Required>,
1492                                // Extra latency cycles since wbck is 7 cycles
1493                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1494                                InstrStage<2, [A9_NPipe]>],
1495                               [7, 2, 1]>,
1496   //
1497   // Quad-register Integer Multiply (.32)
1498   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1499                                InstrStage<1, [A9_MUX0], 0>,
1500                                InstrStage<1, [A9_DRegsN],   0, Required>,
1501                                // Extra latency cycles since wbck is 9 cycles
1502                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1503                                InstrStage<4, [A9_NPipe]>],
1504                               [9, 2, 1]>,
1505   //
1506   // Double-register Integer Multiply-Accumulate (.8, .16)
1507   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1508                                InstrStage<1, [A9_MUX0], 0>,
1509                                InstrStage<1, [A9_DRegsN],   0, Required>,
1510                                // Extra latency cycles since wbck is 6 cycles
1511                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1512                                InstrStage<1, [A9_NPipe]>],
1513                               [6, 3, 2, 2]>,
1514   //
1515   // Double-register Integer Multiply-Accumulate (.32)
1516   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1517                                InstrStage<1, [A9_MUX0], 0>,
1518                                InstrStage<1, [A9_DRegsN],   0, Required>,
1519                                // Extra latency cycles since wbck is 7 cycles
1520                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1521                                InstrStage<2, [A9_NPipe]>],
1522                               [7, 3, 2, 1]>,
1523   //
1524   // Quad-register Integer Multiply-Accumulate (.8, .16)
1525   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1526                                InstrStage<1, [A9_MUX0], 0>,
1527                                InstrStage<1, [A9_DRegsN],   0, Required>,
1528                                // Extra latency cycles since wbck is 7 cycles
1529                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1530                                InstrStage<2, [A9_NPipe]>],
1531                               [7, 3, 2, 2]>,
1532   //
1533   // Quad-register Integer Multiply-Accumulate (.32)
1534   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1535                                InstrStage<1, [A9_MUX0], 0>,
1536                                InstrStage<1, [A9_DRegsN],   0, Required>,
1537                                // Extra latency cycles since wbck is 9 cycles
1538                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1539                                InstrStage<4, [A9_NPipe]>],
1540                               [9, 3, 2, 1]>,
1541
1542   //
1543   // Move
1544   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1545                                InstrStage<1, [A9_MUX0], 0>,
1546                                InstrStage<1, [A9_DRegsN],   0, Required>,
1547                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1548                                InstrStage<1, [A9_NPipe]>],
1549                               [1,1]>,
1550   //
1551   // Move Immediate
1552   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1553                                InstrStage<1, [A9_MUX0], 0>,
1554                                InstrStage<1, [A9_DRegsN],   0, Required>,
1555                                // Extra latency cycles since wbck is 6 cycles
1556                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1557                                InstrStage<1, [A9_NPipe]>],
1558                               [3]>,
1559   //
1560   // Double-register Permute Move
1561   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1562                                InstrStage<1, [A9_MUX0], 0>,
1563                                InstrStage<1, [A9_DRegsN],   0, Required>,
1564                                // Extra latency cycles since wbck is 6 cycles
1565                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1566                                InstrStage<1, [A9_NPipe]>],
1567                               [2, 1]>,
1568   //
1569   // Quad-register Permute Move
1570   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1571                                InstrStage<1, [A9_MUX0], 0>,
1572                                InstrStage<1, [A9_DRegsN],   0, Required>,
1573                                // Extra latency cycles since wbck is 6 cycles
1574                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1575                                InstrStage<1, [A9_NPipe]>],
1576                               [2, 1]>,
1577   //
1578   // Integer to Single-precision Move
1579   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1580                                InstrStage<1, [A9_MUX0], 0>,
1581                                InstrStage<1, [A9_DRegsN],   0, Required>,
1582                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1583                                InstrStage<1, [A9_NPipe]>],
1584                               [1, 1]>,
1585   //
1586   // Integer to Double-precision Move
1587   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1588                                InstrStage<1, [A9_MUX0], 0>,
1589                                InstrStage<1, [A9_DRegsN],   0, Required>,
1590                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1591                                InstrStage<1, [A9_NPipe]>],
1592                               [1, 1, 1]>,
1593   //
1594   // Single-precision to Integer Move
1595   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1596                                InstrStage<1, [A9_MUX0], 0>,
1597                                InstrStage<1, [A9_DRegsN],   0, Required>,
1598                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1599                                InstrStage<1, [A9_NPipe]>],
1600                               [2, 1]>,
1601   //
1602   // Double-precision to Integer Move
1603   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1604                                InstrStage<1, [A9_MUX0], 0>,
1605                                InstrStage<1, [A9_DRegsN],   0, Required>,
1606                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1607                                InstrStage<1, [A9_NPipe]>],
1608                               [2, 2, 1]>,
1609   //
1610   // Integer to Lane Move
1611   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1612                                InstrStage<1, [A9_MUX0], 0>,
1613                                InstrStage<1, [A9_DRegsN],   0, Required>,
1614                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1615                                InstrStage<2, [A9_NPipe]>],
1616                               [3, 1, 1]>,
1617
1618   //
1619   // Vector narrow move
1620   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1621                                InstrStage<1, [A9_MUX0], 0>,
1622                                InstrStage<1, [A9_DRegsN],   0, Required>,
1623                                // Extra latency cycles since wbck is 6 cycles
1624                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1625                                InstrStage<1, [A9_NPipe]>],
1626                               [3, 1]>,
1627   //
1628   // Double-register FP Unary
1629   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1630                                InstrStage<1, [A9_MUX0], 0>,
1631                                InstrStage<1, [A9_DRegsN],   0, Required>,
1632                                // Extra latency cycles since wbck is 6 cycles
1633                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1634                                InstrStage<1, [A9_NPipe]>],
1635                               [5, 2]>,
1636   //
1637   // Quad-register FP Unary
1638   // Result written in N5, but that is relative to the last cycle of multicycle,
1639   // so we use 6 for those cases
1640   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1641                                InstrStage<1, [A9_MUX0], 0>,
1642                                InstrStage<1, [A9_DRegsN],   0, Required>,
1643                                // Extra latency cycles since wbck is 7 cycles
1644                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1645                                InstrStage<2, [A9_NPipe]>],
1646                               [6, 2]>,
1647   //
1648   // Double-register FP Binary
1649   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1650   // optimistic.
1651   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1652                                InstrStage<1, [A9_MUX0], 0>,
1653                                InstrStage<1, [A9_DRegsN],   0, Required>,
1654                                // Extra latency cycles since wbck is 6 cycles
1655                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1656                                InstrStage<1, [A9_NPipe]>],
1657                               [5, 2, 2]>,
1658
1659   //
1660   // VPADD, etc.
1661   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1662                                InstrStage<1, [A9_MUX0], 0>,
1663                                InstrStage<1, [A9_DRegsN],   0, Required>,
1664                                // Extra latency cycles since wbck is 6 cycles
1665                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1666                                InstrStage<1, [A9_NPipe]>],
1667                               [5, 1, 1]>,
1668   //
1669   // Double-register FP VMUL
1670   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1671                                InstrStage<1, [A9_MUX0], 0>,
1672                                InstrStage<1, [A9_DRegsN],   0, Required>,
1673                                // Extra latency cycles since wbck is 6 cycles
1674                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1675                                InstrStage<1, [A9_NPipe]>],
1676                               [5, 2, 1]>,
1677   //
1678   // Quad-register FP Binary
1679   // Result written in N5, but that is relative to the last cycle of multicycle,
1680   // so we use 6 for those cases
1681   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1682   // optimistic.
1683   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1684                                InstrStage<1, [A9_MUX0], 0>,
1685                                InstrStage<1, [A9_DRegsN],   0, Required>,
1686                                // Extra latency cycles since wbck is 7 cycles
1687                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1688                                InstrStage<2, [A9_NPipe]>],
1689                               [6, 2, 2]>,
1690   //
1691   // Quad-register FP VMUL
1692   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1693                                InstrStage<1, [A9_MUX0], 0>,
1694                                InstrStage<1, [A9_DRegsN],   0, Required>,
1695                                // Extra latency cycles since wbck is 7 cycles
1696                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1697                                InstrStage<1, [A9_NPipe]>],
1698                               [6, 2, 1]>,
1699   //
1700   // Double-register FP Multiple-Accumulate
1701   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1702                                InstrStage<1, [A9_MUX0], 0>,
1703                                InstrStage<1, [A9_DRegsN],   0, Required>,
1704                                // Extra latency cycles since wbck is 7 cycles
1705                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1706                                InstrStage<2, [A9_NPipe]>],
1707                               [6, 3, 2, 1]>,
1708   //
1709   // Quad-register FP Multiple-Accumulate
1710   // Result written in N9, but that is relative to the last cycle of multicycle,
1711   // so we use 10 for those cases
1712   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1713                                InstrStage<1, [A9_MUX0], 0>,
1714                                InstrStage<1, [A9_DRegsN],   0, Required>,
1715                                // Extra latency cycles since wbck is 9 cycles
1716                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1717                                InstrStage<4, [A9_NPipe]>],
1718                               [8, 4, 2, 1]>,
1719   //
1720   // Double-register Fused FP Multiple-Accumulate
1721   InstrItinData<IIC_VFMACD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1722                                InstrStage<1, [A9_MUX0], 0>,
1723                                InstrStage<1, [A9_DRegsN],   0, Required>,
1724                                // Extra latency cycles since wbck is 7 cycles
1725                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1726                                InstrStage<2, [A9_NPipe]>],
1727                               [6, 3, 2, 1]>,
1728   //
1729   // Quad-register Fused FP Multiple-Accumulate
1730   // Result written in N9, but that is relative to the last cycle of multicycle,
1731   // so we use 10 for those cases
1732   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1733                                InstrStage<1, [A9_MUX0], 0>,
1734                                InstrStage<1, [A9_DRegsN],   0, Required>,
1735                                // Extra latency cycles since wbck is 9 cycles
1736                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1737                                InstrStage<4, [A9_NPipe]>],
1738                               [8, 4, 2, 1]>,
1739   //
1740   // Double-register Reciprical Step
1741   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1742                                InstrStage<1, [A9_MUX0], 0>,
1743                                InstrStage<1, [A9_DRegsN],   0, Required>,
1744                                // Extra latency cycles since wbck is 10 cycles
1745                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1746                                InstrStage<1, [A9_NPipe]>],
1747                               [9, 2, 2]>,
1748   //
1749   // Quad-register Reciprical Step
1750   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1751                                InstrStage<1, [A9_MUX0], 0>,
1752                                InstrStage<1, [A9_DRegsN],   0, Required>,
1753                                // Extra latency cycles since wbck is 11 cycles
1754                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1755                                InstrStage<2, [A9_NPipe]>],
1756                               [10, 2, 2]>,
1757   //
1758   // Double-register Permute
1759   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1760                                InstrStage<1, [A9_MUX0], 0>,
1761                                InstrStage<1, [A9_DRegsN],   0, Required>,
1762                                // Extra latency cycles since wbck is 6 cycles
1763                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1764                                InstrStage<1, [A9_NPipe]>],
1765                               [2, 2, 1, 1]>,
1766   //
1767   // Quad-register Permute
1768   // Result written in N2, but that is relative to the last cycle of multicycle,
1769   // so we use 3 for those cases
1770   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1771                                InstrStage<1, [A9_MUX0], 0>,
1772                                InstrStage<1, [A9_DRegsN],   0, Required>,
1773                                // Extra latency cycles since wbck is 7 cycles
1774                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1775                                InstrStage<2, [A9_NPipe]>],
1776                               [3, 3, 1, 1]>,
1777   //
1778   // Quad-register Permute (3 cycle issue)
1779   // Result written in N2, but that is relative to the last cycle of multicycle,
1780   // so we use 4 for those cases
1781   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1782                                InstrStage<1, [A9_MUX0], 0>,
1783                                InstrStage<1, [A9_DRegsN],   0, Required>,
1784                                // Extra latency cycles since wbck is 8 cycles
1785                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1786                                InstrStage<3, [A9_NPipe]>],
1787                               [4, 4, 1, 1]>,
1788
1789   //
1790   // Double-register VEXT
1791   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1792                                InstrStage<1, [A9_MUX0], 0>,
1793                                InstrStage<1, [A9_DRegsN],   0, Required>,
1794                                // Extra latency cycles since wbck is 6 cycles
1795                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1796                                InstrStage<1, [A9_NPipe]>],
1797                               [2, 1, 1]>,
1798   //
1799   // Quad-register VEXT
1800   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1801                                InstrStage<1, [A9_MUX0], 0>,
1802                                InstrStage<1, [A9_DRegsN],   0, Required>,
1803                                // Extra latency cycles since wbck is 7 cycles
1804                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1805                                InstrStage<2, [A9_NPipe]>],
1806                               [3, 1, 2]>,
1807   //
1808   // VTB
1809   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1810                                InstrStage<1, [A9_MUX0], 0>,
1811                                InstrStage<1, [A9_DRegsN],   0, Required>,
1812                                // Extra latency cycles since wbck is 7 cycles
1813                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1814                                InstrStage<2, [A9_NPipe]>],
1815                               [3, 2, 1]>,
1816   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1817                                InstrStage<1, [A9_MUX0], 0>,
1818                                InstrStage<2, [A9_DRegsN],   0, Required>,
1819                                // Extra latency cycles since wbck is 7 cycles
1820                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1821                                InstrStage<2, [A9_NPipe]>],
1822                               [3, 2, 2, 1]>,
1823   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1824                                InstrStage<1, [A9_MUX0], 0>,
1825                                InstrStage<2, [A9_DRegsN],   0, Required>,
1826                                // Extra latency cycles since wbck is 8 cycles
1827                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1828                                InstrStage<3, [A9_NPipe]>],
1829                               [4, 2, 2, 3, 1]>,
1830   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1831                                InstrStage<1, [A9_MUX0], 0>,
1832                                InstrStage<1, [A9_DRegsN],   0, Required>,
1833                                // Extra latency cycles since wbck is 8 cycles
1834                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1835                                InstrStage<3, [A9_NPipe]>],
1836                               [4, 2, 2, 3, 3, 1]>,
1837   //
1838   // VTBX
1839   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1840                                InstrStage<1, [A9_MUX0], 0>,
1841                                InstrStage<1, [A9_DRegsN],   0, Required>,
1842                                // Extra latency cycles since wbck is 7 cycles
1843                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1844                                InstrStage<2, [A9_NPipe]>],
1845                               [3, 1, 2, 1]>,
1846   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1847                                InstrStage<1, [A9_MUX0], 0>,
1848                                InstrStage<1, [A9_DRegsN],   0, Required>,
1849                                // Extra latency cycles since wbck is 7 cycles
1850                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1851                                InstrStage<2, [A9_NPipe]>],
1852                               [3, 1, 2, 2, 1]>,
1853   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1854                                InstrStage<1, [A9_MUX0], 0>,
1855                                InstrStage<1, [A9_DRegsN],   0, Required>,
1856                                // Extra latency cycles since wbck is 8 cycles
1857                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1858                                InstrStage<3, [A9_NPipe]>],
1859                               [4, 1, 2, 2, 3, 1]>,
1860   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1861                                InstrStage<1, [A9_MUX0], 0>,
1862                                InstrStage<1, [A9_DRegsN],   0, Required>,
1863                                // Extra latency cycles since wbck is 8 cycles
1864                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1865                                InstrStage<2, [A9_NPipe]>],
1866                               [4, 1, 2, 2, 3, 3, 1]>
1867 ]>;