eab6b98a846344ca2e70ae409a6055815c720c0b
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LSUnit  : FuncUnit; // L/S Unit
28 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
30
31 // Bypasses
32 def A9_LdBypass : Bypass;
33
34 def CortexA9Itineraries : ProcessorItineraries<
35   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
36    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
37   [A9_LdBypass], [
38   // Two fully-pipelined integer ALU pipelines
39
40   //
41   // Move instructions, unconditional
42   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
43                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
44   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
45                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
46   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
48   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
50   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
52                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
53   //
54   // MVN instructions
55   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
56                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
57                               [1]>,
58   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
59                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
60                               [1, 1], [NoBypass, A9_LdBypass]>,
61   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
62                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
63                               [2, 1]>,
64   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
65                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
66                               [3, 1, 1]>,
67   //
68   // No operand cycles
69   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
70                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
71   //
72   // Binary Instructions that produce a result
73   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
74                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
75                             [1, 1], [NoBypass, A9_LdBypass]>,
76   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
77                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
78                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
79   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
80                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
81                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
82   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
83                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
84                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
85   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
86                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
87                             [3, 1, 1, 1],
88                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
89   //
90   // Bitwise Instructions that produce a result
91   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
92                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
93   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
94                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
95   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
97   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
98                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
99   //
100   // Unary Instructions that produce a result
101
102   // CLZ, RBIT, etc.
103   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
104                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
105
106   // BFC, BFI, UBFX, SBFX
107   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
109
110   //
111   // Zero and sign extension instructions
112   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
113                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
114   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
115                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
116   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
117                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
118   //
119   // Compare instructions
120   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
121                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
122                                [1], [A9_LdBypass]>,
123   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
124                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
125                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
126   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
127                                 [1, 1], [A9_LdBypass, NoBypass]>,
128   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
129                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
130                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
131   //
132   // Test instructions
133   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
135   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
136                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
137   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
138                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
139   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
141   //
142   // Move instructions, conditional
143   // FIXME: Correctly model the extra input dep on the destination.
144   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
145                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
146   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
147                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
148   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
150   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
152   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
153                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
154                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
155                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
156
157   // Integer multiply pipeline
158   //
159   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
161   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                                InstrStage<2, [A9_ALU0]>],
163                               [3, 1, 1, 1]>,
164   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
165                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
166   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167                                InstrStage<2, [A9_ALU0]>],
168                               [4, 1, 1, 1]>,
169   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
170                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
171   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
172                                InstrStage<3, [A9_ALU0]>],
173                               [4, 5, 1, 1]>,
174   // Integer load pipeline
175   // FIXME: The timings are some rough approximations
176   //
177   // Immediate offset
178   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
179                                  InstrStage<1, [A9_MUX0], 0>,
180                                  InstrStage<1, [A9_AGU], 0>,
181                                  InstrStage<1, [A9_LSUnit]>],
182                                 [3, 1], [A9_LdBypass]>,
183   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
184                                  InstrStage<1, [A9_MUX0], 0>,
185                                  InstrStage<2, [A9_AGU], 0>,
186                                  InstrStage<1, [A9_LSUnit]>],
187                                 [4, 1], [A9_LdBypass]>,
188   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
189   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
190                                  InstrStage<1, [A9_MUX0], 0>,
191                                  InstrStage<2, [A9_AGU], 0>,
192                                  InstrStage<1, [A9_LSUnit]>],
193                                 [3, 3, 1], [A9_LdBypass]>,
194   //
195   // Register offset
196   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
197                                  InstrStage<1, [A9_MUX0], 0>,
198                                  InstrStage<1, [A9_AGU], 0>,
199                                  InstrStage<1, [A9_LSUnit]>],
200                                 [3, 1, 1], [A9_LdBypass]>,
201   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
202                                  InstrStage<1, [A9_MUX0], 0>,
203                                  InstrStage<2, [A9_AGU], 0>,
204                                  InstrStage<1, [A9_LSUnit]>],
205                                 [4, 1, 1], [A9_LdBypass]>,
206   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
207                                  InstrStage<1, [A9_MUX0], 0>,
208                                  InstrStage<2, [A9_AGU], 0>,
209                                  InstrStage<1, [A9_LSUnit]>],
210                                 [3, 3, 1, 1], [A9_LdBypass]>,
211   //
212   // Scaled register offset
213   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
214                                  InstrStage<1, [A9_MUX0], 0>,
215                                  InstrStage<1, [A9_AGU], 0>,
216                                  InstrStage<1, [A9_LSUnit], 0>],
217                                 [4, 1, 1], [A9_LdBypass]>,
218   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
219                                  InstrStage<1, [A9_MUX0], 0>,
220                                  InstrStage<2, [A9_AGU], 0>,
221                                  InstrStage<1, [A9_LSUnit]>],
222                                 [5, 1, 1], [A9_LdBypass]>,
223   //
224   // Immediate offset with update
225   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
226                                  InstrStage<1, [A9_MUX0], 0>,
227                                  InstrStage<1, [A9_AGU], 0>,
228                                  InstrStage<1, [A9_LSUnit]>],
229                                 [3, 2, 1], [A9_LdBypass]>,
230   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
231                                  InstrStage<1, [A9_MUX0], 0>,
232                                  InstrStage<2, [A9_AGU], 0>,
233                                  InstrStage<1, [A9_LSUnit]>],
234                                 [4, 3, 1], [A9_LdBypass]>,
235   //
236   // Register offset with update
237   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
238                                  InstrStage<1, [A9_MUX0], 0>,
239                                  InstrStage<1, [A9_AGU], 0>,
240                                  InstrStage<1, [A9_LSUnit]>],
241                                 [3, 2, 1, 1], [A9_LdBypass]>,
242   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
243                                  InstrStage<1, [A9_MUX0], 0>,
244                                  InstrStage<2, [A9_AGU], 0>,
245                                  InstrStage<1, [A9_LSUnit]>],
246                                 [4, 3, 1, 1], [A9_LdBypass]>,
247   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
248                                  InstrStage<1, [A9_MUX0], 0>,
249                                  InstrStage<2, [A9_AGU], 0>,
250                                  InstrStage<1, [A9_LSUnit]>],
251                                 [3, 3, 1, 1], [A9_LdBypass]>,
252   //
253   // Scaled register offset with update
254   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
255                                  InstrStage<1, [A9_MUX0], 0>,
256                                  InstrStage<1, [A9_AGU], 0>,
257                                  InstrStage<1, [A9_LSUnit]>],
258                                 [4, 3, 1, 1], [A9_LdBypass]>,
259   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
260                                   InstrStage<1, [A9_MUX0], 0>,
261                                   InstrStage<2, [A9_AGU], 0>,
262                                   InstrStage<1, [A9_LSUnit]>],
263                                  [5, 4, 1, 1], [A9_LdBypass]>,
264   //
265   // Load multiple, def is the 5th operand.
266   // FIXME: This assumes 3 to 4 registers.
267   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
268                                 InstrStage<1, [A9_MUX0], 0>,
269                                 InstrStage<2, [A9_AGU], 1>,
270                                 InstrStage<2, [A9_LSUnit]>],
271                                [1, 1, 1, 1, 3],
272                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
273   //
274   // Load multiple + update, defs are the 1st and 5th operands.
275   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
276                                 InstrStage<1, [A9_MUX0], 0>,
277                                 InstrStage<2, [A9_AGU], 1>,
278                                 InstrStage<2, [A9_LSUnit]>],
279                                [2, 1, 1, 1, 3],
280                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
281   //
282   // Load multiple plus branch
283   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
284                                 InstrStage<1, [A9_MUX0], 0>,
285                                 InstrStage<1, [A9_AGU], 1>,
286                                 InstrStage<2, [A9_LSUnit]>,
287                                 InstrStage<1, [A9_Branch]>],
288                                [1, 2, 1, 1, 3],
289                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
290   //
291   // Pop, def is the 3rd operand.
292   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
293                                 InstrStage<1, [A9_MUX0], 0>,
294                                 InstrStage<2, [A9_AGU], 1>,
295                                 InstrStage<2, [A9_LSUnit]>],
296                                [1, 1, 3],
297                                [NoBypass, NoBypass, A9_LdBypass]>,
298   //
299   // Pop + branch, def is the 3rd operand.
300   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
301                                 InstrStage<1, [A9_MUX0], 0>,
302                                 InstrStage<2, [A9_AGU], 1>,
303                                 InstrStage<2, [A9_LSUnit]>,
304                                 InstrStage<1, [A9_Branch]>],
305                                [1, 1, 3],
306                                [NoBypass, NoBypass, A9_LdBypass]>,
307
308   //
309   // iLoadi + iALUr for t2LDRpci_pic.
310   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
311                                 InstrStage<1, [A9_MUX0], 0>,
312                                 InstrStage<1, [A9_AGU], 0>,
313                                 InstrStage<1, [A9_LSUnit]>,
314                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
315                                [2, 1]>,
316
317   // Integer store pipeline
318   ///
319   // Immediate offset
320   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
321                                  InstrStage<1, [A9_MUX0], 0>,
322                                  InstrStage<1, [A9_AGU], 0>,
323                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
324   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
325                                  InstrStage<1, [A9_MUX0], 0>,
326                                  InstrStage<2, [A9_AGU], 1>,
327                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
328   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
329   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
330                                  InstrStage<1, [A9_MUX0], 0>,
331                                  InstrStage<2, [A9_AGU], 1>,
332                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
333   //
334   // Register offset
335   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
336                                  InstrStage<1, [A9_MUX0], 0>,
337                                  InstrStage<1, [A9_AGU], 0>,
338                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
339   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
340                                  InstrStage<1, [A9_MUX0], 0>,
341                                  InstrStage<2, [A9_AGU], 1>,
342                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
343   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
344                                  InstrStage<1, [A9_MUX0], 0>,
345                                  InstrStage<2, [A9_AGU], 1>,
346                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
347   //
348   // Scaled register offset
349   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
350                                   InstrStage<1, [A9_MUX0], 0>,
351                                   InstrStage<1, [A9_AGU], 0>,
352                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
353   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
354                                   InstrStage<1, [A9_MUX0], 0>,
355                                   InstrStage<2, [A9_AGU], 1>,
356                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
357   //
358   // Immediate offset with update
359   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
360                                   InstrStage<1, [A9_MUX0], 0>,
361                                   InstrStage<1, [A9_AGU], 0>,
362                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
363   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
364                                   InstrStage<1, [A9_MUX0], 0>,
365                                   InstrStage<2, [A9_AGU], 1>,
366                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
367   //
368   // Register offset with update
369   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
370                                   InstrStage<1, [A9_MUX0], 0>,
371                                   InstrStage<1, [A9_AGU], 0>,
372                                   InstrStage<1, [A9_LSUnit]>],
373                                  [2, 1, 1, 1]>,
374   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
375                                   InstrStage<1, [A9_MUX0], 0>,
376                                   InstrStage<2, [A9_AGU], 1>,
377                                   InstrStage<1, [A9_LSUnit]>],
378                                  [3, 1, 1, 1]>,
379   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
380                                   InstrStage<1, [A9_MUX0], 0>,
381                                   InstrStage<2, [A9_AGU], 1>,
382                                   InstrStage<1, [A9_LSUnit]>],
383                                  [3, 1, 1, 1]>,
384   //
385   // Scaled register offset with update
386   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
387                                     InstrStage<1, [A9_MUX0], 0>,
388                                     InstrStage<1, [A9_AGU], 0>,
389                                     InstrStage<1, [A9_LSUnit]>],
390                                    [2, 1, 1, 1]>,
391   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
392                                     InstrStage<1, [A9_MUX0], 0>,
393                                     InstrStage<2, [A9_AGU], 1>,
394                                     InstrStage<1, [A9_LSUnit]>],
395                                    [3, 1, 1, 1]>,
396   //
397   // Store multiple
398   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
399                                 InstrStage<1, [A9_MUX0], 0>,
400                                 InstrStage<1, [A9_AGU], 0>,
401                                 InstrStage<2, [A9_LSUnit]>]>,
402   //
403   // Store multiple + update
404   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
405                                 InstrStage<1, [A9_MUX0], 0>,
406                                 InstrStage<1, [A9_AGU], 0>,
407                                 InstrStage<2, [A9_LSUnit]>], [2]>,
408
409   //
410   // Preload
411   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
412
413   // Branch
414   //
415   // no delay slots, so the latency of a branch is unimportant
416   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
417                                 InstrStage<1, [A9_Issue1], 0>,
418                                 InstrStage<1, [A9_Branch]>]>,
419
420   // VFP and NEON shares the same register file. This means that every VFP
421   // instruction should wait for full completion of the consecutive NEON
422   // instruction and vice-versa. We model this behavior with two artificial FUs:
423   // DRegsVFP and DRegsVFP.
424   //
425   // Every VFP instruction:
426   //  - Acquires DRegsVFP resource for 1 cycle
427   //  - Reserves DRegsN resource for the whole duration (including time to
428   //    register file writeback!).
429   // Every NEON instruction does the same but with FUs swapped.
430   //
431   // Since the reserved FU cannot be acquired, this models precisely
432   // "cross-domain" stalls.
433
434   // VFP
435   // Issue through integer pipeline, and execute in NEON unit.
436
437   // FP Special Register to Integer Register File Move
438   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
439                               InstrStage<1, [A9_MUX0], 0>,
440                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
441                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
442                               InstrStage<1, [A9_NPipe]>],
443                              [1]>,
444   //
445   // Single-precision FP Unary
446   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
447                                InstrStage<1, [A9_MUX0], 0>,
448                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
449                                // Extra latency cycles since wbck is 2 cycles
450                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
451                                InstrStage<1, [A9_NPipe]>],
452                               [1, 1]>,
453   //
454   // Double-precision FP Unary
455   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
456                                InstrStage<1, [A9_MUX0], 0>,
457                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
458                                // Extra latency cycles since wbck is 2 cycles
459                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
460                                InstrStage<1, [A9_NPipe]>],
461                               [1, 1]>,
462
463   //
464   // Single-precision FP Compare
465   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
466                                InstrStage<1, [A9_MUX0], 0>,
467                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
468                                // Extra latency cycles since wbck is 4 cycles
469                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
470                                InstrStage<1, [A9_NPipe]>],
471                               [1, 1]>,
472   //
473   // Double-precision FP Compare
474   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
475                                InstrStage<1, [A9_MUX0], 0>,
476                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
477                                // Extra latency cycles since wbck is 4 cycles
478                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
479                                InstrStage<1, [A9_NPipe]>],
480                               [1, 1]>,
481   //
482   // Single to Double FP Convert
483   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
484                                InstrStage<1, [A9_MUX0], 0>,
485                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
486                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
487                                InstrStage<1, [A9_NPipe]>],
488                               [4, 1]>,
489   //
490   // Double to Single FP Convert
491   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
492                                InstrStage<1, [A9_MUX0], 0>,
493                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
494                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
495                                InstrStage<1, [A9_NPipe]>],
496                               [4, 1]>,
497
498   //
499   // Single to Half FP Convert
500   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
501                                InstrStage<1, [A9_MUX0], 0>,
502                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
503                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
504                                InstrStage<1, [A9_NPipe]>],
505                               [4, 1]>,
506   //
507   // Half to Single FP Convert
508   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
509                                InstrStage<1, [A9_MUX0], 0>,
510                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
511                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
512                                InstrStage<1, [A9_NPipe]>],
513                               [2, 1]>,
514
515   //
516   // Single-Precision FP to Integer Convert
517   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
518                                InstrStage<1, [A9_MUX0], 0>,
519                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
520                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
521                                InstrStage<1, [A9_NPipe]>],
522                               [4, 1]>,
523   //
524   // Double-Precision FP to Integer Convert
525   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
526                                InstrStage<1, [A9_MUX0], 0>,
527                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
528                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
529                                InstrStage<1, [A9_NPipe]>],
530                               [4, 1]>,
531   //
532   // Integer to Single-Precision FP Convert
533   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
534                                InstrStage<1, [A9_MUX0], 0>,
535                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
536                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
537                                InstrStage<1, [A9_NPipe]>],
538                               [4, 1]>,
539   //
540   // Integer to Double-Precision FP Convert
541   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
542                                InstrStage<1, [A9_MUX0], 0>,
543                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
544                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
545                                InstrStage<1, [A9_NPipe]>],
546                               [4, 1]>,
547   //
548   // Single-precision FP ALU
549   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
550                                InstrStage<1, [A9_MUX0], 0>,
551                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
552                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
553                                InstrStage<1, [A9_NPipe]>],
554                               [4, 1, 1]>,
555   //
556   // Double-precision FP ALU
557   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
558                                InstrStage<1, [A9_MUX0], 0>,
559                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
560                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
561                                InstrStage<1, [A9_NPipe]>],
562                               [4, 1, 1]>,
563   //
564   // Single-precision FP Multiply
565   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
566                                InstrStage<1, [A9_MUX0], 0>,
567                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
568                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
569                                InstrStage<1, [A9_NPipe]>],
570                               [5, 1, 1]>,
571   //
572   // Double-precision FP Multiply
573   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
574                                InstrStage<1, [A9_MUX0], 0>,
575                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
576                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
577                                InstrStage<2, [A9_NPipe]>],
578                               [6, 1, 1]>,
579   //
580   // Single-precision FP MAC
581   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
582                                InstrStage<1, [A9_MUX0], 0>,
583                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
584                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
585                                InstrStage<1, [A9_NPipe]>],
586                               [8, 1, 1, 1]>,
587   //
588   // Double-precision FP MAC
589   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
590                                InstrStage<1,  [A9_MUX0], 0>,
591                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
592                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
593                                InstrStage<2,  [A9_NPipe]>],
594                               [9, 1, 1, 1]>,
595   //
596   // Single-precision FP DIV
597   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
598                                InstrStage<1,  [A9_MUX0], 0>,
599                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
600                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
601                                InstrStage<10, [A9_NPipe]>],
602                               [15, 1, 1]>,
603   //
604   // Double-precision FP DIV
605   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
606                                InstrStage<1,  [A9_MUX0], 0>,
607                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
608                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
609                                InstrStage<20, [A9_NPipe]>],
610                               [25, 1, 1]>,
611   //
612   // Single-precision FP SQRT
613   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
614                                InstrStage<1,  [A9_MUX0], 0>,
615                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
616                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
617                                InstrStage<13, [A9_NPipe]>],
618                               [17, 1]>,
619   //
620   // Double-precision FP SQRT
621   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
622                                InstrStage<1,  [A9_MUX0], 0>,
623                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
624                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
625                                InstrStage<28, [A9_NPipe]>],
626                               [32, 1]>,
627
628   //
629   // Integer to Single-precision Move
630   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
631                                InstrStage<1, [A9_MUX0], 0>,
632                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
633                                // Extra 1 latency cycle since wbck is 2 cycles
634                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
635                                InstrStage<1, [A9_NPipe]>],
636                               [1, 1]>,
637   //
638   // Integer to Double-precision Move
639   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
640                                InstrStage<1, [A9_MUX0], 0>,
641                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
642                                // Extra 1 latency cycle since wbck is 2 cycles
643                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
644                                InstrStage<1, [A9_NPipe]>],
645                               [1, 1, 1]>,
646   //
647   // Single-precision to Integer Move
648   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
649                                InstrStage<1, [A9_MUX0], 0>,
650                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
651                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
652                                InstrStage<1, [A9_NPipe]>],
653                               [2, 1]>,
654   //
655   // Double-precision to Integer Move
656   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
657                                InstrStage<1, [A9_MUX0], 0>,
658                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
659                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
660                                InstrStage<1, [A9_NPipe]>],
661                               [2, 1, 1]>,
662   //
663   // Single-precision FP Load
664   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
665                                InstrStage<1, [A9_MUX0], 0>,
666                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
667                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
668                                InstrStage<1, [A9_NPipe], 0>,
669                                InstrStage<1, [A9_LSUnit]>],
670                               [1, 1]>,
671   //
672   // Double-precision FP Load
673   // FIXME: Result latency is 1 if address is 64-bit aligned.
674   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
675                                InstrStage<1, [A9_MUX0], 0>,
676                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
677                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
678                                InstrStage<1, [A9_NPipe], 0>,
679                                InstrStage<1, [A9_LSUnit]>],
680                               [2, 1]>,
681   //
682   // FP Load Multiple
683   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
684                                InstrStage<1, [A9_MUX0], 0>,
685                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
686                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
687                                InstrStage<1, [A9_NPipe], 0>,
688                                InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
689   //
690   // FP Load Multiple + update
691   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
692                                InstrStage<1, [A9_MUX0], 0>,
693                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
694                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
695                                InstrStage<1, [A9_NPipe], 0>,
696                                InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
697   //
698   // Single-precision FP Store
699   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
700                                InstrStage<1, [A9_MUX0], 0>,
701                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
702                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
703                                InstrStage<1, [A9_NPipe], 0>,
704                                InstrStage<1, [A9_LSUnit]>],
705                               [1, 1]>,
706   //
707   // Double-precision FP Store
708   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
709                                InstrStage<1, [A9_MUX0], 0>,
710                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
711                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
712                                InstrStage<1, [A9_NPipe], 0>,
713                                InstrStage<1, [A9_LSUnit]>],
714                               [1, 1]>,
715   //
716   // FP Store Multiple
717   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
718                                InstrStage<1, [A9_MUX0], 0>,
719                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
720                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
721                                InstrStage<1, [A9_NPipe], 0>,
722                                InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
723   //
724   // FP Store Multiple + update
725   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
726                                 InstrStage<1, [A9_MUX0], 0>,
727                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
728                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
729                                 InstrStage<1, [A9_NPipe], 0>,
730                                 InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
731   // NEON
732   // VLD1
733   // FIXME: Conservatively assume insufficent alignment.
734   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
735                                InstrStage<1, [A9_MUX0], 0>,
736                                InstrStage<1, [A9_DRegsN],   0, Required>,
737                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
738                                InstrStage<2, [A9_NPipe], 0>,
739                                InstrStage<2, [A9_LSUnit]>],
740                               [2, 1]>,
741   // VLD1x2
742   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
743                                InstrStage<1, [A9_MUX0], 0>,
744                                InstrStage<1, [A9_DRegsN],   0, Required>,
745                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
746                                InstrStage<2, [A9_NPipe], 0>,
747                                InstrStage<2, [A9_LSUnit]>],
748                               [2, 2, 1]>,
749   // VLD1x3
750   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
751                                InstrStage<1, [A9_MUX0], 0>,
752                                InstrStage<1, [A9_DRegsN],   0, Required>,
753                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
754                                InstrStage<3, [A9_NPipe], 0>,
755                                InstrStage<3, [A9_LSUnit]>],
756                               [2, 2, 3, 1]>,
757   // VLD1x4
758   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
759                                InstrStage<1, [A9_MUX0], 0>,
760                                InstrStage<1, [A9_DRegsN],   0, Required>,
761                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
762                                InstrStage<3, [A9_NPipe], 0>,
763                                InstrStage<3, [A9_LSUnit]>],
764                               [2, 2, 3, 3, 1]>,
765   // VLD1u
766   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
767                                InstrStage<1, [A9_MUX0], 0>,
768                                InstrStage<1, [A9_DRegsN],   0, Required>,
769                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
770                                InstrStage<2, [A9_NPipe], 0>,
771                                InstrStage<2, [A9_LSUnit]>],
772                               [2, 2, 1]>,
773   // VLD1x2u
774   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
775                                InstrStage<1, [A9_MUX0], 0>,
776                                InstrStage<1, [A9_DRegsN],   0, Required>,
777                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
778                                InstrStage<2, [A9_NPipe], 0>,
779                                InstrStage<2, [A9_LSUnit]>],
780                               [2, 2, 2, 1]>,
781   // VLD1x3u
782   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
783                                InstrStage<1, [A9_MUX0], 0>,
784                                InstrStage<1, [A9_DRegsN],   0, Required>,
785                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
786                                InstrStage<3, [A9_NPipe], 0>,
787                                InstrStage<3, [A9_LSUnit]>],
788                               [2, 2, 3, 2, 1]>,
789   // VLD1x4u
790   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
791                                InstrStage<1, [A9_MUX0], 0>,
792                                InstrStage<1, [A9_DRegsN],   0, Required>,
793                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
794                                InstrStage<3, [A9_NPipe], 0>,
795                                InstrStage<3, [A9_LSUnit]>],
796                               [2, 2, 3, 3, 2, 1]>,
797   //
798   // VLD1ln
799   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
800                                InstrStage<1, [A9_MUX0], 0>,
801                                InstrStage<1, [A9_DRegsN],   0, Required>,
802                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
803                                InstrStage<3, [A9_NPipe], 0>,
804                                InstrStage<3, [A9_LSUnit]>],
805                               [4, 1, 1, 1]>,
806   //
807   // VLD1lnu
808   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
809                                InstrStage<1, [A9_MUX0], 0>,
810                                InstrStage<1, [A9_DRegsN],   0, Required>,
811                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
812                                InstrStage<3, [A9_NPipe], 0>,
813                                InstrStage<3, [A9_LSUnit]>],
814                               [4, 2, 1, 1, 1, 1]>,
815   //
816   // VLD1dup
817   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
818                                InstrStage<1, [A9_MUX0], 0>,
819                                InstrStage<1, [A9_DRegsN],   0, Required>,
820                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
821                                InstrStage<2, [A9_NPipe], 0>,
822                                InstrStage<2, [A9_LSUnit]>],
823                               [3, 1]>,
824   //
825   // VLD1dupu
826   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
827                                InstrStage<1, [A9_MUX0], 0>,
828                                InstrStage<1, [A9_DRegsN],   0, Required>,
829                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
830                                InstrStage<2, [A9_NPipe], 0>,
831                                InstrStage<2, [A9_LSUnit]>],
832                               [3, 2, 1, 1]>,
833   //
834   // VLD2
835   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
836                                InstrStage<1, [A9_MUX0], 0>,
837                                InstrStage<1, [A9_DRegsN],   0, Required>,
838                                // Extra latency cycles since wbck is 7 cycles
839                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
840                                InstrStage<2, [A9_NPipe], 0>,
841                                InstrStage<2, [A9_LSUnit]>],
842                               [3, 3, 1]>,
843   //
844   // VLD2x2
845   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
846                                InstrStage<1, [A9_MUX0], 0>,
847                                InstrStage<1, [A9_DRegsN],   0, Required>,
848                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
849                                InstrStage<3, [A9_NPipe], 0>,
850                                InstrStage<3, [A9_LSUnit]>],
851                               [3, 4, 3, 4, 1]>,
852   //
853   // VLD2ln
854   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
855                                InstrStage<1, [A9_MUX0], 0>,
856                                InstrStage<1, [A9_DRegsN],   0, Required>,
857                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
858                                InstrStage<3, [A9_NPipe], 0>,
859                                InstrStage<3, [A9_LSUnit]>],
860                               [4, 4, 1, 1, 1, 1]>,
861   //
862   // VLD2u
863   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
864                                InstrStage<1, [A9_MUX0], 0>,
865                                InstrStage<1, [A9_DRegsN],   0, Required>,
866                                // Extra latency cycles since wbck is 7 cycles
867                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
868                                InstrStage<2, [A9_NPipe], 0>,
869                                InstrStage<2, [A9_LSUnit]>],
870                               [3, 3, 2, 1, 1, 1]>,
871   //
872   // VLD2x2u
873   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
874                                InstrStage<1, [A9_MUX0], 0>,
875                                InstrStage<1, [A9_DRegsN],   0, Required>,
876                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
877                                InstrStage<3, [A9_NPipe], 0>,
878                                InstrStage<3, [A9_LSUnit]>],
879                               [3, 4, 3, 4, 2, 1]>,
880   //
881   // VLD2lnu
882   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
883                                InstrStage<1, [A9_MUX0], 0>,
884                                InstrStage<1, [A9_DRegsN],   0, Required>,
885                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
886                                InstrStage<3, [A9_NPipe], 0>,
887                                InstrStage<3, [A9_LSUnit]>],
888                               [4, 4, 2, 1, 1, 1, 1, 1]>,
889   //
890   // VLD2dup
891   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
892                                InstrStage<1, [A9_MUX0], 0>,
893                                InstrStage<1, [A9_DRegsN],   0, Required>,
894                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
895                                InstrStage<2, [A9_NPipe], 0>,
896                                InstrStage<2, [A9_LSUnit]>],
897                               [3, 3, 1]>,
898   //
899   // VLD2dupu
900   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
901                                InstrStage<1, [A9_MUX0], 0>,
902                                InstrStage<1, [A9_DRegsN],   0, Required>,
903                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
904                                InstrStage<2, [A9_NPipe], 0>,
905                                InstrStage<2, [A9_LSUnit]>],
906                               [3, 3, 2, 1, 1]>,
907   //
908   // VLD3
909   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
910                                InstrStage<1, [A9_MUX0], 0>,
911                                InstrStage<1, [A9_DRegsN],   0, Required>,
912                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
913                                InstrStage<4, [A9_NPipe], 0>,
914                                InstrStage<4, [A9_LSUnit]>],
915                               [4, 4, 5, 1]>,
916   //
917   // VLD3ln
918   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
919                                InstrStage<1, [A9_MUX0], 0>,
920                                InstrStage<1, [A9_DRegsN],   0, Required>,
921                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
922                                InstrStage<5, [A9_NPipe], 0>,
923                                InstrStage<5, [A9_LSUnit]>],
924                               [5, 5, 6, 1, 1, 1, 1, 2]>,
925   //
926   // VLD3u
927   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
928                                InstrStage<1, [A9_MUX0], 0>,
929                                InstrStage<1, [A9_DRegsN],   0, Required>,
930                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
931                                InstrStage<4, [A9_NPipe], 0>,
932                                InstrStage<4, [A9_LSUnit]>],
933                               [4, 4, 5, 2, 1]>,
934   //
935   // VLD3lnu
936   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
937                                InstrStage<1, [A9_MUX0], 0>,
938                                InstrStage<1, [A9_DRegsN],   0, Required>,
939                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
940                                InstrStage<5, [A9_NPipe], 0>,
941                                InstrStage<5, [A9_LSUnit]>],
942                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
943   //
944   // VLD3dup
945   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
946                                InstrStage<1, [A9_MUX0], 0>,
947                                InstrStage<1, [A9_DRegsN],   0, Required>,
948                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
949                                InstrStage<3, [A9_NPipe], 0>,
950                                InstrStage<3, [A9_LSUnit]>],
951                               [3, 3, 4, 1]>,
952   //
953   // VLD3dupu
954   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
955                                InstrStage<1, [A9_MUX0], 0>,
956                                InstrStage<1, [A9_DRegsN],   0, Required>,
957                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
958                                InstrStage<3, [A9_NPipe], 0>,
959                                InstrStage<3, [A9_LSUnit]>],
960                               [3, 3, 4, 2, 1, 1]>,
961   //
962   // VLD4
963   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
964                                InstrStage<1, [A9_MUX0], 0>,
965                                InstrStage<1, [A9_DRegsN],   0, Required>,
966                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
967                                InstrStage<4, [A9_NPipe], 0>,
968                                InstrStage<4, [A9_LSUnit]>],
969                               [4, 4, 5, 5, 1]>,
970   //
971   // VLD4ln
972   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
973                                InstrStage<1, [A9_MUX0], 0>,
974                                InstrStage<1, [A9_DRegsN],   0, Required>,
975                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
976                                InstrStage<5, [A9_NPipe], 0>,
977                                InstrStage<5, [A9_LSUnit]>],
978                               [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
979   //
980   // VLD4u
981   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
982                                InstrStage<1, [A9_MUX0], 0>,
983                                InstrStage<1, [A9_DRegsN],   0, Required>,
984                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
985                                InstrStage<4, [A9_NPipe], 0>,
986                                InstrStage<4, [A9_LSUnit]>],
987                               [4, 4, 5, 5, 2, 1]>,
988   //
989   // VLD4lnu
990   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
991                                InstrStage<1, [A9_MUX0], 0>,
992                                InstrStage<1, [A9_DRegsN],   0, Required>,
993                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
994                                InstrStage<5, [A9_NPipe], 0>,
995                                InstrStage<5, [A9_LSUnit]>],
996                               [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
997   //
998   // VLD4dup
999   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1000                                InstrStage<1, [A9_MUX0], 0>,
1001                                InstrStage<1, [A9_DRegsN],   0, Required>,
1002                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1003                                InstrStage<3, [A9_NPipe], 0>,
1004                                InstrStage<3, [A9_LSUnit]>],
1005                               [3, 3, 4, 4, 1]>,
1006   //
1007   // VLD4dupu
1008   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1009                                InstrStage<1, [A9_MUX0], 0>,
1010                                InstrStage<1, [A9_DRegsN],   0, Required>,
1011                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1012                                InstrStage<3, [A9_NPipe], 0>,
1013                                InstrStage<3, [A9_LSUnit]>],
1014                               [3, 3, 4, 4, 2, 1, 1]>,
1015   //
1016   // VST1
1017   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1018                                InstrStage<1, [A9_MUX0], 0>,
1019                                InstrStage<1, [A9_DRegsN],   0, Required>,
1020                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1021                                InstrStage<2, [A9_NPipe], 0>,
1022                                InstrStage<2, [A9_LSUnit]>],
1023                               [1, 1, 1]>,
1024   //
1025   // VST1x2
1026   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1027                                InstrStage<1, [A9_MUX0], 0>,
1028                                InstrStage<1, [A9_DRegsN],   0, Required>,
1029                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1030                                InstrStage<2, [A9_NPipe], 0>,
1031                                InstrStage<2, [A9_LSUnit]>],
1032                               [1, 1, 1, 1]>,
1033   //
1034   // VST1x3
1035   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1036                                InstrStage<1, [A9_MUX0], 0>,
1037                                InstrStage<1, [A9_DRegsN],   0, Required>,
1038                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1039                                InstrStage<3, [A9_NPipe], 0>,
1040                                InstrStage<3, [A9_LSUnit]>],
1041                               [1, 1, 1, 1, 2]>,
1042   //
1043   // VST1x4
1044   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1045                                InstrStage<1, [A9_MUX0], 0>,
1046                                InstrStage<1, [A9_DRegsN],   0, Required>,
1047                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1048                                InstrStage<3, [A9_NPipe], 0>,
1049                                InstrStage<3, [A9_LSUnit]>],
1050                               [1, 1, 1, 1, 2, 2]>,
1051   //
1052   // VST1u
1053   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1054                                InstrStage<1, [A9_MUX0], 0>,
1055                                InstrStage<1, [A9_DRegsN],   0, Required>,
1056                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1057                                InstrStage<2, [A9_NPipe], 0>,
1058                                InstrStage<2, [A9_LSUnit]>],
1059                               [2, 1, 1, 1, 1]>,
1060   //
1061   // VST1x2u
1062   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1063                                InstrStage<1, [A9_MUX0], 0>,
1064                                InstrStage<1, [A9_DRegsN],   0, Required>,
1065                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1066                                InstrStage<2, [A9_NPipe], 0>,
1067                                InstrStage<2, [A9_LSUnit]>],
1068                               [2, 1, 1, 1, 1, 1]>,
1069   //
1070   // VST1x3u
1071   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1072                                InstrStage<1, [A9_MUX0], 0>,
1073                                InstrStage<1, [A9_DRegsN],   0, Required>,
1074                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1075                                InstrStage<3, [A9_NPipe], 0>,
1076                                InstrStage<3, [A9_LSUnit]>],
1077                               [2, 1, 1, 1, 1, 1, 2]>,
1078   //
1079   // VST1x4u
1080   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1081                                InstrStage<1, [A9_MUX0], 0>,
1082                                InstrStage<1, [A9_DRegsN],   0, Required>,
1083                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1084                                InstrStage<3, [A9_NPipe], 0>,
1085                                InstrStage<3, [A9_LSUnit]>],
1086                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1087   //
1088   // VST1ln
1089   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1090                                InstrStage<1, [A9_MUX0], 0>,
1091                                InstrStage<1, [A9_DRegsN],   0, Required>,
1092                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1093                                InstrStage<2, [A9_NPipe], 0>,
1094                                InstrStage<2, [A9_LSUnit]>],
1095                               [1, 1, 1]>,
1096   //
1097   // VST1lnu
1098   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1099                                InstrStage<1, [A9_MUX0], 0>,
1100                                InstrStage<1, [A9_DRegsN],   0, Required>,
1101                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1102                                InstrStage<3, [A9_NPipe], 0>,
1103                                InstrStage<3, [A9_LSUnit]>],
1104                               [2, 1, 1, 1, 1]>,
1105   //
1106   // VST2
1107   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1108                                InstrStage<1, [A9_MUX0], 0>,
1109                                InstrStage<1, [A9_DRegsN],   0, Required>,
1110                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1111                                InstrStage<2, [A9_NPipe], 0>,
1112                                InstrStage<2, [A9_LSUnit]>],
1113                               [1, 1, 1, 1]>,
1114   //
1115   // VST2x2
1116   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1117                                InstrStage<1, [A9_MUX0], 0>,
1118                                InstrStage<1, [A9_DRegsN],   0, Required>,
1119                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1120                                InstrStage<3, [A9_NPipe], 0>,
1121                                InstrStage<3, [A9_LSUnit]>],
1122                               [1, 1, 1, 1, 2, 2]>,
1123   //
1124   // VST2u
1125   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1126                                InstrStage<1, [A9_MUX0], 0>,
1127                                InstrStage<1, [A9_DRegsN],   0, Required>,
1128                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1129                                InstrStage<2, [A9_NPipe], 0>,
1130                                InstrStage<2, [A9_LSUnit]>],
1131                               [2, 1, 1, 1, 1, 1]>,
1132   //
1133   // VST2x2u
1134   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1135                                InstrStage<1, [A9_MUX0], 0>,
1136                                InstrStage<1, [A9_DRegsN],   0, Required>,
1137                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1138                                InstrStage<3, [A9_NPipe], 0>,
1139                                InstrStage<3, [A9_LSUnit]>],
1140                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1141   //
1142   // VST2ln
1143   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1144                                InstrStage<1, [A9_MUX0], 0>,
1145                                InstrStage<1, [A9_DRegsN],   0, Required>,
1146                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1147                                InstrStage<2, [A9_NPipe], 0>,
1148                                InstrStage<2, [A9_LSUnit]>],
1149                               [1, 1, 1, 1]>,
1150   //
1151   // VST2lnu
1152   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1153                                InstrStage<1, [A9_MUX0], 0>,
1154                                InstrStage<1, [A9_DRegsN],   0, Required>,
1155                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1156                                InstrStage<3, [A9_NPipe], 0>,
1157                                InstrStage<3, [A9_LSUnit]>],
1158                               [2, 1, 1, 1, 1, 1]>,
1159   //
1160   // VST3
1161   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1162                                InstrStage<1, [A9_MUX0], 0>,
1163                                InstrStage<1, [A9_DRegsN],   0, Required>,
1164                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1165                                InstrStage<3, [A9_NPipe], 0>,
1166                                InstrStage<3, [A9_LSUnit]>],
1167                               [1, 1, 1, 1, 2]>,
1168   //
1169   // VST3u
1170   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1171                                InstrStage<1, [A9_MUX0], 0>,
1172                                InstrStage<1, [A9_DRegsN],   0, Required>,
1173                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1174                                InstrStage<3, [A9_NPipe], 0>,
1175                                InstrStage<3, [A9_LSUnit]>],
1176                               [2, 1, 1, 1, 1, 1, 2]>,
1177   //
1178   // VST3ln
1179   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1180                                InstrStage<1, [A9_MUX0], 0>,
1181                                InstrStage<1, [A9_DRegsN],   0, Required>,
1182                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1183                                InstrStage<3, [A9_NPipe], 0>,
1184                                InstrStage<3, [A9_LSUnit]>],
1185                               [1, 1, 1, 1, 2]>,
1186   //
1187   // VST3lnu
1188   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1189                                InstrStage<1, [A9_MUX0], 0>,
1190                                InstrStage<1, [A9_DRegsN],   0, Required>,
1191                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1192                                InstrStage<3, [A9_NPipe], 0>,
1193                                InstrStage<3, [A9_LSUnit]>],
1194                               [2, 1, 1, 1, 1, 1, 2]>,
1195   //
1196   // VST4
1197   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1198                                InstrStage<1, [A9_MUX0], 0>,
1199                                InstrStage<1, [A9_DRegsN],   0, Required>,
1200                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1201                                InstrStage<3, [A9_NPipe], 0>,
1202                                InstrStage<3, [A9_LSUnit]>],
1203                               [1, 1, 1, 1, 2, 2]>,
1204   //
1205   // VST4u
1206   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1207                                InstrStage<1, [A9_MUX0], 0>,
1208                                InstrStage<1, [A9_DRegsN],   0, Required>,
1209                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1210                                InstrStage<3, [A9_NPipe], 0>,
1211                                InstrStage<3, [A9_LSUnit]>],
1212                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1213   //
1214   // VST4ln
1215   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1216                                InstrStage<1, [A9_MUX0], 0>,
1217                                InstrStage<1, [A9_DRegsN],   0, Required>,
1218                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1219                                InstrStage<3, [A9_NPipe], 0>,
1220                                InstrStage<3, [A9_LSUnit]>],
1221                               [1, 1, 1, 1, 2, 2]>,
1222   //
1223   // VST4lnu
1224   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1225                                InstrStage<1, [A9_MUX0], 0>,
1226                                InstrStage<1, [A9_DRegsN],   0, Required>,
1227                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1228                                InstrStage<3, [A9_NPipe], 0>,
1229                                InstrStage<3, [A9_LSUnit]>],
1230                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1231
1232   //
1233   // Double-register Integer Unary
1234   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1235                                InstrStage<1, [A9_MUX0], 0>,
1236                                InstrStage<1, [A9_DRegsN],   0, Required>,
1237                                // Extra latency cycles since wbck is 6 cycles
1238                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1239                                InstrStage<1, [A9_NPipe]>],
1240                               [4, 2]>,
1241   //
1242   // Quad-register Integer Unary
1243   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1244                                InstrStage<1, [A9_MUX0], 0>,
1245                                InstrStage<1, [A9_DRegsN],   0, Required>,
1246                                // Extra latency cycles since wbck is 6 cycles
1247                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1248                                InstrStage<1, [A9_NPipe]>],
1249                               [4, 2]>,
1250   //
1251   // Double-register Integer Q-Unary
1252   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1253                                InstrStage<1, [A9_MUX0], 0>,
1254                                InstrStage<1, [A9_DRegsN],   0, Required>,
1255                                // Extra latency cycles since wbck is 6 cycles
1256                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1257                                InstrStage<1, [A9_NPipe]>],
1258                               [4, 1]>,
1259   //
1260   // Quad-register Integer CountQ-Unary
1261   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1262                                InstrStage<1, [A9_MUX0], 0>,
1263                                InstrStage<1, [A9_DRegsN],   0, Required>,
1264                                // Extra latency cycles since wbck is 6 cycles
1265                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1266                                InstrStage<1, [A9_NPipe]>],
1267                               [4, 1]>,
1268   //
1269   // Double-register Integer Binary
1270   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1271                                InstrStage<1, [A9_MUX0], 0>,
1272                                InstrStage<1, [A9_DRegsN],   0, Required>,
1273                                // Extra latency cycles since wbck is 6 cycles
1274                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1275                                InstrStage<1, [A9_NPipe]>],
1276                               [3, 2, 2]>,
1277   //
1278   // Quad-register Integer Binary
1279   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1280                                InstrStage<1, [A9_MUX0], 0>,
1281                                InstrStage<1, [A9_DRegsN],   0, Required>,
1282                                // Extra latency cycles since wbck is 6 cycles
1283                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1284                                InstrStage<1, [A9_NPipe]>],
1285                               [3, 2, 2]>,
1286   //
1287   // Double-register Integer Subtract
1288   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1289                                InstrStage<1, [A9_MUX0], 0>,
1290                                InstrStage<1, [A9_DRegsN],   0, Required>,
1291                                // Extra latency cycles since wbck is 6 cycles
1292                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1293                                InstrStage<1, [A9_NPipe]>],
1294                               [3, 2, 1]>,
1295   //
1296   // Quad-register Integer Subtract
1297   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1298                                InstrStage<1, [A9_MUX0], 0>,
1299                                InstrStage<1, [A9_DRegsN],   0, Required>,
1300                                // Extra latency cycles since wbck is 6 cycles
1301                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1302                                InstrStage<1, [A9_NPipe]>],
1303                               [3, 2, 1]>,
1304   //
1305   // Double-register Integer Shift
1306   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1307                                InstrStage<1, [A9_MUX0], 0>,
1308                                InstrStage<1, [A9_DRegsN],   0, Required>,
1309                                // Extra latency cycles since wbck is 6 cycles
1310                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1311                                InstrStage<1, [A9_NPipe]>],
1312                               [3, 1, 1]>,
1313   //
1314   // Quad-register Integer Shift
1315   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1316                                InstrStage<1, [A9_MUX0], 0>,
1317                                InstrStage<1, [A9_DRegsN],   0, Required>,
1318                                // Extra latency cycles since wbck is 6 cycles
1319                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1320                                InstrStage<1, [A9_NPipe]>],
1321                               [3, 1, 1]>,
1322   //
1323   // Double-register Integer Shift (4 cycle)
1324   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1325                                InstrStage<1, [A9_MUX0], 0>,
1326                                InstrStage<1, [A9_DRegsN],   0, Required>,
1327                                // Extra latency cycles since wbck is 6 cycles
1328                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1329                                InstrStage<1, [A9_NPipe]>],
1330                               [4, 1, 1]>,
1331   //
1332   // Quad-register Integer Shift (4 cycle)
1333   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1334                                InstrStage<1, [A9_MUX0], 0>,
1335                                InstrStage<1, [A9_DRegsN],   0, Required>,
1336                                // Extra latency cycles since wbck is 6 cycles
1337                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1338                                InstrStage<1, [A9_NPipe]>],
1339                               [4, 1, 1]>,
1340   //
1341   // Double-register Integer Binary (4 cycle)
1342   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1343                                InstrStage<1, [A9_MUX0], 0>,
1344                                InstrStage<1, [A9_DRegsN],   0, Required>,
1345                                // Extra latency cycles since wbck is 6 cycles
1346                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1347                                InstrStage<1, [A9_NPipe]>],
1348                               [4, 2, 2]>,
1349   //
1350   // Quad-register Integer Binary (4 cycle)
1351   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1352                                InstrStage<1, [A9_MUX0], 0>,
1353                                InstrStage<1, [A9_DRegsN],   0, Required>,
1354                                // Extra latency cycles since wbck is 6 cycles
1355                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1356                                InstrStage<1, [A9_NPipe]>],
1357                               [4, 2, 2]>,
1358   //
1359   // Double-register Integer Subtract (4 cycle)
1360   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1361                                InstrStage<1, [A9_MUX0], 0>,
1362                                InstrStage<1, [A9_DRegsN],   0, Required>,
1363                                // Extra latency cycles since wbck is 6 cycles
1364                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1365                                InstrStage<1, [A9_NPipe]>],
1366                               [4, 2, 1]>,
1367   //
1368   // Quad-register Integer Subtract (4 cycle)
1369   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1370                                InstrStage<1, [A9_MUX0], 0>,
1371                                InstrStage<1, [A9_DRegsN],   0, Required>,
1372                                // Extra latency cycles since wbck is 6 cycles
1373                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1374                                InstrStage<1, [A9_NPipe]>],
1375                               [4, 2, 1]>,
1376
1377   //
1378   // Double-register Integer Count
1379   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1380                                InstrStage<1, [A9_MUX0], 0>,
1381                                InstrStage<1, [A9_DRegsN],   0, Required>,
1382                                // Extra latency cycles since wbck is 6 cycles
1383                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1384                                InstrStage<1, [A9_NPipe]>],
1385                               [3, 2, 2]>,
1386   //
1387   // Quad-register Integer Count
1388   // Result written in N3, but that is relative to the last cycle of multicycle,
1389   // so we use 4 for those cases
1390   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1391                                InstrStage<1, [A9_MUX0], 0>,
1392                                InstrStage<1, [A9_DRegsN],   0, Required>,
1393                                // Extra latency cycles since wbck is 7 cycles
1394                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1395                                InstrStage<2, [A9_NPipe]>],
1396                               [4, 2, 2]>,
1397   //
1398   // Double-register Absolute Difference and Accumulate
1399   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1400                                InstrStage<1, [A9_MUX0], 0>,
1401                                InstrStage<1, [A9_DRegsN],   0, Required>,
1402                                // Extra latency cycles since wbck is 6 cycles
1403                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1404                                InstrStage<1, [A9_NPipe]>],
1405                               [6, 3, 2, 1]>,
1406   //
1407   // Quad-register Absolute Difference and Accumulate
1408   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1409                                InstrStage<1, [A9_MUX0], 0>,
1410                                InstrStage<1, [A9_DRegsN],   0, Required>,
1411                                // Extra latency cycles since wbck is 6 cycles
1412                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1413                                InstrStage<2, [A9_NPipe]>],
1414                               [6, 3, 2, 1]>,
1415   //
1416   // Double-register Integer Pair Add Long
1417   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1418                                InstrStage<1, [A9_MUX0], 0>,
1419                                InstrStage<1, [A9_DRegsN],   0, Required>,
1420                                // Extra latency cycles since wbck is 6 cycles
1421                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1422                                InstrStage<1, [A9_NPipe]>],
1423                               [6, 3, 1]>,
1424   //
1425   // Quad-register Integer Pair Add Long
1426   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1427                                InstrStage<1, [A9_MUX0], 0>,
1428                                InstrStage<1, [A9_DRegsN],   0, Required>,
1429                                // Extra latency cycles since wbck is 6 cycles
1430                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1431                                InstrStage<2, [A9_NPipe]>],
1432                               [6, 3, 1]>,
1433
1434   //
1435   // Double-register Integer Multiply (.8, .16)
1436   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1437                                InstrStage<1, [A9_MUX0], 0>,
1438                                InstrStage<1, [A9_DRegsN],   0, Required>,
1439                                // Extra latency cycles since wbck is 6 cycles
1440                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1441                                InstrStage<1, [A9_NPipe]>],
1442                               [6, 2, 2]>,
1443   //
1444   // Quad-register Integer Multiply (.8, .16)
1445   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1446                                InstrStage<1, [A9_MUX0], 0>,
1447                                InstrStage<1, [A9_DRegsN],   0, Required>,
1448                                // Extra latency cycles since wbck is 7 cycles
1449                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1450                                InstrStage<2, [A9_NPipe]>],
1451                               [7, 2, 2]>,
1452
1453   //
1454   // Double-register Integer Multiply (.32)
1455   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1456                                InstrStage<1, [A9_MUX0], 0>,
1457                                InstrStage<1, [A9_DRegsN],   0, Required>,
1458                                // Extra latency cycles since wbck is 7 cycles
1459                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1460                                InstrStage<2, [A9_NPipe]>],
1461                               [7, 2, 1]>,
1462   //
1463   // Quad-register Integer Multiply (.32)
1464   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1465                                InstrStage<1, [A9_MUX0], 0>,
1466                                InstrStage<1, [A9_DRegsN],   0, Required>,
1467                                // Extra latency cycles since wbck is 9 cycles
1468                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1469                                InstrStage<4, [A9_NPipe]>],
1470                               [9, 2, 1]>,
1471   //
1472   // Double-register Integer Multiply-Accumulate (.8, .16)
1473   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1474                                InstrStage<1, [A9_MUX0], 0>,
1475                                InstrStage<1, [A9_DRegsN],   0, Required>,
1476                                // Extra latency cycles since wbck is 6 cycles
1477                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1478                                InstrStage<1, [A9_NPipe]>],
1479                               [6, 3, 2, 2]>,
1480   //
1481   // Double-register Integer Multiply-Accumulate (.32)
1482   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1483                                InstrStage<1, [A9_MUX0], 0>,
1484                                InstrStage<1, [A9_DRegsN],   0, Required>,
1485                                // Extra latency cycles since wbck is 7 cycles
1486                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1487                                InstrStage<2, [A9_NPipe]>],
1488                               [7, 3, 2, 1]>,
1489   //
1490   // Quad-register Integer Multiply-Accumulate (.8, .16)
1491   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1492                                InstrStage<1, [A9_MUX0], 0>,
1493                                InstrStage<1, [A9_DRegsN],   0, Required>,
1494                                // Extra latency cycles since wbck is 7 cycles
1495                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1496                                InstrStage<2, [A9_NPipe]>],
1497                               [7, 3, 2, 2]>,
1498   //
1499   // Quad-register Integer Multiply-Accumulate (.32)
1500   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1501                                InstrStage<1, [A9_MUX0], 0>,
1502                                InstrStage<1, [A9_DRegsN],   0, Required>,
1503                                // Extra latency cycles since wbck is 9 cycles
1504                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1505                                InstrStage<4, [A9_NPipe]>],
1506                               [9, 3, 2, 1]>,
1507
1508   //
1509   // Move
1510   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1511                                InstrStage<1, [A9_MUX0], 0>,
1512                                InstrStage<1, [A9_DRegsN],   0, Required>,
1513                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1514                                InstrStage<1, [A9_NPipe]>],
1515                               [1,1]>,
1516   //
1517   // Move Immediate
1518   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1519                                InstrStage<1, [A9_MUX0], 0>,
1520                                InstrStage<1, [A9_DRegsN],   0, Required>,
1521                                // Extra latency cycles since wbck is 6 cycles
1522                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1523                                InstrStage<1, [A9_NPipe]>],
1524                               [3]>,
1525   //
1526   // Double-register Permute Move
1527   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1528                                InstrStage<1, [A9_MUX0], 0>,
1529                                InstrStage<1, [A9_DRegsN],   0, Required>,
1530                                // Extra latency cycles since wbck is 6 cycles
1531                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1532                                InstrStage<1, [A9_NPipe]>],
1533                               [2, 1]>,
1534   //
1535   // Quad-register Permute Move
1536   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1537                                InstrStage<1, [A9_MUX0], 0>,
1538                                InstrStage<1, [A9_DRegsN],   0, Required>,
1539                                // Extra latency cycles since wbck is 6 cycles
1540                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1541                                InstrStage<1, [A9_NPipe]>],
1542                               [2, 1]>,
1543   //
1544   // Integer to Single-precision Move
1545   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1546                                InstrStage<1, [A9_MUX0], 0>,
1547                                InstrStage<1, [A9_DRegsN],   0, Required>,
1548                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1549                                InstrStage<1, [A9_NPipe]>],
1550                               [1, 1]>,
1551   //
1552   // Integer to Double-precision Move
1553   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1554                                InstrStage<1, [A9_MUX0], 0>,
1555                                InstrStage<1, [A9_DRegsN],   0, Required>,
1556                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1557                                InstrStage<1, [A9_NPipe]>],
1558                               [1, 1, 1]>,
1559   //
1560   // Single-precision to Integer Move
1561   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1562                                InstrStage<1, [A9_MUX0], 0>,
1563                                InstrStage<1, [A9_DRegsN],   0, Required>,
1564                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1565                                InstrStage<1, [A9_NPipe]>],
1566                               [2, 1]>,
1567   //
1568   // Double-precision to Integer Move
1569   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1570                                InstrStage<1, [A9_MUX0], 0>,
1571                                InstrStage<1, [A9_DRegsN],   0, Required>,
1572                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1573                                InstrStage<1, [A9_NPipe]>],
1574                               [2, 2, 1]>,
1575   //
1576   // Integer to Lane Move
1577   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1578                                InstrStage<1, [A9_MUX0], 0>,
1579                                InstrStage<1, [A9_DRegsN],   0, Required>,
1580                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1581                                InstrStage<2, [A9_NPipe]>],
1582                               [3, 1, 1]>,
1583
1584   //
1585   // Vector narrow move
1586   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1587                                InstrStage<1, [A9_MUX0], 0>,
1588                                InstrStage<1, [A9_DRegsN],   0, Required>,
1589                                // Extra latency cycles since wbck is 6 cycles
1590                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1591                                InstrStage<1, [A9_NPipe]>],
1592                               [3, 1]>,
1593   //
1594   // Double-register FP Unary
1595   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1596                                InstrStage<1, [A9_MUX0], 0>,
1597                                InstrStage<1, [A9_DRegsN],   0, Required>,
1598                                // Extra latency cycles since wbck is 6 cycles
1599                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1600                                InstrStage<1, [A9_NPipe]>],
1601                               [5, 2]>,
1602   //
1603   // Quad-register FP Unary
1604   // Result written in N5, but that is relative to the last cycle of multicycle,
1605   // so we use 6 for those cases
1606   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1607                                InstrStage<1, [A9_MUX0], 0>,
1608                                InstrStage<1, [A9_DRegsN],   0, Required>,
1609                                // Extra latency cycles since wbck is 7 cycles
1610                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1611                                InstrStage<2, [A9_NPipe]>],
1612                               [6, 2]>,
1613   //
1614   // Double-register FP Binary
1615   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1616   // optimistic.
1617   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1618                                InstrStage<1, [A9_MUX0], 0>,
1619                                InstrStage<1, [A9_DRegsN],   0, Required>,
1620                                // Extra latency cycles since wbck is 6 cycles
1621                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1622                                InstrStage<1, [A9_NPipe]>],
1623                               [5, 2, 2]>,
1624
1625   //
1626   // VPADD, etc.
1627   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1628                                InstrStage<1, [A9_MUX0], 0>,
1629                                InstrStage<1, [A9_DRegsN],   0, Required>,
1630                                // Extra latency cycles since wbck is 6 cycles
1631                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1632                                InstrStage<1, [A9_NPipe]>],
1633                               [5, 1, 1]>,
1634   //
1635   // Double-register FP VMUL
1636   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1637                                InstrStage<1, [A9_MUX0], 0>,
1638                                InstrStage<1, [A9_DRegsN],   0, Required>,
1639                                // Extra latency cycles since wbck is 6 cycles
1640                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1641                                InstrStage<1, [A9_NPipe]>],
1642                               [5, 2, 1]>,
1643   //
1644   // Quad-register FP Binary
1645   // Result written in N5, but that is relative to the last cycle of multicycle,
1646   // so we use 6 for those cases
1647   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1648   // optimistic.
1649   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1650                                InstrStage<1, [A9_MUX0], 0>,
1651                                InstrStage<1, [A9_DRegsN],   0, Required>,
1652                                // Extra latency cycles since wbck is 7 cycles
1653                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1654                                InstrStage<2, [A9_NPipe]>],
1655                               [6, 2, 2]>,
1656   //
1657   // Quad-register FP VMUL
1658   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1659                                InstrStage<1, [A9_MUX0], 0>,
1660                                InstrStage<1, [A9_DRegsN],   0, Required>,
1661                                // Extra latency cycles since wbck is 7 cycles
1662                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1663                                InstrStage<1, [A9_NPipe]>],
1664                               [6, 2, 1]>,
1665   //
1666   // Double-register FP Multiple-Accumulate
1667   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1668                                InstrStage<1, [A9_MUX0], 0>,
1669                                InstrStage<1, [A9_DRegsN],   0, Required>,
1670                                // Extra latency cycles since wbck is 7 cycles
1671                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1672                                InstrStage<2, [A9_NPipe]>],
1673                               [6, 3, 2, 1]>,
1674   //
1675   // Quad-register FP Multiple-Accumulate
1676   // Result written in N9, but that is relative to the last cycle of multicycle,
1677   // so we use 10 for those cases
1678   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1679                                InstrStage<1, [A9_MUX0], 0>,
1680                                InstrStage<1, [A9_DRegsN],   0, Required>,
1681                                // Extra latency cycles since wbck is 9 cycles
1682                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1683                                InstrStage<4, [A9_NPipe]>],
1684                               [8, 4, 2, 1]>,
1685   //
1686   // Double-register Reciprical Step
1687   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1688                                InstrStage<1, [A9_MUX0], 0>,
1689                                InstrStage<1, [A9_DRegsN],   0, Required>,
1690                                // Extra latency cycles since wbck is 10 cycles
1691                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1692                                InstrStage<1, [A9_NPipe]>],
1693                               [9, 2, 2]>,
1694   //
1695   // Quad-register Reciprical Step
1696   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1697                                InstrStage<1, [A9_MUX0], 0>,
1698                                InstrStage<1, [A9_DRegsN],   0, Required>,
1699                                // Extra latency cycles since wbck is 11 cycles
1700                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1701                                InstrStage<2, [A9_NPipe]>],
1702                               [10, 2, 2]>,
1703   //
1704   // Double-register Permute
1705   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1706                                InstrStage<1, [A9_MUX0], 0>,
1707                                InstrStage<1, [A9_DRegsN],   0, Required>,
1708                                // Extra latency cycles since wbck is 6 cycles
1709                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1710                                InstrStage<1, [A9_NPipe]>],
1711                               [2, 2, 1, 1]>,
1712   //
1713   // Quad-register Permute
1714   // Result written in N2, but that is relative to the last cycle of multicycle,
1715   // so we use 3 for those cases
1716   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1717                                InstrStage<1, [A9_MUX0], 0>,
1718                                InstrStage<1, [A9_DRegsN],   0, Required>,
1719                                // Extra latency cycles since wbck is 7 cycles
1720                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1721                                InstrStage<2, [A9_NPipe]>],
1722                               [3, 3, 1, 1]>,
1723   //
1724   // Quad-register Permute (3 cycle issue)
1725   // Result written in N2, but that is relative to the last cycle of multicycle,
1726   // so we use 4 for those cases
1727   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1728                                InstrStage<1, [A9_MUX0], 0>,
1729                                InstrStage<1, [A9_DRegsN],   0, Required>,
1730                                // Extra latency cycles since wbck is 8 cycles
1731                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1732                                InstrStage<3, [A9_NPipe]>],
1733                               [4, 4, 1, 1]>,
1734
1735   //
1736   // Double-register VEXT
1737   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1738                                InstrStage<1, [A9_MUX0], 0>,
1739                                InstrStage<1, [A9_DRegsN],   0, Required>,
1740                                // Extra latency cycles since wbck is 6 cycles
1741                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1742                                InstrStage<1, [A9_NPipe]>],
1743                               [2, 1, 1]>,
1744   //
1745   // Quad-register VEXT
1746   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1747                                InstrStage<1, [A9_MUX0], 0>,
1748                                InstrStage<1, [A9_DRegsN],   0, Required>,
1749                                // Extra latency cycles since wbck is 7 cycles
1750                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1751                                InstrStage<2, [A9_NPipe]>],
1752                               [3, 1, 2]>,
1753   //
1754   // VTB
1755   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1756                                InstrStage<1, [A9_MUX0], 0>,
1757                                InstrStage<1, [A9_DRegsN],   0, Required>,
1758                                // Extra latency cycles since wbck is 7 cycles
1759                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1760                                InstrStage<2, [A9_NPipe]>],
1761                               [3, 2, 1]>,
1762   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1763                                InstrStage<1, [A9_MUX0], 0>,
1764                                InstrStage<2, [A9_DRegsN],   0, Required>,
1765                                // Extra latency cycles since wbck is 7 cycles
1766                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1767                                InstrStage<2, [A9_NPipe]>],
1768                               [3, 2, 2, 1]>,
1769   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1770                                InstrStage<1, [A9_MUX0], 0>,
1771                                InstrStage<2, [A9_DRegsN],   0, Required>,
1772                                // Extra latency cycles since wbck is 8 cycles
1773                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1774                                InstrStage<3, [A9_NPipe]>],
1775                               [4, 2, 2, 3, 1]>,
1776   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1777                                InstrStage<1, [A9_MUX0], 0>,
1778                                InstrStage<1, [A9_DRegsN],   0, Required>,
1779                                // Extra latency cycles since wbck is 8 cycles
1780                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1781                                InstrStage<3, [A9_NPipe]>],
1782                               [4, 2, 2, 3, 3, 1]>,
1783   //
1784   // VTBX
1785   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1786                                InstrStage<1, [A9_MUX0], 0>,
1787                                InstrStage<1, [A9_DRegsN],   0, Required>,
1788                                // Extra latency cycles since wbck is 7 cycles
1789                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1790                                InstrStage<2, [A9_NPipe]>],
1791                               [3, 1, 2, 1]>,
1792   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1793                                InstrStage<1, [A9_MUX0], 0>,
1794                                InstrStage<1, [A9_DRegsN],   0, Required>,
1795                                // Extra latency cycles since wbck is 7 cycles
1796                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1797                                InstrStage<2, [A9_NPipe]>],
1798                               [3, 1, 2, 2, 1]>,
1799   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1800                                InstrStage<1, [A9_MUX0], 0>,
1801                                InstrStage<1, [A9_DRegsN],   0, Required>,
1802                                // Extra latency cycles since wbck is 8 cycles
1803                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1804                                InstrStage<3, [A9_NPipe]>],
1805                               [4, 1, 2, 2, 3, 1]>,
1806   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1807                                InstrStage<1, [A9_MUX0], 0>,
1808                                InstrStage<1, [A9_DRegsN],   0, Required>,
1809                                // Extra latency cycles since wbck is 8 cycles
1810                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1811                                InstrStage<2, [A9_NPipe]>],
1812                               [4, 1, 2, 2, 3, 3, 1]>
1813 ]>;