Added the missing bit definition for the 4th bit of the STR (post reg) instruction...
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LSUnit  : FuncUnit; // L/S Unit
28 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
30
31 // Bypasses
32 def A9_LdBypass : Bypass;
33
34 def CortexA9Itineraries : ProcessorItineraries<
35   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
36    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
37   [A9_LdBypass], [
38   // Two fully-pipelined integer ALU pipelines
39
40   //
41   // Move instructions, unconditional
42   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
43                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
44   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
45                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
46   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
48   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
50   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
52                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
53   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
54                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
55                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
56                                   InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
57   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
58                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
59                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
60                                InstrStage<1, [A9_MUX0], 0>,
61                                InstrStage<1, [A9_AGU], 0>,
62                                InstrStage<1, [A9_LSUnit]>], [5]>,
63   //
64   // MVN instructions
65   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
67                               [1]>,
68   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
69                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
70                               [1, 1], [NoBypass, A9_LdBypass]>,
71   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
72                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
73                               [2, 1]>,
74   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
76                               [3, 1, 1]>,
77   //
78   // No operand cycles
79   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
80                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
81   //
82   // Binary Instructions that produce a result
83   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
85                             [1, 1], [NoBypass, A9_LdBypass]>,
86   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
88                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
89   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
90                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
91                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
92   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
94                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
95   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
97                             [3, 1, 1, 1],
98                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
99   //
100   // Bitwise Instructions that produce a result
101   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
102                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
103   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
104                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
105   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
106                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
107   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
109   //
110   // Unary Instructions that produce a result
111
112   // CLZ, RBIT, etc.
113   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
115
116   // BFC, BFI, UBFX, SBFX
117   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
119
120   //
121   // Zero and sign extension instructions
122   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
123                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
124   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
126   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
127                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
128   //
129   // Compare instructions
130   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
131                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
132                                [1], [A9_LdBypass]>,
133   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
135                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
136   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
138                                 [1, 1], [A9_LdBypass, NoBypass]>,
139   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
141                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
142   //
143   // Test instructions
144   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
145                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
146   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
147                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
148   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
150   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
152   //
153   // Move instructions, conditional
154   // FIXME: Correctly model the extra input dep on the destination.
155   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
156                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
157   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
158                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
159   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
161   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
163   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
165                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
167
168   // Integer multiply pipeline
169   //
170   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
171                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
172   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
173                                InstrStage<2, [A9_ALU0]>],
174                               [3, 1, 1, 1]>,
175   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
177   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
178                                InstrStage<2, [A9_ALU0]>],
179                               [4, 1, 1, 1]>,
180   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
182   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
183                                InstrStage<3, [A9_ALU0]>],
184                               [4, 5, 1, 1]>,
185   // Integer load pipeline
186   // FIXME: The timings are some rough approximations
187   //
188   // Immediate offset
189   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
190                                  InstrStage<1, [A9_MUX0], 0>,
191                                  InstrStage<1, [A9_AGU], 0>,
192                                  InstrStage<1, [A9_LSUnit]>],
193                                 [3, 1], [A9_LdBypass]>,
194   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
195                                  InstrStage<1, [A9_MUX0], 0>,
196                                  InstrStage<2, [A9_AGU], 0>,
197                                  InstrStage<1, [A9_LSUnit]>],
198                                 [4, 1], [A9_LdBypass]>,
199   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
200   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
201                                  InstrStage<1, [A9_MUX0], 0>,
202                                  InstrStage<2, [A9_AGU], 0>,
203                                  InstrStage<1, [A9_LSUnit]>],
204                                 [3, 3, 1], [A9_LdBypass]>,
205   //
206   // Register offset
207   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
208                                  InstrStage<1, [A9_MUX0], 0>,
209                                  InstrStage<1, [A9_AGU], 0>,
210                                  InstrStage<1, [A9_LSUnit]>],
211                                 [3, 1, 1], [A9_LdBypass]>,
212   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
213                                  InstrStage<1, [A9_MUX0], 0>,
214                                  InstrStage<2, [A9_AGU], 0>,
215                                  InstrStage<1, [A9_LSUnit]>],
216                                 [4, 1, 1], [A9_LdBypass]>,
217   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
218                                  InstrStage<1, [A9_MUX0], 0>,
219                                  InstrStage<2, [A9_AGU], 0>,
220                                  InstrStage<1, [A9_LSUnit]>],
221                                 [3, 3, 1, 1], [A9_LdBypass]>,
222   //
223   // Scaled register offset
224   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
225                                  InstrStage<1, [A9_MUX0], 0>,
226                                  InstrStage<1, [A9_AGU], 0>,
227                                  InstrStage<1, [A9_LSUnit], 0>],
228                                 [4, 1, 1], [A9_LdBypass]>,
229   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
230                                  InstrStage<1, [A9_MUX0], 0>,
231                                  InstrStage<2, [A9_AGU], 0>,
232                                  InstrStage<1, [A9_LSUnit]>],
233                                 [5, 1, 1], [A9_LdBypass]>,
234   //
235   // Immediate offset with update
236   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
237                                  InstrStage<1, [A9_MUX0], 0>,
238                                  InstrStage<1, [A9_AGU], 0>,
239                                  InstrStage<1, [A9_LSUnit]>],
240                                 [3, 2, 1], [A9_LdBypass]>,
241   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
242                                  InstrStage<1, [A9_MUX0], 0>,
243                                  InstrStage<2, [A9_AGU], 0>,
244                                  InstrStage<1, [A9_LSUnit]>],
245                                 [4, 3, 1], [A9_LdBypass]>,
246   //
247   // Register offset with update
248   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
249                                  InstrStage<1, [A9_MUX0], 0>,
250                                  InstrStage<1, [A9_AGU], 0>,
251                                  InstrStage<1, [A9_LSUnit]>],
252                                 [3, 2, 1, 1], [A9_LdBypass]>,
253   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
254                                  InstrStage<1, [A9_MUX0], 0>,
255                                  InstrStage<2, [A9_AGU], 0>,
256                                  InstrStage<1, [A9_LSUnit]>],
257                                 [4, 3, 1, 1], [A9_LdBypass]>,
258   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
259                                  InstrStage<1, [A9_MUX0], 0>,
260                                  InstrStage<2, [A9_AGU], 0>,
261                                  InstrStage<1, [A9_LSUnit]>],
262                                 [3, 3, 1, 1], [A9_LdBypass]>,
263   //
264   // Scaled register offset with update
265   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
266                                  InstrStage<1, [A9_MUX0], 0>,
267                                  InstrStage<1, [A9_AGU], 0>,
268                                  InstrStage<1, [A9_LSUnit]>],
269                                 [4, 3, 1, 1], [A9_LdBypass]>,
270   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
271                                   InstrStage<1, [A9_MUX0], 0>,
272                                   InstrStage<2, [A9_AGU], 0>,
273                                   InstrStage<1, [A9_LSUnit]>],
274                                  [5, 4, 1, 1], [A9_LdBypass]>,
275   //
276   // Load multiple, def is the 5th operand.
277   // FIXME: This assumes 3 to 4 registers.
278   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
279                                 InstrStage<1, [A9_MUX0], 0>,
280                                 InstrStage<2, [A9_AGU], 1>,
281                                 InstrStage<2, [A9_LSUnit]>],
282                                [1, 1, 1, 1, 3],
283                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
284   //
285   // Load multiple + update, defs are the 1st and 5th operands.
286   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
287                                 InstrStage<1, [A9_MUX0], 0>,
288                                 InstrStage<2, [A9_AGU], 1>,
289                                 InstrStage<2, [A9_LSUnit]>],
290                                [2, 1, 1, 1, 3],
291                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
292   //
293   // Load multiple plus branch
294   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
295                                 InstrStage<1, [A9_MUX0], 0>,
296                                 InstrStage<1, [A9_AGU], 1>,
297                                 InstrStage<2, [A9_LSUnit]>,
298                                 InstrStage<1, [A9_Branch]>],
299                                [1, 2, 1, 1, 3],
300                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
301   //
302   // Pop, def is the 3rd operand.
303   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
304                                 InstrStage<1, [A9_MUX0], 0>,
305                                 InstrStage<2, [A9_AGU], 1>,
306                                 InstrStage<2, [A9_LSUnit]>],
307                                [1, 1, 3],
308                                [NoBypass, NoBypass, A9_LdBypass]>,
309   //
310   // Pop + branch, def is the 3rd operand.
311   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
312                                 InstrStage<1, [A9_MUX0], 0>,
313                                 InstrStage<2, [A9_AGU], 1>,
314                                 InstrStage<2, [A9_LSUnit]>,
315                                 InstrStage<1, [A9_Branch]>],
316                                [1, 1, 3],
317                                [NoBypass, NoBypass, A9_LdBypass]>,
318
319   //
320   // iLoadi + iALUr for t2LDRpci_pic.
321   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322                                 InstrStage<1, [A9_MUX0], 0>,
323                                 InstrStage<1, [A9_AGU], 0>,
324                                 InstrStage<1, [A9_LSUnit]>,
325                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
326                                [2, 1]>,
327
328   // Integer store pipeline
329   ///
330   // Immediate offset
331   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
332                                  InstrStage<1, [A9_MUX0], 0>,
333                                  InstrStage<1, [A9_AGU], 0>,
334                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
335   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
336                                  InstrStage<1, [A9_MUX0], 0>,
337                                  InstrStage<2, [A9_AGU], 1>,
338                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
339   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
340   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341                                  InstrStage<1, [A9_MUX0], 0>,
342                                  InstrStage<2, [A9_AGU], 1>,
343                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
344   //
345   // Register offset
346   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347                                  InstrStage<1, [A9_MUX0], 0>,
348                                  InstrStage<1, [A9_AGU], 0>,
349                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
350   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351                                  InstrStage<1, [A9_MUX0], 0>,
352                                  InstrStage<2, [A9_AGU], 1>,
353                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
354   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355                                  InstrStage<1, [A9_MUX0], 0>,
356                                  InstrStage<2, [A9_AGU], 1>,
357                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
358   //
359   // Scaled register offset
360   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361                                   InstrStage<1, [A9_MUX0], 0>,
362                                   InstrStage<1, [A9_AGU], 0>,
363                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
364   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
365                                   InstrStage<1, [A9_MUX0], 0>,
366                                   InstrStage<2, [A9_AGU], 1>,
367                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
368   //
369   // Immediate offset with update
370   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
371                                   InstrStage<1, [A9_MUX0], 0>,
372                                   InstrStage<1, [A9_AGU], 0>,
373                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
374   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
375                                   InstrStage<1, [A9_MUX0], 0>,
376                                   InstrStage<2, [A9_AGU], 1>,
377                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
378   //
379   // Register offset with update
380   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
381                                   InstrStage<1, [A9_MUX0], 0>,
382                                   InstrStage<1, [A9_AGU], 0>,
383                                   InstrStage<1, [A9_LSUnit]>],
384                                  [2, 1, 1, 1]>,
385   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
386                                   InstrStage<1, [A9_MUX0], 0>,
387                                   InstrStage<2, [A9_AGU], 1>,
388                                   InstrStage<1, [A9_LSUnit]>],
389                                  [3, 1, 1, 1]>,
390   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
391                                   InstrStage<1, [A9_MUX0], 0>,
392                                   InstrStage<2, [A9_AGU], 1>,
393                                   InstrStage<1, [A9_LSUnit]>],
394                                  [3, 1, 1, 1]>,
395   //
396   // Scaled register offset with update
397   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
398                                     InstrStage<1, [A9_MUX0], 0>,
399                                     InstrStage<1, [A9_AGU], 0>,
400                                     InstrStage<1, [A9_LSUnit]>],
401                                    [2, 1, 1, 1]>,
402   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
403                                     InstrStage<1, [A9_MUX0], 0>,
404                                     InstrStage<2, [A9_AGU], 1>,
405                                     InstrStage<1, [A9_LSUnit]>],
406                                    [3, 1, 1, 1]>,
407   //
408   // Store multiple
409   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
410                                 InstrStage<1, [A9_MUX0], 0>,
411                                 InstrStage<1, [A9_AGU], 0>,
412                                 InstrStage<2, [A9_LSUnit]>]>,
413   //
414   // Store multiple + update
415   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
416                                 InstrStage<1, [A9_MUX0], 0>,
417                                 InstrStage<1, [A9_AGU], 0>,
418                                 InstrStage<2, [A9_LSUnit]>], [2]>,
419
420   //
421   // Preload
422   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
423
424   // Branch
425   //
426   // no delay slots, so the latency of a branch is unimportant
427   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
428                                 InstrStage<1, [A9_Issue1], 0>,
429                                 InstrStage<1, [A9_Branch]>]>,
430
431   // VFP and NEON shares the same register file. This means that every VFP
432   // instruction should wait for full completion of the consecutive NEON
433   // instruction and vice-versa. We model this behavior with two artificial FUs:
434   // DRegsVFP and DRegsVFP.
435   //
436   // Every VFP instruction:
437   //  - Acquires DRegsVFP resource for 1 cycle
438   //  - Reserves DRegsN resource for the whole duration (including time to
439   //    register file writeback!).
440   // Every NEON instruction does the same but with FUs swapped.
441   //
442   // Since the reserved FU cannot be acquired, this models precisely
443   // "cross-domain" stalls.
444
445   // VFP
446   // Issue through integer pipeline, and execute in NEON unit.
447
448   // FP Special Register to Integer Register File Move
449   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
450                               InstrStage<1, [A9_MUX0], 0>,
451                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
452                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
453                               InstrStage<1, [A9_NPipe]>],
454                              [1]>,
455   //
456   // Single-precision FP Unary
457   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
458                                InstrStage<1, [A9_MUX0], 0>,
459                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
460                                // Extra latency cycles since wbck is 2 cycles
461                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
462                                InstrStage<1, [A9_NPipe]>],
463                               [1, 1]>,
464   //
465   // Double-precision FP Unary
466   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
467                                InstrStage<1, [A9_MUX0], 0>,
468                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
469                                // Extra latency cycles since wbck is 2 cycles
470                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
471                                InstrStage<1, [A9_NPipe]>],
472                               [1, 1]>,
473
474   //
475   // Single-precision FP Compare
476   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
477                                InstrStage<1, [A9_MUX0], 0>,
478                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
479                                // Extra latency cycles since wbck is 4 cycles
480                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
481                                InstrStage<1, [A9_NPipe]>],
482                               [1, 1]>,
483   //
484   // Double-precision FP Compare
485   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486                                InstrStage<1, [A9_MUX0], 0>,
487                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
488                                // Extra latency cycles since wbck is 4 cycles
489                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
490                                InstrStage<1, [A9_NPipe]>],
491                               [1, 1]>,
492   //
493   // Single to Double FP Convert
494   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495                                InstrStage<1, [A9_MUX0], 0>,
496                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
497                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
498                                InstrStage<1, [A9_NPipe]>],
499                               [4, 1]>,
500   //
501   // Double to Single FP Convert
502   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503                                InstrStage<1, [A9_MUX0], 0>,
504                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
505                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
506                                InstrStage<1, [A9_NPipe]>],
507                               [4, 1]>,
508
509   //
510   // Single to Half FP Convert
511   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512                                InstrStage<1, [A9_MUX0], 0>,
513                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
514                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
515                                InstrStage<1, [A9_NPipe]>],
516                               [4, 1]>,
517   //
518   // Half to Single FP Convert
519   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520                                InstrStage<1, [A9_MUX0], 0>,
521                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
522                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
523                                InstrStage<1, [A9_NPipe]>],
524                               [2, 1]>,
525
526   //
527   // Single-Precision FP to Integer Convert
528   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
529                                InstrStage<1, [A9_MUX0], 0>,
530                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
531                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
532                                InstrStage<1, [A9_NPipe]>],
533                               [4, 1]>,
534   //
535   // Double-Precision FP to Integer Convert
536   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
537                                InstrStage<1, [A9_MUX0], 0>,
538                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
539                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
540                                InstrStage<1, [A9_NPipe]>],
541                               [4, 1]>,
542   //
543   // Integer to Single-Precision FP Convert
544   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
545                                InstrStage<1, [A9_MUX0], 0>,
546                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
547                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
548                                InstrStage<1, [A9_NPipe]>],
549                               [4, 1]>,
550   //
551   // Integer to Double-Precision FP Convert
552   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
553                                InstrStage<1, [A9_MUX0], 0>,
554                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
555                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
556                                InstrStage<1, [A9_NPipe]>],
557                               [4, 1]>,
558   //
559   // Single-precision FP ALU
560   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
561                                InstrStage<1, [A9_MUX0], 0>,
562                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
563                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
564                                InstrStage<1, [A9_NPipe]>],
565                               [4, 1, 1]>,
566   //
567   // Double-precision FP ALU
568   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
569                                InstrStage<1, [A9_MUX0], 0>,
570                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
571                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
572                                InstrStage<1, [A9_NPipe]>],
573                               [4, 1, 1]>,
574   //
575   // Single-precision FP Multiply
576   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
577                                InstrStage<1, [A9_MUX0], 0>,
578                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
579                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
580                                InstrStage<1, [A9_NPipe]>],
581                               [5, 1, 1]>,
582   //
583   // Double-precision FP Multiply
584   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
585                                InstrStage<1, [A9_MUX0], 0>,
586                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
587                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
588                                InstrStage<2, [A9_NPipe]>],
589                               [6, 1, 1]>,
590   //
591   // Single-precision FP MAC
592   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
593                                InstrStage<1, [A9_MUX0], 0>,
594                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
595                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
596                                InstrStage<1, [A9_NPipe]>],
597                               [8, 1, 1, 1]>,
598   //
599   // Double-precision FP MAC
600   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
601                                InstrStage<1,  [A9_MUX0], 0>,
602                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
603                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
604                                InstrStage<2,  [A9_NPipe]>],
605                               [9, 1, 1, 1]>,
606   //
607   // Single-precision Fused FP MAC
608   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
609                                InstrStage<1, [A9_MUX0], 0>,
610                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
611                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
612                                InstrStage<1, [A9_NPipe]>],
613                               [8, 1, 1, 1]>,
614   //
615   // Double-precision Fused FP MAC
616   InstrItinData<IIC_fpFMAC64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
617                                InstrStage<1,  [A9_MUX0], 0>,
618                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
619                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
620                                InstrStage<2,  [A9_NPipe]>],
621                               [9, 1, 1, 1]>,
622   //
623   // Single-precision FP DIV
624   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
625                                InstrStage<1,  [A9_MUX0], 0>,
626                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
627                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
628                                InstrStage<10, [A9_NPipe]>],
629                               [15, 1, 1]>,
630   //
631   // Double-precision FP DIV
632   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
633                                InstrStage<1,  [A9_MUX0], 0>,
634                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
635                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
636                                InstrStage<20, [A9_NPipe]>],
637                               [25, 1, 1]>,
638   //
639   // Single-precision FP SQRT
640   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
641                                InstrStage<1,  [A9_MUX0], 0>,
642                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
643                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
644                                InstrStage<13, [A9_NPipe]>],
645                               [17, 1]>,
646   //
647   // Double-precision FP SQRT
648   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
649                                InstrStage<1,  [A9_MUX0], 0>,
650                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
651                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
652                                InstrStage<28, [A9_NPipe]>],
653                               [32, 1]>,
654
655   //
656   // Integer to Single-precision Move
657   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
658                                InstrStage<1, [A9_MUX0], 0>,
659                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
660                                // Extra 1 latency cycle since wbck is 2 cycles
661                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
662                                InstrStage<1, [A9_NPipe]>],
663                               [1, 1]>,
664   //
665   // Integer to Double-precision Move
666   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
667                                InstrStage<1, [A9_MUX0], 0>,
668                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
669                                // Extra 1 latency cycle since wbck is 2 cycles
670                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
671                                InstrStage<1, [A9_NPipe]>],
672                               [1, 1, 1]>,
673   //
674   // Single-precision to Integer Move
675   //
676   // On A9 move-from-VFP is free to issue with no stall if other VFP
677   // operations are in flight. I assume it still can't dual-issue though.
678   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
679                                InstrStage<1, [A9_MUX0], 0>],
680                               [2, 1]>,
681   //
682   // Double-precision to Integer Move
683   //
684   // On A9 move-from-VFP is free to issue with no stall if other VFP
685   // operations are in flight. I assume it still can't dual-issue though.
686   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
687                                InstrStage<1, [A9_MUX0], 0>],
688                               [2, 1, 1]>,
689   //
690   // Single-precision FP Load
691   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
692                                InstrStage<1, [A9_MUX0], 0>,
693                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
694                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
695                                InstrStage<1, [A9_NPipe], 0>,
696                                InstrStage<1, [A9_LSUnit]>],
697                               [1, 1]>,
698   //
699   // Double-precision FP Load
700   // FIXME: Result latency is 1 if address is 64-bit aligned.
701   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
702                                InstrStage<1, [A9_MUX0], 0>,
703                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
704                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
705                                InstrStage<1, [A9_NPipe], 0>,
706                                InstrStage<1, [A9_LSUnit]>],
707                               [2, 1]>,
708   //
709   // FP Load Multiple
710   // FIXME: assumes 2 doubles which requires 2 LS cycles.
711   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
712                                InstrStage<1, [A9_MUX0], 0>,
713                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
714                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
715                                InstrStage<1, [A9_NPipe], 0>,
716                                InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
717   //
718   // FP Load Multiple + update
719   // FIXME: assumes 2 doubles which requires 2 LS cycles.
720   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721                                InstrStage<1, [A9_MUX0], 0>,
722                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
723                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
724                                InstrStage<1, [A9_NPipe], 0>,
725                                InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
726   //
727   // Single-precision FP Store
728   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
729                                InstrStage<1, [A9_MUX0], 0>,
730                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
731                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
732                                InstrStage<1, [A9_NPipe], 0>,
733                                InstrStage<1, [A9_LSUnit]>],
734                               [1, 1]>,
735   //
736   // Double-precision FP Store
737   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
738                                InstrStage<1, [A9_MUX0], 0>,
739                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
740                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
741                                InstrStage<1, [A9_NPipe], 0>,
742                                InstrStage<1, [A9_LSUnit]>],
743                               [1, 1]>,
744   //
745   // FP Store Multiple
746   // FIXME: assumes 2 doubles which requires 2 LS cycles.
747   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
748                                InstrStage<1, [A9_MUX0], 0>,
749                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
750                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
751                                InstrStage<1, [A9_NPipe], 0>,
752                                InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
753   //
754   // FP Store Multiple + update
755   // FIXME: assumes 2 doubles which requires 2 LS cycles.
756   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
757                                 InstrStage<1, [A9_MUX0], 0>,
758                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
759                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
760                                 InstrStage<1, [A9_NPipe], 0>,
761                                 InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
762   // NEON
763   // VLD1
764   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
765                                InstrStage<1, [A9_MUX0], 0>,
766                                InstrStage<1, [A9_DRegsN],   0, Required>,
767                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
768                                InstrStage<1, [A9_NPipe], 0>,
769                                InstrStage<1, [A9_LSUnit]>],
770                               [1, 1]>,
771   // VLD1x2
772   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
773                                InstrStage<1, [A9_MUX0], 0>,
774                                InstrStage<1, [A9_DRegsN],   0, Required>,
775                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
776                                InstrStage<1, [A9_NPipe], 0>,
777                                InstrStage<1, [A9_LSUnit]>],
778                               [1, 1, 1]>,
779   // VLD1x3
780   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
781                                InstrStage<1, [A9_MUX0], 0>,
782                                InstrStage<1, [A9_DRegsN],   0, Required>,
783                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
784                                InstrStage<2, [A9_NPipe], 0>,
785                                InstrStage<2, [A9_LSUnit]>],
786                               [1, 1, 2, 1]>,
787   // VLD1x4
788   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
789                                InstrStage<1, [A9_MUX0], 0>,
790                                InstrStage<1, [A9_DRegsN],   0, Required>,
791                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
792                                InstrStage<2, [A9_NPipe], 0>,
793                                InstrStage<2, [A9_LSUnit]>],
794                               [1, 1, 2, 2, 1]>,
795   // VLD1u
796   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
797                                InstrStage<1, [A9_MUX0], 0>,
798                                InstrStage<1, [A9_DRegsN],   0, Required>,
799                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
800                                InstrStage<1, [A9_NPipe], 0>,
801                                InstrStage<1, [A9_LSUnit]>],
802                               [1, 2, 1]>,
803   // VLD1x2u
804   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
805                                InstrStage<1, [A9_MUX0], 0>,
806                                InstrStage<1, [A9_DRegsN],   0, Required>,
807                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
808                                InstrStage<1, [A9_NPipe], 0>,
809                                InstrStage<1, [A9_LSUnit]>],
810                               [1, 1, 2, 1]>,
811   // VLD1x3u
812   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
813                                InstrStage<1, [A9_MUX0], 0>,
814                                InstrStage<1, [A9_DRegsN],   0, Required>,
815                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
816                                InstrStage<2, [A9_NPipe], 0>,
817                                InstrStage<2, [A9_LSUnit]>],
818                               [1, 1, 2, 2, 1]>,
819   // VLD1x4u
820   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
821                                InstrStage<1, [A9_MUX0], 0>,
822                                InstrStage<1, [A9_DRegsN],   0, Required>,
823                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
824                                InstrStage<2, [A9_NPipe], 0>,
825                                InstrStage<2, [A9_LSUnit]>],
826                               [1, 1, 2, 2, 2, 1]>,
827   //
828   // VLD1ln
829   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
830                                InstrStage<1, [A9_MUX0], 0>,
831                                InstrStage<1, [A9_DRegsN],   0, Required>,
832                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
833                                InstrStage<2, [A9_NPipe], 0>,
834                                InstrStage<2, [A9_LSUnit]>],
835                               [3, 1, 1, 1]>,
836   //
837   // VLD1lnu
838   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
839                                InstrStage<1, [A9_MUX0], 0>,
840                                InstrStage<1, [A9_DRegsN],   0, Required>,
841                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
842                                InstrStage<2, [A9_NPipe], 0>,
843                                InstrStage<2, [A9_LSUnit]>],
844                               [3, 2, 1, 1, 1, 1]>,
845   //
846   // VLD1dup
847   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
848                                InstrStage<1, [A9_MUX0], 0>,
849                                InstrStage<1, [A9_DRegsN],   0, Required>,
850                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
851                                InstrStage<1, [A9_NPipe], 0>,
852                                InstrStage<1, [A9_LSUnit]>],
853                               [2, 1]>,
854   //
855   // VLD1dupu
856   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
857                                InstrStage<1, [A9_MUX0], 0>,
858                                InstrStage<1, [A9_DRegsN],   0, Required>,
859                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
860                                InstrStage<1, [A9_NPipe], 0>,
861                                InstrStage<1, [A9_LSUnit]>],
862                               [2, 2, 1, 1]>,
863   //
864   // VLD2
865   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
866                                InstrStage<1, [A9_MUX0], 0>,
867                                InstrStage<1, [A9_DRegsN],   0, Required>,
868                                // Extra latency cycles since wbck is 7 cycles
869                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
870                                InstrStage<1, [A9_NPipe], 0>,
871                                InstrStage<1, [A9_LSUnit]>],
872                               [2, 2, 1]>,
873   //
874   // VLD2x2
875   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
876                                InstrStage<1, [A9_MUX0], 0>,
877                                InstrStage<1, [A9_DRegsN],   0, Required>,
878                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
879                                InstrStage<2, [A9_NPipe], 0>,
880                                InstrStage<2, [A9_LSUnit]>],
881                               [2, 3, 2, 3, 1]>,
882   //
883   // VLD2ln
884   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
885                                InstrStage<1, [A9_MUX0], 0>,
886                                InstrStage<1, [A9_DRegsN],   0, Required>,
887                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
888                                InstrStage<2, [A9_NPipe], 0>,
889                                InstrStage<2, [A9_LSUnit]>],
890                               [3, 3, 1, 1, 1, 1]>,
891   //
892   // VLD2u
893   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
894                                InstrStage<1, [A9_MUX0], 0>,
895                                InstrStage<1, [A9_DRegsN],   0, Required>,
896                                // Extra latency cycles since wbck is 7 cycles
897                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
898                                InstrStage<1, [A9_NPipe], 0>,
899                                InstrStage<1, [A9_LSUnit]>],
900                               [2, 2, 2, 1, 1, 1]>,
901   //
902   // VLD2x2u
903   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
904                                InstrStage<1, [A9_MUX0], 0>,
905                                InstrStage<1, [A9_DRegsN],   0, Required>,
906                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
907                                InstrStage<2, [A9_NPipe], 0>,
908                                InstrStage<2, [A9_LSUnit]>],
909                               [2, 3, 2, 3, 2, 1]>,
910   //
911   // VLD2lnu
912   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
913                                InstrStage<1, [A9_MUX0], 0>,
914                                InstrStage<1, [A9_DRegsN],   0, Required>,
915                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
916                                InstrStage<2, [A9_NPipe], 0>,
917                                InstrStage<2, [A9_LSUnit]>],
918                               [3, 3, 2, 1, 1, 1, 1, 1]>,
919   //
920   // VLD2dup
921   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
922                                InstrStage<1, [A9_MUX0], 0>,
923                                InstrStage<1, [A9_DRegsN],   0, Required>,
924                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
925                                InstrStage<1, [A9_NPipe], 0>,
926                                InstrStage<1, [A9_LSUnit]>],
927                               [2, 2, 1]>,
928   //
929   // VLD2dupu
930   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
931                                InstrStage<1, [A9_MUX0], 0>,
932                                InstrStage<1, [A9_DRegsN],   0, Required>,
933                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
934                                InstrStage<1, [A9_NPipe], 0>,
935                                InstrStage<1, [A9_LSUnit]>],
936                               [2, 2, 2, 1, 1]>,
937   //
938   // VLD3
939   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
940                                InstrStage<1, [A9_MUX0], 0>,
941                                InstrStage<1, [A9_DRegsN],   0, Required>,
942                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
943                                InstrStage<3, [A9_NPipe], 0>,
944                                InstrStage<3, [A9_LSUnit]>],
945                               [3, 3, 4, 1]>,
946   //
947   // VLD3ln
948   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
949                                InstrStage<1, [A9_MUX0], 0>,
950                                InstrStage<1, [A9_DRegsN],   0, Required>,
951                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
952                                InstrStage<5, [A9_NPipe], 0>,
953                                InstrStage<5, [A9_LSUnit]>],
954                               [5, 5, 6, 1, 1, 1, 1, 2]>,
955   //
956   // VLD3u
957   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
958                                InstrStage<1, [A9_MUX0], 0>,
959                                InstrStage<1, [A9_DRegsN],   0, Required>,
960                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
961                                InstrStage<3, [A9_NPipe], 0>,
962                                InstrStage<3, [A9_LSUnit]>],
963                               [3, 3, 4, 2, 1]>,
964   //
965   // VLD3lnu
966   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
967                                InstrStage<1, [A9_MUX0], 0>,
968                                InstrStage<1, [A9_DRegsN],   0, Required>,
969                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
970                                InstrStage<5, [A9_NPipe], 0>,
971                                InstrStage<5, [A9_LSUnit]>],
972                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
973   //
974   // VLD3dup
975   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
976                                InstrStage<1, [A9_MUX0], 0>,
977                                InstrStage<1, [A9_DRegsN],   0, Required>,
978                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
979                                InstrStage<3, [A9_NPipe], 0>,
980                                InstrStage<3, [A9_LSUnit]>],
981                               [3, 3, 4, 1]>,
982   //
983   // VLD3dupu
984   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
985                                InstrStage<1, [A9_MUX0], 0>,
986                                InstrStage<1, [A9_DRegsN],   0, Required>,
987                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
988                                InstrStage<3, [A9_NPipe], 0>,
989                                InstrStage<3, [A9_LSUnit]>],
990                               [3, 3, 4, 2, 1, 1]>,
991   //
992   // VLD4
993   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
994                                InstrStage<1, [A9_MUX0], 0>,
995                                InstrStage<1, [A9_DRegsN],   0, Required>,
996                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
997                                InstrStage<3, [A9_NPipe], 0>,
998                                InstrStage<3, [A9_LSUnit]>],
999                               [3, 3, 4, 4, 1]>,
1000   //
1001   // VLD4ln
1002   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1003                                InstrStage<1, [A9_MUX0], 0>,
1004                                InstrStage<1, [A9_DRegsN],   0, Required>,
1005                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1006                                InstrStage<4, [A9_NPipe], 0>,
1007                                InstrStage<4, [A9_LSUnit]>],
1008                               [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
1009   //
1010   // VLD4u
1011   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1012                                InstrStage<1, [A9_MUX0], 0>,
1013                                InstrStage<1, [A9_DRegsN],   0, Required>,
1014                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1015                                InstrStage<3, [A9_NPipe], 0>,
1016                                InstrStage<3, [A9_LSUnit]>],
1017                               [3, 3, 4, 4, 2, 1]>,
1018   //
1019   // VLD4lnu
1020   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1021                                InstrStage<1, [A9_MUX0], 0>,
1022                                InstrStage<1, [A9_DRegsN],   0, Required>,
1023                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1024                                InstrStage<4, [A9_NPipe], 0>,
1025                                InstrStage<4, [A9_LSUnit]>],
1026                               [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1027   //
1028   // VLD4dup
1029   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1030                                InstrStage<1, [A9_MUX0], 0>,
1031                                InstrStage<1, [A9_DRegsN],   0, Required>,
1032                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1033                                InstrStage<2, [A9_NPipe], 0>,
1034                                InstrStage<2, [A9_LSUnit]>],
1035                               [2, 2, 3, 3, 1]>,
1036   //
1037   // VLD4dupu
1038   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1039                                InstrStage<1, [A9_MUX0], 0>,
1040                                InstrStage<1, [A9_DRegsN],   0, Required>,
1041                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1042                                InstrStage<2, [A9_NPipe], 0>,
1043                                InstrStage<2, [A9_LSUnit]>],
1044                               [2, 2, 3, 3, 2, 1, 1]>,
1045   //
1046   // VST1
1047   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1048                                InstrStage<1, [A9_MUX0], 0>,
1049                                InstrStage<1, [A9_DRegsN],   0, Required>,
1050                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1051                                InstrStage<1, [A9_NPipe], 0>,
1052                                InstrStage<1, [A9_LSUnit]>],
1053                               [1, 1, 1]>,
1054   //
1055   // VST1x2
1056   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1057                                InstrStage<1, [A9_MUX0], 0>,
1058                                InstrStage<1, [A9_DRegsN],   0, Required>,
1059                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1060                                InstrStage<1, [A9_NPipe], 0>,
1061                                InstrStage<1, [A9_LSUnit]>],
1062                               [1, 1, 1, 1]>,
1063   //
1064   // VST1x3
1065   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1066                                InstrStage<1, [A9_MUX0], 0>,
1067                                InstrStage<1, [A9_DRegsN],   0, Required>,
1068                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1069                                InstrStage<2, [A9_NPipe], 0>,
1070                                InstrStage<2, [A9_LSUnit]>],
1071                               [1, 1, 1, 1, 2]>,
1072   //
1073   // VST1x4
1074   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1075                                InstrStage<1, [A9_MUX0], 0>,
1076                                InstrStage<1, [A9_DRegsN],   0, Required>,
1077                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1078                                InstrStage<2, [A9_NPipe], 0>,
1079                                InstrStage<2, [A9_LSUnit]>],
1080                               [1, 1, 1, 1, 2, 2]>,
1081   //
1082   // VST1u
1083   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1084                                InstrStage<1, [A9_MUX0], 0>,
1085                                InstrStage<1, [A9_DRegsN],   0, Required>,
1086                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1087                                InstrStage<1, [A9_NPipe], 0>,
1088                                InstrStage<1, [A9_LSUnit]>],
1089                               [2, 1, 1, 1, 1]>,
1090   //
1091   // VST1x2u
1092   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1093                                InstrStage<1, [A9_MUX0], 0>,
1094                                InstrStage<1, [A9_DRegsN],   0, Required>,
1095                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1096                                InstrStage<1, [A9_NPipe], 0>,
1097                                InstrStage<1, [A9_LSUnit]>],
1098                               [2, 1, 1, 1, 1, 1]>,
1099   //
1100   // VST1x3u
1101   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1102                                InstrStage<1, [A9_MUX0], 0>,
1103                                InstrStage<1, [A9_DRegsN],   0, Required>,
1104                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1105                                InstrStage<2, [A9_NPipe], 0>,
1106                                InstrStage<2, [A9_LSUnit]>],
1107                               [2, 1, 1, 1, 1, 1, 2]>,
1108   //
1109   // VST1x4u
1110   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1111                                InstrStage<1, [A9_MUX0], 0>,
1112                                InstrStage<1, [A9_DRegsN],   0, Required>,
1113                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1114                                InstrStage<2, [A9_NPipe], 0>,
1115                                InstrStage<2, [A9_LSUnit]>],
1116                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1117   //
1118   // VST1ln
1119   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1120                                InstrStage<1, [A9_MUX0], 0>,
1121                                InstrStage<1, [A9_DRegsN],   0, Required>,
1122                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1123                                InstrStage<1, [A9_NPipe], 0>,
1124                                InstrStage<1, [A9_LSUnit]>],
1125                               [1, 1, 1]>,
1126   //
1127   // VST1lnu
1128   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1129                                InstrStage<1, [A9_MUX0], 0>,
1130                                InstrStage<1, [A9_DRegsN],   0, Required>,
1131                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1132                                InstrStage<1, [A9_NPipe], 0>,
1133                                InstrStage<1, [A9_LSUnit]>],
1134                               [2, 1, 1, 1, 1]>,
1135   //
1136   // VST2
1137   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1138                                InstrStage<1, [A9_MUX0], 0>,
1139                                InstrStage<1, [A9_DRegsN],   0, Required>,
1140                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1141                                InstrStage<1, [A9_NPipe], 0>,
1142                                InstrStage<1, [A9_LSUnit]>],
1143                               [1, 1, 1, 1]>,
1144   //
1145   // VST2x2
1146   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1147                                InstrStage<1, [A9_MUX0], 0>,
1148                                InstrStage<1, [A9_DRegsN],   0, Required>,
1149                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1150                                InstrStage<3, [A9_NPipe], 0>,
1151                                InstrStage<3, [A9_LSUnit]>],
1152                               [1, 1, 1, 1, 2, 2]>,
1153   //
1154   // VST2u
1155   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1156                                InstrStage<1, [A9_MUX0], 0>,
1157                                InstrStage<1, [A9_DRegsN],   0, Required>,
1158                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1159                                InstrStage<1, [A9_NPipe], 0>,
1160                                InstrStage<1, [A9_LSUnit]>],
1161                               [2, 1, 1, 1, 1, 1]>,
1162   //
1163   // VST2x2u
1164   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1165                                InstrStage<1, [A9_MUX0], 0>,
1166                                InstrStage<1, [A9_DRegsN],   0, Required>,
1167                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1168                                InstrStage<3, [A9_NPipe], 0>,
1169                                InstrStage<3, [A9_LSUnit]>],
1170                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1171   //
1172   // VST2ln
1173   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1174                                InstrStage<1, [A9_MUX0], 0>,
1175                                InstrStage<1, [A9_DRegsN],   0, Required>,
1176                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1177                                InstrStage<1, [A9_NPipe], 0>,
1178                                InstrStage<1, [A9_LSUnit]>],
1179                               [1, 1, 1, 1]>,
1180   //
1181   // VST2lnu
1182   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1183                                InstrStage<1, [A9_MUX0], 0>,
1184                                InstrStage<1, [A9_DRegsN],   0, Required>,
1185                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1186                                InstrStage<1, [A9_NPipe], 0>,
1187                                InstrStage<1, [A9_LSUnit]>],
1188                               [2, 1, 1, 1, 1, 1]>,
1189   //
1190   // VST3
1191   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1192                                InstrStage<1, [A9_MUX0], 0>,
1193                                InstrStage<1, [A9_DRegsN],   0, Required>,
1194                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1195                                InstrStage<2, [A9_NPipe], 0>,
1196                                InstrStage<2, [A9_LSUnit]>],
1197                               [1, 1, 1, 1, 2]>,
1198   //
1199   // VST3u
1200   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1201                                InstrStage<1, [A9_MUX0], 0>,
1202                                InstrStage<1, [A9_DRegsN],   0, Required>,
1203                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1204                                InstrStage<2, [A9_NPipe], 0>,
1205                                InstrStage<2, [A9_LSUnit]>],
1206                               [2, 1, 1, 1, 1, 1, 2]>,
1207   //
1208   // VST3ln
1209   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1210                                InstrStage<1, [A9_MUX0], 0>,
1211                                InstrStage<1, [A9_DRegsN],   0, Required>,
1212                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1213                                InstrStage<3, [A9_NPipe], 0>,
1214                                InstrStage<3, [A9_LSUnit]>],
1215                               [1, 1, 1, 1, 2]>,
1216   //
1217   // VST3lnu
1218   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1219                                InstrStage<1, [A9_MUX0], 0>,
1220                                InstrStage<1, [A9_DRegsN],   0, Required>,
1221                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1222                                InstrStage<3, [A9_NPipe], 0>,
1223                                InstrStage<3, [A9_LSUnit]>],
1224                               [2, 1, 1, 1, 1, 1, 2]>,
1225   //
1226   // VST4
1227   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1228                                InstrStage<1, [A9_MUX0], 0>,
1229                                InstrStage<1, [A9_DRegsN],   0, Required>,
1230                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1231                                InstrStage<2, [A9_NPipe], 0>,
1232                                InstrStage<2, [A9_LSUnit]>],
1233                               [1, 1, 1, 1, 2, 2]>,
1234   //
1235   // VST4u
1236   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1237                                InstrStage<1, [A9_MUX0], 0>,
1238                                InstrStage<1, [A9_DRegsN],   0, Required>,
1239                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1240                                InstrStage<2, [A9_NPipe], 0>,
1241                                InstrStage<2, [A9_LSUnit]>],
1242                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1243   //
1244   // VST4ln
1245   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1246                                InstrStage<1, [A9_MUX0], 0>,
1247                                InstrStage<1, [A9_DRegsN],   0, Required>,
1248                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1249                                InstrStage<2, [A9_NPipe], 0>,
1250                                InstrStage<2, [A9_LSUnit]>],
1251                               [1, 1, 1, 1, 2, 2]>,
1252   //
1253   // VST4lnu
1254   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1255                                InstrStage<1, [A9_MUX0], 0>,
1256                                InstrStage<1, [A9_DRegsN],   0, Required>,
1257                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1258                                InstrStage<2, [A9_NPipe], 0>,
1259                                InstrStage<2, [A9_LSUnit]>],
1260                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1261
1262   //
1263   // Double-register Integer Unary
1264   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1265                                InstrStage<1, [A9_MUX0], 0>,
1266                                InstrStage<1, [A9_DRegsN],   0, Required>,
1267                                // Extra latency cycles since wbck is 6 cycles
1268                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1269                                InstrStage<1, [A9_NPipe]>],
1270                               [4, 2]>,
1271   //
1272   // Quad-register Integer Unary
1273   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1274                                InstrStage<1, [A9_MUX0], 0>,
1275                                InstrStage<1, [A9_DRegsN],   0, Required>,
1276                                // Extra latency cycles since wbck is 6 cycles
1277                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1278                                InstrStage<1, [A9_NPipe]>],
1279                               [4, 2]>,
1280   //
1281   // Double-register Integer Q-Unary
1282   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1283                                InstrStage<1, [A9_MUX0], 0>,
1284                                InstrStage<1, [A9_DRegsN],   0, Required>,
1285                                // Extra latency cycles since wbck is 6 cycles
1286                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1287                                InstrStage<1, [A9_NPipe]>],
1288                               [4, 1]>,
1289   //
1290   // Quad-register Integer CountQ-Unary
1291   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1292                                InstrStage<1, [A9_MUX0], 0>,
1293                                InstrStage<1, [A9_DRegsN],   0, Required>,
1294                                // Extra latency cycles since wbck is 6 cycles
1295                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1296                                InstrStage<1, [A9_NPipe]>],
1297                               [4, 1]>,
1298   //
1299   // Double-register Integer Binary
1300   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1301                                InstrStage<1, [A9_MUX0], 0>,
1302                                InstrStage<1, [A9_DRegsN],   0, Required>,
1303                                // Extra latency cycles since wbck is 6 cycles
1304                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1305                                InstrStage<1, [A9_NPipe]>],
1306                               [3, 2, 2]>,
1307   //
1308   // Quad-register Integer Binary
1309   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1310                                InstrStage<1, [A9_MUX0], 0>,
1311                                InstrStage<1, [A9_DRegsN],   0, Required>,
1312                                // Extra latency cycles since wbck is 6 cycles
1313                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1314                                InstrStage<1, [A9_NPipe]>],
1315                               [3, 2, 2]>,
1316   //
1317   // Double-register Integer Subtract
1318   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1319                                InstrStage<1, [A9_MUX0], 0>,
1320                                InstrStage<1, [A9_DRegsN],   0, Required>,
1321                                // Extra latency cycles since wbck is 6 cycles
1322                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1323                                InstrStage<1, [A9_NPipe]>],
1324                               [3, 2, 1]>,
1325   //
1326   // Quad-register Integer Subtract
1327   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1328                                InstrStage<1, [A9_MUX0], 0>,
1329                                InstrStage<1, [A9_DRegsN],   0, Required>,
1330                                // Extra latency cycles since wbck is 6 cycles
1331                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1332                                InstrStage<1, [A9_NPipe]>],
1333                               [3, 2, 1]>,
1334   //
1335   // Double-register Integer Shift
1336   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1337                                InstrStage<1, [A9_MUX0], 0>,
1338                                InstrStage<1, [A9_DRegsN],   0, Required>,
1339                                // Extra latency cycles since wbck is 6 cycles
1340                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1341                                InstrStage<1, [A9_NPipe]>],
1342                               [3, 1, 1]>,
1343   //
1344   // Quad-register Integer Shift
1345   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1346                                InstrStage<1, [A9_MUX0], 0>,
1347                                InstrStage<1, [A9_DRegsN],   0, Required>,
1348                                // Extra latency cycles since wbck is 6 cycles
1349                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1350                                InstrStage<1, [A9_NPipe]>],
1351                               [3, 1, 1]>,
1352   //
1353   // Double-register Integer Shift (4 cycle)
1354   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1355                                InstrStage<1, [A9_MUX0], 0>,
1356                                InstrStage<1, [A9_DRegsN],   0, Required>,
1357                                // Extra latency cycles since wbck is 6 cycles
1358                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1359                                InstrStage<1, [A9_NPipe]>],
1360                               [4, 1, 1]>,
1361   //
1362   // Quad-register Integer Shift (4 cycle)
1363   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1364                                InstrStage<1, [A9_MUX0], 0>,
1365                                InstrStage<1, [A9_DRegsN],   0, Required>,
1366                                // Extra latency cycles since wbck is 6 cycles
1367                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1368                                InstrStage<1, [A9_NPipe]>],
1369                               [4, 1, 1]>,
1370   //
1371   // Double-register Integer Binary (4 cycle)
1372   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1373                                InstrStage<1, [A9_MUX0], 0>,
1374                                InstrStage<1, [A9_DRegsN],   0, Required>,
1375                                // Extra latency cycles since wbck is 6 cycles
1376                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1377                                InstrStage<1, [A9_NPipe]>],
1378                               [4, 2, 2]>,
1379   //
1380   // Quad-register Integer Binary (4 cycle)
1381   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1382                                InstrStage<1, [A9_MUX0], 0>,
1383                                InstrStage<1, [A9_DRegsN],   0, Required>,
1384                                // Extra latency cycles since wbck is 6 cycles
1385                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1386                                InstrStage<1, [A9_NPipe]>],
1387                               [4, 2, 2]>,
1388   //
1389   // Double-register Integer Subtract (4 cycle)
1390   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1391                                InstrStage<1, [A9_MUX0], 0>,
1392                                InstrStage<1, [A9_DRegsN],   0, Required>,
1393                                // Extra latency cycles since wbck is 6 cycles
1394                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1395                                InstrStage<1, [A9_NPipe]>],
1396                               [4, 2, 1]>,
1397   //
1398   // Quad-register Integer Subtract (4 cycle)
1399   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1400                                InstrStage<1, [A9_MUX0], 0>,
1401                                InstrStage<1, [A9_DRegsN],   0, Required>,
1402                                // Extra latency cycles since wbck is 6 cycles
1403                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1404                                InstrStage<1, [A9_NPipe]>],
1405                               [4, 2, 1]>,
1406
1407   //
1408   // Double-register Integer Count
1409   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1410                                InstrStage<1, [A9_MUX0], 0>,
1411                                InstrStage<1, [A9_DRegsN],   0, Required>,
1412                                // Extra latency cycles since wbck is 6 cycles
1413                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1414                                InstrStage<1, [A9_NPipe]>],
1415                               [3, 2, 2]>,
1416   //
1417   // Quad-register Integer Count
1418   // Result written in N3, but that is relative to the last cycle of multicycle,
1419   // so we use 4 for those cases
1420   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1421                                InstrStage<1, [A9_MUX0], 0>,
1422                                InstrStage<1, [A9_DRegsN],   0, Required>,
1423                                // Extra latency cycles since wbck is 7 cycles
1424                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1425                                InstrStage<2, [A9_NPipe]>],
1426                               [4, 2, 2]>,
1427   //
1428   // Double-register Absolute Difference and Accumulate
1429   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1430                                InstrStage<1, [A9_MUX0], 0>,
1431                                InstrStage<1, [A9_DRegsN],   0, Required>,
1432                                // Extra latency cycles since wbck is 6 cycles
1433                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1434                                InstrStage<1, [A9_NPipe]>],
1435                               [6, 3, 2, 1]>,
1436   //
1437   // Quad-register Absolute Difference and Accumulate
1438   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1439                                InstrStage<1, [A9_MUX0], 0>,
1440                                InstrStage<1, [A9_DRegsN],   0, Required>,
1441                                // Extra latency cycles since wbck is 6 cycles
1442                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1443                                InstrStage<2, [A9_NPipe]>],
1444                               [6, 3, 2, 1]>,
1445   //
1446   // Double-register Integer Pair Add Long
1447   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1448                                InstrStage<1, [A9_MUX0], 0>,
1449                                InstrStage<1, [A9_DRegsN],   0, Required>,
1450                                // Extra latency cycles since wbck is 6 cycles
1451                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1452                                InstrStage<1, [A9_NPipe]>],
1453                               [6, 3, 1]>,
1454   //
1455   // Quad-register Integer Pair Add Long
1456   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1457                                InstrStage<1, [A9_MUX0], 0>,
1458                                InstrStage<1, [A9_DRegsN],   0, Required>,
1459                                // Extra latency cycles since wbck is 6 cycles
1460                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1461                                InstrStage<2, [A9_NPipe]>],
1462                               [6, 3, 1]>,
1463
1464   //
1465   // Double-register Integer Multiply (.8, .16)
1466   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1467                                InstrStage<1, [A9_MUX0], 0>,
1468                                InstrStage<1, [A9_DRegsN],   0, Required>,
1469                                // Extra latency cycles since wbck is 6 cycles
1470                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1471                                InstrStage<1, [A9_NPipe]>],
1472                               [6, 2, 2]>,
1473   //
1474   // Quad-register Integer Multiply (.8, .16)
1475   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1476                                InstrStage<1, [A9_MUX0], 0>,
1477                                InstrStage<1, [A9_DRegsN],   0, Required>,
1478                                // Extra latency cycles since wbck is 7 cycles
1479                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1480                                InstrStage<2, [A9_NPipe]>],
1481                               [7, 2, 2]>,
1482
1483   //
1484   // Double-register Integer Multiply (.32)
1485   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1486                                InstrStage<1, [A9_MUX0], 0>,
1487                                InstrStage<1, [A9_DRegsN],   0, Required>,
1488                                // Extra latency cycles since wbck is 7 cycles
1489                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1490                                InstrStage<2, [A9_NPipe]>],
1491                               [7, 2, 1]>,
1492   //
1493   // Quad-register Integer Multiply (.32)
1494   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1495                                InstrStage<1, [A9_MUX0], 0>,
1496                                InstrStage<1, [A9_DRegsN],   0, Required>,
1497                                // Extra latency cycles since wbck is 9 cycles
1498                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1499                                InstrStage<4, [A9_NPipe]>],
1500                               [9, 2, 1]>,
1501   //
1502   // Double-register Integer Multiply-Accumulate (.8, .16)
1503   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1504                                InstrStage<1, [A9_MUX0], 0>,
1505                                InstrStage<1, [A9_DRegsN],   0, Required>,
1506                                // Extra latency cycles since wbck is 6 cycles
1507                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1508                                InstrStage<1, [A9_NPipe]>],
1509                               [6, 3, 2, 2]>,
1510   //
1511   // Double-register Integer Multiply-Accumulate (.32)
1512   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1513                                InstrStage<1, [A9_MUX0], 0>,
1514                                InstrStage<1, [A9_DRegsN],   0, Required>,
1515                                // Extra latency cycles since wbck is 7 cycles
1516                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1517                                InstrStage<2, [A9_NPipe]>],
1518                               [7, 3, 2, 1]>,
1519   //
1520   // Quad-register Integer Multiply-Accumulate (.8, .16)
1521   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1522                                InstrStage<1, [A9_MUX0], 0>,
1523                                InstrStage<1, [A9_DRegsN],   0, Required>,
1524                                // Extra latency cycles since wbck is 7 cycles
1525                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1526                                InstrStage<2, [A9_NPipe]>],
1527                               [7, 3, 2, 2]>,
1528   //
1529   // Quad-register Integer Multiply-Accumulate (.32)
1530   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1531                                InstrStage<1, [A9_MUX0], 0>,
1532                                InstrStage<1, [A9_DRegsN],   0, Required>,
1533                                // Extra latency cycles since wbck is 9 cycles
1534                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1535                                InstrStage<4, [A9_NPipe]>],
1536                               [9, 3, 2, 1]>,
1537
1538   //
1539   // Move
1540   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1541                                InstrStage<1, [A9_MUX0], 0>,
1542                                InstrStage<1, [A9_DRegsN],   0, Required>,
1543                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1544                                InstrStage<1, [A9_NPipe]>],
1545                               [1,1]>,
1546   //
1547   // Move Immediate
1548   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1549                                InstrStage<1, [A9_MUX0], 0>,
1550                                InstrStage<1, [A9_DRegsN],   0, Required>,
1551                                // Extra latency cycles since wbck is 6 cycles
1552                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1553                                InstrStage<1, [A9_NPipe]>],
1554                               [3]>,
1555   //
1556   // Double-register Permute Move
1557   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1558                                InstrStage<1, [A9_MUX0], 0>,
1559                                InstrStage<1, [A9_DRegsN],   0, Required>,
1560                                // Extra latency cycles since wbck is 6 cycles
1561                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1562                                InstrStage<1, [A9_NPipe]>],
1563                               [2, 1]>,
1564   //
1565   // Quad-register Permute Move
1566   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1567                                InstrStage<1, [A9_MUX0], 0>,
1568                                InstrStage<1, [A9_DRegsN],   0, Required>,
1569                                // Extra latency cycles since wbck is 6 cycles
1570                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1571                                InstrStage<1, [A9_NPipe]>],
1572                               [2, 1]>,
1573   //
1574   // Integer to Single-precision Move
1575   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1576                                InstrStage<1, [A9_MUX0], 0>,
1577                                InstrStage<1, [A9_DRegsN],   0, Required>,
1578                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1579                                InstrStage<1, [A9_NPipe]>],
1580                               [1, 1]>,
1581   //
1582   // Integer to Double-precision Move
1583   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1584                                InstrStage<1, [A9_MUX0], 0>,
1585                                InstrStage<1, [A9_DRegsN],   0, Required>,
1586                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1587                                InstrStage<1, [A9_NPipe]>],
1588                               [1, 1, 1]>,
1589   //
1590   // Single-precision to Integer Move
1591   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1592                                InstrStage<1, [A9_MUX0], 0>,
1593                                InstrStage<1, [A9_DRegsN],   0, Required>,
1594                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1595                                InstrStage<1, [A9_NPipe]>],
1596                               [2, 1]>,
1597   //
1598   // Double-precision to Integer Move
1599   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1600                                InstrStage<1, [A9_MUX0], 0>,
1601                                InstrStage<1, [A9_DRegsN],   0, Required>,
1602                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1603                                InstrStage<1, [A9_NPipe]>],
1604                               [2, 2, 1]>,
1605   //
1606   // Integer to Lane Move
1607   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1608                                InstrStage<1, [A9_MUX0], 0>,
1609                                InstrStage<1, [A9_DRegsN],   0, Required>,
1610                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1611                                InstrStage<2, [A9_NPipe]>],
1612                               [3, 1, 1]>,
1613
1614   //
1615   // Vector narrow move
1616   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1617                                InstrStage<1, [A9_MUX0], 0>,
1618                                InstrStage<1, [A9_DRegsN],   0, Required>,
1619                                // Extra latency cycles since wbck is 6 cycles
1620                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1621                                InstrStage<1, [A9_NPipe]>],
1622                               [3, 1]>,
1623   //
1624   // Double-register FP Unary
1625   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1626                                InstrStage<1, [A9_MUX0], 0>,
1627                                InstrStage<1, [A9_DRegsN],   0, Required>,
1628                                // Extra latency cycles since wbck is 6 cycles
1629                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1630                                InstrStage<1, [A9_NPipe]>],
1631                               [5, 2]>,
1632   //
1633   // Quad-register FP Unary
1634   // Result written in N5, but that is relative to the last cycle of multicycle,
1635   // so we use 6 for those cases
1636   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1637                                InstrStage<1, [A9_MUX0], 0>,
1638                                InstrStage<1, [A9_DRegsN],   0, Required>,
1639                                // Extra latency cycles since wbck is 7 cycles
1640                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1641                                InstrStage<2, [A9_NPipe]>],
1642                               [6, 2]>,
1643   //
1644   // Double-register FP Binary
1645   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1646   // optimistic.
1647   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1648                                InstrStage<1, [A9_MUX0], 0>,
1649                                InstrStage<1, [A9_DRegsN],   0, Required>,
1650                                // Extra latency cycles since wbck is 6 cycles
1651                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1652                                InstrStage<1, [A9_NPipe]>],
1653                               [5, 2, 2]>,
1654
1655   //
1656   // VPADD, etc.
1657   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1658                                InstrStage<1, [A9_MUX0], 0>,
1659                                InstrStage<1, [A9_DRegsN],   0, Required>,
1660                                // Extra latency cycles since wbck is 6 cycles
1661                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1662                                InstrStage<1, [A9_NPipe]>],
1663                               [5, 1, 1]>,
1664   //
1665   // Double-register FP VMUL
1666   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1667                                InstrStage<1, [A9_MUX0], 0>,
1668                                InstrStage<1, [A9_DRegsN],   0, Required>,
1669                                // Extra latency cycles since wbck is 6 cycles
1670                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1671                                InstrStage<1, [A9_NPipe]>],
1672                               [5, 2, 1]>,
1673   //
1674   // Quad-register FP Binary
1675   // Result written in N5, but that is relative to the last cycle of multicycle,
1676   // so we use 6 for those cases
1677   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1678   // optimistic.
1679   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1680                                InstrStage<1, [A9_MUX0], 0>,
1681                                InstrStage<1, [A9_DRegsN],   0, Required>,
1682                                // Extra latency cycles since wbck is 7 cycles
1683                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1684                                InstrStage<2, [A9_NPipe]>],
1685                               [6, 2, 2]>,
1686   //
1687   // Quad-register FP VMUL
1688   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1689                                InstrStage<1, [A9_MUX0], 0>,
1690                                InstrStage<1, [A9_DRegsN],   0, Required>,
1691                                // Extra latency cycles since wbck is 7 cycles
1692                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1693                                InstrStage<1, [A9_NPipe]>],
1694                               [6, 2, 1]>,
1695   //
1696   // Double-register FP Multiple-Accumulate
1697   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1698                                InstrStage<1, [A9_MUX0], 0>,
1699                                InstrStage<1, [A9_DRegsN],   0, Required>,
1700                                // Extra latency cycles since wbck is 7 cycles
1701                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1702                                InstrStage<2, [A9_NPipe]>],
1703                               [6, 3, 2, 1]>,
1704   //
1705   // Quad-register FP Multiple-Accumulate
1706   // Result written in N9, but that is relative to the last cycle of multicycle,
1707   // so we use 10 for those cases
1708   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1709                                InstrStage<1, [A9_MUX0], 0>,
1710                                InstrStage<1, [A9_DRegsN],   0, Required>,
1711                                // Extra latency cycles since wbck is 9 cycles
1712                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1713                                InstrStage<4, [A9_NPipe]>],
1714                               [8, 4, 2, 1]>,
1715   //
1716   // Double-register Fused FP Multiple-Accumulate
1717   InstrItinData<IIC_VFMACD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1718                                InstrStage<1, [A9_MUX0], 0>,
1719                                InstrStage<1, [A9_DRegsN],   0, Required>,
1720                                // Extra latency cycles since wbck is 7 cycles
1721                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1722                                InstrStage<2, [A9_NPipe]>],
1723                               [6, 3, 2, 1]>,
1724   //
1725   // Quad-register Fused FP Multiple-Accumulate
1726   // Result written in N9, but that is relative to the last cycle of multicycle,
1727   // so we use 10 for those cases
1728   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1729                                InstrStage<1, [A9_MUX0], 0>,
1730                                InstrStage<1, [A9_DRegsN],   0, Required>,
1731                                // Extra latency cycles since wbck is 9 cycles
1732                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1733                                InstrStage<4, [A9_NPipe]>],
1734                               [8, 4, 2, 1]>,
1735   //
1736   // Double-register Reciprical Step
1737   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1738                                InstrStage<1, [A9_MUX0], 0>,
1739                                InstrStage<1, [A9_DRegsN],   0, Required>,
1740                                // Extra latency cycles since wbck is 10 cycles
1741                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1742                                InstrStage<1, [A9_NPipe]>],
1743                               [9, 2, 2]>,
1744   //
1745   // Quad-register Reciprical Step
1746   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1747                                InstrStage<1, [A9_MUX0], 0>,
1748                                InstrStage<1, [A9_DRegsN],   0, Required>,
1749                                // Extra latency cycles since wbck is 11 cycles
1750                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1751                                InstrStage<2, [A9_NPipe]>],
1752                               [10, 2, 2]>,
1753   //
1754   // Double-register Permute
1755   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1756                                InstrStage<1, [A9_MUX0], 0>,
1757                                InstrStage<1, [A9_DRegsN],   0, Required>,
1758                                // Extra latency cycles since wbck is 6 cycles
1759                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1760                                InstrStage<1, [A9_NPipe]>],
1761                               [2, 2, 1, 1]>,
1762   //
1763   // Quad-register Permute
1764   // Result written in N2, but that is relative to the last cycle of multicycle,
1765   // so we use 3 for those cases
1766   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1767                                InstrStage<1, [A9_MUX0], 0>,
1768                                InstrStage<1, [A9_DRegsN],   0, Required>,
1769                                // Extra latency cycles since wbck is 7 cycles
1770                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1771                                InstrStage<2, [A9_NPipe]>],
1772                               [3, 3, 1, 1]>,
1773   //
1774   // Quad-register Permute (3 cycle issue)
1775   // Result written in N2, but that is relative to the last cycle of multicycle,
1776   // so we use 4 for those cases
1777   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1778                                InstrStage<1, [A9_MUX0], 0>,
1779                                InstrStage<1, [A9_DRegsN],   0, Required>,
1780                                // Extra latency cycles since wbck is 8 cycles
1781                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1782                                InstrStage<3, [A9_NPipe]>],
1783                               [4, 4, 1, 1]>,
1784
1785   //
1786   // Double-register VEXT
1787   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1788                                InstrStage<1, [A9_MUX0], 0>,
1789                                InstrStage<1, [A9_DRegsN],   0, Required>,
1790                                // Extra latency cycles since wbck is 6 cycles
1791                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1792                                InstrStage<1, [A9_NPipe]>],
1793                               [2, 1, 1]>,
1794   //
1795   // Quad-register VEXT
1796   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1797                                InstrStage<1, [A9_MUX0], 0>,
1798                                InstrStage<1, [A9_DRegsN],   0, Required>,
1799                                // Extra latency cycles since wbck is 7 cycles
1800                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1801                                InstrStage<2, [A9_NPipe]>],
1802                               [3, 1, 2]>,
1803   //
1804   // VTB
1805   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1806                                InstrStage<1, [A9_MUX0], 0>,
1807                                InstrStage<1, [A9_DRegsN],   0, Required>,
1808                                // Extra latency cycles since wbck is 7 cycles
1809                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1810                                InstrStage<2, [A9_NPipe]>],
1811                               [3, 2, 1]>,
1812   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1813                                InstrStage<1, [A9_MUX0], 0>,
1814                                InstrStage<2, [A9_DRegsN],   0, Required>,
1815                                // Extra latency cycles since wbck is 7 cycles
1816                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1817                                InstrStage<2, [A9_NPipe]>],
1818                               [3, 2, 2, 1]>,
1819   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1820                                InstrStage<1, [A9_MUX0], 0>,
1821                                InstrStage<2, [A9_DRegsN],   0, Required>,
1822                                // Extra latency cycles since wbck is 8 cycles
1823                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1824                                InstrStage<3, [A9_NPipe]>],
1825                               [4, 2, 2, 3, 1]>,
1826   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1827                                InstrStage<1, [A9_MUX0], 0>,
1828                                InstrStage<1, [A9_DRegsN],   0, Required>,
1829                                // Extra latency cycles since wbck is 8 cycles
1830                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1831                                InstrStage<3, [A9_NPipe]>],
1832                               [4, 2, 2, 3, 3, 1]>,
1833   //
1834   // VTBX
1835   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1836                                InstrStage<1, [A9_MUX0], 0>,
1837                                InstrStage<1, [A9_DRegsN],   0, Required>,
1838                                // Extra latency cycles since wbck is 7 cycles
1839                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1840                                InstrStage<2, [A9_NPipe]>],
1841                               [3, 1, 2, 1]>,
1842   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1843                                InstrStage<1, [A9_MUX0], 0>,
1844                                InstrStage<1, [A9_DRegsN],   0, Required>,
1845                                // Extra latency cycles since wbck is 7 cycles
1846                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1847                                InstrStage<2, [A9_NPipe]>],
1848                               [3, 1, 2, 2, 1]>,
1849   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1850                                InstrStage<1, [A9_MUX0], 0>,
1851                                InstrStage<1, [A9_DRegsN],   0, Required>,
1852                                // Extra latency cycles since wbck is 8 cycles
1853                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1854                                InstrStage<3, [A9_NPipe]>],
1855                               [4, 1, 2, 2, 3, 1]>,
1856   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1857                                InstrStage<1, [A9_MUX0], 0>,
1858                                InstrStage<1, [A9_DRegsN],   0, Required>,
1859                                // Extra latency cycles since wbck is 8 cycles
1860                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1861                                InstrStage<2, [A9_NPipe]>],
1862                               [4, 1, 2, 2, 3, 3, 1]>
1863 ]>;