Finish vld3 and vld4.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LS0     : FuncUnit; // L/S Units, 32-bit per unit. Fake FU to limit l/s.
28 def A9_LS1     : FuncUnit; // L/S Units, 32-bit per unit.
29 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
30 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
31
32 // Bypasses
33 def A9_LdBypass : Bypass;
34
35 def CortexA9Itineraries : ProcessorItineraries<
36   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
37    A9_LS0, A9_LS1, A9_DRegsVFP, A9_DRegsN],
38   [A9_LdBypass], [
39   // Two fully-pipelined integer ALU pipelines
40
41   //
42   // Move instructions, unconditional
43   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
44                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
45   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
46                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
47   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
48                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
49   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
50                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
51   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
52                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
53                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
54   //
55   // MVN instructions
56   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
57                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
58                               [1]>,
59   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
60                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
61                               [1, 1], [NoBypass, A9_LdBypass]>,
62   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
63                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
64                               [2, 1]>,
65   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
67                               [3, 1, 1]>,
68   //
69   // No operand cycles
70   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
71                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
72   //
73   // Binary Instructions that produce a result
74   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
76                             [1, 1], [NoBypass, A9_LdBypass]>,
77   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
78                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
79                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
80   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
81                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
82                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
83   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
85                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
86   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
88                             [3, 1, 1, 1],
89                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
90   //
91   // Bitwise Instructions that produce a result
92   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
94   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
95                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
96   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
98   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
99                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
100   //
101   // Unary Instructions that produce a result
102
103   // CLZ, RBIT, etc.
104   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
105                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
106
107   // BFC, BFI, UBFX, SBFX
108   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
109                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
110
111   //
112   // Zero and sign extension instructions
113   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
115   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
116                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
117   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
119   //
120   // Compare instructions
121   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
123                                [1], [A9_LdBypass]>,
124   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
126                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
127   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
128                                 [1, 1], [A9_LdBypass, NoBypass]>,
129   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
130                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
131                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
132   //
133   // Test instructions
134   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
136   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
138   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
139                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
140   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
142   //
143   // Move instructions, conditional
144   // FIXME: Correctly model the extra input dep on the destination.
145   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
146                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
147   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
149   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
151   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
152                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
153
154   // Integer multiply pipeline
155   //
156   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
157                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
158   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
159                                InstrStage<2, [A9_ALU0]>],
160                               [3, 1, 1, 1]>,
161   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
163   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                                InstrStage<2, [A9_ALU0]>],
165                               [4, 1, 1, 1]>,
166   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
168   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
169                                InstrStage<3, [A9_ALU0]>],
170                               [4, 5, 1, 1]>,
171   // Integer load pipeline
172   // FIXME: The timings are some rough approximations
173   //
174   // Immediate offset
175   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176                                  InstrStage<1, [A9_MUX0], 0>,
177                                  InstrStage<1, [A9_AGU]>,
178                                  InstrStage<1, [A9_LS0, A9_LS1]>],
179                                 [3, 1], [A9_LdBypass]>,
180   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181                                  InstrStage<1, [A9_MUX0], 0>,
182                                  InstrStage<2, [A9_AGU]>,
183                                  InstrStage<1, [A9_LS0, A9_LS1]>],
184                                 [4, 1], [A9_LdBypass]>,
185   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
186   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187                                  InstrStage<1, [A9_MUX0], 0>,
188                                  InstrStage<2, [A9_AGU]>,
189                                  InstrStage<1, [A9_LS0, A9_LS1]>],
190                                 [3, 3, 1], [A9_LdBypass]>,
191   //
192   // Register offset
193   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194                                  InstrStage<1, [A9_MUX0], 0>,
195                                  InstrStage<1, [A9_AGU]>,
196                                  InstrStage<1, [A9_LS0, A9_LS1]>],
197                                 [3, 1, 1], [A9_LdBypass]>,
198   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199                                  InstrStage<1, [A9_MUX0], 0>,
200                                  InstrStage<2, [A9_AGU]>,
201                                  InstrStage<1, [A9_LS0, A9_LS1]>],
202                                 [4, 1, 1], [A9_LdBypass]>,
203   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
204                                  InstrStage<1, [A9_MUX0], 0>,
205                                  InstrStage<2, [A9_AGU]>,
206                                  InstrStage<1, [A9_LS0, A9_LS1]>],
207                                 [3, 3, 1, 1], [A9_LdBypass]>,
208   //
209   // Scaled register offset
210   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
211                                  InstrStage<1, [A9_MUX0], 0>,
212                                  InstrStage<1, [A9_AGU]>,
213                                  InstrStage<1, [A9_LS0, A9_LS1]>],
214                                 [4, 1, 1], [A9_LdBypass]>,
215   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
216                                  InstrStage<1, [A9_MUX0], 0>,
217                                  InstrStage<2, [A9_AGU]>,
218                                  InstrStage<1, [A9_LS0, A9_LS1]>],
219                                 [5, 1, 1], [A9_LdBypass]>,
220   //
221   // Immediate offset with update
222   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
223                                  InstrStage<1, [A9_MUX0], 0>,
224                                  InstrStage<1, [A9_AGU]>,
225                                  InstrStage<1, [A9_LS0, A9_LS1]>],
226                                 [3, 2, 1], [A9_LdBypass]>,
227   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
228                                  InstrStage<1, [A9_MUX0], 0>,
229                                  InstrStage<2, [A9_AGU]>,
230                                  InstrStage<1, [A9_LS0, A9_LS1]>],
231                                 [4, 3, 1], [A9_LdBypass]>,
232   //
233   // Register offset with update
234   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
235                                  InstrStage<1, [A9_MUX0], 0>,
236                                  InstrStage<1, [A9_AGU]>,
237                                  InstrStage<1, [A9_LS0, A9_LS1]>],
238                                 [3, 2, 1, 1], [A9_LdBypass]>,
239   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
240                                  InstrStage<1, [A9_MUX0], 0>,
241                                  InstrStage<2, [A9_AGU]>,
242                                  InstrStage<1, [A9_LS0, A9_LS1]>],
243                                 [4, 3, 1, 1], [A9_LdBypass]>,
244   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
245                                  InstrStage<1, [A9_MUX0], 0>,
246                                  InstrStage<2, [A9_AGU]>,
247                                  InstrStage<1, [A9_LS0, A9_LS1]>],
248                                 [3, 3, 1, 1], [A9_LdBypass]>,
249   //
250   // Scaled register offset with update
251   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
252                                  InstrStage<1, [A9_MUX0], 0>,
253                                  InstrStage<1, [A9_AGU]>,
254                                  InstrStage<1, [A9_LS0, A9_LS1]>],
255                                 [4, 3, 1, 1], [A9_LdBypass]>,
256   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
257                                   InstrStage<1, [A9_MUX0], 0>,
258                                   InstrStage<2, [A9_AGU]>,
259                                   InstrStage<1, [A9_LS0, A9_LS1]>],
260                                  [5, 4, 1, 1], [A9_LdBypass]>,
261   //
262   // Load multiple, def is the 5th operand.
263   // FIXME: This assumes 3 to 4 registers.
264   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
265                                 InstrStage<1, [A9_MUX0], 0>,
266                                 InstrStage<2, [A9_AGU]>,
267                                 InstrStage<2, [A9_LS0, A9_LS1]>],
268                                [1, 1, 1, 1, 3],
269                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
270   //
271   // Load multiple + update, defs are the 1st and 5th operands.
272   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
273                                 InstrStage<1, [A9_MUX0], 0>,
274                                 InstrStage<2, [A9_AGU]>,
275                                 InstrStage<2, [A9_LS0, A9_LS1]>],
276                                [2, 1, 1, 1, 3],
277                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
278   //
279   // Load multiple plus branch
280   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
281                                 InstrStage<1, [A9_MUX0], 0>,
282                                 InstrStage<1, [A9_AGU]>,
283                                 InstrStage<2, [A9_LS0, A9_LS1]>,
284                                 InstrStage<1, [A9_Branch]>],
285                                [1, 2, 1, 1, 3],
286                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
287   //
288   // Pop, def is the 3rd operand.
289   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
290                                 InstrStage<1, [A9_MUX0], 0>,
291                                 InstrStage<2, [A9_AGU]>,
292                                 InstrStage<2, [A9_LS0, A9_LS1]>],
293                                [1, 1, 3],
294                                [NoBypass, NoBypass, A9_LdBypass]>,
295   //
296   // Pop + branch, def is the 3rd operand.
297   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
298                                 InstrStage<1, [A9_MUX0], 0>,
299                                 InstrStage<2, [A9_AGU]>,
300                                 InstrStage<2, [A9_LS0, A9_LS1]>,
301                                 InstrStage<1, [A9_Branch]>],
302                                [1, 1, 3],
303                                [NoBypass, NoBypass, A9_LdBypass]>,
304
305   //
306   // iLoadi + iALUr for t2LDRpci_pic.
307   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
308                                 InstrStage<1, [A9_MUX0], 0>,
309                                 InstrStage<1, [A9_AGU]>,
310                                 InstrStage<1, [A9_LS0, A9_LS1]>,
311                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
312                                [2, 1]>,
313
314   // Integer store pipeline
315   ///
316   // Immediate offset
317   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
318                                  InstrStage<1, [A9_MUX0], 0>,
319                                  InstrStage<1, [A9_AGU]>,
320                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
321   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322                                  InstrStage<1, [A9_MUX0], 0>,
323                                  InstrStage<2, [A9_AGU]>,
324                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
325   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
326   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
327                                  InstrStage<1, [A9_MUX0], 0>,
328                                  InstrStage<2, [A9_AGU]>,
329                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
330   //
331   // Register offset
332   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
333                                  InstrStage<1, [A9_MUX0], 0>,
334                                  InstrStage<1, [A9_AGU]>,
335                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
336   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
337                                  InstrStage<1, [A9_MUX0], 0>,
338                                  InstrStage<2, [A9_AGU]>,
339                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
340   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341                                  InstrStage<1, [A9_MUX0], 0>,
342                                  InstrStage<2, [A9_AGU]>,
343                                  InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
344   //
345   // Scaled register offset
346   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347                                   InstrStage<1, [A9_MUX0], 0>,
348                                   InstrStage<1, [A9_AGU]>,
349                                   InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
350   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351                                   InstrStage<1, [A9_MUX0], 0>,
352                                   InstrStage<2, [A9_AGU]>,
353                                   InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
354   //
355   // Immediate offset with update
356   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
357                                   InstrStage<1, [A9_MUX0], 0>,
358                                   InstrStage<1, [A9_AGU]>,
359                                   InstrStage<1, [A9_LS0, A9_LS1]>], [2, 1, 1]>,
360   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361                                   InstrStage<1, [A9_MUX0], 0>,
362                                   InstrStage<2, [A9_AGU]>,
363                                   InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1]>,
364   //
365   // Register offset with update
366   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
367                                   InstrStage<1, [A9_MUX0], 0>,
368                                   InstrStage<1, [A9_AGU]>,
369                                   InstrStage<1, [A9_LS0, A9_LS1]>],
370                                  [2, 1, 1, 1]>,
371   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
372                                   InstrStage<1, [A9_MUX0], 0>,
373                                   InstrStage<2, [A9_AGU]>,
374                                   InstrStage<1, [A9_LS0, A9_LS1]>],
375                                  [3, 1, 1, 1]>,
376   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
377                                   InstrStage<1, [A9_MUX0], 0>,
378                                   InstrStage<2, [A9_AGU]>,
379                                   InstrStage<1, [A9_LS0, A9_LS1]>],
380                                  [3, 1, 1, 1]>,
381   //
382   // Scaled register offset with update
383   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
384                                     InstrStage<1, [A9_MUX0], 0>,
385                                     InstrStage<1, [A9_AGU]>,
386                                     InstrStage<1, [A9_LS0, A9_LS1]>],
387                                    [2, 1, 1, 1]>,
388   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
389                                     InstrStage<1, [A9_MUX0], 0>,
390                                     InstrStage<2, [A9_AGU]>,
391                                     InstrStage<1, [A9_LS0, A9_LS1]>],
392                                    [3, 1, 1, 1]>,
393   //
394   // Store multiple
395   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
396                                 InstrStage<1, [A9_MUX0], 0>,
397                                 InstrStage<1, [A9_AGU]>,
398                                 InstrStage<2, [A9_LS0, A9_LS1]>]>,
399   //
400   // Store multiple + update
401   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
402                                 InstrStage<1, [A9_MUX0], 0>,
403                                 InstrStage<1, [A9_AGU]>,
404                                 InstrStage<2, [A9_LS0, A9_LS1]>], [2]>,
405
406   // Branch
407   //
408   // no delay slots, so the latency of a branch is unimportant
409   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
410                                 InstrStage<1, [A9_Issue1], 0>,
411                                 InstrStage<1, [A9_Branch]>]>,
412
413   // VFP and NEON shares the same register file. This means that every VFP
414   // instruction should wait for full completion of the consecutive NEON
415   // instruction and vice-versa. We model this behavior with two artificial FUs:
416   // DRegsVFP and DRegsVFP.
417   //
418   // Every VFP instruction:
419   //  - Acquires DRegsVFP resource for 1 cycle
420   //  - Reserves DRegsN resource for the whole duration (including time to
421   //    register file writeback!).
422   // Every NEON instruction does the same but with FUs swapped.
423   //
424   // Since the reserved FU cannot be acquired, this models precisely
425   // "cross-domain" stalls.
426
427   // VFP
428   // Issue through integer pipeline, and execute in NEON unit.
429
430   // FP Special Register to Integer Register File Move
431   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
432                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
433                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
434                               InstrStage<1, [A9_MUX0], 0>,
435                               InstrStage<1, [A9_NPipe]>]>,
436   //
437   // Single-precision FP Unary
438   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
439                                // Extra latency cycles since wbck is 2 cycles
440                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
441                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
442                                InstrStage<1, [A9_MUX0], 0>,
443                                InstrStage<1, [A9_NPipe]>],
444                               [1, 1]>,
445   //
446   // Double-precision FP Unary
447   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
448                                // Extra latency cycles since wbck is 2 cycles
449                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
450                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
451                                InstrStage<1, [A9_MUX0], 0>,
452                                InstrStage<1, [A9_NPipe]>],
453                               [1, 1]>,
454
455   //
456   // Single-precision FP Compare
457   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
458                                // Extra latency cycles since wbck is 4 cycles
459                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
460                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
461                                InstrStage<1, [A9_MUX0], 0>,
462                                InstrStage<1, [A9_NPipe]>],
463                               [1, 1]>,
464   //
465   // Double-precision FP Compare
466   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
467                                // Extra latency cycles since wbck is 4 cycles
468                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
469                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
470                                InstrStage<1, [A9_MUX0], 0>,
471                                InstrStage<1, [A9_NPipe]>],
472                               [1, 1]>,
473   //
474   // Single to Double FP Convert
475   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
476                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
477                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
478                                InstrStage<1, [A9_MUX0], 0>,
479                                InstrStage<1, [A9_NPipe]>],
480                               [4, 1]>,
481   //
482   // Double to Single FP Convert
483   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
484                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
485                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486                                InstrStage<1, [A9_MUX0], 0>,
487                                InstrStage<1, [A9_NPipe]>],
488                               [4, 1]>,
489
490   //
491   // Single to Half FP Convert
492   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
493                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
494                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495                                InstrStage<1, [A9_MUX0], 0>,
496                                InstrStage<1, [A9_NPipe]>],
497                               [4, 1]>,
498   //
499   // Half to Single FP Convert
500   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
501                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
502                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503                                InstrStage<1, [A9_MUX0], 0>,
504                                InstrStage<1, [A9_NPipe]>],
505                               [2, 1]>,
506
507   //
508   // Single-Precision FP to Integer Convert
509   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
510                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
511                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512                                InstrStage<1, [A9_MUX0], 0>,
513                                InstrStage<1, [A9_NPipe]>],
514                               [4, 1]>,
515   //
516   // Double-Precision FP to Integer Convert
517   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
518                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
519                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520                                InstrStage<1, [A9_MUX0], 0>,
521                                InstrStage<1, [A9_NPipe]>],
522                               [4, 1]>,
523   //
524   // Integer to Single-Precision FP Convert
525   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
526                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
527                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
528                                InstrStage<1, [A9_MUX0], 0>,
529                                InstrStage<1, [A9_NPipe]>],
530                               [4, 1]>,
531   //
532   // Integer to Double-Precision FP Convert
533   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
534                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
535                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
536                                InstrStage<1, [A9_MUX0], 0>,
537                                InstrStage<1, [A9_NPipe]>],
538                               [4, 1]>,
539   //
540   // Single-precision FP ALU
541   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
542                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
543                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
544                                InstrStage<1, [A9_MUX0], 0>,
545                                InstrStage<1, [A9_NPipe]>],
546                               [4, 1, 1]>,
547   //
548   // Double-precision FP ALU
549   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
550                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
551                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
552                                InstrStage<1, [A9_MUX0], 0>,
553                                InstrStage<1, [A9_NPipe]>],
554                               [4, 1, 1]>,
555   //
556   // Single-precision FP Multiply
557   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
558                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
559                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
560                                InstrStage<1, [A9_MUX0], 0>,
561                                InstrStage<1, [A9_NPipe]>],
562                               [5, 1, 1]>,
563   //
564   // Double-precision FP Multiply
565   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
566                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
567                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
568                                InstrStage<1, [A9_MUX0], 0>,
569                                InstrStage<2, [A9_NPipe]>],
570                               [6, 1, 1]>,
571   //
572   // Single-precision FP MAC
573   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
574                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
575                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
576                                InstrStage<1, [A9_MUX0], 0>,
577                                InstrStage<1, [A9_NPipe]>],
578                               [8, 0, 1, 1]>,
579   //
580   // Double-precision FP MAC
581   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
582                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
583                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
584                                InstrStage<1,  [A9_MUX0], 0>,
585                                InstrStage<2,  [A9_NPipe]>],
586                               [9, 0, 1, 1]>,
587   //
588   // Single-precision FP DIV
589   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
590                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
591                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
592                                InstrStage<1,  [A9_MUX0], 0>,
593                                InstrStage<10, [A9_NPipe]>],
594                               [15, 1, 1]>,
595   //
596   // Double-precision FP DIV
597   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
598                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
599                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
600                                InstrStage<1,  [A9_MUX0], 0>,
601                                InstrStage<20, [A9_NPipe]>],
602                               [25, 1, 1]>,
603   //
604   // Single-precision FP SQRT
605   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
606                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
607                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
608                                InstrStage<1,  [A9_MUX0], 0>,
609                                InstrStage<13, [A9_NPipe]>],
610                               [17, 1]>,
611   //
612   // Double-precision FP SQRT
613   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
614                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
615                                InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
616                                InstrStage<1,  [A9_MUX0], 0>,
617                                InstrStage<28, [A9_NPipe]>],
618                               [32, 1]>,
619
620   //
621   // Integer to Single-precision Move
622   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
623                                // Extra 1 latency cycle since wbck is 2 cycles
624                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
625                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
626                                InstrStage<1, [A9_MUX0], 0>,
627                                InstrStage<1, [A9_NPipe]>],
628                               [1, 1]>,
629   //
630   // Integer to Double-precision Move
631   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
632                                // Extra 1 latency cycle since wbck is 2 cycles
633                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
634                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
635                                InstrStage<1, [A9_MUX0], 0>,
636                                InstrStage<1, [A9_NPipe]>],
637                               [1, 1, 1]>,
638   //
639   // Single-precision to Integer Move
640   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
641                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
642                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
643                                InstrStage<1, [A9_MUX0], 0>,
644                                InstrStage<1, [A9_NPipe]>],
645                               [1, 1]>,
646   //
647   // Double-precision to Integer Move
648   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
649                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
650                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
651                                InstrStage<1, [A9_MUX0], 0>,
652                                InstrStage<1, [A9_NPipe]>],
653                               [1, 1, 1]>,
654   //
655   // Single-precision FP Load
656   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
657                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
658                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
659                                InstrStage<1, [A9_MUX0], 0>,
660                                InstrStage<1, [A9_NPipe]>],
661                               [1, 1]>,
662   //
663   // Double-precision FP Load
664   // FIXME: Result latency is 1 if address is 64-bit aligned.
665   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
666                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
667                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
668                                InstrStage<1, [A9_MUX0], 0>,
669                                InstrStage<1, [A9_NPipe]>],
670                               [2, 1]>,
671   //
672   // FP Load Multiple
673   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
674                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
675                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676                                InstrStage<1, [A9_MUX0], 0>,
677                                InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
678   //
679   // FP Load Multiple + update
680   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
681                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
682                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
683                                InstrStage<1, [A9_MUX0], 0>,
684                                InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
685   //
686   // Single-precision FP Store
687   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
688                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
689                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
690                                InstrStage<1, [A9_MUX0], 0>,
691                                InstrStage<1, [A9_NPipe]>],
692                               [1, 1]>,
693   //
694   // Double-precision FP Store
695   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
696                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
697                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
698                                InstrStage<1, [A9_MUX0], 0>,
699                                InstrStage<1, [A9_NPipe]>],
700                               [1, 1]>,
701   //
702   // FP Store Multiple
703   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
704                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
705                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
706                                InstrStage<1, [A9_MUX0], 0>,
707                                InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
708   //
709   // FP Store Multiple + update
710   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
711                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
712                                 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
713                                 InstrStage<1, [A9_MUX0], 0>,
714                                 InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
715   // NEON
716   // VLD1
717   // FIXME: Conservatively assume insufficent alignment.
718   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
719                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
720                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721                                InstrStage<1, [A9_MUX0], 0>,
722                                InstrStage<2, [A9_NPipe]>],
723                               [2, 1]>,
724   // VLD1x2
725   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_DRegsN],   0, Required>,
726                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
727                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
728                                InstrStage<1, [A9_MUX0], 0>,
729                                InstrStage<2, [A9_NPipe]>],
730                               [2, 2, 1]>,
731   // VLD1x3
732   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_DRegsN],   0, Required>,
733                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
734                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
735                                InstrStage<1, [A9_MUX0], 0>,
736                                InstrStage<3, [A9_NPipe]>],
737                               [2, 2, 3, 1]>,
738   // VLD1x4
739   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_DRegsN],   0, Required>,
740                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
741                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
742                                InstrStage<1, [A9_MUX0], 0>,
743                                InstrStage<3, [A9_NPipe]>],
744                               [2, 2, 3, 3, 1]>,
745   // VLD1u
746   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
747                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
748                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749                                InstrStage<1, [A9_MUX0], 0>,
750                                InstrStage<2, [A9_NPipe]>],
751                               [2, 2, 1]>,
752   // VLD1x2u
753   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
754                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
755                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
756                                InstrStage<1, [A9_MUX0], 0>,
757                                InstrStage<2, [A9_NPipe]>],
758                               [2, 2, 2, 1]>,
759   // VLD1x3u
760   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
761                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
762                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
763                                InstrStage<1, [A9_MUX0], 0>,
764                                InstrStage<3, [A9_NPipe]>],
765                               [2, 2, 3, 2, 1]>,
766   // VLD1x4u
767   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
768                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
769                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
770                                InstrStage<1, [A9_MUX0], 0>,
771                                InstrStage<3, [A9_NPipe]>],
772                               [2, 2, 3, 3, 2, 1]>,
773   //
774   // VLD2
775   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
776                                // Extra latency cycles since wbck is 7 cycles
777                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
778                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
779                                InstrStage<1, [A9_MUX0], 0>,
780                                InstrStage<2, [A9_NPipe]>],
781                               [3, 3, 1]>,
782   //
783   // VLD2x2
784   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_DRegsN],   0, Required>,
785                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
786                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
787                                InstrStage<1, [A9_MUX0], 0>,
788                                InstrStage<3, [A9_NPipe]>],
789                               [3, 4, 3, 4, 1]>,
790   //
791   // VLD2ln
792   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_DRegsN],   0, Required>,
793                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
794                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
795                                InstrStage<1, [A9_MUX0], 0>,
796                                InstrStage<3, [A9_NPipe]>],
797                               [4, 4, 1, 1, 1, 1]>,
798   //
799   // VLD2u
800   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
801                                // Extra latency cycles since wbck is 7 cycles
802                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
803                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
804                                InstrStage<1, [A9_MUX0], 0>,
805                                InstrStage<2, [A9_NPipe]>],
806                               [3, 3, 2, 1, 1, 1]>,
807   //
808   // VLD2x2u
809   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_DRegsN],   0, Required>,
810                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
811                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
812                                InstrStage<1, [A9_MUX0], 0>,
813                                InstrStage<3, [A9_NPipe]>],
814                               [3, 4, 3, 4, 2, 1]>,
815   //
816   // VLD2lnu
817   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_DRegsN],   0, Required>,
818                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
819                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
820                                InstrStage<1, [A9_MUX0], 0>,
821                                InstrStage<3, [A9_NPipe]>],
822                               [4, 4, 2, 1, 1, 1, 1, 1]>,
823   //
824   // VLD3
825   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
826                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
827                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
828                                InstrStage<1, [A9_MUX0], 0>,
829                                InstrStage<4, [A9_NPipe]>],
830                               [4, 4, 5, 1]>,
831   //
832   // VLD3ln
833   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_DRegsN],   0, Required>,
834                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
835                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
836                                InstrStage<1, [A9_MUX0], 0>,
837                                InstrStage<5, [A9_NPipe]>],
838                               [5, 5, 6, 1, 1, 1, 1, 2]>,
839   //
840   // VLD3u
841   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_DRegsN],   0, Required>,
842                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
843                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
844                                InstrStage<1, [A9_MUX0], 0>,
845                                InstrStage<4, [A9_NPipe]>],
846                               [4, 4, 5, 2, 1]>,
847   //
848   // VLD3lnu
849   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_DRegsN],   0, Required>,
850                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
851                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
852                                InstrStage<1, [A9_MUX0], 0>,
853                                InstrStage<5, [A9_NPipe]>],
854                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
855   //
856   // VLD4
857   // FIXME: We don't model this instruction properly
858   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
859                                // Extra latency cycles since wbck is 6 cycles
860                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
861                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
862                                InstrStage<1, [A9_MUX0], 0>,
863                                InstrStage<1, [A9_NPipe]>],
864                               [2, 2, 2, 2, 1]>,
865   //
866   // VST
867   // FIXME: We don't model this instruction properly
868   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
869                                // Extra latency cycles since wbck is 6 cycles
870                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
871                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
872                                InstrStage<1, [A9_MUX0], 0>,
873                                InstrStage<1, [A9_NPipe]>]>,
874   //
875   // Double-register Integer Unary
876   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
877                                // Extra latency cycles since wbck is 6 cycles
878                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
879                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
880                                InstrStage<1, [A9_MUX0], 0>,
881                                InstrStage<1, [A9_NPipe]>],
882                               [4, 2]>,
883   //
884   // Quad-register Integer Unary
885   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
886                                // Extra latency cycles since wbck is 6 cycles
887                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
888                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
889                                InstrStage<1, [A9_MUX0], 0>,
890                                InstrStage<1, [A9_NPipe]>],
891                               [4, 2]>,
892   //
893   // Double-register Integer Q-Unary
894   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
895                                // Extra latency cycles since wbck is 6 cycles
896                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
897                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
898                                InstrStage<1, [A9_MUX0], 0>,
899                                InstrStage<1, [A9_NPipe]>],
900                               [4, 1]>,
901   //
902   // Quad-register Integer CountQ-Unary
903   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
904                                // Extra latency cycles since wbck is 6 cycles
905                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
906                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
907                                InstrStage<1, [A9_MUX0], 0>,
908                                InstrStage<1, [A9_NPipe]>],
909                               [4, 1]>,
910   //
911   // Double-register Integer Binary
912   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
913                                // Extra latency cycles since wbck is 6 cycles
914                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
915                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
916                                InstrStage<1, [A9_MUX0], 0>,
917                                InstrStage<1, [A9_NPipe]>],
918                               [3, 2, 2]>,
919   //
920   // Quad-register Integer Binary
921   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
922                                // Extra latency cycles since wbck is 6 cycles
923                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
924                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
925                                InstrStage<1, [A9_MUX0], 0>,
926                                InstrStage<1, [A9_NPipe]>],
927                               [3, 2, 2]>,
928   //
929   // Double-register Integer Subtract
930   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
931                                // Extra latency cycles since wbck is 6 cycles
932                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
933                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
934                                InstrStage<1, [A9_MUX0], 0>,
935                                InstrStage<1, [A9_NPipe]>],
936                               [3, 2, 1]>,
937   //
938   // Quad-register Integer Subtract
939   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
940                                // Extra latency cycles since wbck is 6 cycles
941                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
942                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
943                                InstrStage<1, [A9_MUX0], 0>,
944                                InstrStage<1, [A9_NPipe]>],
945                               [3, 2, 1]>,
946   //
947   // Double-register Integer Shift
948   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
949                                // Extra latency cycles since wbck is 6 cycles
950                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
951                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
952                                InstrStage<1, [A9_MUX0], 0>,
953                                InstrStage<1, [A9_NPipe]>],
954                               [3, 1, 1]>,
955   //
956   // Quad-register Integer Shift
957   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
958                                // Extra latency cycles since wbck is 6 cycles
959                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
960                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
961                                InstrStage<1, [A9_MUX0], 0>,
962                                InstrStage<1, [A9_NPipe]>],
963                               [3, 1, 1]>,
964   //
965   // Double-register Integer Shift (4 cycle)
966   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
967                                // Extra latency cycles since wbck is 6 cycles
968                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
969                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
970                                InstrStage<1, [A9_MUX0], 0>,
971                                InstrStage<1, [A9_NPipe]>],
972                               [4, 1, 1]>,
973   //
974   // Quad-register Integer Shift (4 cycle)
975   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
976                                // Extra latency cycles since wbck is 6 cycles
977                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
978                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
979                                InstrStage<1, [A9_MUX0], 0>,
980                                InstrStage<1, [A9_NPipe]>],
981                               [4, 1, 1]>,
982   //
983   // Double-register Integer Binary (4 cycle)
984   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
985                                // Extra latency cycles since wbck is 6 cycles
986                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
987                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
988                                InstrStage<1, [A9_MUX0], 0>,
989                                InstrStage<1, [A9_NPipe]>],
990                               [4, 2, 2]>,
991   //
992   // Quad-register Integer Binary (4 cycle)
993   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
994                                // Extra latency cycles since wbck is 6 cycles
995                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
996                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
997                                InstrStage<1, [A9_MUX0], 0>,
998                                InstrStage<1, [A9_NPipe]>],
999                               [4, 2, 2]>,
1000   //
1001   // Double-register Integer Subtract (4 cycle)
1002   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1003                                // Extra latency cycles since wbck is 6 cycles
1004                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1005                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1006                                InstrStage<1, [A9_MUX0], 0>,
1007                                InstrStage<1, [A9_NPipe]>],
1008                               [4, 2, 1]>,
1009   //
1010   // Quad-register Integer Subtract (4 cycle)
1011   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1012                                // Extra latency cycles since wbck is 6 cycles
1013                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1014                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1015                                InstrStage<1, [A9_MUX0], 0>,
1016                                InstrStage<1, [A9_NPipe]>],
1017                               [4, 2, 1]>,
1018
1019   //
1020   // Double-register Integer Count
1021   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1022                                // Extra latency cycles since wbck is 6 cycles
1023                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1024                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1025                                InstrStage<1, [A9_MUX0], 0>,
1026                                InstrStage<1, [A9_NPipe]>],
1027                               [3, 2, 2]>,
1028   //
1029   // Quad-register Integer Count
1030   // Result written in N3, but that is relative to the last cycle of multicycle,
1031   // so we use 4 for those cases
1032   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1033                                // Extra latency cycles since wbck is 7 cycles
1034                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1035                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1036                                InstrStage<1, [A9_MUX0], 0>,
1037                                InstrStage<2, [A9_NPipe]>],
1038                               [4, 2, 2]>,
1039   //
1040   // Double-register Absolute Difference and Accumulate
1041   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1042                                // Extra latency cycles since wbck is 6 cycles
1043                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1044                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1045                                InstrStage<1, [A9_MUX0], 0>,
1046                                InstrStage<1, [A9_NPipe]>],
1047                               [6, 3, 2, 1]>,
1048   //
1049   // Quad-register Absolute Difference and Accumulate
1050   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1051                                // Extra latency cycles since wbck is 6 cycles
1052                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1053                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1054                                InstrStage<1, [A9_MUX0], 0>,
1055                                InstrStage<2, [A9_NPipe]>],
1056                               [6, 3, 2, 1]>,
1057   //
1058   // Double-register Integer Pair Add Long
1059   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1060                                // Extra latency cycles since wbck is 6 cycles
1061                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1062                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1063                                InstrStage<1, [A9_MUX0], 0>,
1064                                InstrStage<1, [A9_NPipe]>],
1065                               [6, 3, 1]>,
1066   //
1067   // Quad-register Integer Pair Add Long
1068   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1069                                // Extra latency cycles since wbck is 6 cycles
1070                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1071                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1072                                InstrStage<1, [A9_MUX0], 0>,
1073                                InstrStage<2, [A9_NPipe]>],
1074                               [6, 3, 1]>,
1075
1076   //
1077   // Double-register Integer Multiply (.8, .16)
1078   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1079                                // Extra latency cycles since wbck is 6 cycles
1080                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1081                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1082                                InstrStage<1, [A9_MUX0], 0>,
1083                                InstrStage<1, [A9_NPipe]>],
1084                               [6, 2, 2]>,
1085   //
1086   // Quad-register Integer Multiply (.8, .16)
1087   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1088                                // Extra latency cycles since wbck is 7 cycles
1089                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1090                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1091                                InstrStage<1, [A9_MUX0], 0>,
1092                                InstrStage<2, [A9_NPipe]>],
1093                               [7, 2, 2]>,
1094
1095   //
1096   // Double-register Integer Multiply (.32)
1097   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1098                                // Extra latency cycles since wbck is 7 cycles
1099                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1100                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1101                                InstrStage<1, [A9_MUX0], 0>,
1102                                InstrStage<2, [A9_NPipe]>],
1103                               [7, 2, 1]>,
1104   //
1105   // Quad-register Integer Multiply (.32)
1106   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1107                                // Extra latency cycles since wbck is 9 cycles
1108                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1109                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1110                                InstrStage<1, [A9_MUX0], 0>,
1111                                InstrStage<4, [A9_NPipe]>],
1112                               [9, 2, 1]>,
1113   //
1114   // Double-register Integer Multiply-Accumulate (.8, .16)
1115   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1116                                // Extra latency cycles since wbck is 6 cycles
1117                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1118                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1119                                InstrStage<1, [A9_MUX0], 0>,
1120                                InstrStage<1, [A9_NPipe]>],
1121                               [6, 3, 2, 2]>,
1122   //
1123   // Double-register Integer Multiply-Accumulate (.32)
1124   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
1125                                // Extra latency cycles since wbck is 7 cycles
1126                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1127                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1128                                InstrStage<1, [A9_MUX0], 0>,
1129                                InstrStage<2, [A9_NPipe]>],
1130                               [7, 3, 2, 1]>,
1131   //
1132   // Quad-register Integer Multiply-Accumulate (.8, .16)
1133   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1134                                // Extra latency cycles since wbck is 7 cycles
1135                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1136                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1137                                InstrStage<1, [A9_MUX0], 0>,
1138                                InstrStage<2, [A9_NPipe]>],
1139                               [7, 3, 2, 2]>,
1140   //
1141   // Quad-register Integer Multiply-Accumulate (.32)
1142   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
1143                                // Extra latency cycles since wbck is 9 cycles
1144                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1145                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1146                                InstrStage<1, [A9_MUX0], 0>,
1147                                InstrStage<4, [A9_NPipe]>],
1148                               [9, 3, 2, 1]>,
1149
1150   //
1151   // Move
1152   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1153                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1154                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1155                                InstrStage<1, [A9_MUX0], 0>,
1156                                InstrStage<1, [A9_NPipe]>],
1157                               [1,1]>,
1158   //
1159   // Move Immediate
1160   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1161                                // Extra latency cycles since wbck is 6 cycles
1162                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1163                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1164                                InstrStage<1, [A9_MUX0], 0>,
1165                                InstrStage<1, [A9_NPipe]>],
1166                               [3]>,
1167   //
1168   // Double-register Permute Move
1169   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1170   // FIXME: all latencies are arbitrary, no information is available
1171                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1172                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1173                                InstrStage<1, [A9_MUX0], 0>,
1174                                InstrStage<1, [A9_NPipe]>],
1175                               [2, 1]>,
1176   //
1177   // Quad-register Permute Move
1178   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1179   // FIXME: all latencies are arbitrary, no information is available
1180                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1181                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1182                                InstrStage<1, [A9_MUX0], 0>,
1183                                InstrStage<1, [A9_NPipe]>],
1184                               [2, 1]>,
1185   //
1186   // Integer to Single-precision Move
1187   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1188   // FIXME: all latencies are arbitrary, no information is available
1189                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1190                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1191                                InstrStage<1, [A9_MUX0], 0>,
1192                                InstrStage<1, [A9_NPipe]>],
1193                               [2, 1]>,
1194   //
1195   // Integer to Double-precision Move
1196   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1197   // FIXME: all latencies are arbitrary, no information is available
1198                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1199                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1200                                InstrStage<1, [A9_MUX0], 0>,
1201                                InstrStage<1, [A9_NPipe]>],
1202                               [2, 1, 1]>,
1203   //
1204   // Single-precision to Integer Move
1205   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1206   // FIXME: all latencies are arbitrary, no information is available
1207                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1208                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1209                                InstrStage<1, [A9_MUX0], 0>,
1210                                InstrStage<1, [A9_NPipe]>],
1211                               [2, 1]>,
1212   //
1213   // Double-precision to Integer Move
1214   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1215   // FIXME: all latencies are arbitrary, no information is available
1216                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1217                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1218                                InstrStage<1, [A9_MUX0], 0>,
1219                                InstrStage<1, [A9_NPipe]>],
1220                               [2, 2, 1]>,
1221   //
1222   // Integer to Lane Move
1223   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
1224   // FIXME: all latencies are arbitrary, no information is available
1225                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1226                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1227                                InstrStage<1, [A9_MUX0], 0>,
1228                                InstrStage<2, [A9_NPipe]>],
1229                               [3, 1, 1]>,
1230
1231   //
1232   // Vector narrow move
1233   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1234                                // Extra latency cycles since wbck is 6 cycles
1235                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1236                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1237                                InstrStage<1, [A9_MUX0], 0>,
1238                                InstrStage<1, [A9_NPipe]>],
1239                               [3, 1]>,
1240   //
1241   // Double-register FP Unary
1242   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1243                                // Extra latency cycles since wbck is 6 cycles
1244                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1245                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1246                                InstrStage<1, [A9_MUX0], 0>,
1247                                InstrStage<1, [A9_NPipe]>],
1248                               [5, 2]>,
1249   //
1250   // Quad-register FP Unary
1251   // Result written in N5, but that is relative to the last cycle of multicycle,
1252   // so we use 6 for those cases
1253   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1254                                // Extra latency cycles since wbck is 7 cycles
1255                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1256                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1257                                InstrStage<1, [A9_MUX0], 0>,
1258                                InstrStage<2, [A9_NPipe]>],
1259                               [6, 2]>,
1260   //
1261   // Double-register FP Binary
1262   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1263   // optimistic.
1264   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1265                                // Extra latency cycles since wbck is 7 cycles
1266                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1267                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1268                                InstrStage<1, [A9_MUX0], 0>,
1269                                InstrStage<1, [A9_NPipe]>],
1270                               [5, 2, 2]>,
1271   //
1272   // Quad-register FP Binary
1273   // Result written in N5, but that is relative to the last cycle of multicycle,
1274   // so we use 6 for those cases
1275   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1276   // optimistic.
1277   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1278                                // Extra latency cycles since wbck is 8 cycles
1279                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1280                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1281                                InstrStage<1, [A9_MUX0], 0>,
1282                                InstrStage<2, [A9_NPipe]>],
1283                               [6, 2, 2]>,
1284   //
1285   // Double-register FP Multiple-Accumulate
1286   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1287                                // Extra latency cycles since wbck is 7 cycles
1288                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1289                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1290                                InstrStage<1, [A9_MUX0], 0>,
1291                                InstrStage<2, [A9_NPipe]>],
1292                               [6, 3, 2, 1]>,
1293   //
1294   // Quad-register FP Multiple-Accumulate
1295   // Result written in N9, but that is relative to the last cycle of multicycle,
1296   // so we use 10 for those cases
1297   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1298                                // Extra latency cycles since wbck is 9 cycles
1299                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1300                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1301                                InstrStage<1, [A9_MUX0], 0>,
1302                                InstrStage<4, [A9_NPipe]>],
1303                               [8, 4, 2, 1]>,
1304   //
1305   // Double-register Reciprical Step
1306   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1307                                // Extra latency cycles since wbck is 7 cycles
1308                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1309                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1310                                InstrStage<1, [A9_MUX0], 0>,
1311                                InstrStage<2, [A9_NPipe]>],
1312                               [6, 2, 2]>,
1313   //
1314   // Quad-register Reciprical Step
1315   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1316                                // Extra latency cycles since wbck is 9 cycles
1317                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1318                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1319                                InstrStage<1, [A9_MUX0], 0>,
1320                                InstrStage<4, [A9_NPipe]>],
1321                               [8, 2, 2]>,
1322   //
1323   // Double-register Permute
1324   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1325                                // Extra latency cycles since wbck is 6 cycles
1326                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1327                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1328                                InstrStage<1, [A9_MUX0], 0>,
1329                                InstrStage<1, [A9_NPipe]>],
1330                               [2, 2, 1, 1]>,
1331   //
1332   // Quad-register Permute
1333   // Result written in N2, but that is relative to the last cycle of multicycle,
1334   // so we use 3 for those cases
1335   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
1336                                // Extra latency cycles since wbck is 7 cycles
1337                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1338                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1339                                InstrStage<1, [A9_MUX0], 0>,
1340                                InstrStage<2, [A9_NPipe]>],
1341                               [3, 3, 1, 1]>,
1342   //
1343   // Quad-register Permute (3 cycle issue)
1344   // Result written in N2, but that is relative to the last cycle of multicycle,
1345   // so we use 4 for those cases
1346   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
1347                                // Extra latency cycles since wbck is 8 cycles
1348                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1349                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1350                                InstrStage<1, [A9_MUX0], 0>,
1351                                InstrStage<3, [A9_NPipe]>],
1352                               [4, 4, 1, 1]>,
1353
1354   //
1355   // Double-register VEXT
1356   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1357                                // Extra latency cycles since wbck is 7 cycles
1358                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1359                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1360                                InstrStage<1, [A9_MUX0], 0>,
1361                                InstrStage<1, [A9_NPipe]>],
1362                               [2, 1, 1]>,
1363   //
1364   // Quad-register VEXT
1365   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1366                                // Extra latency cycles since wbck is 9 cycles
1367                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1368                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1369                                InstrStage<1, [A9_MUX0], 0>,
1370                                InstrStage<2, [A9_NPipe]>],
1371                               [3, 1, 1]>,
1372   //
1373   // VTB
1374   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1375                                // Extra latency cycles since wbck is 7 cycles
1376                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1377                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1378                                InstrStage<1, [A9_MUX0], 0>,
1379                                InstrStage<2, [A9_NPipe]>],
1380                               [3, 2, 1]>,
1381   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
1382                                // Extra latency cycles since wbck is 7 cycles
1383                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1384                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1385                                InstrStage<1, [A9_MUX0], 0>,
1386                                InstrStage<2, [A9_NPipe]>],
1387                               [3, 2, 2, 1]>,
1388   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
1389                                // Extra latency cycles since wbck is 8 cycles
1390                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1391                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1392                                InstrStage<1, [A9_MUX0], 0>,
1393                                InstrStage<3, [A9_NPipe]>],
1394                               [4, 2, 2, 3, 1]>,
1395   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
1396                                // Extra latency cycles since wbck is 8 cycles
1397                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1398                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1399                                InstrStage<1, [A9_MUX0], 0>,
1400                                InstrStage<3, [A9_NPipe]>],
1401                               [4, 2, 2, 3, 3, 1]>,
1402   //
1403   // VTBX
1404   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1405                                // Extra latency cycles since wbck is 7 cycles
1406                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1407                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1408                                InstrStage<1, [A9_MUX0], 0>,
1409                                InstrStage<2, [A9_NPipe]>],
1410                               [3, 1, 2, 1]>,
1411   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1412                                // Extra latency cycles since wbck is 7 cycles
1413                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1414                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1415                                InstrStage<1, [A9_MUX0], 0>,
1416                                InstrStage<2, [A9_NPipe]>],
1417                               [3, 1, 2, 2, 1]>,
1418   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1419                                // Extra latency cycles since wbck is 8 cycles
1420                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1421                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1422                                InstrStage<1, [A9_MUX0], 0>,
1423                                InstrStage<3, [A9_NPipe]>],
1424                               [4, 1, 2, 2, 3, 1]>,
1425   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
1426                                // Extra latency cycles since wbck is 8 cycles
1427                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1428                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1429                                InstrStage<1, [A9_MUX0], 0>,
1430                                InstrStage<2, [A9_NPipe]>],
1431                               [4, 1, 2, 2, 3, 3, 1]>
1432 ]>;