Thumb2 asm aliases for wide bitwise w/ immediate instructions.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Issue0  : FuncUnit; // Issue 0
20 def A9_Issue1  : FuncUnit; // Issue 1
21 def A9_Branch  : FuncUnit; // Branch
22 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1    : FuncUnit; // ALU pipeline 1
24 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe   : FuncUnit; // NEON pipeline
26 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LSUnit  : FuncUnit; // L/S Unit
28 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
29 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
30
31 // Bypasses
32 def A9_LdBypass : Bypass;
33
34 def CortexA9Itineraries : ProcessorItineraries<
35   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
36    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
37   [A9_LdBypass], [
38   // Two fully-pipelined integer ALU pipelines
39
40   //
41   // Move instructions, unconditional
42   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
43                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
44   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
45                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
46   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
48   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
50   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
52                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
53   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
54                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
55                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
56                                   InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
57   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
58                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
59                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
60                                InstrStage<1, [A9_MUX0], 0>,
61                                InstrStage<1, [A9_AGU], 0>,
62                                InstrStage<1, [A9_LSUnit]>], [5]>,
63   //
64   // MVN instructions
65   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
67                               [1]>,
68   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
69                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
70                               [1, 1], [NoBypass, A9_LdBypass]>,
71   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
72                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
73                               [2, 1]>,
74   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
76                               [3, 1, 1]>,
77   //
78   // No operand cycles
79   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
80                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
81   //
82   // Binary Instructions that produce a result
83   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
85                             [1, 1], [NoBypass, A9_LdBypass]>,
86   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
88                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
89   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
90                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
91                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
92   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
94                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
95   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
96                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
97                             [3, 1, 1, 1],
98                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
99   //
100   // Bitwise Instructions that produce a result
101   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
102                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
103   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
104                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
105   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
106                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
107   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
109   //
110   // Unary Instructions that produce a result
111
112   // CLZ, RBIT, etc.
113   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
115
116   // BFC, BFI, UBFX, SBFX
117   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
119
120   //
121   // Zero and sign extension instructions
122   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
123                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
124   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
126   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
127                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
128   //
129   // Compare instructions
130   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
131                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
132                                [1], [A9_LdBypass]>,
133   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
134                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
135                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
136   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
138                                 [1, 1], [A9_LdBypass, NoBypass]>,
139   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
140                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
141                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
142   //
143   // Test instructions
144   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
145                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
146   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
147                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
148   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
150   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
152   //
153   // Move instructions, conditional
154   // FIXME: Correctly model the extra input dep on the destination.
155   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
156                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
157   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
158                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
159   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
161   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
163   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
165                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
167
168   // Integer multiply pipeline
169   //
170   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
171                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
172   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
173                                InstrStage<2, [A9_ALU0]>],
174                               [3, 1, 1, 1]>,
175   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
177   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
178                                InstrStage<2, [A9_ALU0]>],
179                               [4, 1, 1, 1]>,
180   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
182   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
183                                InstrStage<3, [A9_ALU0]>],
184                               [4, 5, 1, 1]>,
185   // Integer load pipeline
186   // FIXME: The timings are some rough approximations
187   //
188   // Immediate offset
189   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
190                                  InstrStage<1, [A9_MUX0], 0>,
191                                  InstrStage<1, [A9_AGU], 0>,
192                                  InstrStage<1, [A9_LSUnit]>],
193                                 [3, 1], [A9_LdBypass]>,
194   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
195                                  InstrStage<1, [A9_MUX0], 0>,
196                                  InstrStage<2, [A9_AGU], 0>,
197                                  InstrStage<1, [A9_LSUnit]>],
198                                 [4, 1], [A9_LdBypass]>,
199   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
200   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
201                                  InstrStage<1, [A9_MUX0], 0>,
202                                  InstrStage<2, [A9_AGU], 0>,
203                                  InstrStage<1, [A9_LSUnit]>],
204                                 [3, 3, 1], [A9_LdBypass]>,
205   //
206   // Register offset
207   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
208                                  InstrStage<1, [A9_MUX0], 0>,
209                                  InstrStage<1, [A9_AGU], 0>,
210                                  InstrStage<1, [A9_LSUnit]>],
211                                 [3, 1, 1], [A9_LdBypass]>,
212   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
213                                  InstrStage<1, [A9_MUX0], 0>,
214                                  InstrStage<2, [A9_AGU], 0>,
215                                  InstrStage<1, [A9_LSUnit]>],
216                                 [4, 1, 1], [A9_LdBypass]>,
217   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
218                                  InstrStage<1, [A9_MUX0], 0>,
219                                  InstrStage<2, [A9_AGU], 0>,
220                                  InstrStage<1, [A9_LSUnit]>],
221                                 [3, 3, 1, 1], [A9_LdBypass]>,
222   //
223   // Scaled register offset
224   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
225                                  InstrStage<1, [A9_MUX0], 0>,
226                                  InstrStage<1, [A9_AGU], 0>,
227                                  InstrStage<1, [A9_LSUnit], 0>],
228                                 [4, 1, 1], [A9_LdBypass]>,
229   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
230                                  InstrStage<1, [A9_MUX0], 0>,
231                                  InstrStage<2, [A9_AGU], 0>,
232                                  InstrStage<1, [A9_LSUnit]>],
233                                 [5, 1, 1], [A9_LdBypass]>,
234   //
235   // Immediate offset with update
236   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
237                                  InstrStage<1, [A9_MUX0], 0>,
238                                  InstrStage<1, [A9_AGU], 0>,
239                                  InstrStage<1, [A9_LSUnit]>],
240                                 [3, 2, 1], [A9_LdBypass]>,
241   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
242                                  InstrStage<1, [A9_MUX0], 0>,
243                                  InstrStage<2, [A9_AGU], 0>,
244                                  InstrStage<1, [A9_LSUnit]>],
245                                 [4, 3, 1], [A9_LdBypass]>,
246   //
247   // Register offset with update
248   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
249                                  InstrStage<1, [A9_MUX0], 0>,
250                                  InstrStage<1, [A9_AGU], 0>,
251                                  InstrStage<1, [A9_LSUnit]>],
252                                 [3, 2, 1, 1], [A9_LdBypass]>,
253   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
254                                  InstrStage<1, [A9_MUX0], 0>,
255                                  InstrStage<2, [A9_AGU], 0>,
256                                  InstrStage<1, [A9_LSUnit]>],
257                                 [4, 3, 1, 1], [A9_LdBypass]>,
258   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
259                                  InstrStage<1, [A9_MUX0], 0>,
260                                  InstrStage<2, [A9_AGU], 0>,
261                                  InstrStage<1, [A9_LSUnit]>],
262                                 [3, 3, 1, 1], [A9_LdBypass]>,
263   //
264   // Scaled register offset with update
265   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
266                                  InstrStage<1, [A9_MUX0], 0>,
267                                  InstrStage<1, [A9_AGU], 0>,
268                                  InstrStage<1, [A9_LSUnit]>],
269                                 [4, 3, 1, 1], [A9_LdBypass]>,
270   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
271                                   InstrStage<1, [A9_MUX0], 0>,
272                                   InstrStage<2, [A9_AGU], 0>,
273                                   InstrStage<1, [A9_LSUnit]>],
274                                  [5, 4, 1, 1], [A9_LdBypass]>,
275   //
276   // Load multiple, def is the 5th operand.
277   // FIXME: This assumes 3 to 4 registers.
278   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
279                                 InstrStage<1, [A9_MUX0], 0>,
280                                 InstrStage<2, [A9_AGU], 1>,
281                                 InstrStage<2, [A9_LSUnit]>],
282                                [1, 1, 1, 1, 3],
283                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
284   //
285   // Load multiple + update, defs are the 1st and 5th operands.
286   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
287                                 InstrStage<1, [A9_MUX0], 0>,
288                                 InstrStage<2, [A9_AGU], 1>,
289                                 InstrStage<2, [A9_LSUnit]>],
290                                [2, 1, 1, 1, 3],
291                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
292   //
293   // Load multiple plus branch
294   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
295                                 InstrStage<1, [A9_MUX0], 0>,
296                                 InstrStage<1, [A9_AGU], 1>,
297                                 InstrStage<2, [A9_LSUnit]>,
298                                 InstrStage<1, [A9_Branch]>],
299                                [1, 2, 1, 1, 3],
300                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
301   //
302   // Pop, def is the 3rd operand.
303   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
304                                 InstrStage<1, [A9_MUX0], 0>,
305                                 InstrStage<2, [A9_AGU], 1>,
306                                 InstrStage<2, [A9_LSUnit]>],
307                                [1, 1, 3],
308                                [NoBypass, NoBypass, A9_LdBypass]>,
309   //
310   // Pop + branch, def is the 3rd operand.
311   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
312                                 InstrStage<1, [A9_MUX0], 0>,
313                                 InstrStage<2, [A9_AGU], 1>,
314                                 InstrStage<2, [A9_LSUnit]>,
315                                 InstrStage<1, [A9_Branch]>],
316                                [1, 1, 3],
317                                [NoBypass, NoBypass, A9_LdBypass]>,
318
319   //
320   // iLoadi + iALUr for t2LDRpci_pic.
321   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322                                 InstrStage<1, [A9_MUX0], 0>,
323                                 InstrStage<1, [A9_AGU], 0>,
324                                 InstrStage<1, [A9_LSUnit]>,
325                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
326                                [2, 1]>,
327
328   // Integer store pipeline
329   ///
330   // Immediate offset
331   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
332                                  InstrStage<1, [A9_MUX0], 0>,
333                                  InstrStage<1, [A9_AGU], 0>,
334                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
335   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
336                                  InstrStage<1, [A9_MUX0], 0>,
337                                  InstrStage<2, [A9_AGU], 1>,
338                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
339   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
340   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341                                  InstrStage<1, [A9_MUX0], 0>,
342                                  InstrStage<2, [A9_AGU], 1>,
343                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
344   //
345   // Register offset
346   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347                                  InstrStage<1, [A9_MUX0], 0>,
348                                  InstrStage<1, [A9_AGU], 0>,
349                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
350   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351                                  InstrStage<1, [A9_MUX0], 0>,
352                                  InstrStage<2, [A9_AGU], 1>,
353                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
354   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355                                  InstrStage<1, [A9_MUX0], 0>,
356                                  InstrStage<2, [A9_AGU], 1>,
357                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
358   //
359   // Scaled register offset
360   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361                                   InstrStage<1, [A9_MUX0], 0>,
362                                   InstrStage<1, [A9_AGU], 0>,
363                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
364   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
365                                   InstrStage<1, [A9_MUX0], 0>,
366                                   InstrStage<2, [A9_AGU], 1>,
367                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
368   //
369   // Immediate offset with update
370   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
371                                   InstrStage<1, [A9_MUX0], 0>,
372                                   InstrStage<1, [A9_AGU], 0>,
373                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
374   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
375                                   InstrStage<1, [A9_MUX0], 0>,
376                                   InstrStage<2, [A9_AGU], 1>,
377                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
378   //
379   // Register offset with update
380   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
381                                   InstrStage<1, [A9_MUX0], 0>,
382                                   InstrStage<1, [A9_AGU], 0>,
383                                   InstrStage<1, [A9_LSUnit]>],
384                                  [2, 1, 1, 1]>,
385   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
386                                   InstrStage<1, [A9_MUX0], 0>,
387                                   InstrStage<2, [A9_AGU], 1>,
388                                   InstrStage<1, [A9_LSUnit]>],
389                                  [3, 1, 1, 1]>,
390   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
391                                   InstrStage<1, [A9_MUX0], 0>,
392                                   InstrStage<2, [A9_AGU], 1>,
393                                   InstrStage<1, [A9_LSUnit]>],
394                                  [3, 1, 1, 1]>,
395   //
396   // Scaled register offset with update
397   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
398                                     InstrStage<1, [A9_MUX0], 0>,
399                                     InstrStage<1, [A9_AGU], 0>,
400                                     InstrStage<1, [A9_LSUnit]>],
401                                    [2, 1, 1, 1]>,
402   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
403                                     InstrStage<1, [A9_MUX0], 0>,
404                                     InstrStage<2, [A9_AGU], 1>,
405                                     InstrStage<1, [A9_LSUnit]>],
406                                    [3, 1, 1, 1]>,
407   //
408   // Store multiple
409   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
410                                 InstrStage<1, [A9_MUX0], 0>,
411                                 InstrStage<1, [A9_AGU], 0>,
412                                 InstrStage<2, [A9_LSUnit]>]>,
413   //
414   // Store multiple + update
415   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
416                                 InstrStage<1, [A9_MUX0], 0>,
417                                 InstrStage<1, [A9_AGU], 0>,
418                                 InstrStage<2, [A9_LSUnit]>], [2]>,
419
420   //
421   // Preload
422   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
423
424   // Branch
425   //
426   // no delay slots, so the latency of a branch is unimportant
427   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
428                                 InstrStage<1, [A9_Issue1], 0>,
429                                 InstrStage<1, [A9_Branch]>]>,
430
431   // VFP and NEON shares the same register file. This means that every VFP
432   // instruction should wait for full completion of the consecutive NEON
433   // instruction and vice-versa. We model this behavior with two artificial FUs:
434   // DRegsVFP and DRegsVFP.
435   //
436   // Every VFP instruction:
437   //  - Acquires DRegsVFP resource for 1 cycle
438   //  - Reserves DRegsN resource for the whole duration (including time to
439   //    register file writeback!).
440   // Every NEON instruction does the same but with FUs swapped.
441   //
442   // Since the reserved FU cannot be acquired, this models precisely
443   // "cross-domain" stalls.
444
445   // VFP
446   // Issue through integer pipeline, and execute in NEON unit.
447
448   // FP Special Register to Integer Register File Move
449   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
450                               InstrStage<1, [A9_MUX0], 0>,
451                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
452                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
453                               InstrStage<1, [A9_NPipe]>],
454                              [1]>,
455   //
456   // Single-precision FP Unary
457   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
458                                InstrStage<1, [A9_MUX0], 0>,
459                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
460                                // Extra latency cycles since wbck is 2 cycles
461                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
462                                InstrStage<1, [A9_NPipe]>],
463                               [1, 1]>,
464   //
465   // Double-precision FP Unary
466   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
467                                InstrStage<1, [A9_MUX0], 0>,
468                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
469                                // Extra latency cycles since wbck is 2 cycles
470                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
471                                InstrStage<1, [A9_NPipe]>],
472                               [1, 1]>,
473
474   //
475   // Single-precision FP Compare
476   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
477                                InstrStage<1, [A9_MUX0], 0>,
478                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
479                                // Extra latency cycles since wbck is 4 cycles
480                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
481                                InstrStage<1, [A9_NPipe]>],
482                               [1, 1]>,
483   //
484   // Double-precision FP Compare
485   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486                                InstrStage<1, [A9_MUX0], 0>,
487                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
488                                // Extra latency cycles since wbck is 4 cycles
489                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
490                                InstrStage<1, [A9_NPipe]>],
491                               [1, 1]>,
492   //
493   // Single to Double FP Convert
494   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495                                InstrStage<1, [A9_MUX0], 0>,
496                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
497                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
498                                InstrStage<1, [A9_NPipe]>],
499                               [4, 1]>,
500   //
501   // Double to Single FP Convert
502   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503                                InstrStage<1, [A9_MUX0], 0>,
504                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
505                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
506                                InstrStage<1, [A9_NPipe]>],
507                               [4, 1]>,
508
509   //
510   // Single to Half FP Convert
511   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512                                InstrStage<1, [A9_MUX0], 0>,
513                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
514                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
515                                InstrStage<1, [A9_NPipe]>],
516                               [4, 1]>,
517   //
518   // Half to Single FP Convert
519   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520                                InstrStage<1, [A9_MUX0], 0>,
521                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
522                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
523                                InstrStage<1, [A9_NPipe]>],
524                               [2, 1]>,
525
526   //
527   // Single-Precision FP to Integer Convert
528   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
529                                InstrStage<1, [A9_MUX0], 0>,
530                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
531                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
532                                InstrStage<1, [A9_NPipe]>],
533                               [4, 1]>,
534   //
535   // Double-Precision FP to Integer Convert
536   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
537                                InstrStage<1, [A9_MUX0], 0>,
538                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
539                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
540                                InstrStage<1, [A9_NPipe]>],
541                               [4, 1]>,
542   //
543   // Integer to Single-Precision FP Convert
544   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
545                                InstrStage<1, [A9_MUX0], 0>,
546                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
547                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
548                                InstrStage<1, [A9_NPipe]>],
549                               [4, 1]>,
550   //
551   // Integer to Double-Precision FP Convert
552   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
553                                InstrStage<1, [A9_MUX0], 0>,
554                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
555                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
556                                InstrStage<1, [A9_NPipe]>],
557                               [4, 1]>,
558   //
559   // Single-precision FP ALU
560   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
561                                InstrStage<1, [A9_MUX0], 0>,
562                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
563                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
564                                InstrStage<1, [A9_NPipe]>],
565                               [4, 1, 1]>,
566   //
567   // Double-precision FP ALU
568   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
569                                InstrStage<1, [A9_MUX0], 0>,
570                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
571                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
572                                InstrStage<1, [A9_NPipe]>],
573                               [4, 1, 1]>,
574   //
575   // Single-precision FP Multiply
576   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
577                                InstrStage<1, [A9_MUX0], 0>,
578                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
579                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
580                                InstrStage<1, [A9_NPipe]>],
581                               [5, 1, 1]>,
582   //
583   // Double-precision FP Multiply
584   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
585                                InstrStage<1, [A9_MUX0], 0>,
586                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
587                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
588                                InstrStage<2, [A9_NPipe]>],
589                               [6, 1, 1]>,
590   //
591   // Single-precision FP MAC
592   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
593                                InstrStage<1, [A9_MUX0], 0>,
594                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
595                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
596                                InstrStage<1, [A9_NPipe]>],
597                               [8, 1, 1, 1]>,
598   //
599   // Double-precision FP MAC
600   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
601                                InstrStage<1,  [A9_MUX0], 0>,
602                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
603                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
604                                InstrStage<2,  [A9_NPipe]>],
605                               [9, 1, 1, 1]>,
606   //
607   // Single-precision FP DIV
608   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
609                                InstrStage<1,  [A9_MUX0], 0>,
610                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
611                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
612                                InstrStage<10, [A9_NPipe]>],
613                               [15, 1, 1]>,
614   //
615   // Double-precision FP DIV
616   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
617                                InstrStage<1,  [A9_MUX0], 0>,
618                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
619                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
620                                InstrStage<20, [A9_NPipe]>],
621                               [25, 1, 1]>,
622   //
623   // Single-precision FP SQRT
624   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
625                                InstrStage<1,  [A9_MUX0], 0>,
626                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
627                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
628                                InstrStage<13, [A9_NPipe]>],
629                               [17, 1]>,
630   //
631   // Double-precision FP SQRT
632   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
633                                InstrStage<1,  [A9_MUX0], 0>,
634                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
635                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
636                                InstrStage<28, [A9_NPipe]>],
637                               [32, 1]>,
638
639   //
640   // Integer to Single-precision Move
641   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
642                                InstrStage<1, [A9_MUX0], 0>,
643                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
644                                // Extra 1 latency cycle since wbck is 2 cycles
645                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
646                                InstrStage<1, [A9_NPipe]>],
647                               [1, 1]>,
648   //
649   // Integer to Double-precision Move
650   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
651                                InstrStage<1, [A9_MUX0], 0>,
652                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
653                                // Extra 1 latency cycle since wbck is 2 cycles
654                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
655                                InstrStage<1, [A9_NPipe]>],
656                               [1, 1, 1]>,
657   //
658   // Single-precision to Integer Move
659   //
660   // On A9 move-from-VFP is free to issue with no stall if other VFP
661   // operations are in flight. I assume it still can't dual-issue though.
662   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
663                                InstrStage<1, [A9_MUX0], 0>],
664                               [2, 1]>,
665   //
666   // Double-precision to Integer Move
667   //
668   // On A9 move-from-VFP is free to issue with no stall if other VFP
669   // operations are in flight. I assume it still can't dual-issue though.
670   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
671                                InstrStage<1, [A9_MUX0], 0>],
672                               [2, 1, 1]>,
673   //
674   // Single-precision FP Load
675   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676                                InstrStage<1, [A9_MUX0], 0>,
677                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
678                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
679                                InstrStage<1, [A9_NPipe], 0>,
680                                InstrStage<1, [A9_LSUnit]>],
681                               [1, 1]>,
682   //
683   // Double-precision FP Load
684   // FIXME: Result latency is 1 if address is 64-bit aligned.
685   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
686                                InstrStage<1, [A9_MUX0], 0>,
687                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
688                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
689                                InstrStage<1, [A9_NPipe], 0>,
690                                InstrStage<1, [A9_LSUnit]>],
691                               [2, 1]>,
692   //
693   // FP Load Multiple
694   // FIXME: assumes 2 doubles which requires 2 LS cycles.
695   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
696                                InstrStage<1, [A9_MUX0], 0>,
697                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
698                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
699                                InstrStage<1, [A9_NPipe], 0>,
700                                InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
701   //
702   // FP Load Multiple + update
703   // FIXME: assumes 2 doubles which requires 2 LS cycles.
704   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
705                                InstrStage<1, [A9_MUX0], 0>,
706                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
707                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
708                                InstrStage<1, [A9_NPipe], 0>,
709                                InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
710   //
711   // Single-precision FP Store
712   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
713                                InstrStage<1, [A9_MUX0], 0>,
714                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
715                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
716                                InstrStage<1, [A9_NPipe], 0>,
717                                InstrStage<1, [A9_LSUnit]>],
718                               [1, 1]>,
719   //
720   // Double-precision FP Store
721   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
722                                InstrStage<1, [A9_MUX0], 0>,
723                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
724                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
725                                InstrStage<1, [A9_NPipe], 0>,
726                                InstrStage<1, [A9_LSUnit]>],
727                               [1, 1]>,
728   //
729   // FP Store Multiple
730   // FIXME: assumes 2 doubles which requires 2 LS cycles.
731   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
732                                InstrStage<1, [A9_MUX0], 0>,
733                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
734                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
735                                InstrStage<1, [A9_NPipe], 0>,
736                                InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>,
737   //
738   // FP Store Multiple + update
739   // FIXME: assumes 2 doubles which requires 2 LS cycles.
740   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
741                                 InstrStage<1, [A9_MUX0], 0>,
742                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
743                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
744                                 InstrStage<1, [A9_NPipe], 0>,
745                                 InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>,
746   // NEON
747   // VLD1
748   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749                                InstrStage<1, [A9_MUX0], 0>,
750                                InstrStage<1, [A9_DRegsN],   0, Required>,
751                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
752                                InstrStage<1, [A9_NPipe], 0>,
753                                InstrStage<1, [A9_LSUnit]>],
754                               [1, 1]>,
755   // VLD1x2
756   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
757                                InstrStage<1, [A9_MUX0], 0>,
758                                InstrStage<1, [A9_DRegsN],   0, Required>,
759                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
760                                InstrStage<1, [A9_NPipe], 0>,
761                                InstrStage<1, [A9_LSUnit]>],
762                               [1, 1, 1]>,
763   // VLD1x3
764   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
765                                InstrStage<1, [A9_MUX0], 0>,
766                                InstrStage<1, [A9_DRegsN],   0, Required>,
767                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
768                                InstrStage<2, [A9_NPipe], 0>,
769                                InstrStage<2, [A9_LSUnit]>],
770                               [1, 1, 2, 1]>,
771   // VLD1x4
772   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
773                                InstrStage<1, [A9_MUX0], 0>,
774                                InstrStage<1, [A9_DRegsN],   0, Required>,
775                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
776                                InstrStage<2, [A9_NPipe], 0>,
777                                InstrStage<2, [A9_LSUnit]>],
778                               [1, 1, 2, 2, 1]>,
779   // VLD1u
780   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
781                                InstrStage<1, [A9_MUX0], 0>,
782                                InstrStage<1, [A9_DRegsN],   0, Required>,
783                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
784                                InstrStage<1, [A9_NPipe], 0>,
785                                InstrStage<1, [A9_LSUnit]>],
786                               [1, 2, 1]>,
787   // VLD1x2u
788   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
789                                InstrStage<1, [A9_MUX0], 0>,
790                                InstrStage<1, [A9_DRegsN],   0, Required>,
791                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
792                                InstrStage<1, [A9_NPipe], 0>,
793                                InstrStage<1, [A9_LSUnit]>],
794                               [1, 1, 2, 1]>,
795   // VLD1x3u
796   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
797                                InstrStage<1, [A9_MUX0], 0>,
798                                InstrStage<1, [A9_DRegsN],   0, Required>,
799                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
800                                InstrStage<2, [A9_NPipe], 0>,
801                                InstrStage<2, [A9_LSUnit]>],
802                               [1, 1, 2, 2, 1]>,
803   // VLD1x4u
804   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
805                                InstrStage<1, [A9_MUX0], 0>,
806                                InstrStage<1, [A9_DRegsN],   0, Required>,
807                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
808                                InstrStage<2, [A9_NPipe], 0>,
809                                InstrStage<2, [A9_LSUnit]>],
810                               [1, 1, 2, 2, 2, 1]>,
811   //
812   // VLD1ln
813   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
814                                InstrStage<1, [A9_MUX0], 0>,
815                                InstrStage<1, [A9_DRegsN],   0, Required>,
816                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
817                                InstrStage<2, [A9_NPipe], 0>,
818                                InstrStage<2, [A9_LSUnit]>],
819                               [3, 1, 1, 1]>,
820   //
821   // VLD1lnu
822   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
823                                InstrStage<1, [A9_MUX0], 0>,
824                                InstrStage<1, [A9_DRegsN],   0, Required>,
825                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
826                                InstrStage<2, [A9_NPipe], 0>,
827                                InstrStage<2, [A9_LSUnit]>],
828                               [3, 2, 1, 1, 1, 1]>,
829   //
830   // VLD1dup
831   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
832                                InstrStage<1, [A9_MUX0], 0>,
833                                InstrStage<1, [A9_DRegsN],   0, Required>,
834                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
835                                InstrStage<1, [A9_NPipe], 0>,
836                                InstrStage<1, [A9_LSUnit]>],
837                               [2, 1]>,
838   //
839   // VLD1dupu
840   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
841                                InstrStage<1, [A9_MUX0], 0>,
842                                InstrStage<1, [A9_DRegsN],   0, Required>,
843                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
844                                InstrStage<1, [A9_NPipe], 0>,
845                                InstrStage<1, [A9_LSUnit]>],
846                               [2, 2, 1, 1]>,
847   //
848   // VLD2
849   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
850                                InstrStage<1, [A9_MUX0], 0>,
851                                InstrStage<1, [A9_DRegsN],   0, Required>,
852                                // Extra latency cycles since wbck is 7 cycles
853                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
854                                InstrStage<1, [A9_NPipe], 0>,
855                                InstrStage<1, [A9_LSUnit]>],
856                               [2, 2, 1]>,
857   //
858   // VLD2x2
859   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
860                                InstrStage<1, [A9_MUX0], 0>,
861                                InstrStage<1, [A9_DRegsN],   0, Required>,
862                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
863                                InstrStage<2, [A9_NPipe], 0>,
864                                InstrStage<2, [A9_LSUnit]>],
865                               [2, 3, 2, 3, 1]>,
866   //
867   // VLD2ln
868   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
869                                InstrStage<1, [A9_MUX0], 0>,
870                                InstrStage<1, [A9_DRegsN],   0, Required>,
871                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
872                                InstrStage<2, [A9_NPipe], 0>,
873                                InstrStage<2, [A9_LSUnit]>],
874                               [3, 3, 1, 1, 1, 1]>,
875   //
876   // VLD2u
877   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
878                                InstrStage<1, [A9_MUX0], 0>,
879                                InstrStage<1, [A9_DRegsN],   0, Required>,
880                                // Extra latency cycles since wbck is 7 cycles
881                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
882                                InstrStage<1, [A9_NPipe], 0>,
883                                InstrStage<1, [A9_LSUnit]>],
884                               [2, 2, 2, 1, 1, 1]>,
885   //
886   // VLD2x2u
887   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
888                                InstrStage<1, [A9_MUX0], 0>,
889                                InstrStage<1, [A9_DRegsN],   0, Required>,
890                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
891                                InstrStage<2, [A9_NPipe], 0>,
892                                InstrStage<2, [A9_LSUnit]>],
893                               [2, 3, 2, 3, 2, 1]>,
894   //
895   // VLD2lnu
896   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
897                                InstrStage<1, [A9_MUX0], 0>,
898                                InstrStage<1, [A9_DRegsN],   0, Required>,
899                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
900                                InstrStage<2, [A9_NPipe], 0>,
901                                InstrStage<2, [A9_LSUnit]>],
902                               [3, 3, 2, 1, 1, 1, 1, 1]>,
903   //
904   // VLD2dup
905   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
906                                InstrStage<1, [A9_MUX0], 0>,
907                                InstrStage<1, [A9_DRegsN],   0, Required>,
908                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
909                                InstrStage<1, [A9_NPipe], 0>,
910                                InstrStage<1, [A9_LSUnit]>],
911                               [2, 2, 1]>,
912   //
913   // VLD2dupu
914   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
915                                InstrStage<1, [A9_MUX0], 0>,
916                                InstrStage<1, [A9_DRegsN],   0, Required>,
917                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
918                                InstrStage<1, [A9_NPipe], 0>,
919                                InstrStage<1, [A9_LSUnit]>],
920                               [2, 2, 2, 1, 1]>,
921   //
922   // VLD3
923   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
924                                InstrStage<1, [A9_MUX0], 0>,
925                                InstrStage<1, [A9_DRegsN],   0, Required>,
926                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
927                                InstrStage<3, [A9_NPipe], 0>,
928                                InstrStage<3, [A9_LSUnit]>],
929                               [3, 3, 4, 1]>,
930   //
931   // VLD3ln
932   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
933                                InstrStage<1, [A9_MUX0], 0>,
934                                InstrStage<1, [A9_DRegsN],   0, Required>,
935                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
936                                InstrStage<5, [A9_NPipe], 0>,
937                                InstrStage<5, [A9_LSUnit]>],
938                               [5, 5, 6, 1, 1, 1, 1, 2]>,
939   //
940   // VLD3u
941   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
942                                InstrStage<1, [A9_MUX0], 0>,
943                                InstrStage<1, [A9_DRegsN],   0, Required>,
944                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
945                                InstrStage<3, [A9_NPipe], 0>,
946                                InstrStage<3, [A9_LSUnit]>],
947                               [3, 3, 4, 2, 1]>,
948   //
949   // VLD3lnu
950   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
951                                InstrStage<1, [A9_MUX0], 0>,
952                                InstrStage<1, [A9_DRegsN],   0, Required>,
953                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
954                                InstrStage<5, [A9_NPipe], 0>,
955                                InstrStage<5, [A9_LSUnit]>],
956                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
957   //
958   // VLD3dup
959   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
960                                InstrStage<1, [A9_MUX0], 0>,
961                                InstrStage<1, [A9_DRegsN],   0, Required>,
962                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
963                                InstrStage<3, [A9_NPipe], 0>,
964                                InstrStage<3, [A9_LSUnit]>],
965                               [3, 3, 4, 1]>,
966   //
967   // VLD3dupu
968   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
969                                InstrStage<1, [A9_MUX0], 0>,
970                                InstrStage<1, [A9_DRegsN],   0, Required>,
971                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
972                                InstrStage<3, [A9_NPipe], 0>,
973                                InstrStage<3, [A9_LSUnit]>],
974                               [3, 3, 4, 2, 1, 1]>,
975   //
976   // VLD4
977   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
978                                InstrStage<1, [A9_MUX0], 0>,
979                                InstrStage<1, [A9_DRegsN],   0, Required>,
980                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
981                                InstrStage<3, [A9_NPipe], 0>,
982                                InstrStage<3, [A9_LSUnit]>],
983                               [3, 3, 4, 4, 1]>,
984   //
985   // VLD4ln
986   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
987                                InstrStage<1, [A9_MUX0], 0>,
988                                InstrStage<1, [A9_DRegsN],   0, Required>,
989                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
990                                InstrStage<4, [A9_NPipe], 0>,
991                                InstrStage<4, [A9_LSUnit]>],
992                               [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
993   //
994   // VLD4u
995   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
996                                InstrStage<1, [A9_MUX0], 0>,
997                                InstrStage<1, [A9_DRegsN],   0, Required>,
998                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
999                                InstrStage<3, [A9_NPipe], 0>,
1000                                InstrStage<3, [A9_LSUnit]>],
1001                               [3, 3, 4, 4, 2, 1]>,
1002   //
1003   // VLD4lnu
1004   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1005                                InstrStage<1, [A9_MUX0], 0>,
1006                                InstrStage<1, [A9_DRegsN],   0, Required>,
1007                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1008                                InstrStage<4, [A9_NPipe], 0>,
1009                                InstrStage<4, [A9_LSUnit]>],
1010                               [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1011   //
1012   // VLD4dup
1013   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1014                                InstrStage<1, [A9_MUX0], 0>,
1015                                InstrStage<1, [A9_DRegsN],   0, Required>,
1016                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1017                                InstrStage<2, [A9_NPipe], 0>,
1018                                InstrStage<2, [A9_LSUnit]>],
1019                               [2, 2, 3, 3, 1]>,
1020   //
1021   // VLD4dupu
1022   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1023                                InstrStage<1, [A9_MUX0], 0>,
1024                                InstrStage<1, [A9_DRegsN],   0, Required>,
1025                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1026                                InstrStage<2, [A9_NPipe], 0>,
1027                                InstrStage<2, [A9_LSUnit]>],
1028                               [2, 2, 3, 3, 2, 1, 1]>,
1029   //
1030   // VST1
1031   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1032                                InstrStage<1, [A9_MUX0], 0>,
1033                                InstrStage<1, [A9_DRegsN],   0, Required>,
1034                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1035                                InstrStage<1, [A9_NPipe], 0>,
1036                                InstrStage<1, [A9_LSUnit]>],
1037                               [1, 1, 1]>,
1038   //
1039   // VST1x2
1040   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1041                                InstrStage<1, [A9_MUX0], 0>,
1042                                InstrStage<1, [A9_DRegsN],   0, Required>,
1043                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1044                                InstrStage<1, [A9_NPipe], 0>,
1045                                InstrStage<1, [A9_LSUnit]>],
1046                               [1, 1, 1, 1]>,
1047   //
1048   // VST1x3
1049   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1050                                InstrStage<1, [A9_MUX0], 0>,
1051                                InstrStage<1, [A9_DRegsN],   0, Required>,
1052                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1053                                InstrStage<2, [A9_NPipe], 0>,
1054                                InstrStage<2, [A9_LSUnit]>],
1055                               [1, 1, 1, 1, 2]>,
1056   //
1057   // VST1x4
1058   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1059                                InstrStage<1, [A9_MUX0], 0>,
1060                                InstrStage<1, [A9_DRegsN],   0, Required>,
1061                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1062                                InstrStage<2, [A9_NPipe], 0>,
1063                                InstrStage<2, [A9_LSUnit]>],
1064                               [1, 1, 1, 1, 2, 2]>,
1065   //
1066   // VST1u
1067   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1068                                InstrStage<1, [A9_MUX0], 0>,
1069                                InstrStage<1, [A9_DRegsN],   0, Required>,
1070                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1071                                InstrStage<1, [A9_NPipe], 0>,
1072                                InstrStage<1, [A9_LSUnit]>],
1073                               [2, 1, 1, 1, 1]>,
1074   //
1075   // VST1x2u
1076   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1077                                InstrStage<1, [A9_MUX0], 0>,
1078                                InstrStage<1, [A9_DRegsN],   0, Required>,
1079                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1080                                InstrStage<1, [A9_NPipe], 0>,
1081                                InstrStage<1, [A9_LSUnit]>],
1082                               [2, 1, 1, 1, 1, 1]>,
1083   //
1084   // VST1x3u
1085   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1086                                InstrStage<1, [A9_MUX0], 0>,
1087                                InstrStage<1, [A9_DRegsN],   0, Required>,
1088                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1089                                InstrStage<2, [A9_NPipe], 0>,
1090                                InstrStage<2, [A9_LSUnit]>],
1091                               [2, 1, 1, 1, 1, 1, 2]>,
1092   //
1093   // VST1x4u
1094   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1095                                InstrStage<1, [A9_MUX0], 0>,
1096                                InstrStage<1, [A9_DRegsN],   0, Required>,
1097                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1098                                InstrStage<2, [A9_NPipe], 0>,
1099                                InstrStage<2, [A9_LSUnit]>],
1100                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1101   //
1102   // VST1ln
1103   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1104                                InstrStage<1, [A9_MUX0], 0>,
1105                                InstrStage<1, [A9_DRegsN],   0, Required>,
1106                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1107                                InstrStage<1, [A9_NPipe], 0>,
1108                                InstrStage<1, [A9_LSUnit]>],
1109                               [1, 1, 1]>,
1110   //
1111   // VST1lnu
1112   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1113                                InstrStage<1, [A9_MUX0], 0>,
1114                                InstrStage<1, [A9_DRegsN],   0, Required>,
1115                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1116                                InstrStage<1, [A9_NPipe], 0>,
1117                                InstrStage<1, [A9_LSUnit]>],
1118                               [2, 1, 1, 1, 1]>,
1119   //
1120   // VST2
1121   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1122                                InstrStage<1, [A9_MUX0], 0>,
1123                                InstrStage<1, [A9_DRegsN],   0, Required>,
1124                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1125                                InstrStage<1, [A9_NPipe], 0>,
1126                                InstrStage<1, [A9_LSUnit]>],
1127                               [1, 1, 1, 1]>,
1128   //
1129   // VST2x2
1130   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1131                                InstrStage<1, [A9_MUX0], 0>,
1132                                InstrStage<1, [A9_DRegsN],   0, Required>,
1133                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1134                                InstrStage<3, [A9_NPipe], 0>,
1135                                InstrStage<3, [A9_LSUnit]>],
1136                               [1, 1, 1, 1, 2, 2]>,
1137   //
1138   // VST2u
1139   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1140                                InstrStage<1, [A9_MUX0], 0>,
1141                                InstrStage<1, [A9_DRegsN],   0, Required>,
1142                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1143                                InstrStage<1, [A9_NPipe], 0>,
1144                                InstrStage<1, [A9_LSUnit]>],
1145                               [2, 1, 1, 1, 1, 1]>,
1146   //
1147   // VST2x2u
1148   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1149                                InstrStage<1, [A9_MUX0], 0>,
1150                                InstrStage<1, [A9_DRegsN],   0, Required>,
1151                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1152                                InstrStage<3, [A9_NPipe], 0>,
1153                                InstrStage<3, [A9_LSUnit]>],
1154                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1155   //
1156   // VST2ln
1157   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1158                                InstrStage<1, [A9_MUX0], 0>,
1159                                InstrStage<1, [A9_DRegsN],   0, Required>,
1160                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1161                                InstrStage<1, [A9_NPipe], 0>,
1162                                InstrStage<1, [A9_LSUnit]>],
1163                               [1, 1, 1, 1]>,
1164   //
1165   // VST2lnu
1166   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1167                                InstrStage<1, [A9_MUX0], 0>,
1168                                InstrStage<1, [A9_DRegsN],   0, Required>,
1169                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1170                                InstrStage<1, [A9_NPipe], 0>,
1171                                InstrStage<1, [A9_LSUnit]>],
1172                               [2, 1, 1, 1, 1, 1]>,
1173   //
1174   // VST3
1175   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1176                                InstrStage<1, [A9_MUX0], 0>,
1177                                InstrStage<1, [A9_DRegsN],   0, Required>,
1178                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1179                                InstrStage<2, [A9_NPipe], 0>,
1180                                InstrStage<2, [A9_LSUnit]>],
1181                               [1, 1, 1, 1, 2]>,
1182   //
1183   // VST3u
1184   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1185                                InstrStage<1, [A9_MUX0], 0>,
1186                                InstrStage<1, [A9_DRegsN],   0, Required>,
1187                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1188                                InstrStage<2, [A9_NPipe], 0>,
1189                                InstrStage<2, [A9_LSUnit]>],
1190                               [2, 1, 1, 1, 1, 1, 2]>,
1191   //
1192   // VST3ln
1193   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1194                                InstrStage<1, [A9_MUX0], 0>,
1195                                InstrStage<1, [A9_DRegsN],   0, Required>,
1196                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1197                                InstrStage<3, [A9_NPipe], 0>,
1198                                InstrStage<3, [A9_LSUnit]>],
1199                               [1, 1, 1, 1, 2]>,
1200   //
1201   // VST3lnu
1202   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1203                                InstrStage<1, [A9_MUX0], 0>,
1204                                InstrStage<1, [A9_DRegsN],   0, Required>,
1205                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1206                                InstrStage<3, [A9_NPipe], 0>,
1207                                InstrStage<3, [A9_LSUnit]>],
1208                               [2, 1, 1, 1, 1, 1, 2]>,
1209   //
1210   // VST4
1211   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1212                                InstrStage<1, [A9_MUX0], 0>,
1213                                InstrStage<1, [A9_DRegsN],   0, Required>,
1214                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1215                                InstrStage<2, [A9_NPipe], 0>,
1216                                InstrStage<2, [A9_LSUnit]>],
1217                               [1, 1, 1, 1, 2, 2]>,
1218   //
1219   // VST4u
1220   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1221                                InstrStage<1, [A9_MUX0], 0>,
1222                                InstrStage<1, [A9_DRegsN],   0, Required>,
1223                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1224                                InstrStage<2, [A9_NPipe], 0>,
1225                                InstrStage<2, [A9_LSUnit]>],
1226                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1227   //
1228   // VST4ln
1229   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1230                                InstrStage<1, [A9_MUX0], 0>,
1231                                InstrStage<1, [A9_DRegsN],   0, Required>,
1232                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1233                                InstrStage<2, [A9_NPipe], 0>,
1234                                InstrStage<2, [A9_LSUnit]>],
1235                               [1, 1, 1, 1, 2, 2]>,
1236   //
1237   // VST4lnu
1238   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1239                                InstrStage<1, [A9_MUX0], 0>,
1240                                InstrStage<1, [A9_DRegsN],   0, Required>,
1241                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1242                                InstrStage<2, [A9_NPipe], 0>,
1243                                InstrStage<2, [A9_LSUnit]>],
1244                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1245
1246   //
1247   // Double-register Integer Unary
1248   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1249                                InstrStage<1, [A9_MUX0], 0>,
1250                                InstrStage<1, [A9_DRegsN],   0, Required>,
1251                                // Extra latency cycles since wbck is 6 cycles
1252                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1253                                InstrStage<1, [A9_NPipe]>],
1254                               [4, 2]>,
1255   //
1256   // Quad-register Integer Unary
1257   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1258                                InstrStage<1, [A9_MUX0], 0>,
1259                                InstrStage<1, [A9_DRegsN],   0, Required>,
1260                                // Extra latency cycles since wbck is 6 cycles
1261                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1262                                InstrStage<1, [A9_NPipe]>],
1263                               [4, 2]>,
1264   //
1265   // Double-register Integer Q-Unary
1266   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1267                                InstrStage<1, [A9_MUX0], 0>,
1268                                InstrStage<1, [A9_DRegsN],   0, Required>,
1269                                // Extra latency cycles since wbck is 6 cycles
1270                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1271                                InstrStage<1, [A9_NPipe]>],
1272                               [4, 1]>,
1273   //
1274   // Quad-register Integer CountQ-Unary
1275   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1276                                InstrStage<1, [A9_MUX0], 0>,
1277                                InstrStage<1, [A9_DRegsN],   0, Required>,
1278                                // Extra latency cycles since wbck is 6 cycles
1279                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1280                                InstrStage<1, [A9_NPipe]>],
1281                               [4, 1]>,
1282   //
1283   // Double-register Integer Binary
1284   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1285                                InstrStage<1, [A9_MUX0], 0>,
1286                                InstrStage<1, [A9_DRegsN],   0, Required>,
1287                                // Extra latency cycles since wbck is 6 cycles
1288                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1289                                InstrStage<1, [A9_NPipe]>],
1290                               [3, 2, 2]>,
1291   //
1292   // Quad-register Integer Binary
1293   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1294                                InstrStage<1, [A9_MUX0], 0>,
1295                                InstrStage<1, [A9_DRegsN],   0, Required>,
1296                                // Extra latency cycles since wbck is 6 cycles
1297                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1298                                InstrStage<1, [A9_NPipe]>],
1299                               [3, 2, 2]>,
1300   //
1301   // Double-register Integer Subtract
1302   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1303                                InstrStage<1, [A9_MUX0], 0>,
1304                                InstrStage<1, [A9_DRegsN],   0, Required>,
1305                                // Extra latency cycles since wbck is 6 cycles
1306                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1307                                InstrStage<1, [A9_NPipe]>],
1308                               [3, 2, 1]>,
1309   //
1310   // Quad-register Integer Subtract
1311   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1312                                InstrStage<1, [A9_MUX0], 0>,
1313                                InstrStage<1, [A9_DRegsN],   0, Required>,
1314                                // Extra latency cycles since wbck is 6 cycles
1315                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1316                                InstrStage<1, [A9_NPipe]>],
1317                               [3, 2, 1]>,
1318   //
1319   // Double-register Integer Shift
1320   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1321                                InstrStage<1, [A9_MUX0], 0>,
1322                                InstrStage<1, [A9_DRegsN],   0, Required>,
1323                                // Extra latency cycles since wbck is 6 cycles
1324                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1325                                InstrStage<1, [A9_NPipe]>],
1326                               [3, 1, 1]>,
1327   //
1328   // Quad-register Integer Shift
1329   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1330                                InstrStage<1, [A9_MUX0], 0>,
1331                                InstrStage<1, [A9_DRegsN],   0, Required>,
1332                                // Extra latency cycles since wbck is 6 cycles
1333                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1334                                InstrStage<1, [A9_NPipe]>],
1335                               [3, 1, 1]>,
1336   //
1337   // Double-register Integer Shift (4 cycle)
1338   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1339                                InstrStage<1, [A9_MUX0], 0>,
1340                                InstrStage<1, [A9_DRegsN],   0, Required>,
1341                                // Extra latency cycles since wbck is 6 cycles
1342                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1343                                InstrStage<1, [A9_NPipe]>],
1344                               [4, 1, 1]>,
1345   //
1346   // Quad-register Integer Shift (4 cycle)
1347   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1348                                InstrStage<1, [A9_MUX0], 0>,
1349                                InstrStage<1, [A9_DRegsN],   0, Required>,
1350                                // Extra latency cycles since wbck is 6 cycles
1351                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1352                                InstrStage<1, [A9_NPipe]>],
1353                               [4, 1, 1]>,
1354   //
1355   // Double-register Integer Binary (4 cycle)
1356   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1357                                InstrStage<1, [A9_MUX0], 0>,
1358                                InstrStage<1, [A9_DRegsN],   0, Required>,
1359                                // Extra latency cycles since wbck is 6 cycles
1360                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1361                                InstrStage<1, [A9_NPipe]>],
1362                               [4, 2, 2]>,
1363   //
1364   // Quad-register Integer Binary (4 cycle)
1365   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1366                                InstrStage<1, [A9_MUX0], 0>,
1367                                InstrStage<1, [A9_DRegsN],   0, Required>,
1368                                // Extra latency cycles since wbck is 6 cycles
1369                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1370                                InstrStage<1, [A9_NPipe]>],
1371                               [4, 2, 2]>,
1372   //
1373   // Double-register Integer Subtract (4 cycle)
1374   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1375                                InstrStage<1, [A9_MUX0], 0>,
1376                                InstrStage<1, [A9_DRegsN],   0, Required>,
1377                                // Extra latency cycles since wbck is 6 cycles
1378                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1379                                InstrStage<1, [A9_NPipe]>],
1380                               [4, 2, 1]>,
1381   //
1382   // Quad-register Integer Subtract (4 cycle)
1383   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1384                                InstrStage<1, [A9_MUX0], 0>,
1385                                InstrStage<1, [A9_DRegsN],   0, Required>,
1386                                // Extra latency cycles since wbck is 6 cycles
1387                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1388                                InstrStage<1, [A9_NPipe]>],
1389                               [4, 2, 1]>,
1390
1391   //
1392   // Double-register Integer Count
1393   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1394                                InstrStage<1, [A9_MUX0], 0>,
1395                                InstrStage<1, [A9_DRegsN],   0, Required>,
1396                                // Extra latency cycles since wbck is 6 cycles
1397                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1398                                InstrStage<1, [A9_NPipe]>],
1399                               [3, 2, 2]>,
1400   //
1401   // Quad-register Integer Count
1402   // Result written in N3, but that is relative to the last cycle of multicycle,
1403   // so we use 4 for those cases
1404   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1405                                InstrStage<1, [A9_MUX0], 0>,
1406                                InstrStage<1, [A9_DRegsN],   0, Required>,
1407                                // Extra latency cycles since wbck is 7 cycles
1408                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1409                                InstrStage<2, [A9_NPipe]>],
1410                               [4, 2, 2]>,
1411   //
1412   // Double-register Absolute Difference and Accumulate
1413   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1414                                InstrStage<1, [A9_MUX0], 0>,
1415                                InstrStage<1, [A9_DRegsN],   0, Required>,
1416                                // Extra latency cycles since wbck is 6 cycles
1417                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1418                                InstrStage<1, [A9_NPipe]>],
1419                               [6, 3, 2, 1]>,
1420   //
1421   // Quad-register Absolute Difference and Accumulate
1422   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1423                                InstrStage<1, [A9_MUX0], 0>,
1424                                InstrStage<1, [A9_DRegsN],   0, Required>,
1425                                // Extra latency cycles since wbck is 6 cycles
1426                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1427                                InstrStage<2, [A9_NPipe]>],
1428                               [6, 3, 2, 1]>,
1429   //
1430   // Double-register Integer Pair Add Long
1431   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1432                                InstrStage<1, [A9_MUX0], 0>,
1433                                InstrStage<1, [A9_DRegsN],   0, Required>,
1434                                // Extra latency cycles since wbck is 6 cycles
1435                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1436                                InstrStage<1, [A9_NPipe]>],
1437                               [6, 3, 1]>,
1438   //
1439   // Quad-register Integer Pair Add Long
1440   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1441                                InstrStage<1, [A9_MUX0], 0>,
1442                                InstrStage<1, [A9_DRegsN],   0, Required>,
1443                                // Extra latency cycles since wbck is 6 cycles
1444                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1445                                InstrStage<2, [A9_NPipe]>],
1446                               [6, 3, 1]>,
1447
1448   //
1449   // Double-register Integer Multiply (.8, .16)
1450   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1451                                InstrStage<1, [A9_MUX0], 0>,
1452                                InstrStage<1, [A9_DRegsN],   0, Required>,
1453                                // Extra latency cycles since wbck is 6 cycles
1454                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1455                                InstrStage<1, [A9_NPipe]>],
1456                               [6, 2, 2]>,
1457   //
1458   // Quad-register Integer Multiply (.8, .16)
1459   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1460                                InstrStage<1, [A9_MUX0], 0>,
1461                                InstrStage<1, [A9_DRegsN],   0, Required>,
1462                                // Extra latency cycles since wbck is 7 cycles
1463                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1464                                InstrStage<2, [A9_NPipe]>],
1465                               [7, 2, 2]>,
1466
1467   //
1468   // Double-register Integer Multiply (.32)
1469   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1470                                InstrStage<1, [A9_MUX0], 0>,
1471                                InstrStage<1, [A9_DRegsN],   0, Required>,
1472                                // Extra latency cycles since wbck is 7 cycles
1473                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1474                                InstrStage<2, [A9_NPipe]>],
1475                               [7, 2, 1]>,
1476   //
1477   // Quad-register Integer Multiply (.32)
1478   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1479                                InstrStage<1, [A9_MUX0], 0>,
1480                                InstrStage<1, [A9_DRegsN],   0, Required>,
1481                                // Extra latency cycles since wbck is 9 cycles
1482                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1483                                InstrStage<4, [A9_NPipe]>],
1484                               [9, 2, 1]>,
1485   //
1486   // Double-register Integer Multiply-Accumulate (.8, .16)
1487   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1488                                InstrStage<1, [A9_MUX0], 0>,
1489                                InstrStage<1, [A9_DRegsN],   0, Required>,
1490                                // Extra latency cycles since wbck is 6 cycles
1491                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1492                                InstrStage<1, [A9_NPipe]>],
1493                               [6, 3, 2, 2]>,
1494   //
1495   // Double-register Integer Multiply-Accumulate (.32)
1496   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1497                                InstrStage<1, [A9_MUX0], 0>,
1498                                InstrStage<1, [A9_DRegsN],   0, Required>,
1499                                // Extra latency cycles since wbck is 7 cycles
1500                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1501                                InstrStage<2, [A9_NPipe]>],
1502                               [7, 3, 2, 1]>,
1503   //
1504   // Quad-register Integer Multiply-Accumulate (.8, .16)
1505   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1506                                InstrStage<1, [A9_MUX0], 0>,
1507                                InstrStage<1, [A9_DRegsN],   0, Required>,
1508                                // Extra latency cycles since wbck is 7 cycles
1509                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1510                                InstrStage<2, [A9_NPipe]>],
1511                               [7, 3, 2, 2]>,
1512   //
1513   // Quad-register Integer Multiply-Accumulate (.32)
1514   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1515                                InstrStage<1, [A9_MUX0], 0>,
1516                                InstrStage<1, [A9_DRegsN],   0, Required>,
1517                                // Extra latency cycles since wbck is 9 cycles
1518                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1519                                InstrStage<4, [A9_NPipe]>],
1520                               [9, 3, 2, 1]>,
1521
1522   //
1523   // Move
1524   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1525                                InstrStage<1, [A9_MUX0], 0>,
1526                                InstrStage<1, [A9_DRegsN],   0, Required>,
1527                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1528                                InstrStage<1, [A9_NPipe]>],
1529                               [1,1]>,
1530   //
1531   // Move Immediate
1532   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1533                                InstrStage<1, [A9_MUX0], 0>,
1534                                InstrStage<1, [A9_DRegsN],   0, Required>,
1535                                // Extra latency cycles since wbck is 6 cycles
1536                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1537                                InstrStage<1, [A9_NPipe]>],
1538                               [3]>,
1539   //
1540   // Double-register Permute Move
1541   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1542                                InstrStage<1, [A9_MUX0], 0>,
1543                                InstrStage<1, [A9_DRegsN],   0, Required>,
1544                                // Extra latency cycles since wbck is 6 cycles
1545                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1546                                InstrStage<1, [A9_NPipe]>],
1547                               [2, 1]>,
1548   //
1549   // Quad-register Permute Move
1550   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1551                                InstrStage<1, [A9_MUX0], 0>,
1552                                InstrStage<1, [A9_DRegsN],   0, Required>,
1553                                // Extra latency cycles since wbck is 6 cycles
1554                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1555                                InstrStage<1, [A9_NPipe]>],
1556                               [2, 1]>,
1557   //
1558   // Integer to Single-precision Move
1559   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1560                                InstrStage<1, [A9_MUX0], 0>,
1561                                InstrStage<1, [A9_DRegsN],   0, Required>,
1562                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1563                                InstrStage<1, [A9_NPipe]>],
1564                               [1, 1]>,
1565   //
1566   // Integer to Double-precision Move
1567   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1568                                InstrStage<1, [A9_MUX0], 0>,
1569                                InstrStage<1, [A9_DRegsN],   0, Required>,
1570                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1571                                InstrStage<1, [A9_NPipe]>],
1572                               [1, 1, 1]>,
1573   //
1574   // Single-precision to Integer Move
1575   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1576                                InstrStage<1, [A9_MUX0], 0>,
1577                                InstrStage<1, [A9_DRegsN],   0, Required>,
1578                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1579                                InstrStage<1, [A9_NPipe]>],
1580                               [2, 1]>,
1581   //
1582   // Double-precision to Integer Move
1583   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1584                                InstrStage<1, [A9_MUX0], 0>,
1585                                InstrStage<1, [A9_DRegsN],   0, Required>,
1586                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1587                                InstrStage<1, [A9_NPipe]>],
1588                               [2, 2, 1]>,
1589   //
1590   // Integer to Lane Move
1591   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1592                                InstrStage<1, [A9_MUX0], 0>,
1593                                InstrStage<1, [A9_DRegsN],   0, Required>,
1594                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1595                                InstrStage<2, [A9_NPipe]>],
1596                               [3, 1, 1]>,
1597
1598   //
1599   // Vector narrow move
1600   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1601                                InstrStage<1, [A9_MUX0], 0>,
1602                                InstrStage<1, [A9_DRegsN],   0, Required>,
1603                                // Extra latency cycles since wbck is 6 cycles
1604                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1605                                InstrStage<1, [A9_NPipe]>],
1606                               [3, 1]>,
1607   //
1608   // Double-register FP Unary
1609   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1610                                InstrStage<1, [A9_MUX0], 0>,
1611                                InstrStage<1, [A9_DRegsN],   0, Required>,
1612                                // Extra latency cycles since wbck is 6 cycles
1613                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1614                                InstrStage<1, [A9_NPipe]>],
1615                               [5, 2]>,
1616   //
1617   // Quad-register FP Unary
1618   // Result written in N5, but that is relative to the last cycle of multicycle,
1619   // so we use 6 for those cases
1620   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1621                                InstrStage<1, [A9_MUX0], 0>,
1622                                InstrStage<1, [A9_DRegsN],   0, Required>,
1623                                // Extra latency cycles since wbck is 7 cycles
1624                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1625                                InstrStage<2, [A9_NPipe]>],
1626                               [6, 2]>,
1627   //
1628   // Double-register FP Binary
1629   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1630   // optimistic.
1631   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1632                                InstrStage<1, [A9_MUX0], 0>,
1633                                InstrStage<1, [A9_DRegsN],   0, Required>,
1634                                // Extra latency cycles since wbck is 6 cycles
1635                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1636                                InstrStage<1, [A9_NPipe]>],
1637                               [5, 2, 2]>,
1638
1639   //
1640   // VPADD, etc.
1641   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1642                                InstrStage<1, [A9_MUX0], 0>,
1643                                InstrStage<1, [A9_DRegsN],   0, Required>,
1644                                // Extra latency cycles since wbck is 6 cycles
1645                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1646                                InstrStage<1, [A9_NPipe]>],
1647                               [5, 1, 1]>,
1648   //
1649   // Double-register FP VMUL
1650   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1651                                InstrStage<1, [A9_MUX0], 0>,
1652                                InstrStage<1, [A9_DRegsN],   0, Required>,
1653                                // Extra latency cycles since wbck is 6 cycles
1654                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1655                                InstrStage<1, [A9_NPipe]>],
1656                               [5, 2, 1]>,
1657   //
1658   // Quad-register FP Binary
1659   // Result written in N5, but that is relative to the last cycle of multicycle,
1660   // so we use 6 for those cases
1661   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1662   // optimistic.
1663   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1664                                InstrStage<1, [A9_MUX0], 0>,
1665                                InstrStage<1, [A9_DRegsN],   0, Required>,
1666                                // Extra latency cycles since wbck is 7 cycles
1667                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1668                                InstrStage<2, [A9_NPipe]>],
1669                               [6, 2, 2]>,
1670   //
1671   // Quad-register FP VMUL
1672   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1673                                InstrStage<1, [A9_MUX0], 0>,
1674                                InstrStage<1, [A9_DRegsN],   0, Required>,
1675                                // Extra latency cycles since wbck is 7 cycles
1676                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1677                                InstrStage<1, [A9_NPipe]>],
1678                               [6, 2, 1]>,
1679   //
1680   // Double-register FP Multiple-Accumulate
1681   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1682                                InstrStage<1, [A9_MUX0], 0>,
1683                                InstrStage<1, [A9_DRegsN],   0, Required>,
1684                                // Extra latency cycles since wbck is 7 cycles
1685                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1686                                InstrStage<2, [A9_NPipe]>],
1687                               [6, 3, 2, 1]>,
1688   //
1689   // Quad-register FP Multiple-Accumulate
1690   // Result written in N9, but that is relative to the last cycle of multicycle,
1691   // so we use 10 for those cases
1692   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1693                                InstrStage<1, [A9_MUX0], 0>,
1694                                InstrStage<1, [A9_DRegsN],   0, Required>,
1695                                // Extra latency cycles since wbck is 9 cycles
1696                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1697                                InstrStage<4, [A9_NPipe]>],
1698                               [8, 4, 2, 1]>,
1699   //
1700   // Double-register Reciprical Step
1701   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1702                                InstrStage<1, [A9_MUX0], 0>,
1703                                InstrStage<1, [A9_DRegsN],   0, Required>,
1704                                // Extra latency cycles since wbck is 10 cycles
1705                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1706                                InstrStage<1, [A9_NPipe]>],
1707                               [9, 2, 2]>,
1708   //
1709   // Quad-register Reciprical Step
1710   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1711                                InstrStage<1, [A9_MUX0], 0>,
1712                                InstrStage<1, [A9_DRegsN],   0, Required>,
1713                                // Extra latency cycles since wbck is 11 cycles
1714                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1715                                InstrStage<2, [A9_NPipe]>],
1716                               [10, 2, 2]>,
1717   //
1718   // Double-register Permute
1719   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1720                                InstrStage<1, [A9_MUX0], 0>,
1721                                InstrStage<1, [A9_DRegsN],   0, Required>,
1722                                // Extra latency cycles since wbck is 6 cycles
1723                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1724                                InstrStage<1, [A9_NPipe]>],
1725                               [2, 2, 1, 1]>,
1726   //
1727   // Quad-register Permute
1728   // Result written in N2, but that is relative to the last cycle of multicycle,
1729   // so we use 3 for those cases
1730   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1731                                InstrStage<1, [A9_MUX0], 0>,
1732                                InstrStage<1, [A9_DRegsN],   0, Required>,
1733                                // Extra latency cycles since wbck is 7 cycles
1734                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1735                                InstrStage<2, [A9_NPipe]>],
1736                               [3, 3, 1, 1]>,
1737   //
1738   // Quad-register Permute (3 cycle issue)
1739   // Result written in N2, but that is relative to the last cycle of multicycle,
1740   // so we use 4 for those cases
1741   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1742                                InstrStage<1, [A9_MUX0], 0>,
1743                                InstrStage<1, [A9_DRegsN],   0, Required>,
1744                                // Extra latency cycles since wbck is 8 cycles
1745                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1746                                InstrStage<3, [A9_NPipe]>],
1747                               [4, 4, 1, 1]>,
1748
1749   //
1750   // Double-register VEXT
1751   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1752                                InstrStage<1, [A9_MUX0], 0>,
1753                                InstrStage<1, [A9_DRegsN],   0, Required>,
1754                                // Extra latency cycles since wbck is 6 cycles
1755                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1756                                InstrStage<1, [A9_NPipe]>],
1757                               [2, 1, 1]>,
1758   //
1759   // Quad-register VEXT
1760   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1761                                InstrStage<1, [A9_MUX0], 0>,
1762                                InstrStage<1, [A9_DRegsN],   0, Required>,
1763                                // Extra latency cycles since wbck is 7 cycles
1764                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1765                                InstrStage<2, [A9_NPipe]>],
1766                               [3, 1, 2]>,
1767   //
1768   // VTB
1769   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1770                                InstrStage<1, [A9_MUX0], 0>,
1771                                InstrStage<1, [A9_DRegsN],   0, Required>,
1772                                // Extra latency cycles since wbck is 7 cycles
1773                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1774                                InstrStage<2, [A9_NPipe]>],
1775                               [3, 2, 1]>,
1776   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1777                                InstrStage<1, [A9_MUX0], 0>,
1778                                InstrStage<2, [A9_DRegsN],   0, Required>,
1779                                // Extra latency cycles since wbck is 7 cycles
1780                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1781                                InstrStage<2, [A9_NPipe]>],
1782                               [3, 2, 2, 1]>,
1783   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1784                                InstrStage<1, [A9_MUX0], 0>,
1785                                InstrStage<2, [A9_DRegsN],   0, Required>,
1786                                // Extra latency cycles since wbck is 8 cycles
1787                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1788                                InstrStage<3, [A9_NPipe]>],
1789                               [4, 2, 2, 3, 1]>,
1790   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1791                                InstrStage<1, [A9_MUX0], 0>,
1792                                InstrStage<1, [A9_DRegsN],   0, Required>,
1793                                // Extra latency cycles since wbck is 8 cycles
1794                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1795                                InstrStage<3, [A9_NPipe]>],
1796                               [4, 2, 2, 3, 3, 1]>,
1797   //
1798   // VTBX
1799   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1800                                InstrStage<1, [A9_MUX0], 0>,
1801                                InstrStage<1, [A9_DRegsN],   0, Required>,
1802                                // Extra latency cycles since wbck is 7 cycles
1803                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1804                                InstrStage<2, [A9_NPipe]>],
1805                               [3, 1, 2, 1]>,
1806   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1807                                InstrStage<1, [A9_MUX0], 0>,
1808                                InstrStage<1, [A9_DRegsN],   0, Required>,
1809                                // Extra latency cycles since wbck is 7 cycles
1810                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1811                                InstrStage<2, [A9_NPipe]>],
1812                               [3, 1, 2, 2, 1]>,
1813   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1814                                InstrStage<1, [A9_MUX0], 0>,
1815                                InstrStage<1, [A9_DRegsN],   0, Required>,
1816                                // Extra latency cycles since wbck is 8 cycles
1817                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1818                                InstrStage<3, [A9_NPipe]>],
1819                               [4, 1, 2, 2, 3, 1]>,
1820   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1821                                InstrStage<1, [A9_MUX0], 0>,
1822                                InstrStage<1, [A9_DRegsN],   0, Required>,
1823                                // Extra latency cycles since wbck is 8 cycles
1824                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1825                                InstrStage<2, [A9_NPipe]>],
1826                               [4, 1, 2, 2, 3, 3, 1]>
1827 ]>;