[WebAssembly] Factor out a TypeToString function, since we need it in multiple places.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 // ===---------------------------------------------------------------------===//
15 // This section contains legacy support for itineraries. This is
16 // required until SD and PostRA schedulers are replaced by MachineScheduler.
17
18 //
19 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
20 // Reference Manual".
21 //
22 // Functional units
23 def A9_Issue0  : FuncUnit; // Issue 0
24 def A9_Issue1  : FuncUnit; // Issue 1
25 def A9_Branch  : FuncUnit; // Branch
26 def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
27 def A9_ALU1    : FuncUnit; // ALU pipeline 1
28 def A9_AGU     : FuncUnit; // Address generation unit for ld / st
29 def A9_NPipe   : FuncUnit; // NEON pipeline
30 def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
31 def A9_LSUnit  : FuncUnit; // L/S Unit
32 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
33 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
34
35 // Bypasses
36 def A9_LdBypass : Bypass;
37
38 def CortexA9Itineraries : ProcessorItineraries<
39   [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
40    A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
41   [A9_LdBypass], [
42   // Two fully-pipelined integer ALU pipelines
43
44   //
45   // Move instructions, unconditional
46   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
47                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
48   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
49                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
50   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
51                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
52   InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
53                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
54   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
55                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
56                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
57   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
58                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
59                                   InstrStage<1, [A9_ALU0, A9_ALU1]>,
60                                   InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
61   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
62                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
63                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
64                                InstrStage<1, [A9_MUX0], 0>,
65                                InstrStage<1, [A9_AGU], 0>,
66                                InstrStage<1, [A9_LSUnit]>], [5]>,
67   //
68   // MVN instructions
69   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
70                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
71                               [1]>,
72   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
73                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
74                               [1, 1], [NoBypass, A9_LdBypass]>,
75   InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
76                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
77                               [2, 1]>,
78   InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
79                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
80                               [3, 1, 1]>,
81   //
82   // No operand cycles
83   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84                                InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
85   //
86   // Binary Instructions that produce a result
87   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
88                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
89                             [1, 1], [NoBypass, A9_LdBypass]>,
90   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
91                              InstrStage<1, [A9_ALU0, A9_ALU1]>],
92                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
93   InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
94                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
95                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
96   InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97                              InstrStage<2, [A9_ALU0, A9_ALU1]>],
98                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
99   InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
100                              InstrStage<3, [A9_ALU0, A9_ALU1]>],
101                             [3, 1, 1, 1],
102                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
103   //
104   // Bitwise Instructions that produce a result
105   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
106                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
107   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
108                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
109   InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
110                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
111   InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
112                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
113   //
114   // Unary Instructions that produce a result
115
116   // CLZ, RBIT, etc.
117   InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
119
120   // BFC, BFI, UBFX, SBFX
121   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
123
124   //
125   // Zero and sign extension instructions
126   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
127                              InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
128   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
129                              InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
130   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
131                              InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
132   //
133   // Compare instructions
134   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
136                                [1], [A9_LdBypass]>,
137   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
138                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
139                                [1, 1], [A9_LdBypass, A9_LdBypass]>,
140   InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141                                InstrStage<2, [A9_ALU0, A9_ALU1]>],
142                                 [1, 1], [A9_LdBypass, NoBypass]>,
143   InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
144                                InstrStage<3, [A9_ALU0, A9_ALU1]>],
145                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
146   //
147   // Test instructions
148   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
149                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
150   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
151                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
152   InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
153                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
154   InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
155                                InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
156   //
157   // Move instructions, conditional
158   // FIXME: Correctly model the extra input dep on the destination.
159   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
161   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
163   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
165   InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
166                                InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
167   InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
168                                InstrStage<1, [A9_ALU0, A9_ALU1]>,
169                                InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
170                                InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
171
172   // Integer multiply pipeline
173   //
174   InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
175                                InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
176   InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
177                                InstrStage<2, [A9_ALU0]>],
178                               [3, 1, 1, 1]>,
179   InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
180                                InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
181   InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
182                                InstrStage<2, [A9_ALU0]>],
183                               [4, 1, 1, 1]>,
184   InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
185                                InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
186   InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187                                InstrStage<3, [A9_ALU0]>],
188                               [4, 5, 1, 1]>,
189   // Integer load pipeline
190   // FIXME: The timings are some rough approximations
191   //
192   // Immediate offset
193   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194                                  InstrStage<1, [A9_MUX0], 0>,
195                                  InstrStage<1, [A9_AGU], 0>,
196                                  InstrStage<1, [A9_LSUnit]>],
197                                 [3, 1], [A9_LdBypass]>,
198   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199                                  InstrStage<1, [A9_MUX0], 0>,
200                                  InstrStage<2, [A9_AGU], 0>,
201                                  InstrStage<1, [A9_LSUnit]>],
202                                 [4, 1], [A9_LdBypass]>,
203   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
204   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
205                                  InstrStage<1, [A9_MUX0], 0>,
206                                  InstrStage<2, [A9_AGU], 0>,
207                                  InstrStage<1, [A9_LSUnit]>],
208                                 [3, 3, 1], [A9_LdBypass]>,
209   //
210   // Register offset
211   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
212                                  InstrStage<1, [A9_MUX0], 0>,
213                                  InstrStage<1, [A9_AGU], 0>,
214                                  InstrStage<1, [A9_LSUnit]>],
215                                 [3, 1, 1], [A9_LdBypass]>,
216   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
217                                  InstrStage<1, [A9_MUX0], 0>,
218                                  InstrStage<2, [A9_AGU], 0>,
219                                  InstrStage<1, [A9_LSUnit]>],
220                                 [4, 1, 1], [A9_LdBypass]>,
221   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
222                                  InstrStage<1, [A9_MUX0], 0>,
223                                  InstrStage<2, [A9_AGU], 0>,
224                                  InstrStage<1, [A9_LSUnit]>],
225                                 [3, 3, 1, 1], [A9_LdBypass]>,
226   //
227   // Scaled register offset
228   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
229                                  InstrStage<1, [A9_MUX0], 0>,
230                                  InstrStage<1, [A9_AGU], 0>,
231                                  InstrStage<1, [A9_LSUnit], 0>],
232                                 [4, 1, 1], [A9_LdBypass]>,
233   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
234                                  InstrStage<1, [A9_MUX0], 0>,
235                                  InstrStage<2, [A9_AGU], 0>,
236                                  InstrStage<1, [A9_LSUnit]>],
237                                 [5, 1, 1], [A9_LdBypass]>,
238   //
239   // Immediate offset with update
240   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
241                                  InstrStage<1, [A9_MUX0], 0>,
242                                  InstrStage<1, [A9_AGU], 0>,
243                                  InstrStage<1, [A9_LSUnit]>],
244                                 [3, 2, 1], [A9_LdBypass]>,
245   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
246                                  InstrStage<1, [A9_MUX0], 0>,
247                                  InstrStage<2, [A9_AGU], 0>,
248                                  InstrStage<1, [A9_LSUnit]>],
249                                 [4, 3, 1], [A9_LdBypass]>,
250   //
251   // Register offset with update
252   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
253                                  InstrStage<1, [A9_MUX0], 0>,
254                                  InstrStage<1, [A9_AGU], 0>,
255                                  InstrStage<1, [A9_LSUnit]>],
256                                 [3, 2, 1, 1], [A9_LdBypass]>,
257   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
258                                  InstrStage<1, [A9_MUX0], 0>,
259                                  InstrStage<2, [A9_AGU], 0>,
260                                  InstrStage<1, [A9_LSUnit]>],
261                                 [4, 3, 1, 1], [A9_LdBypass]>,
262   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
263                                  InstrStage<1, [A9_MUX0], 0>,
264                                  InstrStage<2, [A9_AGU], 0>,
265                                  InstrStage<1, [A9_LSUnit]>],
266                                 [3, 3, 1, 1], [A9_LdBypass]>,
267   //
268   // Scaled register offset with update
269   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
270                                  InstrStage<1, [A9_MUX0], 0>,
271                                  InstrStage<1, [A9_AGU], 0>,
272                                  InstrStage<1, [A9_LSUnit]>],
273                                 [4, 3, 1, 1], [A9_LdBypass]>,
274   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
275                                   InstrStage<1, [A9_MUX0], 0>,
276                                   InstrStage<2, [A9_AGU], 0>,
277                                   InstrStage<1, [A9_LSUnit]>],
278                                  [5, 4, 1, 1], [A9_LdBypass]>,
279   //
280   // Load multiple, def is the 5th operand.
281   // FIXME: This assumes 3 to 4 registers.
282   InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
283                                 InstrStage<1, [A9_MUX0], 0>,
284                                 InstrStage<2, [A9_AGU], 1>,
285                                 InstrStage<2, [A9_LSUnit]>],
286                                [1, 1, 1, 1, 3],
287                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
288                          -1>, // dynamic uops
289   //
290   // Load multiple + update, defs are the 1st and 5th operands.
291   InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
292                                 InstrStage<1, [A9_MUX0], 0>,
293                                 InstrStage<2, [A9_AGU], 1>,
294                                 InstrStage<2, [A9_LSUnit]>],
295                                [2, 1, 1, 1, 3],
296                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
297                          -1>, // dynamic uops
298   //
299   // Load multiple plus branch
300   InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
301                                 InstrStage<1, [A9_MUX0], 0>,
302                                 InstrStage<1, [A9_AGU], 1>,
303                                 InstrStage<2, [A9_LSUnit]>,
304                                 InstrStage<1, [A9_Branch]>],
305                                [1, 2, 1, 1, 3],
306                          [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
307                          -1>, // dynamic uops
308   //
309   // Pop, def is the 3rd operand.
310   InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
311                                 InstrStage<1, [A9_MUX0], 0>,
312                                 InstrStage<2, [A9_AGU], 1>,
313                                 InstrStage<2, [A9_LSUnit]>],
314                                [1, 1, 3],
315                                [NoBypass, NoBypass, A9_LdBypass],
316                                -1>, // dynamic uops
317   //
318   // Pop + branch, def is the 3rd operand.
319   InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
320                                 InstrStage<1, [A9_MUX0], 0>,
321                                 InstrStage<2, [A9_AGU], 1>,
322                                 InstrStage<2, [A9_LSUnit]>,
323                                 InstrStage<1, [A9_Branch]>],
324                                [1, 1, 3],
325                                [NoBypass, NoBypass, A9_LdBypass],
326                                -1>, // dynamic uops
327   //
328   // iLoadi + iALUr for t2LDRpci_pic.
329   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
330                                 InstrStage<1, [A9_MUX0], 0>,
331                                 InstrStage<1, [A9_AGU], 0>,
332                                 InstrStage<1, [A9_LSUnit]>,
333                                 InstrStage<1, [A9_ALU0, A9_ALU1]>],
334                                [2, 1]>,
335
336   // Integer store pipeline
337   ///
338   // Immediate offset
339   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
340                                  InstrStage<1, [A9_MUX0], 0>,
341                                  InstrStage<1, [A9_AGU], 0>,
342                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
343   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
344                                  InstrStage<1, [A9_MUX0], 0>,
345                                  InstrStage<2, [A9_AGU], 1>,
346                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
347   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
348   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
349                                  InstrStage<1, [A9_MUX0], 0>,
350                                  InstrStage<2, [A9_AGU], 1>,
351                                  InstrStage<1, [A9_LSUnit]>], [1, 1]>,
352   //
353   // Register offset
354   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
355                                  InstrStage<1, [A9_MUX0], 0>,
356                                  InstrStage<1, [A9_AGU], 0>,
357                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
358   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
359                                  InstrStage<1, [A9_MUX0], 0>,
360                                  InstrStage<2, [A9_AGU], 1>,
361                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
362   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
363                                  InstrStage<1, [A9_MUX0], 0>,
364                                  InstrStage<2, [A9_AGU], 1>,
365                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
366   //
367   // Scaled register offset
368   InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
369                                   InstrStage<1, [A9_MUX0], 0>,
370                                   InstrStage<1, [A9_AGU], 0>,
371                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
372   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
373                                   InstrStage<1, [A9_MUX0], 0>,
374                                   InstrStage<2, [A9_AGU], 1>,
375                                   InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
376   //
377   // Immediate offset with update
378   InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
379                                   InstrStage<1, [A9_MUX0], 0>,
380                                   InstrStage<1, [A9_AGU], 0>,
381                                   InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
382   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
383                                   InstrStage<1, [A9_MUX0], 0>,
384                                   InstrStage<2, [A9_AGU], 1>,
385                                   InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
386   //
387   // Register offset with update
388   InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
389                                   InstrStage<1, [A9_MUX0], 0>,
390                                   InstrStage<1, [A9_AGU], 0>,
391                                   InstrStage<1, [A9_LSUnit]>],
392                                  [2, 1, 1, 1]>,
393   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
394                                   InstrStage<1, [A9_MUX0], 0>,
395                                   InstrStage<2, [A9_AGU], 1>,
396                                   InstrStage<1, [A9_LSUnit]>],
397                                  [3, 1, 1, 1]>,
398   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
399                                   InstrStage<1, [A9_MUX0], 0>,
400                                   InstrStage<2, [A9_AGU], 1>,
401                                   InstrStage<1, [A9_LSUnit]>],
402                                  [3, 1, 1, 1]>,
403   //
404   // Scaled register offset with update
405   InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
406                                     InstrStage<1, [A9_MUX0], 0>,
407                                     InstrStage<1, [A9_AGU], 0>,
408                                     InstrStage<1, [A9_LSUnit]>],
409                                    [2, 1, 1, 1]>,
410   InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
411                                     InstrStage<1, [A9_MUX0], 0>,
412                                     InstrStage<2, [A9_AGU], 1>,
413                                     InstrStage<1, [A9_LSUnit]>],
414                                    [3, 1, 1, 1]>,
415   //
416   // Store multiple
417   InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
418                                 InstrStage<1, [A9_MUX0], 0>,
419                                 InstrStage<1, [A9_AGU], 0>,
420                                 InstrStage<2, [A9_LSUnit]>],
421                 [], [], -1>, // dynamic uops
422   //
423   // Store multiple + update
424   InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
425                                 InstrStage<1, [A9_MUX0], 0>,
426                                 InstrStage<1, [A9_AGU], 0>,
427                                 InstrStage<2, [A9_LSUnit]>],
428                 [2], [], -1>, // dynamic uops
429   //
430   // Preload
431   InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
432
433   // Branch
434   //
435   // no delay slots, so the latency of a branch is unimportant
436   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
437                                 InstrStage<1, [A9_Issue1], 0>,
438                                 InstrStage<1, [A9_Branch]>]>,
439
440   // VFP and NEON shares the same register file. This means that every VFP
441   // instruction should wait for full completion of the consecutive NEON
442   // instruction and vice-versa. We model this behavior with two artificial FUs:
443   // DRegsVFP and DRegsVFP.
444   //
445   // Every VFP instruction:
446   //  - Acquires DRegsVFP resource for 1 cycle
447   //  - Reserves DRegsN resource for the whole duration (including time to
448   //    register file writeback!).
449   // Every NEON instruction does the same but with FUs swapped.
450   //
451   // Since the reserved FU cannot be acquired, this models precisely
452   // "cross-domain" stalls.
453
454   // VFP
455   // Issue through integer pipeline, and execute in NEON unit.
456
457   // FP Special Register to Integer Register File Move
458   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
459                               InstrStage<1, [A9_MUX0], 0>,
460                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
461                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
462                               InstrStage<1, [A9_NPipe]>],
463                              [1]>,
464   //
465   // Single-precision FP Unary
466   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
467                                InstrStage<1, [A9_MUX0], 0>,
468                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
469                                // Extra latency cycles since wbck is 2 cycles
470                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
471                                InstrStage<1, [A9_NPipe]>],
472                               [1, 1]>,
473   //
474   // Double-precision FP Unary
475   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
476                                InstrStage<1, [A9_MUX0], 0>,
477                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
478                                // Extra latency cycles since wbck is 2 cycles
479                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
480                                InstrStage<1, [A9_NPipe]>],
481                               [1, 1]>,
482
483   //
484   // Single-precision FP Compare
485   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486                                InstrStage<1, [A9_MUX0], 0>,
487                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
488                                // Extra latency cycles since wbck is 4 cycles
489                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
490                                InstrStage<1, [A9_NPipe]>],
491                               [1, 1]>,
492   //
493   // Double-precision FP Compare
494   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495                                InstrStage<1, [A9_MUX0], 0>,
496                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
497                                // Extra latency cycles since wbck is 4 cycles
498                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
499                                InstrStage<1, [A9_NPipe]>],
500                               [1, 1]>,
501   //
502   // Single to Double FP Convert
503   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
504                                InstrStage<1, [A9_MUX0], 0>,
505                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
506                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
507                                InstrStage<1, [A9_NPipe]>],
508                               [4, 1]>,
509   //
510   // Double to Single FP Convert
511   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512                                InstrStage<1, [A9_MUX0], 0>,
513                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
514                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
515                                InstrStage<1, [A9_NPipe]>],
516                               [4, 1]>,
517
518   //
519   // Single to Half FP Convert
520   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
521                                InstrStage<1, [A9_MUX0], 0>,
522                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
523                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
524                                InstrStage<1, [A9_NPipe]>],
525                               [4, 1]>,
526   //
527   // Half to Single FP Convert
528   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
529                                InstrStage<1, [A9_MUX0], 0>,
530                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
531                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
532                                InstrStage<1, [A9_NPipe]>],
533                               [2, 1]>,
534
535   //
536   // Single-Precision FP to Integer Convert
537   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
538                                InstrStage<1, [A9_MUX0], 0>,
539                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
540                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
541                                InstrStage<1, [A9_NPipe]>],
542                               [4, 1]>,
543   //
544   // Double-Precision FP to Integer Convert
545   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
546                                InstrStage<1, [A9_MUX0], 0>,
547                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
548                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
549                                InstrStage<1, [A9_NPipe]>],
550                               [4, 1]>,
551   //
552   // Integer to Single-Precision FP Convert
553   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
554                                InstrStage<1, [A9_MUX0], 0>,
555                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
556                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
557                                InstrStage<1, [A9_NPipe]>],
558                               [4, 1]>,
559   //
560   // Integer to Double-Precision FP Convert
561   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
562                                InstrStage<1, [A9_MUX0], 0>,
563                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
564                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
565                                InstrStage<1, [A9_NPipe]>],
566                               [4, 1]>,
567   //
568   // Single-precision FP ALU
569   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
570                                InstrStage<1, [A9_MUX0], 0>,
571                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
572                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
573                                InstrStage<1, [A9_NPipe]>],
574                               [4, 1, 1]>,
575   //
576   // Double-precision FP ALU
577   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
578                                InstrStage<1, [A9_MUX0], 0>,
579                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
580                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
581                                InstrStage<1, [A9_NPipe]>],
582                               [4, 1, 1]>,
583   //
584   // Single-precision FP Multiply
585   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
586                                InstrStage<1, [A9_MUX0], 0>,
587                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
588                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
589                                InstrStage<1, [A9_NPipe]>],
590                               [5, 1, 1]>,
591   //
592   // Double-precision FP Multiply
593   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
594                                InstrStage<1, [A9_MUX0], 0>,
595                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
596                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
597                                InstrStage<2, [A9_NPipe]>],
598                               [6, 1, 1]>,
599   //
600   // Single-precision FP MAC
601   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
602                                InstrStage<1, [A9_MUX0], 0>,
603                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
604                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
605                                InstrStage<1, [A9_NPipe]>],
606                               [8, 1, 1, 1]>,
607   //
608   // Double-precision FP MAC
609   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
610                                InstrStage<1,  [A9_MUX0], 0>,
611                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
612                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
613                                InstrStage<2,  [A9_NPipe]>],
614                               [9, 1, 1, 1]>,
615   //
616   // Single-precision Fused FP MAC
617   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
618                                InstrStage<1, [A9_MUX0], 0>,
619                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
620                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
621                                InstrStage<1, [A9_NPipe]>],
622                               [8, 1, 1, 1]>,
623   //
624   // Double-precision Fused FP MAC
625   InstrItinData<IIC_fpFMAC64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
626                                InstrStage<1,  [A9_MUX0], 0>,
627                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
628                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
629                                InstrStage<2,  [A9_NPipe]>],
630                               [9, 1, 1, 1]>,
631   //
632   // Single-precision FP DIV
633   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
634                                InstrStage<1,  [A9_MUX0], 0>,
635                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
636                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
637                                InstrStage<10, [A9_NPipe]>],
638                               [15, 1, 1]>,
639   //
640   // Double-precision FP DIV
641   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
642                                InstrStage<1,  [A9_MUX0], 0>,
643                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
644                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
645                                InstrStage<20, [A9_NPipe]>],
646                               [25, 1, 1]>,
647   //
648   // Single-precision FP SQRT
649   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
650                                InstrStage<1,  [A9_MUX0], 0>,
651                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
652                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
653                                InstrStage<13, [A9_NPipe]>],
654                               [17, 1]>,
655   //
656   // Double-precision FP SQRT
657   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
658                                InstrStage<1,  [A9_MUX0], 0>,
659                                InstrStage<1,  [A9_DRegsVFP], 0, Required>,
660                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
661                                InstrStage<28, [A9_NPipe]>],
662                               [32, 1]>,
663
664   //
665   // Integer to Single-precision Move
666   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
667                                InstrStage<1, [A9_MUX0], 0>,
668                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
669                                // Extra 1 latency cycle since wbck is 2 cycles
670                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
671                                InstrStage<1, [A9_NPipe]>],
672                               [1, 1]>,
673   //
674   // Integer to Double-precision Move
675   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676                                InstrStage<1, [A9_MUX0], 0>,
677                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
678                                // Extra 1 latency cycle since wbck is 2 cycles
679                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
680                                InstrStage<1, [A9_NPipe]>],
681                               [1, 1, 1]>,
682   //
683   // Single-precision to Integer Move
684   //
685   // On A9 move-from-VFP is free to issue with no stall if other VFP
686   // operations are in flight. I assume it still can't dual-issue though.
687   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
688                                InstrStage<1, [A9_MUX0], 0>],
689                               [2, 1]>,
690   //
691   // Double-precision to Integer Move
692   //
693   // On A9 move-from-VFP is free to issue with no stall if other VFP
694   // operations are in flight. I assume it still can't dual-issue though.
695   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
696                                InstrStage<1, [A9_MUX0], 0>],
697                               [2, 1, 1]>,
698   //
699   // Single-precision FP Load
700   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
701                                InstrStage<1, [A9_MUX0], 0>,
702                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
703                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
704                                InstrStage<1, [A9_NPipe], 0>,
705                                InstrStage<1, [A9_LSUnit]>],
706                               [1, 1]>,
707   //
708   // Double-precision FP Load
709   // FIXME: Result latency is 1 if address is 64-bit aligned.
710   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
711                                InstrStage<1, [A9_MUX0], 0>,
712                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
713                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
714                                InstrStage<1, [A9_NPipe], 0>,
715                                InstrStage<1, [A9_LSUnit]>],
716                               [2, 1]>,
717   //
718   // FP Load Multiple
719   // FIXME: assumes 2 doubles which requires 2 LS cycles.
720   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721                                InstrStage<1, [A9_MUX0], 0>,
722                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
723                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
724                                InstrStage<1, [A9_NPipe], 0>,
725                                InstrStage<2, [A9_LSUnit]>],
726                 [1, 1, 1, 1], [], -1>, // dynamic uops
727   //
728   // FP Load Multiple + update
729   // FIXME: assumes 2 doubles which requires 2 LS cycles.
730   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
731                                InstrStage<1, [A9_MUX0], 0>,
732                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
733                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
734                                InstrStage<1, [A9_NPipe], 0>,
735                                InstrStage<2, [A9_LSUnit]>],
736                 [2, 1, 1, 1], [], -1>, // dynamic uops
737   //
738   // Single-precision FP Store
739   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
740                                InstrStage<1, [A9_MUX0], 0>,
741                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
742                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
743                                InstrStage<1, [A9_NPipe], 0>,
744                                InstrStage<1, [A9_LSUnit]>],
745                               [1, 1]>,
746   //
747   // Double-precision FP Store
748   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749                                InstrStage<1, [A9_MUX0], 0>,
750                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
751                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
752                                InstrStage<1, [A9_NPipe], 0>,
753                                InstrStage<1, [A9_LSUnit]>],
754                               [1, 1]>,
755   //
756   // FP Store Multiple
757   // FIXME: assumes 2 doubles which requires 2 LS cycles.
758   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
759                                InstrStage<1, [A9_MUX0], 0>,
760                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
761                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
762                                InstrStage<1, [A9_NPipe], 0>,
763                                InstrStage<2, [A9_LSUnit]>],
764                 [1, 1, 1, 1], [], -1>, // dynamic uops
765   //
766   // FP Store Multiple + update
767   // FIXME: assumes 2 doubles which requires 2 LS cycles.
768   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
769                                 InstrStage<1, [A9_MUX0], 0>,
770                                 InstrStage<1, [A9_DRegsVFP], 0, Required>,
771                                 InstrStage<2, [A9_DRegsN],   0, Reserved>,
772                                 InstrStage<1, [A9_NPipe], 0>,
773                                 InstrStage<2, [A9_LSUnit]>],
774                 [2, 1, 1, 1], [], -1>, // dynamic uops
775   // NEON
776   // VLD1
777   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
778                                InstrStage<1, [A9_MUX0], 0>,
779                                InstrStage<1, [A9_DRegsN],   0, Required>,
780                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
781                                InstrStage<1, [A9_NPipe], 0>,
782                                InstrStage<1, [A9_LSUnit]>],
783                               [1, 1]>,
784   // VLD1x2
785   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
786                                InstrStage<1, [A9_MUX0], 0>,
787                                InstrStage<1, [A9_DRegsN],   0, Required>,
788                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
789                                InstrStage<1, [A9_NPipe], 0>,
790                                InstrStage<1, [A9_LSUnit]>],
791                               [1, 1, 1]>,
792   // VLD1x3
793   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
794                                InstrStage<1, [A9_MUX0], 0>,
795                                InstrStage<1, [A9_DRegsN],   0, Required>,
796                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
797                                InstrStage<2, [A9_NPipe], 0>,
798                                InstrStage<2, [A9_LSUnit]>],
799                               [1, 1, 2, 1]>,
800   // VLD1x4
801   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
802                                InstrStage<1, [A9_MUX0], 0>,
803                                InstrStage<1, [A9_DRegsN],   0, Required>,
804                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
805                                InstrStage<2, [A9_NPipe], 0>,
806                                InstrStage<2, [A9_LSUnit]>],
807                               [1, 1, 2, 2, 1]>,
808   // VLD1u
809   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
810                                InstrStage<1, [A9_MUX0], 0>,
811                                InstrStage<1, [A9_DRegsN],   0, Required>,
812                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
813                                InstrStage<1, [A9_NPipe], 0>,
814                                InstrStage<1, [A9_LSUnit]>],
815                               [1, 2, 1]>,
816   // VLD1x2u
817   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
818                                InstrStage<1, [A9_MUX0], 0>,
819                                InstrStage<1, [A9_DRegsN],   0, Required>,
820                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
821                                InstrStage<1, [A9_NPipe], 0>,
822                                InstrStage<1, [A9_LSUnit]>],
823                               [1, 1, 2, 1]>,
824   // VLD1x3u
825   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
826                                InstrStage<1, [A9_MUX0], 0>,
827                                InstrStage<1, [A9_DRegsN],   0, Required>,
828                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
829                                InstrStage<2, [A9_NPipe], 0>,
830                                InstrStage<2, [A9_LSUnit]>],
831                               [1, 1, 2, 2, 1]>,
832   // VLD1x4u
833   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
834                                InstrStage<1, [A9_MUX0], 0>,
835                                InstrStage<1, [A9_DRegsN],   0, Required>,
836                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
837                                InstrStage<2, [A9_NPipe], 0>,
838                                InstrStage<2, [A9_LSUnit]>],
839                               [1, 1, 2, 2, 2, 1]>,
840   //
841   // VLD1ln
842   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
843                                InstrStage<1, [A9_MUX0], 0>,
844                                InstrStage<1, [A9_DRegsN],   0, Required>,
845                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
846                                InstrStage<2, [A9_NPipe], 0>,
847                                InstrStage<2, [A9_LSUnit]>],
848                               [3, 1, 1, 1]>,
849   //
850   // VLD1lnu
851   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
852                                InstrStage<1, [A9_MUX0], 0>,
853                                InstrStage<1, [A9_DRegsN],   0, Required>,
854                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
855                                InstrStage<2, [A9_NPipe], 0>,
856                                InstrStage<2, [A9_LSUnit]>],
857                               [3, 2, 1, 1, 1, 1]>,
858   //
859   // VLD1dup
860   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
861                                InstrStage<1, [A9_MUX0], 0>,
862                                InstrStage<1, [A9_DRegsN],   0, Required>,
863                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
864                                InstrStage<1, [A9_NPipe], 0>,
865                                InstrStage<1, [A9_LSUnit]>],
866                               [2, 1]>,
867   //
868   // VLD1dupu
869   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
870                                InstrStage<1, [A9_MUX0], 0>,
871                                InstrStage<1, [A9_DRegsN],   0, Required>,
872                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
873                                InstrStage<1, [A9_NPipe], 0>,
874                                InstrStage<1, [A9_LSUnit]>],
875                               [2, 2, 1, 1]>,
876   //
877   // VLD2
878   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
879                                InstrStage<1, [A9_MUX0], 0>,
880                                InstrStage<1, [A9_DRegsN],   0, Required>,
881                                // Extra latency cycles since wbck is 7 cycles
882                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
883                                InstrStage<1, [A9_NPipe], 0>,
884                                InstrStage<1, [A9_LSUnit]>],
885                               [2, 2, 1]>,
886   //
887   // VLD2x2
888   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
889                                InstrStage<1, [A9_MUX0], 0>,
890                                InstrStage<1, [A9_DRegsN],   0, Required>,
891                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
892                                InstrStage<2, [A9_NPipe], 0>,
893                                InstrStage<2, [A9_LSUnit]>],
894                               [2, 3, 2, 3, 1]>,
895   //
896   // VLD2ln
897   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
898                                InstrStage<1, [A9_MUX0], 0>,
899                                InstrStage<1, [A9_DRegsN],   0, Required>,
900                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
901                                InstrStage<2, [A9_NPipe], 0>,
902                                InstrStage<2, [A9_LSUnit]>],
903                               [3, 3, 1, 1, 1, 1]>,
904   //
905   // VLD2u
906   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
907                                InstrStage<1, [A9_MUX0], 0>,
908                                InstrStage<1, [A9_DRegsN],   0, Required>,
909                                // Extra latency cycles since wbck is 7 cycles
910                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
911                                InstrStage<1, [A9_NPipe], 0>,
912                                InstrStage<1, [A9_LSUnit]>],
913                               [2, 2, 2, 1, 1, 1]>,
914   //
915   // VLD2x2u
916   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
917                                InstrStage<1, [A9_MUX0], 0>,
918                                InstrStage<1, [A9_DRegsN],   0, Required>,
919                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
920                                InstrStage<2, [A9_NPipe], 0>,
921                                InstrStage<2, [A9_LSUnit]>],
922                               [2, 3, 2, 3, 2, 1]>,
923   //
924   // VLD2lnu
925   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
926                                InstrStage<1, [A9_MUX0], 0>,
927                                InstrStage<1, [A9_DRegsN],   0, Required>,
928                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
929                                InstrStage<2, [A9_NPipe], 0>,
930                                InstrStage<2, [A9_LSUnit]>],
931                               [3, 3, 2, 1, 1, 1, 1, 1]>,
932   //
933   // VLD2dup
934   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
935                                InstrStage<1, [A9_MUX0], 0>,
936                                InstrStage<1, [A9_DRegsN],   0, Required>,
937                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
938                                InstrStage<1, [A9_NPipe], 0>,
939                                InstrStage<1, [A9_LSUnit]>],
940                               [2, 2, 1]>,
941   //
942   // VLD2dupu
943   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
944                                InstrStage<1, [A9_MUX0], 0>,
945                                InstrStage<1, [A9_DRegsN],   0, Required>,
946                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
947                                InstrStage<1, [A9_NPipe], 0>,
948                                InstrStage<1, [A9_LSUnit]>],
949                               [2, 2, 2, 1, 1]>,
950   //
951   // VLD3
952   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
953                                InstrStage<1, [A9_MUX0], 0>,
954                                InstrStage<1, [A9_DRegsN],   0, Required>,
955                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
956                                InstrStage<3, [A9_NPipe], 0>,
957                                InstrStage<3, [A9_LSUnit]>],
958                               [3, 3, 4, 1]>,
959   //
960   // VLD3ln
961   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
962                                InstrStage<1, [A9_MUX0], 0>,
963                                InstrStage<1, [A9_DRegsN],   0, Required>,
964                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
965                                InstrStage<5, [A9_NPipe], 0>,
966                                InstrStage<5, [A9_LSUnit]>],
967                               [5, 5, 6, 1, 1, 1, 1, 2]>,
968   //
969   // VLD3u
970   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
971                                InstrStage<1, [A9_MUX0], 0>,
972                                InstrStage<1, [A9_DRegsN],   0, Required>,
973                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
974                                InstrStage<3, [A9_NPipe], 0>,
975                                InstrStage<3, [A9_LSUnit]>],
976                               [3, 3, 4, 2, 1]>,
977   //
978   // VLD3lnu
979   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
980                                InstrStage<1, [A9_MUX0], 0>,
981                                InstrStage<1, [A9_DRegsN],   0, Required>,
982                                InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
983                                InstrStage<5, [A9_NPipe], 0>,
984                                InstrStage<5, [A9_LSUnit]>],
985                               [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
986   //
987   // VLD3dup
988   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
989                                InstrStage<1, [A9_MUX0], 0>,
990                                InstrStage<1, [A9_DRegsN],   0, Required>,
991                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
992                                InstrStage<3, [A9_NPipe], 0>,
993                                InstrStage<3, [A9_LSUnit]>],
994                               [3, 3, 4, 1]>,
995   //
996   // VLD3dupu
997   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
998                                InstrStage<1, [A9_MUX0], 0>,
999                                InstrStage<1, [A9_DRegsN],   0, Required>,
1000                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1001                                InstrStage<3, [A9_NPipe], 0>,
1002                                InstrStage<3, [A9_LSUnit]>],
1003                               [3, 3, 4, 2, 1, 1]>,
1004   //
1005   // VLD4
1006   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1007                                InstrStage<1, [A9_MUX0], 0>,
1008                                InstrStage<1, [A9_DRegsN],   0, Required>,
1009                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1010                                InstrStage<3, [A9_NPipe], 0>,
1011                                InstrStage<3, [A9_LSUnit]>],
1012                               [3, 3, 4, 4, 1]>,
1013   //
1014   // VLD4ln
1015   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1016                                InstrStage<1, [A9_MUX0], 0>,
1017                                InstrStage<1, [A9_DRegsN],   0, Required>,
1018                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1019                                InstrStage<4, [A9_NPipe], 0>,
1020                                InstrStage<4, [A9_LSUnit]>],
1021                               [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
1022   //
1023   // VLD4u
1024   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1025                                InstrStage<1, [A9_MUX0], 0>,
1026                                InstrStage<1, [A9_DRegsN],   0, Required>,
1027                                InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
1028                                InstrStage<3, [A9_NPipe], 0>,
1029                                InstrStage<3, [A9_LSUnit]>],
1030                               [3, 3, 4, 4, 2, 1]>,
1031   //
1032   // VLD4lnu
1033   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1034                                InstrStage<1, [A9_MUX0], 0>,
1035                                InstrStage<1, [A9_DRegsN],   0, Required>,
1036                                InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
1037                                InstrStage<4, [A9_NPipe], 0>,
1038                                InstrStage<4, [A9_LSUnit]>],
1039                               [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
1040   //
1041   // VLD4dup
1042   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1043                                InstrStage<1, [A9_MUX0], 0>,
1044                                InstrStage<1, [A9_DRegsN],   0, Required>,
1045                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1046                                InstrStage<2, [A9_NPipe], 0>,
1047                                InstrStage<2, [A9_LSUnit]>],
1048                               [2, 2, 3, 3, 1]>,
1049   //
1050   // VLD4dupu
1051   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1052                                InstrStage<1, [A9_MUX0], 0>,
1053                                InstrStage<1, [A9_DRegsN],   0, Required>,
1054                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1055                                InstrStage<2, [A9_NPipe], 0>,
1056                                InstrStage<2, [A9_LSUnit]>],
1057                               [2, 2, 3, 3, 2, 1, 1]>,
1058   //
1059   // VST1
1060   InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1061                                InstrStage<1, [A9_MUX0], 0>,
1062                                InstrStage<1, [A9_DRegsN],   0, Required>,
1063                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1064                                InstrStage<1, [A9_NPipe], 0>,
1065                                InstrStage<1, [A9_LSUnit]>],
1066                               [1, 1, 1]>,
1067   //
1068   // VST1x2
1069   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1070                                InstrStage<1, [A9_MUX0], 0>,
1071                                InstrStage<1, [A9_DRegsN],   0, Required>,
1072                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1073                                InstrStage<1, [A9_NPipe], 0>,
1074                                InstrStage<1, [A9_LSUnit]>],
1075                               [1, 1, 1, 1]>,
1076   //
1077   // VST1x3
1078   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1079                                InstrStage<1, [A9_MUX0], 0>,
1080                                InstrStage<1, [A9_DRegsN],   0, Required>,
1081                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1082                                InstrStage<2, [A9_NPipe], 0>,
1083                                InstrStage<2, [A9_LSUnit]>],
1084                               [1, 1, 1, 1, 2]>,
1085   //
1086   // VST1x4
1087   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1088                                InstrStage<1, [A9_MUX0], 0>,
1089                                InstrStage<1, [A9_DRegsN],   0, Required>,
1090                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1091                                InstrStage<2, [A9_NPipe], 0>,
1092                                InstrStage<2, [A9_LSUnit]>],
1093                               [1, 1, 1, 1, 2, 2]>,
1094   //
1095   // VST1u
1096   InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1097                                InstrStage<1, [A9_MUX0], 0>,
1098                                InstrStage<1, [A9_DRegsN],   0, Required>,
1099                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1100                                InstrStage<1, [A9_NPipe], 0>,
1101                                InstrStage<1, [A9_LSUnit]>],
1102                               [2, 1, 1, 1, 1]>,
1103   //
1104   // VST1x2u
1105   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1106                                InstrStage<1, [A9_MUX0], 0>,
1107                                InstrStage<1, [A9_DRegsN],   0, Required>,
1108                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1109                                InstrStage<1, [A9_NPipe], 0>,
1110                                InstrStage<1, [A9_LSUnit]>],
1111                               [2, 1, 1, 1, 1, 1]>,
1112   //
1113   // VST1x3u
1114   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1115                                InstrStage<1, [A9_MUX0], 0>,
1116                                InstrStage<1, [A9_DRegsN],   0, Required>,
1117                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1118                                InstrStage<2, [A9_NPipe], 0>,
1119                                InstrStage<2, [A9_LSUnit]>],
1120                               [2, 1, 1, 1, 1, 1, 2]>,
1121   //
1122   // VST1x4u
1123   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1124                                InstrStage<1, [A9_MUX0], 0>,
1125                                InstrStage<1, [A9_DRegsN],   0, Required>,
1126                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1127                                InstrStage<2, [A9_NPipe], 0>,
1128                                InstrStage<2, [A9_LSUnit]>],
1129                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1130   //
1131   // VST1ln
1132   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1133                                InstrStage<1, [A9_MUX0], 0>,
1134                                InstrStage<1, [A9_DRegsN],   0, Required>,
1135                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1136                                InstrStage<1, [A9_NPipe], 0>,
1137                                InstrStage<1, [A9_LSUnit]>],
1138                               [1, 1, 1]>,
1139   //
1140   // VST1lnu
1141   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1142                                InstrStage<1, [A9_MUX0], 0>,
1143                                InstrStage<1, [A9_DRegsN],   0, Required>,
1144                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1145                                InstrStage<1, [A9_NPipe], 0>,
1146                                InstrStage<1, [A9_LSUnit]>],
1147                               [2, 1, 1, 1, 1]>,
1148   //
1149   // VST2
1150   InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1151                                InstrStage<1, [A9_MUX0], 0>,
1152                                InstrStage<1, [A9_DRegsN],   0, Required>,
1153                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1154                                InstrStage<1, [A9_NPipe], 0>,
1155                                InstrStage<1, [A9_LSUnit]>],
1156                               [1, 1, 1, 1]>,
1157   //
1158   // VST2x2
1159   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1160                                InstrStage<1, [A9_MUX0], 0>,
1161                                InstrStage<1, [A9_DRegsN],   0, Required>,
1162                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1163                                InstrStage<3, [A9_NPipe], 0>,
1164                                InstrStage<3, [A9_LSUnit]>],
1165                               [1, 1, 1, 1, 2, 2]>,
1166   //
1167   // VST2u
1168   InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1169                                InstrStage<1, [A9_MUX0], 0>,
1170                                InstrStage<1, [A9_DRegsN],   0, Required>,
1171                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1172                                InstrStage<1, [A9_NPipe], 0>,
1173                                InstrStage<1, [A9_LSUnit]>],
1174                               [2, 1, 1, 1, 1, 1]>,
1175   //
1176   // VST2x2u
1177   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1178                                InstrStage<1, [A9_MUX0], 0>,
1179                                InstrStage<1, [A9_DRegsN],   0, Required>,
1180                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1181                                InstrStage<3, [A9_NPipe], 0>,
1182                                InstrStage<3, [A9_LSUnit]>],
1183                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1184   //
1185   // VST2ln
1186   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1187                                InstrStage<1, [A9_MUX0], 0>,
1188                                InstrStage<1, [A9_DRegsN],   0, Required>,
1189                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1190                                InstrStage<1, [A9_NPipe], 0>,
1191                                InstrStage<1, [A9_LSUnit]>],
1192                               [1, 1, 1, 1]>,
1193   //
1194   // VST2lnu
1195   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1196                                InstrStage<1, [A9_MUX0], 0>,
1197                                InstrStage<1, [A9_DRegsN],   0, Required>,
1198                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1199                                InstrStage<1, [A9_NPipe], 0>,
1200                                InstrStage<1, [A9_LSUnit]>],
1201                               [2, 1, 1, 1, 1, 1]>,
1202   //
1203   // VST3
1204   InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1205                                InstrStage<1, [A9_MUX0], 0>,
1206                                InstrStage<1, [A9_DRegsN],   0, Required>,
1207                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1208                                InstrStage<2, [A9_NPipe], 0>,
1209                                InstrStage<2, [A9_LSUnit]>],
1210                               [1, 1, 1, 1, 2]>,
1211   //
1212   // VST3u
1213   InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1214                                InstrStage<1, [A9_MUX0], 0>,
1215                                InstrStage<1, [A9_DRegsN],   0, Required>,
1216                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1217                                InstrStage<2, [A9_NPipe], 0>,
1218                                InstrStage<2, [A9_LSUnit]>],
1219                               [2, 1, 1, 1, 1, 1, 2]>,
1220   //
1221   // VST3ln
1222   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1223                                InstrStage<1, [A9_MUX0], 0>,
1224                                InstrStage<1, [A9_DRegsN],   0, Required>,
1225                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1226                                InstrStage<3, [A9_NPipe], 0>,
1227                                InstrStage<3, [A9_LSUnit]>],
1228                               [1, 1, 1, 1, 2]>,
1229   //
1230   // VST3lnu
1231   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1232                                InstrStage<1, [A9_MUX0], 0>,
1233                                InstrStage<1, [A9_DRegsN],   0, Required>,
1234                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1235                                InstrStage<3, [A9_NPipe], 0>,
1236                                InstrStage<3, [A9_LSUnit]>],
1237                               [2, 1, 1, 1, 1, 1, 2]>,
1238   //
1239   // VST4
1240   InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1241                                InstrStage<1, [A9_MUX0], 0>,
1242                                InstrStage<1, [A9_DRegsN],   0, Required>,
1243                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1244                                InstrStage<2, [A9_NPipe], 0>,
1245                                InstrStage<2, [A9_LSUnit]>],
1246                               [1, 1, 1, 1, 2, 2]>,
1247   //
1248   // VST4u
1249   InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1250                                InstrStage<1, [A9_MUX0], 0>,
1251                                InstrStage<1, [A9_DRegsN],   0, Required>,
1252                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1253                                InstrStage<2, [A9_NPipe], 0>,
1254                                InstrStage<2, [A9_LSUnit]>],
1255                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1256   //
1257   // VST4ln
1258   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1259                                InstrStage<1, [A9_MUX0], 0>,
1260                                InstrStage<1, [A9_DRegsN],   0, Required>,
1261                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1262                                InstrStage<2, [A9_NPipe], 0>,
1263                                InstrStage<2, [A9_LSUnit]>],
1264                               [1, 1, 1, 1, 2, 2]>,
1265   //
1266   // VST4lnu
1267   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1268                                InstrStage<1, [A9_MUX0], 0>,
1269                                InstrStage<1, [A9_DRegsN],   0, Required>,
1270                                InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
1271                                InstrStage<2, [A9_NPipe], 0>,
1272                                InstrStage<2, [A9_LSUnit]>],
1273                               [2, 1, 1, 1, 1, 1, 2, 2]>,
1274
1275   //
1276   // Double-register Integer Unary
1277   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1278                                InstrStage<1, [A9_MUX0], 0>,
1279                                InstrStage<1, [A9_DRegsN],   0, Required>,
1280                                // Extra latency cycles since wbck is 6 cycles
1281                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1282                                InstrStage<1, [A9_NPipe]>],
1283                               [4, 2]>,
1284   //
1285   // Quad-register Integer Unary
1286   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1287                                InstrStage<1, [A9_MUX0], 0>,
1288                                InstrStage<1, [A9_DRegsN],   0, Required>,
1289                                // Extra latency cycles since wbck is 6 cycles
1290                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1291                                InstrStage<1, [A9_NPipe]>],
1292                               [4, 2]>,
1293   //
1294   // Double-register Integer Q-Unary
1295   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1296                                InstrStage<1, [A9_MUX0], 0>,
1297                                InstrStage<1, [A9_DRegsN],   0, Required>,
1298                                // Extra latency cycles since wbck is 6 cycles
1299                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1300                                InstrStage<1, [A9_NPipe]>],
1301                               [4, 1]>,
1302   //
1303   // Quad-register Integer CountQ-Unary
1304   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1305                                InstrStage<1, [A9_MUX0], 0>,
1306                                InstrStage<1, [A9_DRegsN],   0, Required>,
1307                                // Extra latency cycles since wbck is 6 cycles
1308                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1309                                InstrStage<1, [A9_NPipe]>],
1310                               [4, 1]>,
1311   //
1312   // Double-register Integer Binary
1313   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1314                                InstrStage<1, [A9_MUX0], 0>,
1315                                InstrStage<1, [A9_DRegsN],   0, Required>,
1316                                // Extra latency cycles since wbck is 6 cycles
1317                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1318                                InstrStage<1, [A9_NPipe]>],
1319                               [3, 2, 2]>,
1320   //
1321   // Quad-register Integer Binary
1322   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1323                                InstrStage<1, [A9_MUX0], 0>,
1324                                InstrStage<1, [A9_DRegsN],   0, Required>,
1325                                // Extra latency cycles since wbck is 6 cycles
1326                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1327                                InstrStage<1, [A9_NPipe]>],
1328                               [3, 2, 2]>,
1329   //
1330   // Double-register Integer Subtract
1331   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1332                                InstrStage<1, [A9_MUX0], 0>,
1333                                InstrStage<1, [A9_DRegsN],   0, Required>,
1334                                // Extra latency cycles since wbck is 6 cycles
1335                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1336                                InstrStage<1, [A9_NPipe]>],
1337                               [3, 2, 1]>,
1338   //
1339   // Quad-register Integer Subtract
1340   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1341                                InstrStage<1, [A9_MUX0], 0>,
1342                                InstrStage<1, [A9_DRegsN],   0, Required>,
1343                                // Extra latency cycles since wbck is 6 cycles
1344                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1345                                InstrStage<1, [A9_NPipe]>],
1346                               [3, 2, 1]>,
1347   //
1348   // Double-register Integer Shift
1349   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1350                                InstrStage<1, [A9_MUX0], 0>,
1351                                InstrStage<1, [A9_DRegsN],   0, Required>,
1352                                // Extra latency cycles since wbck is 6 cycles
1353                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1354                                InstrStage<1, [A9_NPipe]>],
1355                               [3, 1, 1]>,
1356   //
1357   // Quad-register Integer Shift
1358   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1359                                InstrStage<1, [A9_MUX0], 0>,
1360                                InstrStage<1, [A9_DRegsN],   0, Required>,
1361                                // Extra latency cycles since wbck is 6 cycles
1362                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1363                                InstrStage<1, [A9_NPipe]>],
1364                               [3, 1, 1]>,
1365   //
1366   // Double-register Integer Shift (4 cycle)
1367   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1368                                InstrStage<1, [A9_MUX0], 0>,
1369                                InstrStage<1, [A9_DRegsN],   0, Required>,
1370                                // Extra latency cycles since wbck is 6 cycles
1371                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1372                                InstrStage<1, [A9_NPipe]>],
1373                               [4, 1, 1]>,
1374   //
1375   // Quad-register Integer Shift (4 cycle)
1376   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1377                                InstrStage<1, [A9_MUX0], 0>,
1378                                InstrStage<1, [A9_DRegsN],   0, Required>,
1379                                // Extra latency cycles since wbck is 6 cycles
1380                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1381                                InstrStage<1, [A9_NPipe]>],
1382                               [4, 1, 1]>,
1383   //
1384   // Double-register Integer Binary (4 cycle)
1385   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1386                                InstrStage<1, [A9_MUX0], 0>,
1387                                InstrStage<1, [A9_DRegsN],   0, Required>,
1388                                // Extra latency cycles since wbck is 6 cycles
1389                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1390                                InstrStage<1, [A9_NPipe]>],
1391                               [4, 2, 2]>,
1392   //
1393   // Quad-register Integer Binary (4 cycle)
1394   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1395                                InstrStage<1, [A9_MUX0], 0>,
1396                                InstrStage<1, [A9_DRegsN],   0, Required>,
1397                                // Extra latency cycles since wbck is 6 cycles
1398                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1399                                InstrStage<1, [A9_NPipe]>],
1400                               [4, 2, 2]>,
1401   //
1402   // Double-register Integer Subtract (4 cycle)
1403   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1404                                InstrStage<1, [A9_MUX0], 0>,
1405                                InstrStage<1, [A9_DRegsN],   0, Required>,
1406                                // Extra latency cycles since wbck is 6 cycles
1407                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1408                                InstrStage<1, [A9_NPipe]>],
1409                               [4, 2, 1]>,
1410   //
1411   // Quad-register Integer Subtract (4 cycle)
1412   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1413                                InstrStage<1, [A9_MUX0], 0>,
1414                                InstrStage<1, [A9_DRegsN],   0, Required>,
1415                                // Extra latency cycles since wbck is 6 cycles
1416                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1417                                InstrStage<1, [A9_NPipe]>],
1418                               [4, 2, 1]>,
1419
1420   //
1421   // Double-register Integer Count
1422   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1423                                InstrStage<1, [A9_MUX0], 0>,
1424                                InstrStage<1, [A9_DRegsN],   0, Required>,
1425                                // Extra latency cycles since wbck is 6 cycles
1426                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1427                                InstrStage<1, [A9_NPipe]>],
1428                               [3, 2, 2]>,
1429   //
1430   // Quad-register Integer Count
1431   // Result written in N3, but that is relative to the last cycle of multicycle,
1432   // so we use 4 for those cases
1433   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1434                                InstrStage<1, [A9_MUX0], 0>,
1435                                InstrStage<1, [A9_DRegsN],   0, Required>,
1436                                // Extra latency cycles since wbck is 7 cycles
1437                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1438                                InstrStage<2, [A9_NPipe]>],
1439                               [4, 2, 2]>,
1440   //
1441   // Double-register Absolute Difference and Accumulate
1442   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1443                                InstrStage<1, [A9_MUX0], 0>,
1444                                InstrStage<1, [A9_DRegsN],   0, Required>,
1445                                // Extra latency cycles since wbck is 6 cycles
1446                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1447                                InstrStage<1, [A9_NPipe]>],
1448                               [6, 3, 2, 1]>,
1449   //
1450   // Quad-register Absolute Difference and Accumulate
1451   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1452                                InstrStage<1, [A9_MUX0], 0>,
1453                                InstrStage<1, [A9_DRegsN],   0, Required>,
1454                                // Extra latency cycles since wbck is 6 cycles
1455                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1456                                InstrStage<2, [A9_NPipe]>],
1457                               [6, 3, 2, 1]>,
1458   //
1459   // Double-register Integer Pair Add Long
1460   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1461                                InstrStage<1, [A9_MUX0], 0>,
1462                                InstrStage<1, [A9_DRegsN],   0, Required>,
1463                                // Extra latency cycles since wbck is 6 cycles
1464                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1465                                InstrStage<1, [A9_NPipe]>],
1466                               [6, 3, 1]>,
1467   //
1468   // Quad-register Integer Pair Add Long
1469   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1470                                InstrStage<1, [A9_MUX0], 0>,
1471                                InstrStage<1, [A9_DRegsN],   0, Required>,
1472                                // Extra latency cycles since wbck is 6 cycles
1473                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1474                                InstrStage<2, [A9_NPipe]>],
1475                               [6, 3, 1]>,
1476
1477   //
1478   // Double-register Integer Multiply (.8, .16)
1479   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1480                                InstrStage<1, [A9_MUX0], 0>,
1481                                InstrStage<1, [A9_DRegsN],   0, Required>,
1482                                // Extra latency cycles since wbck is 6 cycles
1483                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1484                                InstrStage<1, [A9_NPipe]>],
1485                               [6, 2, 2]>,
1486   //
1487   // Quad-register Integer Multiply (.8, .16)
1488   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1489                                InstrStage<1, [A9_MUX0], 0>,
1490                                InstrStage<1, [A9_DRegsN],   0, Required>,
1491                                // Extra latency cycles since wbck is 7 cycles
1492                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1493                                InstrStage<2, [A9_NPipe]>],
1494                               [7, 2, 2]>,
1495
1496   //
1497   // Double-register Integer Multiply (.32)
1498   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1499                                InstrStage<1, [A9_MUX0], 0>,
1500                                InstrStage<1, [A9_DRegsN],   0, Required>,
1501                                // Extra latency cycles since wbck is 7 cycles
1502                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1503                                InstrStage<2, [A9_NPipe]>],
1504                               [7, 2, 1]>,
1505   //
1506   // Quad-register Integer Multiply (.32)
1507   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1508                                InstrStage<1, [A9_MUX0], 0>,
1509                                InstrStage<1, [A9_DRegsN],   0, Required>,
1510                                // Extra latency cycles since wbck is 9 cycles
1511                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1512                                InstrStage<4, [A9_NPipe]>],
1513                               [9, 2, 1]>,
1514   //
1515   // Double-register Integer Multiply-Accumulate (.8, .16)
1516   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1517                                InstrStage<1, [A9_MUX0], 0>,
1518                                InstrStage<1, [A9_DRegsN],   0, Required>,
1519                                // Extra latency cycles since wbck is 6 cycles
1520                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1521                                InstrStage<1, [A9_NPipe]>],
1522                               [6, 3, 2, 2]>,
1523   //
1524   // Double-register Integer Multiply-Accumulate (.32)
1525   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1526                                InstrStage<1, [A9_MUX0], 0>,
1527                                InstrStage<1, [A9_DRegsN],   0, Required>,
1528                                // Extra latency cycles since wbck is 7 cycles
1529                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1530                                InstrStage<2, [A9_NPipe]>],
1531                               [7, 3, 2, 1]>,
1532   //
1533   // Quad-register Integer Multiply-Accumulate (.8, .16)
1534   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1535                                InstrStage<1, [A9_MUX0], 0>,
1536                                InstrStage<1, [A9_DRegsN],   0, Required>,
1537                                // Extra latency cycles since wbck is 7 cycles
1538                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1539                                InstrStage<2, [A9_NPipe]>],
1540                               [7, 3, 2, 2]>,
1541   //
1542   // Quad-register Integer Multiply-Accumulate (.32)
1543   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1544                                InstrStage<1, [A9_MUX0], 0>,
1545                                InstrStage<1, [A9_DRegsN],   0, Required>,
1546                                // Extra latency cycles since wbck is 9 cycles
1547                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1548                                InstrStage<4, [A9_NPipe]>],
1549                               [9, 3, 2, 1]>,
1550
1551   //
1552   // Move
1553   InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1554                                InstrStage<1, [A9_MUX0], 0>,
1555                                InstrStage<1, [A9_DRegsN],   0, Required>,
1556                                InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1557                                InstrStage<1, [A9_NPipe]>],
1558                               [1,1]>,
1559   //
1560   // Move Immediate
1561   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1562                                InstrStage<1, [A9_MUX0], 0>,
1563                                InstrStage<1, [A9_DRegsN],   0, Required>,
1564                                // Extra latency cycles since wbck is 6 cycles
1565                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1566                                InstrStage<1, [A9_NPipe]>],
1567                               [3]>,
1568   //
1569   // Double-register Permute Move
1570   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1571                                InstrStage<1, [A9_MUX0], 0>,
1572                                InstrStage<1, [A9_DRegsN],   0, Required>,
1573                                // Extra latency cycles since wbck is 6 cycles
1574                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1575                                InstrStage<1, [A9_NPipe]>],
1576                               [2, 1]>,
1577   //
1578   // Quad-register Permute Move
1579   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1580                                InstrStage<1, [A9_MUX0], 0>,
1581                                InstrStage<1, [A9_DRegsN],   0, Required>,
1582                                // Extra latency cycles since wbck is 6 cycles
1583                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1584                                InstrStage<1, [A9_NPipe]>],
1585                               [2, 1]>,
1586   //
1587   // Integer to Single-precision Move
1588   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1589                                InstrStage<1, [A9_MUX0], 0>,
1590                                InstrStage<1, [A9_DRegsN],   0, Required>,
1591                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1592                                InstrStage<1, [A9_NPipe]>],
1593                               [1, 1]>,
1594   //
1595   // Integer to Double-precision Move
1596   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1597                                InstrStage<1, [A9_MUX0], 0>,
1598                                InstrStage<1, [A9_DRegsN],   0, Required>,
1599                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1600                                InstrStage<1, [A9_NPipe]>],
1601                               [1, 1, 1]>,
1602   //
1603   // Single-precision to Integer Move
1604   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1605                                InstrStage<1, [A9_MUX0], 0>,
1606                                InstrStage<1, [A9_DRegsN],   0, Required>,
1607                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1608                                InstrStage<1, [A9_NPipe]>],
1609                               [2, 1]>,
1610   //
1611   // Double-precision to Integer Move
1612   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1613                                InstrStage<1, [A9_MUX0], 0>,
1614                                InstrStage<1, [A9_DRegsN],   0, Required>,
1615                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1616                                InstrStage<1, [A9_NPipe]>],
1617                               [2, 2, 1]>,
1618   //
1619   // Integer to Lane Move
1620   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1621                                InstrStage<1, [A9_MUX0], 0>,
1622                                InstrStage<1, [A9_DRegsN],   0, Required>,
1623                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1624                                InstrStage<2, [A9_NPipe]>],
1625                               [3, 1, 1]>,
1626
1627   //
1628   // Vector narrow move
1629   InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1630                                InstrStage<1, [A9_MUX0], 0>,
1631                                InstrStage<1, [A9_DRegsN],   0, Required>,
1632                                // Extra latency cycles since wbck is 6 cycles
1633                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1634                                InstrStage<1, [A9_NPipe]>],
1635                               [3, 1]>,
1636   //
1637   // Double-register FP Unary
1638   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1639                                InstrStage<1, [A9_MUX0], 0>,
1640                                InstrStage<1, [A9_DRegsN],   0, Required>,
1641                                // Extra latency cycles since wbck is 6 cycles
1642                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1643                                InstrStage<1, [A9_NPipe]>],
1644                               [5, 2]>,
1645   //
1646   // Quad-register FP Unary
1647   // Result written in N5, but that is relative to the last cycle of multicycle,
1648   // so we use 6 for those cases
1649   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1650                                InstrStage<1, [A9_MUX0], 0>,
1651                                InstrStage<1, [A9_DRegsN],   0, Required>,
1652                                // Extra latency cycles since wbck is 7 cycles
1653                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1654                                InstrStage<2, [A9_NPipe]>],
1655                               [6, 2]>,
1656   //
1657   // Double-register FP Binary
1658   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1659   // optimistic.
1660   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1661                                InstrStage<1, [A9_MUX0], 0>,
1662                                InstrStage<1, [A9_DRegsN],   0, Required>,
1663                                // Extra latency cycles since wbck is 6 cycles
1664                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1665                                InstrStage<1, [A9_NPipe]>],
1666                               [5, 2, 2]>,
1667
1668   //
1669   // VPADD, etc.
1670   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1671                                InstrStage<1, [A9_MUX0], 0>,
1672                                InstrStage<1, [A9_DRegsN],   0, Required>,
1673                                // Extra latency cycles since wbck is 6 cycles
1674                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1675                                InstrStage<1, [A9_NPipe]>],
1676                               [5, 1, 1]>,
1677   //
1678   // Double-register FP VMUL
1679   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1680                                InstrStage<1, [A9_MUX0], 0>,
1681                                InstrStage<1, [A9_DRegsN],   0, Required>,
1682                                // Extra latency cycles since wbck is 6 cycles
1683                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1684                                InstrStage<1, [A9_NPipe]>],
1685                               [5, 2, 1]>,
1686   //
1687   // Quad-register FP Binary
1688   // Result written in N5, but that is relative to the last cycle of multicycle,
1689   // so we use 6 for those cases
1690   // FIXME: We're using this itin for many instructions and [2, 2] here is too
1691   // optimistic.
1692   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1693                                InstrStage<1, [A9_MUX0], 0>,
1694                                InstrStage<1, [A9_DRegsN],   0, Required>,
1695                                // Extra latency cycles since wbck is 7 cycles
1696                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1697                                InstrStage<2, [A9_NPipe]>],
1698                               [6, 2, 2]>,
1699   //
1700   // Quad-register FP VMUL
1701   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1702                                InstrStage<1, [A9_MUX0], 0>,
1703                                InstrStage<1, [A9_DRegsN],   0, Required>,
1704                                // Extra latency cycles since wbck is 7 cycles
1705                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1706                                InstrStage<1, [A9_NPipe]>],
1707                               [6, 2, 1]>,
1708   //
1709   // Double-register FP Multiple-Accumulate
1710   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1711                                InstrStage<1, [A9_MUX0], 0>,
1712                                InstrStage<1, [A9_DRegsN],   0, Required>,
1713                                // Extra latency cycles since wbck is 7 cycles
1714                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1715                                InstrStage<2, [A9_NPipe]>],
1716                               [6, 3, 2, 1]>,
1717   //
1718   // Quad-register FP Multiple-Accumulate
1719   // Result written in N9, but that is relative to the last cycle of multicycle,
1720   // so we use 10 for those cases
1721   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1722                                InstrStage<1, [A9_MUX0], 0>,
1723                                InstrStage<1, [A9_DRegsN],   0, Required>,
1724                                // Extra latency cycles since wbck is 9 cycles
1725                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1726                                InstrStage<4, [A9_NPipe]>],
1727                               [8, 4, 2, 1]>,
1728   //
1729   // Double-register Fused FP Multiple-Accumulate
1730   InstrItinData<IIC_VFMACD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1731                                InstrStage<1, [A9_MUX0], 0>,
1732                                InstrStage<1, [A9_DRegsN],   0, Required>,
1733                                // Extra latency cycles since wbck is 7 cycles
1734                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1735                                InstrStage<2, [A9_NPipe]>],
1736                               [6, 3, 2, 1]>,
1737   //
1738   // Quad-register Fused FP Multiple-Accumulate
1739   // Result written in N9, but that is relative to the last cycle of multicycle,
1740   // so we use 10 for those cases
1741   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1742                                InstrStage<1, [A9_MUX0], 0>,
1743                                InstrStage<1, [A9_DRegsN],   0, Required>,
1744                                // Extra latency cycles since wbck is 9 cycles
1745                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1746                                InstrStage<4, [A9_NPipe]>],
1747                               [8, 4, 2, 1]>,
1748   //
1749   // Double-register Reciprical Step
1750   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1751                                InstrStage<1, [A9_MUX0], 0>,
1752                                InstrStage<1, [A9_DRegsN],   0, Required>,
1753                                // Extra latency cycles since wbck is 10 cycles
1754                                InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
1755                                InstrStage<1, [A9_NPipe]>],
1756                               [9, 2, 2]>,
1757   //
1758   // Quad-register Reciprical Step
1759   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1760                                InstrStage<1, [A9_MUX0], 0>,
1761                                InstrStage<1, [A9_DRegsN],   0, Required>,
1762                                // Extra latency cycles since wbck is 11 cycles
1763                                InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
1764                                InstrStage<2, [A9_NPipe]>],
1765                               [10, 2, 2]>,
1766   //
1767   // Double-register Permute
1768   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1769                                InstrStage<1, [A9_MUX0], 0>,
1770                                InstrStage<1, [A9_DRegsN],   0, Required>,
1771                                // Extra latency cycles since wbck is 6 cycles
1772                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1773                                InstrStage<1, [A9_NPipe]>],
1774                               [2, 2, 1, 1]>,
1775   //
1776   // Quad-register Permute
1777   // Result written in N2, but that is relative to the last cycle of multicycle,
1778   // so we use 3 for those cases
1779   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1780                                InstrStage<1, [A9_MUX0], 0>,
1781                                InstrStage<1, [A9_DRegsN],   0, Required>,
1782                                // Extra latency cycles since wbck is 7 cycles
1783                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1784                                InstrStage<2, [A9_NPipe]>],
1785                               [3, 3, 1, 1]>,
1786   //
1787   // Quad-register Permute (3 cycle issue)
1788   // Result written in N2, but that is relative to the last cycle of multicycle,
1789   // so we use 4 for those cases
1790   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1791                                InstrStage<1, [A9_MUX0], 0>,
1792                                InstrStage<1, [A9_DRegsN],   0, Required>,
1793                                // Extra latency cycles since wbck is 8 cycles
1794                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1795                                InstrStage<3, [A9_NPipe]>],
1796                               [4, 4, 1, 1]>,
1797
1798   //
1799   // Double-register VEXT
1800   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1801                                InstrStage<1, [A9_MUX0], 0>,
1802                                InstrStage<1, [A9_DRegsN],   0, Required>,
1803                                // Extra latency cycles since wbck is 6 cycles
1804                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1805                                InstrStage<1, [A9_NPipe]>],
1806                               [2, 1, 1]>,
1807   //
1808   // Quad-register VEXT
1809   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1810                                InstrStage<1, [A9_MUX0], 0>,
1811                                InstrStage<1, [A9_DRegsN],   0, Required>,
1812                                // Extra latency cycles since wbck is 7 cycles
1813                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1814                                InstrStage<2, [A9_NPipe]>],
1815                               [3, 1, 2]>,
1816   //
1817   // VTB
1818   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1819                                InstrStage<1, [A9_MUX0], 0>,
1820                                InstrStage<1, [A9_DRegsN],   0, Required>,
1821                                // Extra latency cycles since wbck is 7 cycles
1822                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1823                                InstrStage<2, [A9_NPipe]>],
1824                               [3, 2, 1]>,
1825   InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1826                                InstrStage<1, [A9_MUX0], 0>,
1827                                InstrStage<2, [A9_DRegsN],   0, Required>,
1828                                // Extra latency cycles since wbck is 7 cycles
1829                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1830                                InstrStage<2, [A9_NPipe]>],
1831                               [3, 2, 2, 1]>,
1832   InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1833                                InstrStage<1, [A9_MUX0], 0>,
1834                                InstrStage<2, [A9_DRegsN],   0, Required>,
1835                                // Extra latency cycles since wbck is 8 cycles
1836                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1837                                InstrStage<3, [A9_NPipe]>],
1838                               [4, 2, 2, 3, 1]>,
1839   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1840                                InstrStage<1, [A9_MUX0], 0>,
1841                                InstrStage<1, [A9_DRegsN],   0, Required>,
1842                                // Extra latency cycles since wbck is 8 cycles
1843                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1844                                InstrStage<3, [A9_NPipe]>],
1845                               [4, 2, 2, 3, 3, 1]>,
1846   //
1847   // VTBX
1848   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1849                                InstrStage<1, [A9_MUX0], 0>,
1850                                InstrStage<1, [A9_DRegsN],   0, Required>,
1851                                // Extra latency cycles since wbck is 7 cycles
1852                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1853                                InstrStage<2, [A9_NPipe]>],
1854                               [3, 1, 2, 1]>,
1855   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1856                                InstrStage<1, [A9_MUX0], 0>,
1857                                InstrStage<1, [A9_DRegsN],   0, Required>,
1858                                // Extra latency cycles since wbck is 7 cycles
1859                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1860                                InstrStage<2, [A9_NPipe]>],
1861                               [3, 1, 2, 2, 1]>,
1862   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1863                                InstrStage<1, [A9_MUX0], 0>,
1864                                InstrStage<1, [A9_DRegsN],   0, Required>,
1865                                // Extra latency cycles since wbck is 8 cycles
1866                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1867                                InstrStage<3, [A9_NPipe]>],
1868                               [4, 1, 2, 2, 3, 1]>,
1869   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1870                                InstrStage<1, [A9_MUX0], 0>,
1871                                InstrStage<1, [A9_DRegsN],   0, Required>,
1872                                // Extra latency cycles since wbck is 8 cycles
1873                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1874                                InstrStage<2, [A9_NPipe]>],
1875                               [4, 1, 2, 2, 3, 3, 1]>
1876 ]>;
1877
1878 // ===---------------------------------------------------------------------===//
1879 // The following definitions describe the simpler per-operand machine model.
1880 // This works with MachineScheduler and will eventually replace itineraries.
1881
1882 class A9WriteLMOpsListType<list<WriteSequence> writes> {
1883   list <WriteSequence> Writes = writes;
1884   SchedMachineModel SchedModel = ?;
1885 }
1886
1887 // Cortex-A9 machine model for scheduling and other instruction cost heuristics.
1888 def CortexA9Model : SchedMachineModel {
1889   let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
1890   let MicroOpBufferSize = 56; // Based on available renamed registers.
1891   let LoadLatency = 2; // Optimistic load latency assuming bypass.
1892                        // This is overriden by OperandCycles if the
1893                        // Itineraries are queried instead.
1894   let MispredictPenalty = 8; // Based on estimate of pipeline depth.
1895
1896   let Itineraries = CortexA9Itineraries;
1897
1898   // FIXME: Many vector operations were never given an itinerary. We
1899   // haven't mapped these to the new model either.
1900   let CompleteModel = 0;
1901 }
1902
1903 //===----------------------------------------------------------------------===//
1904 // Define each kind of processor resource and number available.
1905 //
1906 // The AGU unit has BufferSize=1 so that the latency between operations
1907 // that use it are considered to stall other operations.
1908 //
1909 // The FP unit has BufferSize=0 so that it is a hard dispatch
1910 // hazard. No instruction may be dispatched while the unit is reserved.
1911
1912 let SchedModel = CortexA9Model in {
1913
1914 def A9UnitALU : ProcResource<2>;
1915 def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
1916 def A9UnitAGU : ProcResource<1> { let BufferSize = 1; }
1917 def A9UnitLS  : ProcResource<1>;
1918 def A9UnitFP  : ProcResource<1> { let BufferSize = 0; }
1919 def A9UnitB   : ProcResource<1>;
1920
1921 //===----------------------------------------------------------------------===//
1922 // Define scheduler read/write types with their resources and latency on A9.
1923
1924 // Consume an issue slot, but no processor resources. This is useful when all
1925 // other writes associated with the operand have NumMicroOps = 0.
1926 def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
1927
1928 // Write an integer register.
1929 def A9WriteI : SchedWriteRes<[A9UnitALU]>;
1930 // Write an integer shifted-by register
1931 def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
1932
1933 // Basic ALU.
1934 def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
1935 // ALU with operand shifted by immediate.
1936 def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
1937 // ALU with operand shifted by register.
1938 def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
1939
1940 // Multiplication
1941 def A9WriteM   : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
1942 def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
1943                                               let NumMicroOps = 0; }
1944 def A9WriteM16   : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
1945 def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
1946                                                 let NumMicroOps = 0; }
1947
1948 // Floating-point
1949 // Only one FP or AGU instruction may issue per cycle. We model this
1950 // by having FP instructions consume the AGU resource.
1951 def A9WriteF      : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1952 def A9WriteFMov   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1953 def A9WriteFMulS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1954 def A9WriteFMulD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1955 def A9WriteFMAS   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
1956 def A9WriteFMAD   : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1957 def A9WriteFDivS  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
1958 def A9WriteFDivD  : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
1959 def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
1960 def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
1961
1962 // NEON has an odd mix of latencies. Simply name the write types by latency.
1963 def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
1964 def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
1965 def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
1966 def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
1967 def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
1968 def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
1969 def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
1970 def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
1971 def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
1972
1973 // Reserve A9UnitFP for 2 consecutive cycles.
1974 def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1975   let Latency = 4;
1976   let ResourceCycles = [2];
1977 }
1978 def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1979   let Latency = 7;
1980   let ResourceCycles = [2];
1981 }
1982 def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
1983   let Latency = 9;
1984   let ResourceCycles = [2];
1985 }
1986
1987 // Branches don't have a def operand but still consume resources.
1988 def A9WriteB : SchedWriteRes<[A9UnitB]>;
1989
1990 // Address generation.
1991 def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
1992
1993 // Load Integer.
1994 def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
1995 // Load the upper 32-bits using the same micro-op.
1996 def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
1997                                      let NumMicroOps = 0; }
1998 // Offset shifted by register.
1999 def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2000 // Load (and zero extend) a byte.
2001 def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
2002 def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
2003
2004 // Load or Store Float, aligned.
2005 def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
2006
2007 // Store Integer.
2008 def A9WriteS : SchedWriteRes<[A9UnitLS]>;
2009
2010 //===----------------------------------------------------------------------===//
2011 // Define resources dynamically for load multiple variants.
2012
2013 // Define helpers for extra latency without consuming resources.
2014 def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
2015 foreach NumCycles = 2-8 in {
2016 def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
2017 } // foreach NumCycles
2018
2019 // Define address generation sequences and predicates for 8 flavors of LDMs.
2020 foreach NumAddr = 1-8 in {
2021
2022 // Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
2023 // latency for instructions that generate multiple loads or stores.
2024 def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
2025
2026 // Define a predicate to select the LDM based on number of memory addresses.
2027 def A9LMAdr#NumAddr#Pred :
2028   SchedPredicate<"(TII->getNumLDMAddresses(MI)+1)/2 == "#NumAddr>;
2029
2030 } // foreach NumAddr
2031
2032 // Fall-back for unknown LDMs.
2033 def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(MI) == 0">;
2034
2035 // LDM/VLDM/VLDn address generation latency & resources.
2036 // Dynamically select the A9WriteAdrN sequence using a predicate.
2037 def A9WriteLMAdr : SchedWriteVariant<[
2038   SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
2039   SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
2040   SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
2041   SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
2042   SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
2043   SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
2044   SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
2045   SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
2046   // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
2047   SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
2048
2049 // Define LDM Resources.
2050 // These take no issue resource, so they can be combined with other
2051 // writes like WriteB.
2052 // A9WriteLMLo takes a single LS resource and 2 cycles.
2053 def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
2054                                               let NumMicroOps = 0; }
2055 // Assuming aligned access, the upper half of each pair is free with
2056 // the same latency.
2057 def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
2058                                       let NumMicroOps = 0; }
2059 // Each A9WriteL#N variant adds N cycles of latency without consuming
2060 // additional resources.
2061 foreach NumAddr = 1-8 in {
2062 def A9WriteL#NumAddr : WriteSequence<
2063   [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2064 def A9WriteL#NumAddr#Hi : WriteSequence<
2065   [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2066 }
2067
2068 //===----------------------------------------------------------------------===//
2069 // LDM: Load multiple into 32-bit integer registers.
2070
2071 def A9WriteLMOpsList : A9WriteLMOpsListType<
2072                  [A9WriteL1, A9WriteL1Hi,
2073                   A9WriteL2, A9WriteL2Hi,
2074                   A9WriteL3, A9WriteL3Hi,
2075                   A9WriteL4, A9WriteL4Hi,
2076                   A9WriteL5, A9WriteL5Hi,
2077                   A9WriteL6, A9WriteL6Hi,
2078                   A9WriteL7, A9WriteL7Hi,
2079                   A9WriteL8, A9WriteL8Hi]>;
2080
2081 // A9WriteLM variants expand into a pair of writes for each 64-bit
2082 // value loaded. When the number of registers is odd, the last
2083 // A9WriteLnHi is naturally ignored because the instruction has no
2084 // following def operands.  These variants take no issue resource, so
2085 // they may need to be part of a WriteSequence that includes A9WriteIssue.
2086 def A9WriteLM : SchedWriteVariant<[
2087   SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
2088   SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
2089   SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
2090   SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
2091   SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
2092   SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
2093   SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
2094   SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
2095   // For unknown LDMs, define the maximum number of writes, but only
2096   // make the first two consume resources.
2097   SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
2098                              A9WriteL2, A9WriteL2Hi,
2099                              A9WriteL3Hi, A9WriteL3Hi,
2100                              A9WriteL4Hi, A9WriteL4Hi,
2101                              A9WriteL5Hi, A9WriteL5Hi,
2102                              A9WriteL6Hi, A9WriteL6Hi,
2103                              A9WriteL7Hi, A9WriteL7Hi,
2104                              A9WriteL8Hi, A9WriteL8Hi]>]> {
2105   let Variadic = 1;
2106 }
2107
2108 //===----------------------------------------------------------------------===//
2109 // VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
2110
2111 // A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
2112 // so can be used in WriteSequences for in single-issue instructions that
2113 // encapsulate multiple loads.
2114 def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
2115   let Latency = 1;
2116   let NumMicroOps = 0;
2117 }
2118
2119 foreach NumAddr = 1-8 in {
2120
2121 // Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
2122 def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
2123
2124 // A9WriteLfp1-8 definitions are statically expanded into a sequence of
2125 // A9WriteLfpOps with additive latency that takes a single issue slot.
2126 // Used directly to describe NEON VLDn.
2127 def A9WriteLfp#NumAddr : WriteSequence<
2128   [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2129
2130 // A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
2131 // permuting loaded values.
2132 def A9WriteLfp#NumAddr#Mov : WriteSequence<
2133   [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
2134
2135 } // foreach NumAddr
2136
2137 // Define VLDM/VSTM PreRA resources.
2138 // A9WriteLMfpPreRA are dynamically expanded into the correct
2139 // A9WriteLfp1-8 sequence based on a predicate. This supports the
2140 // preRA VLDM variants in which all 64-bit loads are written to the
2141 // same tuple of either single or double precision registers.
2142 def A9WriteLMfpPreRA : SchedWriteVariant<[
2143   SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
2144   SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
2145   SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
2146   SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
2147   SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
2148   SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
2149   SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
2150   SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
2151   // For unknown VLDM/VSTM PreRA, assume 2xS registers.
2152   SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
2153
2154 // Define VLDM/VSTM PostRA Resources.
2155 // A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
2156 def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
2157
2158 foreach NumAddr = 1-8 in {
2159
2160 // Each A9WriteL#N variant adds N cycles of latency without consuming
2161 // additional resources.
2162 def A9WriteLMfp#NumAddr : WriteSequence<
2163   [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2164
2165 // Assuming aligned access, the upper half of each pair is free with
2166 // the same latency.
2167 def A9WriteLMfp#NumAddr#Hi : WriteSequence<
2168   [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
2169
2170 } // foreach NumAddr
2171
2172 // VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
2173 // pair of writes for each 64-bit data loaded. When the number of
2174 // registers is odd, the last WriteLMfpnHi is naturally ignored because
2175 // the instruction has no following def operands.
2176
2177 def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
2178                  [A9WriteLMfp1, A9WriteLMfp2,       // 0-1
2179                   A9WriteLMfp3, A9WriteLMfp4,       // 2-3
2180                   A9WriteLMfp5, A9WriteLMfp6,       // 4-5
2181                   A9WriteLMfp7, A9WriteLMfp8,       // 6-7
2182                   A9WriteLMfp1Hi,                   // 8-8
2183                   A9WriteLMfp2Hi, A9WriteLMfp2Hi,   // 9-10
2184                   A9WriteLMfp3Hi, A9WriteLMfp3Hi,   // 11-12
2185                   A9WriteLMfp4Hi, A9WriteLMfp4Hi,   // 13-14
2186                   A9WriteLMfp5Hi, A9WriteLMfp5Hi,   // 15-16
2187                   A9WriteLMfp6Hi, A9WriteLMfp6Hi,   // 17-18
2188                   A9WriteLMfp7Hi, A9WriteLMfp7Hi,   // 19-20
2189                   A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
2190
2191 def A9WriteLMfpPostRA : SchedWriteVariant<[
2192   SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
2193   SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
2194   SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
2195   SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
2196   SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
2197   SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
2198   SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
2199   SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
2200   // For unknown LDMs, define the maximum number of writes, but only
2201   // make the first two consume resources. We are optimizing for the case
2202   // where the operands are DPRs, and this determines the first eight
2203   // types. The remaining eight types are filled to cover the case
2204   // where the operands are SPRs.
2205   SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
2206                              A9WriteLMfp3Hi, A9WriteLMfp4Hi,
2207                              A9WriteLMfp5Hi, A9WriteLMfp6Hi,
2208                              A9WriteLMfp7Hi, A9WriteLMfp8Hi,
2209                              A9WriteLMfp5Hi, A9WriteLMfp5Hi,
2210                              A9WriteLMfp6Hi, A9WriteLMfp6Hi,
2211                              A9WriteLMfp7Hi, A9WriteLMfp7Hi,
2212                              A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
2213   let Variadic = 1;
2214 }
2215
2216 // Distinguish between our multiple MI-level forms of the same
2217 // VLDM/VSTM instructions.
2218 def A9PreRA : SchedPredicate<
2219   "TargetRegisterInfo::isVirtualRegister(MI->getOperand(0).getReg())">;
2220 def A9PostRA : SchedPredicate<
2221   "TargetRegisterInfo::isPhysicalRegister(MI->getOperand(0).getReg())">;
2222
2223 // VLDM represents all destination registers as a single register
2224 // tuple, unlike LDM. So the number of write operands is not variadic.
2225 def A9WriteLMfp : SchedWriteVariant<[
2226   SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
2227   SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
2228
2229 //===----------------------------------------------------------------------===//
2230 // Resources for other (non-LDM/VLDM) Variants.
2231
2232 // These mov immediate writers are unconditionally expanded with
2233 // additive latency.
2234 def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
2235 def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
2236 def A9WriteI2ld  : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
2237
2238 // Some ALU operations can read loaded integer values one cycle early.
2239 def A9ReadALU : SchedReadAdvance<1,
2240   [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
2241    A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
2242    A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
2243    A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
2244    A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
2245
2246 // Read types for operands that are unconditionally read in cycle N
2247 // after the instruction issues, decreases producer latency by N-1.
2248 def A9Read2 : SchedReadAdvance<1>;
2249 def A9Read3 : SchedReadAdvance<2>;
2250 def A9Read4 : SchedReadAdvance<3>;
2251
2252 //===----------------------------------------------------------------------===//
2253 // Map itinerary classes to scheduler read/write resources per operand.
2254 //
2255 // For ARM, we piggyback scheduler resources on the Itinerary classes
2256 // to avoid perturbing the existing instruction definitions.
2257
2258 // This table follows the ARM Cortex-A9 Technical Reference Manuals,
2259 // mostly in order.
2260
2261 def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
2262                          IIC_iMVNi,IIC_iMVNsi,
2263                          IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
2264 def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
2265 def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
2266
2267 def :ItinRW<[A9WriteI2],   [IIC_iMOVix2,IIC_iCMOVix2]>;
2268 def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
2269 def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
2270
2271 def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
2272 def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
2273 def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
2274 def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
2275 def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
2276 def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
2277 def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
2278 def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
2279
2280 // A9WriteHi ignored for MUL32.
2281 def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
2282                                      IIC_iMUL64,IIC_iMAC64]>;
2283 // FIXME: SMLALxx needs itin classes
2284 def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
2285
2286 // TODO: For floating-point ops, we model the pipeline forwarding
2287 // latencies here. WAW latencies are sometimes longer.
2288
2289 def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
2290                             IIC_fpUNA32, IIC_fpUNA64,
2291                             IIC_fpCMP32, IIC_fpCMP64]>;
2292 def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
2293 def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
2294                          IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
2295                          IIC_fpALU32, IIC_fpALU64]>;
2296 def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
2297 def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
2298 def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
2299 def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
2300 def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
2301 def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
2302 def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
2303 def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
2304
2305 def :ItinRW<[A9WriteB], [IIC_Br]>;
2306
2307 // A9 PLD is processed in a dedicated unit.
2308 def :ItinRW<[], [IIC_Preload]>;
2309
2310 // Note: We must assume that loads are aligned, since the machine
2311 // model cannot know this statically and A9 ignores alignment hints.
2312
2313 // A9WriteAdr consumes AGU regardless address writeback. But it's
2314 // latency is only relevant for users of an updated address.
2315 def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
2316                                      IIC_iLoad_iu,IIC_iLoad_ru]>;
2317 def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
2318 def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
2319                                        IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
2320 def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
2321 def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
2322                                             IIC_iLoad_d_ru]>;
2323 // Store either has no def operands, or the one def for address writeback.
2324 def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
2325                                      IIC_iStore_iu, IIC_iStore_ru,
2326                                      IIC_iStore_d_i, IIC_iStore_d_r,
2327                                      IIC_iStore_d_ru]>;
2328 def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
2329                                       IIC_iStore_bh_i, IIC_iStore_bh_r,
2330                                       IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
2331 def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
2332
2333 // A9WriteML will be expanded into a separate write for each def
2334 // operand. Address generation consumes resources, but A9WriteLMAdr
2335 // is listed after all def operands, so has no effective latency.
2336 //
2337 // Note: A9WriteLM expands into an even number of def operands. The
2338 // actual number of def operands may be less by one.
2339 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
2340
2341 // Load multiple with address writeback has an extra def operand in
2342 // front of the loaded registers.
2343 //
2344 // Reuse the load-multiple variants for store-multiple because the
2345 // resources are identical, For stores only the address writeback
2346 // has a def operand so the WriteL latencies are unused.
2347 def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
2348                                                       IIC_iStore_m,
2349                                                       IIC_iStore_mu]>;
2350 def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
2351 def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
2352
2353 def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
2354
2355 def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
2356 def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
2357 def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
2358                                         IIC_fpStore_m, IIC_fpStore_mu]>;
2359
2360 // Note: Unlike VLDM, VLD1 expects the writeback operand after the
2361 // normal writes.
2362 def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
2363                                          IIC_VLD1x2, IIC_VLD1x2u]>;
2364 def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
2365                                          IIC_VLD1x4, IIC_VLD1x4u,
2366                                          IIC_VLD4dup, IIC_VLD4dupu]>;
2367 def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
2368                                             IIC_VLD2, IIC_VLD2u,
2369                                             IIC_VLD2dup, IIC_VLD2dupu]>;
2370 def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
2371                                             IIC_VLD2x2, IIC_VLD2x2u,
2372                                             IIC_VLD2ln, IIC_VLD2lnu]>;
2373 def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
2374                                             IIC_VLD3dup, IIC_VLD3dupu]>;
2375 def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
2376                                             IIC_VLD4ln, IIC_VLD4lnu]>;
2377 def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
2378
2379 // Vector stores use similar resources to vector loads, so use the
2380 // same write types. The address write must be first for stores with
2381 // address writeback.
2382 def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
2383                                          IIC_VST1x2, IIC_VST1x2u,
2384                                          IIC_VST1ln, IIC_VST1lnu,
2385                                          IIC_VST2, IIC_VST2u,
2386                                          IIC_VST2x2, IIC_VST2x2u,
2387                                          IIC_VST2ln, IIC_VST2lnu]>;
2388 def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
2389                                          IIC_VST1x4, IIC_VST1x4u,
2390                                          IIC_VST3, IIC_VST3u,
2391                                          IIC_VST3ln, IIC_VST3lnu,
2392                                          IIC_VST4, IIC_VST4u,
2393                                          IIC_VST4ln, IIC_VST4lnu]>;
2394
2395 // NEON moves.
2396 def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
2397 def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
2398 def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
2399
2400 // NEON integer arithmetic
2401 //
2402 // VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
2403 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
2404 // VSUB/VMVN/VCLSD/VCLZD/VCNTD
2405 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
2406 // VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
2407 // ...
2408 // VHADD/VRHADD/VQADD/VTST/VADH/VRADH
2409 def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
2410
2411 // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
2412 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
2413 // VQNEG/VQABS
2414 def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
2415 // VABS
2416 def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
2417 // VPADD/VPADDL are mapped later under IIC_SHLi.
2418 // ...
2419 // VCLSQ/VCLZQ/VCNTQ, takes two cycles.
2420 def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
2421 // VMOVimm/VMVNimm/VORRimm/VBICimm
2422 def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
2423 def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
2424 def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
2425
2426 // NEON integer multiply
2427 //
2428 // Note: these don't quite match the timing docs, but they do match
2429 // the original A9 itinerary.
2430 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
2431 def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
2432 def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
2433 def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
2434 def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
2435 def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
2436 def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
2437 def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
2438
2439 // NEON integer shift
2440 // TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
2441 def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
2442 def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
2443
2444 // NEON permute
2445 def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
2446 def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
2447             [IIC_VPERMQ3, IIC_VEXTQ]>;
2448 def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
2449 def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
2450 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
2451 def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
2452 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
2453 def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
2454 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
2455 def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
2456             [IIC_VTBX4]>;
2457
2458 // NEON floating-point
2459 def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
2460 def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
2461 def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
2462 def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
2463 def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
2464 def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
2465 def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
2466 def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
2467
2468 // Map SchedRWs that are identical for cortexa9 to existing resources.
2469 def : SchedAlias<WriteALU, A9WriteALU>;
2470 def : SchedAlias<WriteALUsr, A9WriteALUsr>;
2471 def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
2472 def : SchedAlias<ReadALU, A9ReadALU>;
2473 def : SchedAlias<ReadALUsr, A9ReadALU>;
2474 def : InstRW< [WriteALU],
2475       (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
2476                  "BICrr")>;
2477 def : InstRW< [WriteALUsi], (instregex "ANDrsi", "ORRrsi", "EORrsi", "BICrsi")>;
2478 def : InstRW< [WriteALUsr], (instregex "ANDrsr", "ORRrsr", "EORrsr", "BICrsr")>;
2479
2480
2481 def : SchedAlias<WriteCMP, A9WriteALU>;
2482 def : SchedAlias<WriteCMPsi, A9WriteALU>;
2483 def : SchedAlias<WriteCMPsr, A9WriteALU>;
2484
2485 def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
2486                                        "MOVCCsr")>;
2487 def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
2488 def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm",
2489                                       "MOV_ga_dyn")>;
2490 def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
2491 def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
2492
2493 def : InstRW< [WriteALU], (instregex "SEL")>;
2494
2495 def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
2496
2497 def : InstRW< [A9WriteM],
2498       (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
2499       "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
2500 def : InstRW< [A9WriteM, A9WriteMHi],
2501       (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
2502       "UMAAL", "SMLALv5", "UMLALv5", "UMAALv5", "SMLALBB", "SMLALBT", "SMLALTB",
2503       "SMLALTT")>;
2504 // FIXME: These instructions used to have NoItinerary. Just copied the one from above.
2505 def : InstRW< [A9WriteM, A9WriteMHi],
2506       (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
2507       "SMLSLD", "SMLLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
2508
2509 def : InstRW<[A9WriteM16, A9WriteM16Hi],
2510       (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
2511 def : InstRW<[A9WriteM16, A9WriteM16Hi],
2512       (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
2513
2514 def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
2515 def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
2516 def : InstRW<[A9WriteLb],
2517       (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
2518       "LDRH", "LDRSH", "LDRSB")>;
2519 def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
2520
2521 def : WriteRes<WriteDiv, []> { let Latency = 0; }
2522
2523 def : WriteRes<WriteBr, [A9UnitB]>;
2524 def : WriteRes<WriteBrL, [A9UnitB]>;
2525 def : WriteRes<WriteBrTbl, [A9UnitB]>;
2526 def : WriteRes<WritePreLd, []>;
2527 def : SchedAlias<WriteCvtFP, A9WriteF>;
2528 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
2529 } // SchedModel = CortexA9Model