ARM instruction itinerary fixes:
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Pipe0   : FuncUnit; // pipeline 0
20 def A9_Pipe1   : FuncUnit; // pipeline 1
21 def A9_AGU     : FuncUnit; // LS pipe
22 def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
25
26 // Bypasses
27 def A9_LdBypass : Bypass;
28
29 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
30 //
31 def CortexA9Itineraries : ProcessorItineraries<
32   [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_AGU, A9_Pipe0, A9_Pipe1],
33   [A9_LdBypass], [
34   // Two fully-pipelined integer ALU pipelines
35
36   //
37   // Move instructions, unconditional
38   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
39   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
40   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
41   InstrItinData<IIC_iMOVsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
42   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
43                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
44   //
45   // MVN instructions
46   InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
47                               [1]>,
48   InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
49                               [1, 1], [NoBypass, A9_LdBypass]>,
50   InstrItinData<IIC_iMVNsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
51                               [2, 1]>,
52   InstrItinData<IIC_iMVNsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
53                               [3, 1, 1]>,
54   //
55   // No operand cycles
56   InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
57   //
58   // Binary Instructions that produce a result
59   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
60                             [1, 1], [NoBypass, A9_LdBypass]>,
61   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
62                             [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
63   InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
64                             [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
65   InstrItinData<IIC_iALUsir,[InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
66                             [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
67   InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
68                             [3, 1, 1, 1],
69                             [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
70   //
71   // Bitwise Instructions that produce a result
72   InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
73   InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1, 1]>,
74   InstrItinData<IIC_iBITsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
75   InstrItinData<IIC_iBITsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [3, 1, 1, 1]>,
76   //
77   // Unary Instructions that produce a result
78
79   // CLZ, RBIT, etc.
80   InstrItinData<IIC_iUNAr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
81
82   // BFC, BFI, UBFX, SBFX
83   InstrItinData<IIC_iUNAsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
84
85   //
86   // Zero and sign extension instructions
87   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
88   InstrItinData<IIC_iEXTAr, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [3, 1, 1]>,
89   InstrItinData<IIC_iEXTAsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>],[3, 1, 1, 1]>,
90   //
91   // Compare instructions
92   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
93                               [1], [A9_LdBypass]>,
94   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
95                               [1, 1], [A9_LdBypass, A9_LdBypass]>,
96   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
97                               [1, 1], [A9_LdBypass, NoBypass]>,
98   InstrItinData<IIC_iCMPsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
99                               [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
100   //
101   // Test instructions
102   InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
103   InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
104   InstrItinData<IIC_iTSTsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
105   InstrItinData<IIC_iTSTsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [1, 1, 1]>,
106   //
107   // Move instructions, conditional
108   // FIXME: Correctly model the extra input dep on the destination.
109   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
110   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
111   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
112   InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
113
114   // Integer multiply pipeline
115   //
116   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A9_Pipe1], 0>,
117                                 InstrStage<2, [A9_Pipe0]>], [3, 1, 1]>,
118   InstrItinData<IIC_iMAC16   , [InstrStage<1, [A9_Pipe1], 0>,
119                                 InstrStage<2, [A9_Pipe0]>], [3, 1, 1, 1]>,
120   InstrItinData<IIC_iMUL32   , [InstrStage<1, [A9_Pipe1], 0>,
121                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
122   InstrItinData<IIC_iMAC32   , [InstrStage<1, [A9_Pipe1], 0>,
123                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 1]>,
124   InstrItinData<IIC_iMUL64   , [InstrStage<1, [A9_Pipe1], 0>,
125                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
126   InstrItinData<IIC_iMAC64   , [InstrStage<1, [A9_Pipe1], 0>,
127                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
128   // Integer load pipeline
129   // FIXME: The timings are some rough approximations
130   //
131   // Immediate offset
132   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Pipe1]>,
133                                  InstrStage<1, [A9_AGU]>],
134                                 [3, 1], [A9_LdBypass]>,
135   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Pipe1]>,
136                                  InstrStage<2, [A9_AGU]>],
137                                 [4, 1], [A9_LdBypass]>,
138   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
139   InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Pipe1]>,
140                                  InstrStage<2, [A9_AGU]>],
141                                 [3, 3, 1], [A9_LdBypass]>,
142   //
143   // Register offset
144   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Pipe1]>,
145                                  InstrStage<1, [A9_AGU]>],
146                                 [3, 1, 1], [A9_LdBypass]>,
147   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Pipe1]>,
148                                  InstrStage<2, [A9_AGU]>],
149                                 [4, 1, 1], [A9_LdBypass]>,
150   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Pipe1]>,
151                                  InstrStage<2, [A9_AGU]>],
152                                 [3, 3, 1, 1], [A9_LdBypass]>,
153   //
154   // Scaled register offset
155   InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Pipe1]>,
156                                  InstrStage<1, [A9_AGU]>],
157                                 [4, 1, 1], [A9_LdBypass]>,
158   InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Pipe1]>,
159                                  InstrStage<2, [A9_AGU]>],
160                                 [5, 1, 1], [A9_LdBypass]>,
161   //
162   // Immediate offset with update
163   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Pipe1]>,
164                                  InstrStage<1, [A9_AGU]>],
165                                 [3, 2, 1], [A9_LdBypass]>,
166   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Pipe1]>,
167                                  InstrStage<2, [A9_AGU]>],
168                                 [4, 3, 1], [A9_LdBypass]>,
169   //
170   // Register offset with update
171   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Pipe1]>,
172                                  InstrStage<1, [A9_AGU]>],
173                                 [3, 2, 1, 1], [A9_LdBypass]>,
174   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Pipe1]>,
175                                  InstrStage<2, [A9_AGU]>],
176                                 [4, 3, 1, 1], [A9_LdBypass]>,
177   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Pipe1]>,
178                                  InstrStage<2, [A9_AGU]>],
179                                 [3, 3, 1, 1], [A9_LdBypass]>,
180   //
181   // Scaled register offset with update
182   InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Pipe1]>,
183                                  InstrStage<1, [A9_AGU]>],
184                                 [4, 3, 1, 1], [A9_LdBypass]>,
185   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Pipe1]>,
186                                  InstrStage<2, [A9_AGU]>],
187                                 [5, 4, 1, 1], [A9_LdBypass]>,
188   //
189   // Load multiple
190   InstrItinData<IIC_iLoadm   , [InstrStage<1, [A9_Pipe1]>,
191                                 InstrStage<2, [A9_AGU]>],
192                                [3], [A9_LdBypass]>,
193
194   //
195   // Load multiple plus branch
196   InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
197                                 InstrStage<1, [A9_AGU]>,
198                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
199
200   //
201   // iLoadi + iALUr for t2LDRpci_pic.
202   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
203                                 InstrStage<1, [A9_AGU]>,
204                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
205                                [2, 1]>,
206
207   // Integer store pipeline
208   ///
209   // Immediate offset
210   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Pipe1]>,
211                                  InstrStage<1, [A9_AGU]>], [1, 1]>,
212   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Pipe1]>,
213                                  InstrStage<2, [A9_AGU]>], [1, 1]>,
214   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
215   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Pipe1]>,
216                                  InstrStage<2, [A9_AGU]>], [1, 1]>,
217   //
218   // Register offset
219   InstrItinData<IIC_iStore_r  , [InstrStage<1, [ A9_Pipe1]>,
220                                  InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
221   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [ A9_Pipe1]>,
222                                  InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
223   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [ A9_Pipe1]>,
224                                  InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
225   //
226   // Scaled register offset
227   InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Pipe1]>,
228                                  InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
229   InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Pipe1]>,
230                                  InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
231   //
232   // Immediate offset with update
233   InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Pipe1]>,
234                                  InstrStage<1, [A9_AGU]>], [2, 1, 1]>,
235   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Pipe1]>,
236                                  InstrStage<2, [A9_AGU]>], [3, 1, 1]>,
237   //
238   // Register offset with update
239   InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Pipe1]>,
240                                  InstrStage<1, [A9_AGU]>], [2, 1, 1, 1]>,
241   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Pipe1]>,
242                                  InstrStage<2, [A9_AGU]>], [3, 1, 1, 1]>,
243   InstrItinData<IIC_iStore_d_ru,[InstrStage<1, [A9_Pipe1]>,
244                                  InstrStage<2, [A9_AGU]>], [3, 1, 1, 1]>,
245   //
246   // Scaled register offset with update
247   InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Pipe1]>,
248                                  InstrStage<1, [A9_AGU]>], [2, 1, 1, 1]>,
249   InstrItinData<IIC_iStore_bh_siu,[InstrStage<1, [A9_Pipe1]>,
250                                  InstrStage<2, [A9_AGU]>], [3, 1, 1, 1]>,
251   //
252   // Store multiple
253   InstrItinData<IIC_iStorem  , [InstrStage<1, [A9_Pipe1]>,
254                                 InstrStage<1, [A9_AGU]>]>,
255   // Branch
256   //
257   // no delay slots, so the latency of a branch is unimportant
258   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
259
260   // VFP and NEON shares the same register file. This means that every VFP
261   // instruction should wait for full completion of the consecutive NEON
262   // instruction and vice-versa. We model this behavior with two artificial FUs:
263   // DRegsVFP and DRegsVFP.
264   //
265   // Every VFP instruction:
266   //  - Acquires DRegsVFP resource for 1 cycle
267   //  - Reserves DRegsN resource for the whole duration (including time to
268   //    register file writeback!).
269   // Every NEON instruction does the same but with FUs swapped.
270   //
271   // Since the reserved FU cannot be acquired, this models precisely
272   // "cross-domain" stalls.
273
274   // VFP
275   // Issue through integer pipeline, and execute in NEON unit.
276
277   // FP Special Register to Integer Register File Move
278   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
279                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
280                               InstrStage<1, [A9_Pipe1]>,
281                               InstrStage<1, [A9_NPipe]>]>,
282   //
283   // Single-precision FP Unary
284   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
285                                // Extra latency cycles since wbck is 2 cycles
286                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
287                                InstrStage<1, [A9_Pipe1]>,
288                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
289   //
290   // Double-precision FP Unary
291   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
292                                // Extra latency cycles since wbck is 2 cycles
293                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
294                                InstrStage<1, [A9_Pipe1]>,
295                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
296
297   //
298   // Single-precision FP Compare
299   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
300                                // Extra latency cycles since wbck is 4 cycles
301                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
302                                InstrStage<1, [A9_Pipe1]>,
303                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
304   //
305   // Double-precision FP Compare
306   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
307                                // Extra latency cycles since wbck is 4 cycles
308                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
309                                InstrStage<1, [A9_Pipe1]>,
310                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
311   //
312   // Single to Double FP Convert
313   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
314                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
315                                InstrStage<1, [A9_Pipe1]>,
316                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
317   //
318   // Double to Single FP Convert
319   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
320                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
321                                InstrStage<1, [A9_Pipe1]>,
322                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
323
324   //
325   // Single to Half FP Convert
326   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
327                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
328                                InstrStage<1, [A9_Pipe1]>,
329                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
330   //
331   // Half to Single FP Convert
332   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
333                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
334                                InstrStage<1, [A9_Pipe1]>,
335                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
336
337   //
338   // Single-Precision FP to Integer Convert
339   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
340                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
341                                InstrStage<1, [A9_Pipe1]>,
342                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
343   //
344   // Double-Precision FP to Integer Convert
345   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
346                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
347                                InstrStage<1, [A9_Pipe1]>,
348                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
349   //
350   // Integer to Single-Precision FP Convert
351   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
352                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
353                                InstrStage<1, [A9_Pipe1]>,
354                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
355   //
356   // Integer to Double-Precision FP Convert
357   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
358                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
359                                InstrStage<1, [A9_Pipe1]>,
360                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
361   //
362   // Single-precision FP ALU
363   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
364                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
365                                InstrStage<1, [A9_Pipe1]>,
366                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
367   //
368   // Double-precision FP ALU
369   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
370                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
371                                InstrStage<1, [A9_Pipe1]>,
372                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
373   //
374   // Single-precision FP Multiply
375   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
376                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
377                                InstrStage<1, [A9_Pipe1]>,
378                                InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
379   //
380   // Double-precision FP Multiply
381   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
382                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
383                                InstrStage<1, [A9_Pipe1]>,
384                                InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
385   //
386   // Single-precision FP MAC
387   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
388                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
389                                InstrStage<1, [A9_Pipe1]>,
390                                InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
391   //
392   // Double-precision FP MAC
393   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
394                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
395                                InstrStage<1,  [A9_Pipe1]>,
396                                InstrStage<2,  [A9_NPipe]>], [9, 0, 1, 1]>,
397   //
398   // Single-precision FP DIV
399   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
400                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
401                                InstrStage<1,  [A9_Pipe1]>,
402                                InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
403   //
404   // Double-precision FP DIV
405   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
406                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
407                                InstrStage<1,  [A9_Pipe1]>,
408                                InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
409   //
410   // Single-precision FP SQRT
411   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
412                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
413                                InstrStage<1,  [A9_Pipe1]>,
414                                InstrStage<13, [A9_NPipe]>], [17, 1]>,
415   //
416   // Double-precision FP SQRT
417   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
418                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
419                                InstrStage<1,  [A9_Pipe1]>,
420                                InstrStage<28, [A9_NPipe]>], [32, 1]>,
421
422   //
423   // Integer to Single-precision Move
424   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
425                                // Extra 1 latency cycle since wbck is 2 cycles
426                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
427                                InstrStage<1, [A9_Pipe1]>,
428                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
429   //
430   // Integer to Double-precision Move
431   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
432                                // Extra 1 latency cycle since wbck is 2 cycles
433                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
434                                InstrStage<1, [A9_Pipe1]>,
435                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
436   //
437   // Single-precision to Integer Move
438   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
439                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
440                                InstrStage<1, [A9_Pipe1]>,
441                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
442   //
443   // Double-precision to Integer Move
444   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
445                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
446                                InstrStage<1, [A9_Pipe1]>,
447                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
448   //
449   // Single-precision FP Load
450   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
451                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
452                                InstrStage<1, [A9_Pipe1], 0>,
453                                InstrStage<1, [A9_AGU]>,
454                                InstrStage<1, [A9_NPipe]>]>,
455   //
456   // Double-precision FP Load
457   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
458                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
459                                InstrStage<1, [A9_Pipe1], 0>,
460                                InstrStage<1, [A9_AGU]>,
461                                InstrStage<1, [A9_NPipe]>]>,
462   //
463   // FP Load Multiple
464   InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
465                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
466                                InstrStage<1, [A9_Pipe1], 0>,
467                                InstrStage<1, [A9_AGU]>,
468                                InstrStage<1, [A9_NPipe]>]>,
469   //
470   // Single-precision FP Store
471   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
472                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
473                                InstrStage<1, [A9_Pipe1], 0>,
474                                InstrStage<1, [A9_AGU]>,
475                                InstrStage<1, [A9_NPipe]>]>,
476   //
477   // Double-precision FP Store
478   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
479                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
480                                InstrStage<1, [A9_Pipe1], 0>,
481                                InstrStage<1, [A9_AGU]>,
482                                InstrStage<1, [A9_NPipe]>]>,
483   //
484   // FP Store Multiple
485   InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
486                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
487                                InstrStage<1, [A9_Pipe1], 0>,
488                                InstrStage<1, [A9_AGU]>,
489                                InstrStage<1, [A9_NPipe]>]>,
490   // NEON
491   // Issue through integer pipeline, and execute in NEON unit.
492   // FIXME: Neon pipeline and LdSt unit are multiplexed.
493   //        Add some syntactic sugar to model this!
494   // VLD1
495   // FIXME: We don't model this instruction properly
496   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
497                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
498                                InstrStage<1, [A9_Pipe1], 0>,
499                                InstrStage<1, [A9_AGU]>,
500                                InstrStage<1, [A9_NPipe]>]>,
501   //
502   // VLD2
503   // FIXME: We don't model this instruction properly
504   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
505                                // Extra latency cycles since wbck is 6 cycles
506                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
507                                InstrStage<1, [A9_Pipe1], 0>,
508                                InstrStage<1, [A9_AGU]>,
509                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
510   //
511   // VLD3
512   // FIXME: We don't model this instruction properly
513   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
514                                // Extra latency cycles since wbck is 6 cycles
515                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
516                                InstrStage<1, [A9_Pipe1], 0>,
517                                InstrStage<1, [A9_AGU]>,
518                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
519   //
520   // VLD4
521   // FIXME: We don't model this instruction properly
522   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
523                                // Extra latency cycles since wbck is 6 cycles
524                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
525                                InstrStage<1, [A9_Pipe1], 0>,
526                                InstrStage<1, [A9_AGU]>,
527                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
528   //
529   // VST
530   // FIXME: We don't model this instruction properly
531   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
532                                // Extra latency cycles since wbck is 6 cycles
533                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
534                                InstrStage<1, [A9_Pipe1], 0>,
535                                InstrStage<1, [A9_AGU]>,
536                                InstrStage<1, [A9_NPipe]>]>,
537   //
538   // Double-register Integer Unary
539   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
540                                // Extra latency cycles since wbck is 6 cycles
541                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
542                                InstrStage<1, [A9_Pipe1]>,
543                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
544   //
545   // Quad-register Integer Unary
546   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
547                                // Extra latency cycles since wbck is 6 cycles
548                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
549                                InstrStage<1, [A9_Pipe1]>,
550                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
551   //
552   // Double-register Integer Q-Unary
553   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
554                                // Extra latency cycles since wbck is 6 cycles
555                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
556                                InstrStage<1, [A9_Pipe1]>,
557                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
558   //
559   // Quad-register Integer CountQ-Unary
560   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
561                                // Extra latency cycles since wbck is 6 cycles
562                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
563                                InstrStage<1, [A9_Pipe1]>,
564                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
565   //
566   // Double-register Integer Binary
567   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
568                                // Extra latency cycles since wbck is 6 cycles
569                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
570                                InstrStage<1, [A9_Pipe1]>,
571                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
572   //
573   // Quad-register Integer Binary
574   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
575                                // Extra latency cycles since wbck is 6 cycles
576                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
577                                InstrStage<1, [A9_Pipe1]>,
578                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
579   //
580   // Double-register Integer Subtract
581   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
582                                // Extra latency cycles since wbck is 6 cycles
583                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
584                                InstrStage<1, [A9_Pipe1]>,
585                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
586   //
587   // Quad-register Integer Subtract
588   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
589                                // Extra latency cycles since wbck is 6 cycles
590                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
591                                InstrStage<1, [A9_Pipe1]>,
592                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
593   //
594   // Double-register Integer Shift
595   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
596                                // Extra latency cycles since wbck is 6 cycles
597                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
598                                InstrStage<1, [A9_Pipe1]>,
599                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
600   //
601   // Quad-register Integer Shift
602   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
603                                // Extra latency cycles since wbck is 6 cycles
604                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
605                                InstrStage<1, [A9_Pipe1]>,
606                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
607   //
608   // Double-register Integer Shift (4 cycle)
609   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
610                                // Extra latency cycles since wbck is 6 cycles
611                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
612                                InstrStage<1, [A9_Pipe1]>,
613                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
614   //
615   // Quad-register Integer Shift (4 cycle)
616   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
617                                // Extra latency cycles since wbck is 6 cycles
618                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
619                                InstrStage<1, [A9_Pipe1]>,
620                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
621   //
622   // Double-register Integer Binary (4 cycle)
623   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
624                                // Extra latency cycles since wbck is 6 cycles
625                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
626                                InstrStage<1, [A9_Pipe1]>,
627                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
628   //
629   // Quad-register Integer Binary (4 cycle)
630   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
631                                // Extra latency cycles since wbck is 6 cycles
632                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
633                                InstrStage<1, [A9_Pipe1]>,
634                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
635   //
636   // Double-register Integer Subtract (4 cycle)
637   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
638                                // Extra latency cycles since wbck is 6 cycles
639                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
640                                InstrStage<1, [A9_Pipe1]>,
641                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
642   //
643   // Quad-register Integer Subtract (4 cycle)
644   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
645                                // Extra latency cycles since wbck is 6 cycles
646                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
647                                InstrStage<1, [A9_Pipe1]>,
648                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
649
650   //
651   // Double-register Integer Count
652   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
653                                // Extra latency cycles since wbck is 6 cycles
654                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
655                                InstrStage<1, [A9_Pipe1]>,
656                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
657   //
658   // Quad-register Integer Count
659   // Result written in N3, but that is relative to the last cycle of multicycle,
660   // so we use 4 for those cases
661   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
662                                // Extra latency cycles since wbck is 7 cycles
663                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
664                                InstrStage<1, [A9_Pipe1]>,
665                                InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
666   //
667   // Double-register Absolute Difference and Accumulate
668   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
669                                // Extra latency cycles since wbck is 6 cycles
670                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
671                                InstrStage<1, [A9_Pipe1]>,
672                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
673   //
674   // Quad-register Absolute Difference and Accumulate
675   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
676                                // Extra latency cycles since wbck is 6 cycles
677                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
678                                InstrStage<1, [A9_Pipe1]>,
679                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
680   //
681   // Double-register Integer Pair Add Long
682   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
683                                // Extra latency cycles since wbck is 6 cycles
684                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
685                                InstrStage<1, [A9_Pipe1]>,
686                                InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
687   //
688   // Quad-register Integer Pair Add Long
689   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
690                                // Extra latency cycles since wbck is 6 cycles
691                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
692                                InstrStage<1, [A9_Pipe1]>,
693                                InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
694
695   //
696   // Double-register Integer Multiply (.8, .16)
697   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
698                                // Extra latency cycles since wbck is 6 cycles
699                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
700                                InstrStage<1, [A9_Pipe1]>,
701                                InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
702   //
703   // Quad-register Integer Multiply (.8, .16)
704   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
705                                // Extra latency cycles since wbck is 7 cycles
706                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
707                                InstrStage<1, [A9_Pipe1]>,
708                                InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
709
710   //
711   // Double-register Integer Multiply (.32)
712   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
713                                // Extra latency cycles since wbck is 7 cycles
714                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
715                                InstrStage<1, [A9_Pipe1]>,
716                                InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
717   //
718   // Quad-register Integer Multiply (.32)
719   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
720                                // Extra latency cycles since wbck is 9 cycles
721                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
722                                InstrStage<1, [A9_Pipe1]>,
723                                InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
724   //
725   // Double-register Integer Multiply-Accumulate (.8, .16)
726   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
727                                // Extra latency cycles since wbck is 6 cycles
728                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
729                                InstrStage<1, [A9_Pipe1]>,
730                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
731   //
732   // Double-register Integer Multiply-Accumulate (.32)
733   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
734                                // Extra latency cycles since wbck is 7 cycles
735                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
736                                InstrStage<1, [A9_Pipe1]>,
737                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
738   //
739   // Quad-register Integer Multiply-Accumulate (.8, .16)
740   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
741                                // Extra latency cycles since wbck is 7 cycles
742                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
743                                InstrStage<1, [A9_Pipe1]>,
744                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
745   //
746   // Quad-register Integer Multiply-Accumulate (.32)
747   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
748                                // Extra latency cycles since wbck is 9 cycles
749                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
750                                InstrStage<1, [A9_Pipe1]>,
751                                InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
752   //
753   // Move Immediate
754   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
755                                // Extra latency cycles since wbck is 6 cycles
756                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
757                                InstrStage<1, [A9_Pipe1]>,
758                                InstrStage<1, [A9_NPipe]>], [3]>,
759   //
760   // Double-register Permute Move
761   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
762   // FIXME: all latencies are arbitrary, no information is available
763                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
764                                InstrStage<1, [A9_Pipe1]>,
765                                InstrStage<1, [A9_AGU]>], [2, 1]>,
766   //
767   // Quad-register Permute Move
768   // Result written in N2, but that is relative to the last cycle of multicycle,
769   // so we use 3 for those cases
770   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
771   // FIXME: all latencies are arbitrary, no information is available
772                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
773                                InstrStage<1, [A9_Pipe1]>,
774                                InstrStage<2, [A9_NPipe]>], [3, 1]>,
775   //
776   // Integer to Single-precision Move
777   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
778   // FIXME: all latencies are arbitrary, no information is available
779                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
780                                InstrStage<1, [A9_Pipe1]>,
781                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
782   //
783   // Integer to Double-precision Move
784   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
785   // FIXME: all latencies are arbitrary, no information is available
786                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
787                                InstrStage<1, [A9_Pipe1]>,
788                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
789   //
790   // Single-precision to Integer Move
791   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
792   // FIXME: all latencies are arbitrary, no information is available
793                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
794                                InstrStage<1, [A9_Pipe1]>,
795                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
796   //
797   // Double-precision to Integer Move
798   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
799   // FIXME: all latencies are arbitrary, no information is available
800                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
801                                InstrStage<1, [A9_Pipe1]>,
802                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
803   //
804   // Integer to Lane Move
805   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
806   // FIXME: all latencies are arbitrary, no information is available
807                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
808                                InstrStage<1, [A9_Pipe1]>,
809                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
810
811   //
812   // Double-register FP Unary
813   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
814                                // Extra latency cycles since wbck is 6 cycles
815                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
816                                InstrStage<1, [A9_Pipe1]>,
817                                InstrStage<1, [A9_NPipe]>], [5, 2]>,
818   //
819   // Quad-register FP Unary
820   // Result written in N5, but that is relative to the last cycle of multicycle,
821   // so we use 6 for those cases
822   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
823                                // Extra latency cycles since wbck is 7 cycles
824                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
825                                InstrStage<1, [A9_Pipe1]>,
826                                InstrStage<2, [A9_NPipe]>], [6, 2]>,
827   //
828   // Double-register FP Binary
829   // FIXME: We're using this itin for many instructions and [2, 2] here is too
830   // optimistic.
831   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
832                                // Extra latency cycles since wbck is 7 cycles
833                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
834                                InstrStage<1, [A9_Pipe1]>,
835                                InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
836   //
837   // Quad-register FP Binary
838   // Result written in N5, but that is relative to the last cycle of multicycle,
839   // so we use 6 for those cases
840   // FIXME: We're using this itin for many instructions and [2, 2] here is too
841   // optimistic.
842   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
843                                // Extra latency cycles since wbck is 8 cycles
844                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
845                                InstrStage<1, [A9_Pipe1]>,
846                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
847   //
848   // Double-register FP Multiple-Accumulate
849   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
850                                // Extra latency cycles since wbck is 7 cycles
851                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
852                                InstrStage<1, [A9_Pipe1]>,
853                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
854   //
855   // Quad-register FP Multiple-Accumulate
856   // Result written in N9, but that is relative to the last cycle of multicycle,
857   // so we use 10 for those cases
858   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
859                                // Extra latency cycles since wbck is 9 cycles
860                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
861                                InstrStage<1, [A9_Pipe1]>,
862                                InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
863   //
864   // Double-register Reciprical Step
865   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
866                                // Extra latency cycles since wbck is 7 cycles
867                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
868                                InstrStage<1, [A9_Pipe1]>,
869                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
870   //
871   // Quad-register Reciprical Step
872   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
873                                // Extra latency cycles since wbck is 9 cycles
874                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
875                                InstrStage<1, [A9_Pipe1]>,
876                                InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
877   //
878   // Double-register Permute
879   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
880                                // Extra latency cycles since wbck is 6 cycles
881                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
882                                InstrStage<1, [A9_Pipe1]>,
883                                InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
884   //
885   // Quad-register Permute
886   // Result written in N2, but that is relative to the last cycle of multicycle,
887   // so we use 3 for those cases
888   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
889                                // Extra latency cycles since wbck is 7 cycles
890                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
891                                InstrStage<1, [A9_Pipe1]>,
892                                InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
893   //
894   // Quad-register Permute (3 cycle issue)
895   // Result written in N2, but that is relative to the last cycle of multicycle,
896   // so we use 4 for those cases
897   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
898                                // Extra latency cycles since wbck is 8 cycles
899                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
900                                InstrStage<1, [A9_Pipe1]>,
901                                InstrStage<3, [A9_AGU]>], [4, 4, 1, 1]>,
902
903   //
904   // Double-register VEXT
905   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
906                                // Extra latency cycles since wbck is 7 cycles
907                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
908                                InstrStage<1, [A9_Pipe1]>,
909                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
910   //
911   // Quad-register VEXT
912   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
913                                // Extra latency cycles since wbck is 9 cycles
914                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
915                                InstrStage<1, [A9_Pipe1]>,
916                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
917   //
918   // VTB
919   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
920                                // Extra latency cycles since wbck is 7 cycles
921                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
922                                InstrStage<1, [A9_Pipe1]>,
923                                InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
924   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
925                                // Extra latency cycles since wbck is 7 cycles
926                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
927                                InstrStage<1, [A9_Pipe1]>,
928                                InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
929   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
930                                // Extra latency cycles since wbck is 8 cycles
931                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
932                                InstrStage<1, [A9_Pipe1]>,
933                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
934   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
935                                // Extra latency cycles since wbck is 8 cycles
936                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
937                                InstrStage<1, [A9_Pipe1]>,
938                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
939   //
940   // VTBX
941   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
942                                // Extra latency cycles since wbck is 7 cycles
943                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
944                                InstrStage<1, [A9_Pipe1]>,
945                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
946   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
947                                // Extra latency cycles since wbck is 7 cycles
948                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
949                                InstrStage<1, [A9_Pipe1]>,
950                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
951   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
952                                // Extra latency cycles since wbck is 8 cycles
953                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
954                                InstrStage<1, [A9_Pipe1]>,
955                                InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
956   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
957                                // Extra latency cycles since wbck is 8 cycles
958                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
959                                InstrStage<1, [A9_Pipe1]>,
960                               InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
961 ]>;