Fix zero and sign extension instructions scheduling itineraries.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA9.td
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
16 // Reference Manual".
17 //
18 // Functional units
19 def A9_Pipe0   : FuncUnit; // pipeline 0
20 def A9_Pipe1   : FuncUnit; // pipeline 1
21 def A9_LSPipe  : FuncUnit; // LS pipe
22 def A9_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN  : FuncUnit; // FP register set, NEON side
25
26 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
27 //
28 def CortexA9Itineraries : ProcessorItineraries<
29   [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
30   // Two fully-pipelined integer ALU pipelines
31   // FIXME: There are no operand latencies for these instructions at all!
32   //
33   // Move instructions, unconditional
34   InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
35   InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
36                                InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
37   InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
38   InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
39   InstrItinData<IIC_iMOVsr  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
40   //
41   // No operand cycles
42   InstrItinData<IIC_iALUx    , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
43   //
44   // Binary Instructions that produce a result
45   InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
46   InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
47   InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
48   InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
49   //
50   // Unary Instructions that produce a result
51   InstrItinData<IIC_iUNAr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
52   InstrItinData<IIC_iUNAsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
53   InstrItinData<IIC_iUNAsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
54   //
55   // Zero and sign extension instructions
56   InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
57   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [3, 1, 1]>,
58   //
59   // Compare instructions
60   InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
61   InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
62   InstrItinData<IIC_iCMPsi  , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
63   InstrItinData<IIC_iCMPsr  , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
64   //
65   // Move instructions, conditional
66   InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
67   InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
68   InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
69   InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
70
71   // Integer multiply pipeline
72   //
73   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A9_Pipe1], 0>,
74                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
75   InstrItinData<IIC_iMAC16   , [InstrStage<1, [A9_Pipe1], 0>,
76                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
77   InstrItinData<IIC_iMUL32   , [InstrStage<1, [A9_Pipe1], 0>,
78                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
79   InstrItinData<IIC_iMAC32   , [InstrStage<1, [A9_Pipe1], 0>,
80                                 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
81   InstrItinData<IIC_iMUL64   , [InstrStage<2, [A9_Pipe1], 0>,
82                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
83   InstrItinData<IIC_iMAC64   , [InstrStage<2, [A9_Pipe1], 0>,
84                                 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
85   // Integer load pipeline
86   // FIXME: The timings are some rough approximations
87   //
88   // Immediate offset
89   InstrItinData<IIC_iLoadi   , [InstrStage<1, [A9_Pipe1]>,
90                                 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
91   //
92   // Register offset
93   InstrItinData<IIC_iLoadr   , [InstrStage<1, [A9_Pipe1]>,
94                                 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
95   //
96   // Scaled register offset
97   InstrItinData<IIC_iLoadsi  , [InstrStage<1, [A9_Pipe1]>,
98                                 InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
99   //
100   // Immediate offset with update
101   InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A9_Pipe1]>,
102                                 InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
103   //
104   // Register offset with update
105   InstrItinData<IIC_iLoadru  , [InstrStage<1, [A9_Pipe1]>,
106                                 InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
107   //
108   // Scaled register offset with update
109   InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
110                                 InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
111   //
112   // Load multiple
113   InstrItinData<IIC_iLoadm   , [InstrStage<1, [A9_Pipe1]>,
114                                 InstrStage<1, [A9_LSPipe]>]>,
115
116   //
117   // Load multiple plus branch
118   InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
119                                 InstrStage<1, [A9_LSPipe]>,
120                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
121
122   //
123   // iLoadi + iALUr for t2LDRpci_pic.
124   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
125                                 InstrStage<1, [A9_LSPipe]>,
126                                 InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [4, 1]>,
127
128   // Integer store pipeline
129   ///
130   // Immediate offset
131   InstrItinData<IIC_iStorei  , [InstrStage<1, [A9_Pipe1]>,
132                                 InstrStage<1, [A9_LSPipe]>], [3, 1]>,
133   //
134   // Register offset
135   InstrItinData<IIC_iStorer  , [InstrStage<1, [ A9_Pipe1]>,
136                                 InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
137   //
138   // Scaled register offset
139   InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
140                                 InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
141   //
142   // Immediate offset with update
143   InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
144                                 InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
145   //
146   // Register offset with update
147   InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
148                                 InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
149   //
150   // Scaled register offset with update
151   InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
152                                 InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
153   //
154   // Store multiple
155   InstrItinData<IIC_iStorem  , [InstrStage<1, [A9_Pipe1]>,
156                                 InstrStage<1, [A9_LSPipe]>]>,
157   // Branch
158   //
159   // no delay slots, so the latency of a branch is unimportant
160   InstrItinData<IIC_Br       , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
161
162   // VFP and NEON shares the same register file. This means that every VFP
163   // instruction should wait for full completion of the consecutive NEON
164   // instruction and vice-versa. We model this behavior with two artificial FUs:
165   // DRegsVFP and DRegsVFP.
166   //
167   // Every VFP instruction:
168   //  - Acquires DRegsVFP resource for 1 cycle
169   //  - Reserves DRegsN resource for the whole duration (including time to
170   //    register file writeback!).
171   // Every NEON instruction does the same but with FUs swapped.
172   //
173   // Since the reserved FU cannot be acquired, this models precisely
174   // "cross-domain" stalls.
175
176   // VFP
177   // Issue through integer pipeline, and execute in NEON unit.
178
179   // FP Special Register to Integer Register File Move
180   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
181                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
182                               InstrStage<1, [A9_Pipe1]>,
183                               InstrStage<1, [A9_NPipe]>]>,
184   //
185   // Single-precision FP Unary
186   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
187                                // Extra latency cycles since wbck is 2 cycles
188                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
189                                InstrStage<1, [A9_Pipe1]>,
190                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
191   //
192   // Double-precision FP Unary
193   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
194                                // Extra latency cycles since wbck is 2 cycles
195                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
196                                InstrStage<1, [A9_Pipe1]>,
197                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
198
199   //
200   // Single-precision FP Compare
201   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
202                                // Extra latency cycles since wbck is 4 cycles
203                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
204                                InstrStage<1, [A9_Pipe1]>,
205                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
206   //
207   // Double-precision FP Compare
208   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
209                                // Extra latency cycles since wbck is 4 cycles
210                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
211                                InstrStage<1, [A9_Pipe1]>,
212                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
213   //
214   // Single to Double FP Convert
215   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
216                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
217                                InstrStage<1, [A9_Pipe1]>,
218                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
219   //
220   // Double to Single FP Convert
221   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
222                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
223                                InstrStage<1, [A9_Pipe1]>,
224                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
225
226   //
227   // Single to Half FP Convert
228   InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
229                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
230                                InstrStage<1, [A9_Pipe1]>,
231                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
232   //
233   // Half to Single FP Convert
234   InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
235                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
236                                InstrStage<1, [A9_Pipe1]>,
237                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
238
239   //
240   // Single-Precision FP to Integer Convert
241   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
242                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
243                                InstrStage<1, [A9_Pipe1]>,
244                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
245   //
246   // Double-Precision FP to Integer Convert
247   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
248                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
249                                InstrStage<1, [A9_Pipe1]>,
250                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
251   //
252   // Integer to Single-Precision FP Convert
253   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
254                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
255                                InstrStage<1, [A9_Pipe1]>,
256                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
257   //
258   // Integer to Double-Precision FP Convert
259   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
260                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
261                                InstrStage<1, [A9_Pipe1]>,
262                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
263   //
264   // Single-precision FP ALU
265   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
266                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
267                                InstrStage<1, [A9_Pipe1]>,
268                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
269   //
270   // Double-precision FP ALU
271   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
272                                InstrStage<5, [A9_DRegsN],   0, Reserved>,
273                                InstrStage<1, [A9_Pipe1]>,
274                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
275   //
276   // Single-precision FP Multiply
277   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
278                                InstrStage<6, [A9_DRegsN],   0, Reserved>,
279                                InstrStage<1, [A9_Pipe1]>,
280                                InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
281   //
282   // Double-precision FP Multiply
283   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
284                                InstrStage<7, [A9_DRegsN],   0, Reserved>,
285                                InstrStage<1, [A9_Pipe1]>,
286                                InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
287   //
288   // Single-precision FP MAC
289   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
290                                InstrStage<9, [A9_DRegsN],   0, Reserved>,
291                                InstrStage<1, [A9_Pipe1]>,
292                                InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
293   //
294   // Double-precision FP MAC
295   InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
296                                InstrStage<10, [A9_DRegsN],  0, Reserved>,
297                                InstrStage<1,  [A9_Pipe1]>,
298                                InstrStage<2,  [A9_NPipe]>], [9, 0, 1, 1]>,
299   //
300   // Single-precision FP DIV
301   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
302                                InstrStage<16, [A9_DRegsN],  0, Reserved>,
303                                InstrStage<1,  [A9_Pipe1]>,
304                                InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
305   //
306   // Double-precision FP DIV
307   InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
308                                InstrStage<26, [A9_DRegsN],  0, Reserved>,
309                                InstrStage<1,  [A9_Pipe1]>,
310                                InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
311   //
312   // Single-precision FP SQRT
313   InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
314                                InstrStage<18, [A9_DRegsN],   0, Reserved>,
315                                InstrStage<1,  [A9_Pipe1]>,
316                                InstrStage<13, [A9_NPipe]>], [17, 1]>,
317   //
318   // Double-precision FP SQRT
319   InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_DRegsVFP], 0, Required>,
320                                InstrStage<33, [A9_DRegsN],   0, Reserved>,
321                                InstrStage<1,  [A9_Pipe1]>,
322                                InstrStage<28, [A9_NPipe]>], [32, 1]>,
323
324   //
325   // Integer to Single-precision Move
326   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
327                                // Extra 1 latency cycle since wbck is 2 cycles
328                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
329                                InstrStage<1, [A9_Pipe1]>,
330                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
331   //
332   // Integer to Double-precision Move
333   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
334                                // Extra 1 latency cycle since wbck is 2 cycles
335                                InstrStage<3, [A9_DRegsN],   0, Reserved>,
336                                InstrStage<1, [A9_Pipe1]>,
337                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
338   //
339   // Single-precision to Integer Move
340   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
341                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
342                                InstrStage<1, [A9_Pipe1]>,
343                                InstrStage<1, [A9_NPipe]>], [1, 1]>,
344   //
345   // Double-precision to Integer Move
346   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
347                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
348                                InstrStage<1, [A9_Pipe1]>,
349                                InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
350   //
351   // Single-precision FP Load
352   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
353                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
354                                InstrStage<1, [A9_Pipe1], 0>,
355                                InstrStage<1, [A9_LSPipe]>,
356                                InstrStage<1, [A9_NPipe]>]>,
357   //
358   // Double-precision FP Load
359   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
360                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
361                                InstrStage<1, [A9_Pipe1], 0>,
362                                InstrStage<1, [A9_LSPipe]>,
363                                InstrStage<1, [A9_NPipe]>]>,
364   //
365   // FP Load Multiple
366   InstrItinData<IIC_fpLoadm,  [InstrStage<1, [A9_DRegsVFP], 0, Required>,
367                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
368                                InstrStage<1, [A9_Pipe1], 0>,
369                                InstrStage<1, [A9_LSPipe]>,
370                                InstrStage<1, [A9_NPipe]>]>,
371   //
372   // Single-precision FP Store
373   InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
374                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
375                                InstrStage<1, [A9_Pipe1], 0>,
376                                InstrStage<1, [A9_LSPipe]>,
377                                InstrStage<1, [A9_NPipe]>]>,
378   //
379   // Double-precision FP Store
380   InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
381                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
382                                InstrStage<1, [A9_Pipe1], 0>,
383                                InstrStage<1, [A9_LSPipe]>,
384                                InstrStage<1, [A9_NPipe]>]>,
385   //
386   // FP Store Multiple
387   InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
388                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
389                                InstrStage<1, [A9_Pipe1], 0>,
390                                InstrStage<1, [A9_LSPipe]>,
391                                InstrStage<1, [A9_NPipe]>]>,
392   // NEON
393   // Issue through integer pipeline, and execute in NEON unit.
394   // FIXME: Neon pipeline and LdSt unit are multiplexed.
395   //        Add some syntactic sugar to model this!
396   // VLD1
397   // FIXME: We don't model this instruction properly
398   InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
399                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
400                                InstrStage<1, [A9_Pipe1], 0>,
401                                InstrStage<1, [A9_LSPipe]>,
402                                InstrStage<1, [A9_NPipe]>]>,
403   //
404   // VLD2
405   // FIXME: We don't model this instruction properly
406   InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_DRegsN],   0, Required>,
407                                // Extra latency cycles since wbck is 6 cycles
408                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
409                                InstrStage<1, [A9_Pipe1], 0>,
410                                InstrStage<1, [A9_LSPipe]>,
411                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
412   //
413   // VLD3
414   // FIXME: We don't model this instruction properly
415   InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_DRegsN],   0, Required>,
416                                // Extra latency cycles since wbck is 6 cycles
417                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
418                                InstrStage<1, [A9_Pipe1], 0>,
419                                InstrStage<1, [A9_LSPipe]>,
420                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
421   //
422   // VLD4
423   // FIXME: We don't model this instruction properly
424   InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
425                                // Extra latency cycles since wbck is 6 cycles
426                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
427                                InstrStage<1, [A9_Pipe1], 0>,
428                                InstrStage<1, [A9_LSPipe]>,
429                                InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
430   //
431   // VST
432   // FIXME: We don't model this instruction properly
433   InstrItinData<IIC_VST,      [InstrStage<1, [A9_DRegsN],   0, Required>,
434                                // Extra latency cycles since wbck is 6 cycles
435                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
436                                InstrStage<1, [A9_Pipe1], 0>,
437                                InstrStage<1, [A9_LSPipe]>,
438                                InstrStage<1, [A9_NPipe]>]>,
439   //
440   // Double-register Integer Unary
441   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
442                                // Extra latency cycles since wbck is 6 cycles
443                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
444                                InstrStage<1, [A9_Pipe1]>,
445                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
446   //
447   // Quad-register Integer Unary
448   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
449                                // Extra latency cycles since wbck is 6 cycles
450                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
451                                InstrStage<1, [A9_Pipe1]>,
452                                InstrStage<1, [A9_NPipe]>], [4, 2]>,
453   //
454   // Double-register Integer Q-Unary
455   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_DRegsN],   0, Required>,
456                                // Extra latency cycles since wbck is 6 cycles
457                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
458                                InstrStage<1, [A9_Pipe1]>,
459                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
460   //
461   // Quad-register Integer CountQ-Unary
462   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
463                                // Extra latency cycles since wbck is 6 cycles
464                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
465                                InstrStage<1, [A9_Pipe1]>,
466                                InstrStage<1, [A9_NPipe]>], [4, 1]>,
467   //
468   // Double-register Integer Binary
469   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
470                                // Extra latency cycles since wbck is 6 cycles
471                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
472                                InstrStage<1, [A9_Pipe1]>,
473                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
474   //
475   // Quad-register Integer Binary
476   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
477                                // Extra latency cycles since wbck is 6 cycles
478                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
479                                InstrStage<1, [A9_Pipe1]>,
480                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
481   //
482   // Double-register Integer Subtract
483   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
484                                // Extra latency cycles since wbck is 6 cycles
485                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
486                                InstrStage<1, [A9_Pipe1]>,
487                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
488   //
489   // Quad-register Integer Subtract
490   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
491                                // Extra latency cycles since wbck is 6 cycles
492                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
493                                InstrStage<1, [A9_Pipe1]>,
494                                InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
495   //
496   // Double-register Integer Shift
497   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
498                                // Extra latency cycles since wbck is 6 cycles
499                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
500                                InstrStage<1, [A9_Pipe1]>,
501                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
502   //
503   // Quad-register Integer Shift
504   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
505                                // Extra latency cycles since wbck is 6 cycles
506                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
507                                InstrStage<1, [A9_Pipe1]>,
508                                InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
509   //
510   // Double-register Integer Shift (4 cycle)
511   InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
512                                // Extra latency cycles since wbck is 6 cycles
513                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
514                                InstrStage<1, [A9_Pipe1]>,
515                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
516   //
517   // Quad-register Integer Shift (4 cycle)
518   InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
519                                // Extra latency cycles since wbck is 6 cycles
520                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
521                                InstrStage<1, [A9_Pipe1]>,
522                                InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
523   //
524   // Double-register Integer Binary (4 cycle)
525   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_DRegsN],   0, Required>,
526                                // Extra latency cycles since wbck is 6 cycles
527                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
528                                InstrStage<1, [A9_Pipe1]>,
529                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
530   //
531   // Quad-register Integer Binary (4 cycle)
532   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_DRegsN],   0, Required>,
533                                // Extra latency cycles since wbck is 6 cycles
534                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
535                                InstrStage<1, [A9_Pipe1]>,
536                                InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
537   //
538   // Double-register Integer Subtract (4 cycle)
539   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
540                                // Extra latency cycles since wbck is 6 cycles
541                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
542                                InstrStage<1, [A9_Pipe1]>,
543                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
544   //
545   // Quad-register Integer Subtract (4 cycle)
546   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
547                                // Extra latency cycles since wbck is 6 cycles
548                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
549                                InstrStage<1, [A9_Pipe1]>,
550                                InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
551
552   //
553   // Double-register Integer Count
554   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
555                                // Extra latency cycles since wbck is 6 cycles
556                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
557                                InstrStage<1, [A9_Pipe1]>,
558                                InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
559   //
560   // Quad-register Integer Count
561   // Result written in N3, but that is relative to the last cycle of multicycle,
562   // so we use 4 for those cases
563   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
564                                // Extra latency cycles since wbck is 7 cycles
565                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
566                                InstrStage<1, [A9_Pipe1]>,
567                                InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
568   //
569   // Double-register Absolute Difference and Accumulate
570   InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
571                                // Extra latency cycles since wbck is 6 cycles
572                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
573                                InstrStage<1, [A9_Pipe1]>,
574                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
575   //
576   // Quad-register Absolute Difference and Accumulate
577   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
578                                // Extra latency cycles since wbck is 6 cycles
579                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
580                                InstrStage<1, [A9_Pipe1]>,
581                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
582   //
583   // Double-register Integer Pair Add Long
584   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
585                                // Extra latency cycles since wbck is 6 cycles
586                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
587                                InstrStage<1, [A9_Pipe1]>,
588                                InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
589   //
590   // Quad-register Integer Pair Add Long
591   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
592                                // Extra latency cycles since wbck is 6 cycles
593                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
594                                InstrStage<1, [A9_Pipe1]>,
595                                InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
596
597   //
598   // Double-register Integer Multiply (.8, .16)
599   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
600                                // Extra latency cycles since wbck is 6 cycles
601                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
602                                InstrStage<1, [A9_Pipe1]>,
603                                InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
604   //
605   // Quad-register Integer Multiply (.8, .16)
606   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
607                                // Extra latency cycles since wbck is 7 cycles
608                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
609                                InstrStage<1, [A9_Pipe1]>,
610                                InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
611
612   //
613   // Double-register Integer Multiply (.32)
614   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
615                                // Extra latency cycles since wbck is 7 cycles
616                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
617                                InstrStage<1, [A9_Pipe1]>,
618                                InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
619   //
620   // Quad-register Integer Multiply (.32)
621   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
622                                // Extra latency cycles since wbck is 9 cycles
623                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
624                                InstrStage<1, [A9_Pipe1]>,
625                                InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
626   //
627   // Double-register Integer Multiply-Accumulate (.8, .16)
628   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN],   0, Required>,
629                                // Extra latency cycles since wbck is 6 cycles
630                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
631                                InstrStage<1, [A9_Pipe1]>,
632                                InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
633   //
634   // Double-register Integer Multiply-Accumulate (.32)
635   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN],   0, Required>,
636                                // Extra latency cycles since wbck is 7 cycles
637                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
638                                InstrStage<1, [A9_Pipe1]>,
639                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
640   //
641   // Quad-register Integer Multiply-Accumulate (.8, .16)
642   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
643                                // Extra latency cycles since wbck is 7 cycles
644                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
645                                InstrStage<1, [A9_Pipe1]>,
646                                InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
647   //
648   // Quad-register Integer Multiply-Accumulate (.32)
649   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN],   0, Required>,
650                                // Extra latency cycles since wbck is 9 cycles
651                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
652                                InstrStage<1, [A9_Pipe1]>,
653                                InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
654   //
655   // Move Immediate
656   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_DRegsN],   0, Required>,
657                                // Extra latency cycles since wbck is 6 cycles
658                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
659                                InstrStage<1, [A9_Pipe1]>,
660                                InstrStage<1, [A9_NPipe]>], [3]>,
661   //
662   // Double-register Permute Move
663   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
664   // FIXME: all latencies are arbitrary, no information is available
665                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
666                                InstrStage<1, [A9_Pipe1]>,
667                                InstrStage<1, [A9_LSPipe]>], [2, 1]>,
668   //
669   // Quad-register Permute Move
670   // Result written in N2, but that is relative to the last cycle of multicycle,
671   // so we use 3 for those cases
672   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
673   // FIXME: all latencies are arbitrary, no information is available
674                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
675                                InstrStage<1, [A9_Pipe1]>,
676                                InstrStage<2, [A9_NPipe]>], [3, 1]>,
677   //
678   // Integer to Single-precision Move
679   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
680   // FIXME: all latencies are arbitrary, no information is available
681                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
682                                InstrStage<1, [A9_Pipe1]>,
683                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
684   //
685   // Integer to Double-precision Move
686   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
687   // FIXME: all latencies are arbitrary, no information is available
688                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
689                                InstrStage<1, [A9_Pipe1]>,
690                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
691   //
692   // Single-precision to Integer Move
693   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
694   // FIXME: all latencies are arbitrary, no information is available
695                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
696                                InstrStage<1, [A9_Pipe1]>,
697                                InstrStage<1, [A9_NPipe]>], [2, 1]>,
698   //
699   // Double-precision to Integer Move
700   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_DRegsN],   0, Required>,
701   // FIXME: all latencies are arbitrary, no information is available
702                                InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
703                                InstrStage<1, [A9_Pipe1]>,
704                                InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
705   //
706   // Integer to Lane Move
707   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN],   0, Required>,
708   // FIXME: all latencies are arbitrary, no information is available
709                                InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
710                                InstrStage<1, [A9_Pipe1]>,
711                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
712
713   //
714   // Double-register FP Unary
715   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
716                                // Extra latency cycles since wbck is 6 cycles
717                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
718                                InstrStage<1, [A9_Pipe1]>,
719                                InstrStage<1, [A9_NPipe]>], [5, 2]>,
720   //
721   // Quad-register FP Unary
722   // Result written in N5, but that is relative to the last cycle of multicycle,
723   // so we use 6 for those cases
724   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
725                                // Extra latency cycles since wbck is 7 cycles
726                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
727                                InstrStage<1, [A9_Pipe1]>,
728                                InstrStage<2, [A9_NPipe]>], [6, 2]>,
729   //
730   // Double-register FP Binary
731   // FIXME: We're using this itin for many instructions and [2, 2] here is too
732   // optimistic.
733   InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_DRegsN],   0, Required>,
734                                // Extra latency cycles since wbck is 7 cycles
735                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
736                                InstrStage<1, [A9_Pipe1]>,
737                                InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
738   //
739   // Quad-register FP Binary
740   // Result written in N5, but that is relative to the last cycle of multicycle,
741   // so we use 6 for those cases
742   // FIXME: We're using this itin for many instructions and [2, 2] here is too
743   // optimistic.
744   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
745                                // Extra latency cycles since wbck is 8 cycles
746                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
747                                InstrStage<1, [A9_Pipe1]>,
748                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
749   //
750   // Double-register FP Multiple-Accumulate
751   InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
752                                // Extra latency cycles since wbck is 7 cycles
753                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
754                                InstrStage<1, [A9_Pipe1]>,
755                                InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
756   //
757   // Quad-register FP Multiple-Accumulate
758   // Result written in N9, but that is relative to the last cycle of multicycle,
759   // so we use 10 for those cases
760   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
761                                // Extra latency cycles since wbck is 9 cycles
762                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
763                                InstrStage<1, [A9_Pipe1]>,
764                                InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
765   //
766   // Double-register Reciprical Step
767   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
768                                // Extra latency cycles since wbck is 7 cycles
769                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
770                                InstrStage<1, [A9_Pipe1]>,
771                                InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
772   //
773   // Quad-register Reciprical Step
774   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
775                                // Extra latency cycles since wbck is 9 cycles
776                                InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
777                                InstrStage<1, [A9_Pipe1]>,
778                                InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
779   //
780   // Double-register Permute
781   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_DRegsN],   0, Required>,
782                                // Extra latency cycles since wbck is 6 cycles
783                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
784                                InstrStage<1, [A9_Pipe1]>,
785                                InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
786   //
787   // Quad-register Permute
788   // Result written in N2, but that is relative to the last cycle of multicycle,
789   // so we use 3 for those cases
790   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_DRegsN],   0, Required>,
791                                // Extra latency cycles since wbck is 7 cycles
792                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
793                                InstrStage<1, [A9_Pipe1]>,
794                                InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
795   //
796   // Quad-register Permute (3 cycle issue)
797   // Result written in N2, but that is relative to the last cycle of multicycle,
798   // so we use 4 for those cases
799   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_DRegsN],   0, Required>,
800                                // Extra latency cycles since wbck is 8 cycles
801                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
802                                InstrStage<1, [A9_Pipe1]>,
803                                InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
804
805   //
806   // Double-register VEXT
807   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_DRegsN],   0, Required>,
808                                // Extra latency cycles since wbck is 7 cycles
809                                InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
810                                InstrStage<1, [A9_Pipe1]>,
811                                InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
812   //
813   // Quad-register VEXT
814   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_DRegsN],   0, Required>,
815                                // Extra latency cycles since wbck is 9 cycles
816                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
817                                InstrStage<1, [A9_Pipe1]>,
818                                InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
819   //
820   // VTB
821   InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_DRegsN],   0, Required>,
822                                // Extra latency cycles since wbck is 7 cycles
823                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
824                                InstrStage<1, [A9_Pipe1]>,
825                                InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
826   InstrItinData<IIC_VTB2,     [InstrStage<2, [A9_DRegsN],   0, Required>,
827                                // Extra latency cycles since wbck is 7 cycles
828                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
829                                InstrStage<1, [A9_Pipe1]>,
830                                InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
831   InstrItinData<IIC_VTB3,     [InstrStage<2, [A9_DRegsN],   0, Required>,
832                                // Extra latency cycles since wbck is 8 cycles
833                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
834                                InstrStage<1, [A9_Pipe1]>,
835                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
836   InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_DRegsN],   0, Required>,
837                                // Extra latency cycles since wbck is 8 cycles
838                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
839                                InstrStage<1, [A9_Pipe1]>,
840                                InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
841   //
842   // VTBX
843   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_DRegsN],   0, Required>,
844                                // Extra latency cycles since wbck is 7 cycles
845                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
846                                InstrStage<1, [A9_Pipe1]>,
847                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
848   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_DRegsN],   0, Required>,
849                                // Extra latency cycles since wbck is 7 cycles
850                                InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
851                                InstrStage<1, [A9_Pipe1]>,
852                                InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
853   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_DRegsN],   0, Required>,
854                                // Extra latency cycles since wbck is 8 cycles
855                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
856                                InstrStage<1, [A9_Pipe1]>,
857                                InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
858   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_DRegsN],   0, Required>,
859                                // Extra latency cycles since wbck is 8 cycles
860                                InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
861                                InstrStage<1, [A9_Pipe1]>,
862                               InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
863 ]>;