Model Cortex-a9 load to SUB, RSB, ADD, ADC, SBC, RSC, CMN, MVN, or CMP
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA8.td
1 //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A8 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Scheduling information derived from "Cortex-A8 Technical Reference Manual".
16 // Functional Units.
17 def A8_Issue   : FuncUnit; // issue
18 def A8_Pipe0   : FuncUnit; // pipeline 0
19 def A8_Pipe1   : FuncUnit; // pipeline 1
20 def A8_LdSt0   : FuncUnit; // pipeline 0 load/store
21 def A8_LdSt1   : FuncUnit; // pipeline 1 load/store
22 def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23 def A8_NLSPipe : FuncUnit; // NEON LS pipe
24 //
25 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
26 //
27 def CortexA8Itineraries : ProcessorItineraries<
28   [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe],
29   [], [
30   // Two fully-pipelined integer ALU pipelines
31   //
32   // No operand cycles
33   InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
34   //
35   // Binary Instructions that produce a result
36   InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
37   InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
38   InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
39   InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
40   InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
41   //
42   // Bitwise Instructions that produce a result
43   InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
44   InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
45   InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
46   InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
47   //
48   // Unary Instructions that produce a result
49   InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
50   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
51   //
52   // Zero and sign extension instructions
53   InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
54   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
55   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
56   //
57   // Compare instructions
58   InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
59   InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
60   InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
61   InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
62   //
63   // Test instructions
64   InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
65   InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
66   InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
67   InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
68   //
69   // Move instructions, unconditional
70   InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
71   InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
72   InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
73   InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
74   InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
75                              InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
76   //
77   // Move instructions, conditional
78   InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
79   InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
80   InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
81   InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
82   //
83   // MVN instructions
84   InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
85   InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
86   InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
87   InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
88
89   // Integer multiply pipeline
90   // Result written in E5, but that is relative to the last cycle of multicycle,
91   // so we use 6 for those cases
92   //
93   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
94   InstrItinData<IIC_iMAC16   , [InstrStage<1, [A8_Pipe1], 0>,
95                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
96   InstrItinData<IIC_iMUL32   , [InstrStage<1, [A8_Pipe1], 0>,
97                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
98   InstrItinData<IIC_iMAC32   , [InstrStage<1, [A8_Pipe1], 0>,
99                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
100   InstrItinData<IIC_iMUL64   , [InstrStage<2, [A8_Pipe1], 0>,
101                                 InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
102   InstrItinData<IIC_iMAC64   , [InstrStage<2, [A8_Pipe1], 0>,
103                                 InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
104
105   // Integer load pipeline
106   //
107   // loads have an extra cycle of latency, but are fully pipelined
108   // use A8_Issue to enforce the 1 load/store per cycle limit
109   //
110   // Immediate offset
111   InstrItinData<IIC_iLoadi   , [InstrStage<1, [A8_Issue], 0>,
112                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
113                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
114   //
115   // Register offset
116   InstrItinData<IIC_iLoadr   , [InstrStage<1, [A8_Issue], 0>,
117                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
118                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
119   //
120   // Scaled register offset, issues over 2 cycles
121   InstrItinData<IIC_iLoadsi  , [InstrStage<2, [A8_Issue], 0>,
122                                 InstrStage<1, [A8_Pipe0], 0>,
123                                 InstrStage<1, [A8_Pipe1]>,
124                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
125                                 InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
126   //
127   // Immediate offset with update
128   InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A8_Issue], 0>,
129                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
130                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
131   //
132   // Register offset with update
133   InstrItinData<IIC_iLoadru  , [InstrStage<1, [A8_Issue], 0>,
134                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
135                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
136   //
137   // Scaled register offset with update, issues over 2 cycles
138   InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
139                                 InstrStage<1, [A8_Pipe0], 0>,
140                                 InstrStage<1, [A8_Pipe1]>,
141                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
142                                 InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
143   //
144   // Load multiple
145   InstrItinData<IIC_iLoadm   , [InstrStage<2, [A8_Issue], 0>,
146                                 InstrStage<2, [A8_Pipe0], 0>,
147                                 InstrStage<2, [A8_Pipe1]>,
148                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
149                                 InstrStage<1, [A8_LdSt0]>]>,
150
151   //
152   // Load multiple plus branch
153   InstrItinData<IIC_iLoadmBr , [InstrStage<2, [A8_Issue], 0>,
154                                 InstrStage<2, [A8_Pipe0], 0>,
155                                 InstrStage<2, [A8_Pipe1]>,
156                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
157                                 InstrStage<1, [A8_LdSt0]>,
158                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
159
160   //
161   // iLoadi + iALUr for t2LDRpci_pic.
162   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Issue], 0>,
163                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
164                                 InstrStage<1, [A8_LdSt0]>,
165                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
166
167
168   // Integer store pipeline
169   //
170   // use A8_Issue to enforce the 1 load/store per cycle limit
171   //
172   // Immediate offset
173   InstrItinData<IIC_iStorei  , [InstrStage<1, [A8_Issue], 0>,
174                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
175                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
176   //
177   // Register offset
178   InstrItinData<IIC_iStorer  , [InstrStage<1, [A8_Issue], 0>,
179                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
180                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
181   //
182   // Scaled register offset, issues over 2 cycles
183   InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
184                                 InstrStage<1, [A8_Pipe0], 0>,
185                                 InstrStage<1, [A8_Pipe1]>,
186                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
187                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
188   //
189   // Immediate offset with update
190   InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
191                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
192                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
193   //
194   // Register offset with update
195   InstrItinData<IIC_iStoreru  , [InstrStage<1, [A8_Issue], 0>,
196                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
197                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
198   //
199   // Scaled register offset with update, issues over 2 cycles
200   InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
201                                 InstrStage<1, [A8_Pipe0], 0>,
202                                 InstrStage<1, [A8_Pipe1]>,
203                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
204                                 InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
205   //
206   // Store multiple
207   InstrItinData<IIC_iStorem  , [InstrStage<2, [A8_Issue], 0>,
208                                 InstrStage<2, [A8_Pipe0], 0>,
209                                 InstrStage<2, [A8_Pipe1]>,
210                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
211                                 InstrStage<1, [A8_LdSt0]>]>,
212
213   // Branch
214   //
215   // no delay slots, so the latency of a branch is unimportant
216   InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
217
218   // VFP
219   // Issue through integer pipeline, and execute in NEON unit. We assume
220   // RunFast mode so that NFP pipeline is used for single-precision when
221   // possible.
222   //
223   // FP Special Register to Integer Register File Move
224   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
225                               InstrStage<1, [A8_NLSPipe]>]>,
226   //
227   // Single-precision FP Unary
228   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
229                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
230   //
231   // Double-precision FP Unary
232   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
233                                InstrStage<4, [A8_NPipe], 0>,
234                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
235   //
236   // Single-precision FP Compare
237   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
238                                InstrStage<1, [A8_NPipe]>], [1, 1]>,
239   //
240   // Double-precision FP Compare
241   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
242                                InstrStage<4, [A8_NPipe], 0>,
243                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
244   //
245   // Single to Double FP Convert
246   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
247                                InstrStage<7, [A8_NPipe], 0>,
248                                InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
249   //
250   // Double to Single FP Convert
251   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
252                                InstrStage<5, [A8_NPipe], 0>,
253                                InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
254   //
255   // Single-Precision FP to Integer Convert
256   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
257                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
258   //
259   // Double-Precision FP to Integer Convert
260   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
261                                InstrStage<8, [A8_NPipe], 0>,
262                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
263   //
264   // Integer to Single-Precision FP Convert
265   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
266                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
267   //
268   // Integer to Double-Precision FP Convert
269   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
270                                InstrStage<8, [A8_NPipe], 0>,
271                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
272   //
273   // Single-precision FP ALU
274   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
275                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
276   //
277   // Double-precision FP ALU
278   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
279                                InstrStage<9, [A8_NPipe], 0>,
280                                InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
281   //
282   // Single-precision FP Multiply
283   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
284                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
285   //
286   // Double-precision FP Multiply
287   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
288                                InstrStage<11, [A8_NPipe], 0>,
289                                InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
290   //
291   // Single-precision FP MAC
292   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
293                                InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
294   //
295   // Double-precision FP MAC
296   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
297                                InstrStage<19, [A8_NPipe], 0>,
298                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
299   //
300   // Single-precision FP DIV
301   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
302                                InstrStage<20, [A8_NPipe], 0>,
303                                InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
304   //
305   // Double-precision FP DIV
306   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
307                                InstrStage<29, [A8_NPipe], 0>,
308                                InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
309   //
310   // Single-precision FP SQRT
311   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
312                                InstrStage<19, [A8_NPipe], 0>,
313                                InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
314   //
315   // Double-precision FP SQRT
316   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
317                                InstrStage<29, [A8_NPipe], 0>,
318                                InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
319   //
320   // Single-precision FP Load
321   // use A8_Issue to enforce the 1 load/store per cycle limit
322   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
323                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
324                                InstrStage<1, [A8_LdSt0], 0>,
325                                InstrStage<1, [A8_NLSPipe]>]>,
326   //
327   // Double-precision FP Load
328   // use A8_Issue to enforce the 1 load/store per cycle limit
329   InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
330                                InstrStage<1, [A8_Pipe0], 0>,
331                                InstrStage<1, [A8_Pipe1]>,
332                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
333                                InstrStage<1, [A8_LdSt0], 0>,
334                                InstrStage<1, [A8_NLSPipe]>]>,
335   //
336   // FP Load Multiple
337   // use A8_Issue to enforce the 1 load/store per cycle limit
338   InstrItinData<IIC_fpLoadm,  [InstrStage<3, [A8_Issue], 0>,
339                                InstrStage<2, [A8_Pipe0], 0>,
340                                InstrStage<2, [A8_Pipe1]>,
341                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
342                                InstrStage<1, [A8_LdSt0], 0>,
343                                InstrStage<1, [A8_NLSPipe]>]>,
344   //
345   // Single-precision FP Store
346   // use A8_Issue to enforce the 1 load/store per cycle limit
347   InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
348                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
349                                InstrStage<1, [A8_LdSt0], 0>,
350                                InstrStage<1, [A8_NLSPipe]>]>,
351   //
352   // Double-precision FP Store
353   // use A8_Issue to enforce the 1 load/store per cycle limit
354   InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
355                                InstrStage<1, [A8_Pipe0], 0>,
356                                InstrStage<1, [A8_Pipe1]>,
357                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
358                                InstrStage<1, [A8_LdSt0], 0>,
359                                InstrStage<1, [A8_NLSPipe]>]>,
360   //
361   // FP Store Multiple
362   // use A8_Issue to enforce the 1 load/store per cycle limit
363   InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
364                                InstrStage<2, [A8_Pipe0], 0>,
365                                InstrStage<2, [A8_Pipe1]>,
366                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
367                                InstrStage<1, [A8_LdSt0], 0>,
368                                InstrStage<1, [A8_NLSPipe]>]>,
369
370   // NEON
371   // Issue through integer pipeline, and execute in NEON unit.
372   //
373   // VLD1
374   // FIXME: We don't model this instruction properly
375   InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Issue], 0>,
376                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
377                                InstrStage<1, [A8_LdSt0], 0>,
378                                InstrStage<1, [A8_NLSPipe]>]>,
379   //
380   // VLD2
381   // FIXME: We don't model this instruction properly
382   InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Issue], 0>,
383                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
384                                InstrStage<1, [A8_LdSt0], 0>,
385                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
386   //
387   // VLD3
388   // FIXME: We don't model this instruction properly
389   InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Issue], 0>,
390                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
391                                InstrStage<1, [A8_LdSt0], 0>,
392                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
393   //
394   // VLD4
395   // FIXME: We don't model this instruction properly
396   InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Issue], 0>,
397                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
398                                InstrStage<1, [A8_LdSt0], 0>,
399                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
400   //
401   // VST
402   // FIXME: We don't model this instruction properly
403   InstrItinData<IIC_VST,      [InstrStage<1, [A8_Issue], 0>,
404                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
405                                InstrStage<1, [A8_LdSt0], 0>,
406                                InstrStage<1, [A8_NLSPipe]>]>,
407   //
408   // Double-register FP Unary
409   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
410                                InstrStage<1, [A8_NPipe]>], [5, 2]>,
411   //
412   // Quad-register FP Unary
413   // Result written in N5, but that is relative to the last cycle of multicycle,
414   // so we use 6 for those cases
415   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
416                                InstrStage<2, [A8_NPipe]>], [6, 2]>,
417   //
418   // Double-register FP Binary
419   InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
420                                InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
421   //
422   // Quad-register FP Binary
423   // Result written in N5, but that is relative to the last cycle of multicycle,
424   // so we use 6 for those cases
425   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
426                                InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
427   //
428   // Move Immediate
429   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
430                                InstrStage<1, [A8_NPipe]>], [3]>,
431   //
432   // Double-register Permute Move
433   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
434                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
435   //
436   // Quad-register Permute Move
437   // Result written in N2, but that is relative to the last cycle of multicycle,
438   // so we use 3 for those cases
439   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
440                                InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
441   //
442   // Integer to Single-precision Move
443   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
444                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
445   //
446   // Integer to Double-precision Move
447   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
448                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
449   //
450   // Single-precision to Integer Move
451   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
452                                InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
453   //
454   // Double-precision to Integer Move
455   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
456                                InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
457   //
458   // Integer to Lane Move
459   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
460                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
461   //
462   // Double-register Permute
463   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
464                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
465   //
466   // Quad-register Permute
467   // Result written in N2, but that is relative to the last cycle of multicycle,
468   // so we use 3 for those cases
469   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
470                                InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
471   //
472   // Quad-register Permute (3 cycle issue)
473   // Result written in N2, but that is relative to the last cycle of multicycle,
474   // so we use 4 for those cases
475   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
476                                InstrStage<1, [A8_NLSPipe]>,
477                                InstrStage<1, [A8_NPipe], 0>,
478                                InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
479   //
480   // Double-register FP Multiple-Accumulate
481   InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
482                                InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
483   //
484   // Quad-register FP Multiple-Accumulate
485   // Result written in N9, but that is relative to the last cycle of multicycle,
486   // so we use 10 for those cases
487   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
488                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
489   //
490   // Double-register Reciprical Step
491   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
492                                InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
493   //
494   // Quad-register Reciprical Step
495   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
496                                InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
497   //
498   // Double-register Integer Count
499   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
500                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
501   //
502   // Quad-register Integer Count
503   // Result written in N3, but that is relative to the last cycle of multicycle,
504   // so we use 4 for those cases
505   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
506                                InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
507   //
508   // Double-register Integer Unary
509   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
510                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
511   //
512   // Quad-register Integer Unary
513   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
514                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
515   //
516   // Double-register Integer Q-Unary
517   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
518                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
519   //
520   // Quad-register Integer CountQ-Unary
521   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
522                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
523   //
524   // Double-register Integer Binary
525   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
526                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
527   //
528   // Quad-register Integer Binary
529   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
530                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
531   //
532   // Double-register Integer Binary (4 cycle)
533   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
534                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
535   //
536   // Quad-register Integer Binary (4 cycle)
537   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
538                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
539
540   //
541   // Double-register Integer Subtract
542   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
543                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
544   //
545   // Quad-register Integer Subtract
546   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
547                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
548   //
549   // Double-register Integer Subtract
550   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
551                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
552   //
553   // Quad-register Integer Subtract
554   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
555                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
556   //
557   // Double-register Integer Shift
558   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
559                                InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
560   //
561   // Quad-register Integer Shift
562   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
563                                InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
564   //
565   // Double-register Integer Shift (4 cycle)
566   InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
567                                InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
568   //
569   // Quad-register Integer Shift (4 cycle)
570   InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
571                                InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
572   //
573   // Double-register Integer Pair Add Long
574   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
575                                InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
576   //
577   // Quad-register Integer Pair Add Long
578   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
579                                InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
580   //
581   // Double-register Absolute Difference and Accumulate
582   InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
583                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
584   //
585   // Quad-register Absolute Difference and Accumulate
586   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
587                                InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
588
589   //
590   // Double-register Integer Multiply (.8, .16)
591   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
592                                InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
593   //
594   // Double-register Integer Multiply (.32)
595   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
596                                InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
597   //
598   // Quad-register Integer Multiply (.8, .16)
599   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
600                                InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
601   //
602   // Quad-register Integer Multiply (.32)
603   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
604                                InstrStage<1, [A8_NPipe]>,
605                                InstrStage<2, [A8_NLSPipe], 0>,
606                                InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
607   //
608   // Double-register Integer Multiply-Accumulate (.8, .16)
609   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
610                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
611   //
612   // Double-register Integer Multiply-Accumulate (.32)
613   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
614                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
615   //
616   // Quad-register Integer Multiply-Accumulate (.8, .16)
617   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
618                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
619   //
620   // Quad-register Integer Multiply-Accumulate (.32)
621   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
622                                InstrStage<1, [A8_NPipe]>,
623                                InstrStage<2, [A8_NLSPipe], 0>,
624                                InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
625   //
626   // Double-register VEXT
627   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
628                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
629   //
630   // Quad-register VEXT
631   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
632                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
633   //
634   // VTB
635   InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
636                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
637   InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
638                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
639   InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
640                                InstrStage<1, [A8_NLSPipe]>,
641                                InstrStage<1, [A8_NPipe], 0>,
642                                InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
643   InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
644                                InstrStage<1, [A8_NLSPipe]>,
645                                InstrStage<1, [A8_NPipe], 0>,
646                                InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
647   //
648   // VTBX
649   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
650                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
651   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
652                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
653   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
654                                InstrStage<1, [A8_NLSPipe]>,
655                                InstrStage<1, [A8_NPipe], 0>,
656                                InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
657   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
658                                InstrStage<1, [A8_NLSPipe]>,
659                                InstrStage<1, [A8_NPipe], 0>,
660                             InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
661 ]>;