Fix LDM_RET schedule itinery.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA8.td
1 //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A8 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Scheduling information derived from "Cortex-A8 Technical Reference Manual".
16 // Functional Units.
17 def A8_Issue   : FuncUnit; // issue
18 def A8_Pipe0   : FuncUnit; // pipeline 0
19 def A8_Pipe1   : FuncUnit; // pipeline 1
20 def A8_LdSt0   : FuncUnit; // pipeline 0 load/store
21 def A8_LdSt1   : FuncUnit; // pipeline 1 load/store
22 def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23 def A8_NLSPipe : FuncUnit; // NEON LS pipe
24 //
25 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
26 //
27 def CortexA8Itineraries : ProcessorItineraries<
28   [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [
29   // Two fully-pipelined integer ALU pipelines
30   //
31   // No operand cycles
32   InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
33   //
34   // Binary Instructions that produce a result
35   InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
36   InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
37   InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
38   InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
39   //
40   // Unary Instructions that produce a result
41   InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
42   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
43   InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
44   //
45   // Compare instructions
46   InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
47   InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
48   InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
49   InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
50   //
51   // Move instructions, unconditional
52   InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
53   InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
54   InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
55   InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
56   //
57   // Move instructions, conditional
58   InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
59   InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
60   InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
61   InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
62
63   // Integer multiply pipeline
64   // Result written in E5, but that is relative to the last cycle of multicycle,
65   // so we use 6 for those cases
66   //
67   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
68   InstrItinData<IIC_iMAC16   , [InstrStage<1, [A8_Pipe1], 0>,
69                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
70   InstrItinData<IIC_iMUL32   , [InstrStage<1, [A8_Pipe1], 0>,
71                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
72   InstrItinData<IIC_iMAC32   , [InstrStage<1, [A8_Pipe1], 0>,
73                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
74   InstrItinData<IIC_iMUL64   , [InstrStage<2, [A8_Pipe1], 0>,
75                                 InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
76   InstrItinData<IIC_iMAC64   , [InstrStage<2, [A8_Pipe1], 0>,
77                                 InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
78
79   // Integer load pipeline
80   //
81   // loads have an extra cycle of latency, but are fully pipelined
82   // use A8_Issue to enforce the 1 load/store per cycle limit
83   //
84   // Immediate offset
85   InstrItinData<IIC_iLoadi   , [InstrStage<1, [A8_Issue], 0>,
86                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
87                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
88   //
89   // Register offset
90   InstrItinData<IIC_iLoadr   , [InstrStage<1, [A8_Issue], 0>,
91                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
92                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
93   //
94   // Scaled register offset, issues over 2 cycles
95   InstrItinData<IIC_iLoadsi  , [InstrStage<2, [A8_Issue], 0>,
96                                 InstrStage<1, [A8_Pipe0], 0>,
97                                 InstrStage<1, [A8_Pipe1]>,
98                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
99                                 InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
100   //
101   // Immediate offset with update
102   InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A8_Issue], 0>,
103                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
104                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
105   //
106   // Register offset with update
107   InstrItinData<IIC_iLoadru  , [InstrStage<1, [A8_Issue], 0>,
108                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
109                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
110   //
111   // Scaled register offset with update, issues over 2 cycles
112   InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
113                                 InstrStage<1, [A8_Pipe0], 0>,
114                                 InstrStage<1, [A8_Pipe1]>,
115                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
116                                 InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
117   //
118   // Load multiple
119   InstrItinData<IIC_iLoadm   , [InstrStage<2, [A8_Issue], 0>,
120                                 InstrStage<2, [A8_Pipe0], 0>,
121                                 InstrStage<2, [A8_Pipe1]>,
122                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
123                                 InstrStage<1, [A8_LdSt0]>]>,
124
125   //
126   // Load multiple plus branch
127   InstrItinData<IIC_iLoadmBr , [InstrStage<2, [A8_Issue], 0>,
128                                 InstrStage<2, [A8_Pipe0], 0>,
129                                 InstrStage<2, [A8_Pipe1]>,
130                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
131                                 InstrStage<1, [A8_LdSt0]>,
132                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
133
134   // Integer store pipeline
135   //
136   // use A8_Issue to enforce the 1 load/store per cycle limit
137   //
138   // Immediate offset
139   InstrItinData<IIC_iStorei  , [InstrStage<1, [A8_Issue], 0>,
140                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
141                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
142   //
143   // Register offset
144   InstrItinData<IIC_iStorer  , [InstrStage<1, [A8_Issue], 0>,
145                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
146                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
147   //
148   // Scaled register offset, issues over 2 cycles
149   InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
150                                 InstrStage<1, [A8_Pipe0], 0>,
151                                 InstrStage<1, [A8_Pipe1]>,
152                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
153                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
154   //
155   // Immediate offset with update
156   InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
157                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
158                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
159   //
160   // Register offset with update
161   InstrItinData<IIC_iStoreru  , [InstrStage<1, [A8_Issue], 0>,
162                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
163                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
164   //
165   // Scaled register offset with update, issues over 2 cycles
166   InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
167                                 InstrStage<1, [A8_Pipe0], 0>,
168                                 InstrStage<1, [A8_Pipe1]>,
169                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
170                                 InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
171   //
172   // Store multiple
173   InstrItinData<IIC_iStorem  , [InstrStage<2, [A8_Issue], 0>,
174                                 InstrStage<2, [A8_Pipe0], 0>,
175                                 InstrStage<2, [A8_Pipe1]>,
176                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
177                                 InstrStage<1, [A8_LdSt0]>]>,
178
179   // Branch
180   //
181   // no delay slots, so the latency of a branch is unimportant
182   InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
183
184   // VFP
185   // Issue through integer pipeline, and execute in NEON unit. We assume
186   // RunFast mode so that NFP pipeline is used for single-precision when
187   // possible.
188   //
189   // FP Special Register to Integer Register File Move
190   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
191                               InstrStage<1, [A8_NLSPipe]>]>,
192   //
193   // Single-precision FP Unary
194   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
195                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
196   //
197   // Double-precision FP Unary
198   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
199                                InstrStage<4, [A8_NPipe], 0>,
200                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
201   //
202   // Single-precision FP Compare
203   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
204                                InstrStage<1, [A8_NPipe]>], [1, 1]>,
205   //
206   // Double-precision FP Compare
207   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
208                                InstrStage<4, [A8_NPipe], 0>,
209                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
210   //
211   // Single to Double FP Convert
212   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
213                                InstrStage<7, [A8_NPipe], 0>,
214                                InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
215   //
216   // Double to Single FP Convert
217   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
218                                InstrStage<5, [A8_NPipe], 0>,
219                                InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
220   //
221   // Single-Precision FP to Integer Convert
222   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
223                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
224   //
225   // Double-Precision FP to Integer Convert
226   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
227                                InstrStage<8, [A8_NPipe], 0>,
228                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
229   //
230   // Integer to Single-Precision FP Convert
231   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
232                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
233   //
234   // Integer to Double-Precision FP Convert
235   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
236                                InstrStage<8, [A8_NPipe], 0>,
237                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
238   //
239   // Single-precision FP ALU
240   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
241                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
242   //
243   // Double-precision FP ALU
244   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
245                                InstrStage<9, [A8_NPipe], 0>,
246                                InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
247   //
248   // Single-precision FP Multiply
249   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
250                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
251   //
252   // Double-precision FP Multiply
253   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
254                                InstrStage<11, [A8_NPipe], 0>,
255                                InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
256   //
257   // Single-precision FP MAC
258   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
259                                InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
260   //
261   // Double-precision FP MAC
262   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
263                                InstrStage<19, [A8_NPipe], 0>,
264                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
265   //
266   // Single-precision FP DIV
267   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
268                                InstrStage<20, [A8_NPipe], 0>,
269                                InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
270   //
271   // Double-precision FP DIV
272   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
273                                InstrStage<29, [A8_NPipe], 0>,
274                                InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
275   //
276   // Single-precision FP SQRT
277   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
278                                InstrStage<19, [A8_NPipe], 0>,
279                                InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
280   //
281   // Double-precision FP SQRT
282   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
283                                InstrStage<29, [A8_NPipe], 0>,
284                                InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
285   //
286   // Single-precision FP Load
287   // use A8_Issue to enforce the 1 load/store per cycle limit
288   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
289                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
290                                InstrStage<1, [A8_LdSt0], 0>,
291                                InstrStage<1, [A8_NLSPipe]>]>,
292   //
293   // Double-precision FP Load
294   // use A8_Issue to enforce the 1 load/store per cycle limit
295   InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
296                                InstrStage<1, [A8_Pipe0], 0>,
297                                InstrStage<1, [A8_Pipe1]>,
298                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
299                                InstrStage<1, [A8_LdSt0], 0>,
300                                InstrStage<1, [A8_NLSPipe]>]>,
301   //
302   // FP Load Multiple
303   // use A8_Issue to enforce the 1 load/store per cycle limit
304   InstrItinData<IIC_fpLoadm,  [InstrStage<3, [A8_Issue], 0>,
305                                InstrStage<2, [A8_Pipe0], 0>,
306                                InstrStage<2, [A8_Pipe1]>,
307                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
308                                InstrStage<1, [A8_LdSt0], 0>,
309                                InstrStage<1, [A8_NLSPipe]>]>,
310   //
311   // Single-precision FP Store
312   // use A8_Issue to enforce the 1 load/store per cycle limit
313   InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
314                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
315                                InstrStage<1, [A8_LdSt0], 0>,
316                                InstrStage<1, [A8_NLSPipe]>]>,
317   //
318   // Double-precision FP Store
319   // use A8_Issue to enforce the 1 load/store per cycle limit
320   InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
321                                InstrStage<1, [A8_Pipe0], 0>,
322                                InstrStage<1, [A8_Pipe1]>,
323                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
324                                InstrStage<1, [A8_LdSt0], 0>,
325                                InstrStage<1, [A8_NLSPipe]>]>,
326   //
327   // FP Store Multiple
328   // use A8_Issue to enforce the 1 load/store per cycle limit
329   InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
330                                InstrStage<2, [A8_Pipe0], 0>,
331                                InstrStage<2, [A8_Pipe1]>,
332                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
333                                InstrStage<1, [A8_LdSt0], 0>,
334                                InstrStage<1, [A8_NLSPipe]>]>,
335
336   // NEON
337   // Issue through integer pipeline, and execute in NEON unit.
338   //
339   // VLD1
340   // FIXME: We don't model this instruction properly
341   InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Issue], 0>,
342                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
343                                InstrStage<1, [A8_LdSt0], 0>,
344                                InstrStage<1, [A8_NLSPipe]>]>,
345   //
346   // VLD2
347   // FIXME: We don't model this instruction properly
348   InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Issue], 0>,
349                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
350                                InstrStage<1, [A8_LdSt0], 0>,
351                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
352   //
353   // VLD3
354   // FIXME: We don't model this instruction properly
355   InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Issue], 0>,
356                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
357                                InstrStage<1, [A8_LdSt0], 0>,
358                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
359   //
360   // VLD4
361   // FIXME: We don't model this instruction properly
362   InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Issue], 0>,
363                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
364                                InstrStage<1, [A8_LdSt0], 0>,
365                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
366   //
367   // VST
368   // FIXME: We don't model this instruction properly
369   InstrItinData<IIC_VST,      [InstrStage<1, [A8_Issue], 0>,
370                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
371                                InstrStage<1, [A8_LdSt0], 0>,
372                                InstrStage<1, [A8_NLSPipe]>]>,
373   //
374   // Double-register FP Unary
375   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
376                                InstrStage<1, [A8_NPipe]>], [5, 2]>,
377   //
378   // Quad-register FP Unary
379   // Result written in N5, but that is relative to the last cycle of multicycle,
380   // so we use 6 for those cases
381   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
382                                InstrStage<2, [A8_NPipe]>], [6, 2]>,
383   //
384   // Double-register FP Binary
385   InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
386                                InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
387   //
388   // Quad-register FP Binary
389   // Result written in N5, but that is relative to the last cycle of multicycle,
390   // so we use 6 for those cases
391   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
392                                InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
393   //
394   // Move Immediate
395   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
396                                InstrStage<1, [A8_NPipe]>], [3]>,
397   //
398   // Double-register Permute Move
399   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
400                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
401   //
402   // Quad-register Permute Move
403   // Result written in N2, but that is relative to the last cycle of multicycle,
404   // so we use 3 for those cases
405   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
406                                InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
407   //
408   // Integer to Single-precision Move
409   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
410                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
411   //
412   // Integer to Double-precision Move
413   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
414                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
415   //
416   // Single-precision to Integer Move
417   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
418                                InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
419   //
420   // Double-precision to Integer Move
421   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
422                                InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
423   //
424   // Integer to Lane Move
425   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
426                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
427   //
428   // Double-register Permute
429   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
430                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
431   //
432   // Quad-register Permute
433   // Result written in N2, but that is relative to the last cycle of multicycle,
434   // so we use 3 for those cases
435   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
436                                InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
437   //
438   // Quad-register Permute (3 cycle issue)
439   // Result written in N2, but that is relative to the last cycle of multicycle,
440   // so we use 4 for those cases
441   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
442                                InstrStage<1, [A8_NLSPipe]>,
443                                InstrStage<1, [A8_NPipe], 0>,
444                                InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
445   //
446   // Double-register FP Multiple-Accumulate
447   InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
448                                InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
449   //
450   // Quad-register FP Multiple-Accumulate
451   // Result written in N9, but that is relative to the last cycle of multicycle,
452   // so we use 10 for those cases
453   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
454                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
455   //
456   // Double-register Reciprical Step
457   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
458                                InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
459   //
460   // Quad-register Reciprical Step
461   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
462                                InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
463   //
464   // Double-register Integer Count
465   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
466                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
467   //
468   // Quad-register Integer Count
469   // Result written in N3, but that is relative to the last cycle of multicycle,
470   // so we use 4 for those cases
471   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
472                                InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
473   //
474   // Double-register Integer Unary
475   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
476                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
477   //
478   // Quad-register Integer Unary
479   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
480                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
481   //
482   // Double-register Integer Q-Unary
483   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
484                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
485   //
486   // Quad-register Integer CountQ-Unary
487   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
488                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
489   //
490   // Double-register Integer Binary
491   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
492                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
493   //
494   // Quad-register Integer Binary
495   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
496                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
497   //
498   // Double-register Integer Binary (4 cycle)
499   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
500                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
501   //
502   // Quad-register Integer Binary (4 cycle)
503   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
504                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
505
506   //
507   // Double-register Integer Subtract
508   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
509                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
510   //
511   // Quad-register Integer Subtract
512   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
513                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
514   //
515   // Double-register Integer Subtract
516   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
517                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
518   //
519   // Quad-register Integer Subtract
520   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
521                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
522   //
523   // Double-register Integer Shift
524   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
525                                InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
526   //
527   // Quad-register Integer Shift
528   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
529                                InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
530   //
531   // Double-register Integer Shift (4 cycle)
532   InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
533                                InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
534   //
535   // Quad-register Integer Shift (4 cycle)
536   InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
537                                InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
538   //
539   // Double-register Integer Pair Add Long
540   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
541                                InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
542   //
543   // Quad-register Integer Pair Add Long
544   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
545                                InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
546   //
547   // Double-register Absolute Difference and Accumulate
548   InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
549                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
550   //
551   // Quad-register Absolute Difference and Accumulate
552   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
553                                InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
554
555   //
556   // Double-register Integer Multiply (.8, .16)
557   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
558                                InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
559   //
560   // Double-register Integer Multiply (.32)
561   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
562                                InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
563   //
564   // Quad-register Integer Multiply (.8, .16)
565   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
566                                InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
567   //
568   // Quad-register Integer Multiply (.32)
569   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
570                                InstrStage<1, [A8_NPipe]>,
571                                InstrStage<2, [A8_NLSPipe], 0>,
572                                InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
573   //
574   // Double-register Integer Multiply-Accumulate (.8, .16)
575   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
576                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
577   //
578   // Double-register Integer Multiply-Accumulate (.32)
579   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
580                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
581   //
582   // Quad-register Integer Multiply-Accumulate (.8, .16)
583   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
584                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
585   //
586   // Quad-register Integer Multiply-Accumulate (.32)
587   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
588                                InstrStage<1, [A8_NPipe]>,
589                                InstrStage<2, [A8_NLSPipe], 0>,
590                                InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
591   //
592   // Double-register VEXT
593   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
594                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
595   //
596   // Quad-register VEXT
597   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
598                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
599   //
600   // VTB
601   InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
602                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
603   InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
604                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
605   InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
606                                InstrStage<1, [A8_NLSPipe]>,
607                                InstrStage<1, [A8_NPipe], 0>,
608                                InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
609   InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
610                                InstrStage<1, [A8_NLSPipe]>,
611                                InstrStage<1, [A8_NPipe], 0>,
612                                InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
613   //
614   // VTBX
615   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
616                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
617   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
618                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
619   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
620                                InstrStage<1, [A8_NLSPipe]>,
621                                InstrStage<1, [A8_NPipe], 0>,
622                                InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
623   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
624                                InstrStage<1, [A8_NLSPipe]>,
625                                InstrStage<1, [A8_NPipe], 0>,
626                             InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
627 ]>;