Fix scheduling itinerary for pseudo mov immediate instructions which expand into...
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA8.td
1 //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A8 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Scheduling information derived from "Cortex-A8 Technical Reference Manual".
16 // Functional Units.
17 def A8_Issue   : FuncUnit; // issue
18 def A8_Pipe0   : FuncUnit; // pipeline 0
19 def A8_Pipe1   : FuncUnit; // pipeline 1
20 def A8_LdSt0   : FuncUnit; // pipeline 0 load/store
21 def A8_LdSt1   : FuncUnit; // pipeline 1 load/store
22 def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
23 def A8_NLSPipe : FuncUnit; // NEON LS pipe
24 //
25 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
26 //
27 def CortexA8Itineraries : ProcessorItineraries<
28   [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [
29   // Two fully-pipelined integer ALU pipelines
30   //
31   // No operand cycles
32   InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
33   //
34   // Binary Instructions that produce a result
35   InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
36   InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
37   InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
38   InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
39   //
40   // Unary Instructions that produce a result
41   InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
42   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
43   InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
44   //
45   // Compare instructions
46   InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
47   InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
48   InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
49   InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
50   //
51   // Move instructions, unconditional
52   InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
53   InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
54                              InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
55   InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
56   InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
57   InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
58   //
59   // Move instructions, conditional
60   InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
61   InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
62   InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
63   InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
64
65   // Integer multiply pipeline
66   // Result written in E5, but that is relative to the last cycle of multicycle,
67   // so we use 6 for those cases
68   //
69   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
70   InstrItinData<IIC_iMAC16   , [InstrStage<1, [A8_Pipe1], 0>,
71                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
72   InstrItinData<IIC_iMUL32   , [InstrStage<1, [A8_Pipe1], 0>,
73                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
74   InstrItinData<IIC_iMAC32   , [InstrStage<1, [A8_Pipe1], 0>,
75                                 InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
76   InstrItinData<IIC_iMUL64   , [InstrStage<2, [A8_Pipe1], 0>,
77                                 InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
78   InstrItinData<IIC_iMAC64   , [InstrStage<2, [A8_Pipe1], 0>,
79                                 InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
80
81   // Integer load pipeline
82   //
83   // loads have an extra cycle of latency, but are fully pipelined
84   // use A8_Issue to enforce the 1 load/store per cycle limit
85   //
86   // Immediate offset
87   InstrItinData<IIC_iLoadi   , [InstrStage<1, [A8_Issue], 0>,
88                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
89                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
90   //
91   // Register offset
92   InstrItinData<IIC_iLoadr   , [InstrStage<1, [A8_Issue], 0>,
93                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
94                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
95   //
96   // Scaled register offset, issues over 2 cycles
97   InstrItinData<IIC_iLoadsi  , [InstrStage<2, [A8_Issue], 0>,
98                                 InstrStage<1, [A8_Pipe0], 0>,
99                                 InstrStage<1, [A8_Pipe1]>,
100                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
101                                 InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>,
102   //
103   // Immediate offset with update
104   InstrItinData<IIC_iLoadiu  , [InstrStage<1, [A8_Issue], 0>,
105                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
106                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>,
107   //
108   // Register offset with update
109   InstrItinData<IIC_iLoadru  , [InstrStage<1, [A8_Issue], 0>,
110                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
111                                 InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>,
112   //
113   // Scaled register offset with update, issues over 2 cycles
114   InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>,
115                                 InstrStage<1, [A8_Pipe0], 0>,
116                                 InstrStage<1, [A8_Pipe1]>,
117                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
118                                 InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>,
119   //
120   // Load multiple
121   InstrItinData<IIC_iLoadm   , [InstrStage<2, [A8_Issue], 0>,
122                                 InstrStage<2, [A8_Pipe0], 0>,
123                                 InstrStage<2, [A8_Pipe1]>,
124                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
125                                 InstrStage<1, [A8_LdSt0]>]>,
126
127   //
128   // Load multiple plus branch
129   InstrItinData<IIC_iLoadmBr , [InstrStage<2, [A8_Issue], 0>,
130                                 InstrStage<2, [A8_Pipe0], 0>,
131                                 InstrStage<2, [A8_Pipe1]>,
132                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
133                                 InstrStage<1, [A8_LdSt0]>,
134                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
135
136   // Integer store pipeline
137   //
138   // use A8_Issue to enforce the 1 load/store per cycle limit
139   //
140   // Immediate offset
141   InstrItinData<IIC_iStorei  , [InstrStage<1, [A8_Issue], 0>,
142                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
143                                 InstrStage<1, [A8_LdSt0]>], [3, 1]>,
144   //
145   // Register offset
146   InstrItinData<IIC_iStorer  , [InstrStage<1, [A8_Issue], 0>,
147                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
148                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
149   //
150   // Scaled register offset, issues over 2 cycles
151   InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>,
152                                 InstrStage<1, [A8_Pipe0], 0>,
153                                 InstrStage<1, [A8_Pipe1]>,
154                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
155                                 InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>,
156   //
157   // Immediate offset with update
158   InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>,
159                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
160                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>,
161   //
162   // Register offset with update
163   InstrItinData<IIC_iStoreru  , [InstrStage<1, [A8_Issue], 0>,
164                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
165                                 InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>,
166   //
167   // Scaled register offset with update, issues over 2 cycles
168   InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>,
169                                 InstrStage<1, [A8_Pipe0], 0>,
170                                 InstrStage<1, [A8_Pipe1]>,
171                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
172                                 InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>,
173   //
174   // Store multiple
175   InstrItinData<IIC_iStorem  , [InstrStage<2, [A8_Issue], 0>,
176                                 InstrStage<2, [A8_Pipe0], 0>,
177                                 InstrStage<2, [A8_Pipe1]>,
178                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
179                                 InstrStage<1, [A8_LdSt0]>]>,
180
181   // Branch
182   //
183   // no delay slots, so the latency of a branch is unimportant
184   InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
185
186   // VFP
187   // Issue through integer pipeline, and execute in NEON unit. We assume
188   // RunFast mode so that NFP pipeline is used for single-precision when
189   // possible.
190   //
191   // FP Special Register to Integer Register File Move
192   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
193                               InstrStage<1, [A8_NLSPipe]>]>,
194   //
195   // Single-precision FP Unary
196   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
197                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
198   //
199   // Double-precision FP Unary
200   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
201                                InstrStage<4, [A8_NPipe], 0>,
202                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
203   //
204   // Single-precision FP Compare
205   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
206                                InstrStage<1, [A8_NPipe]>], [1, 1]>,
207   //
208   // Double-precision FP Compare
209   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
210                                InstrStage<4, [A8_NPipe], 0>,
211                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
212   //
213   // Single to Double FP Convert
214   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
215                                InstrStage<7, [A8_NPipe], 0>,
216                                InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
217   //
218   // Double to Single FP Convert
219   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
220                                InstrStage<5, [A8_NPipe], 0>,
221                                InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
222   //
223   // Single-Precision FP to Integer Convert
224   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
225                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
226   //
227   // Double-Precision FP to Integer Convert
228   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
229                                InstrStage<8, [A8_NPipe], 0>,
230                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
231   //
232   // Integer to Single-Precision FP Convert
233   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
234                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
235   //
236   // Integer to Double-Precision FP Convert
237   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
238                                InstrStage<8, [A8_NPipe], 0>,
239                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
240   //
241   // Single-precision FP ALU
242   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
243                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
244   //
245   // Double-precision FP ALU
246   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
247                                InstrStage<9, [A8_NPipe], 0>,
248                                InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
249   //
250   // Single-precision FP Multiply
251   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
252                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
253   //
254   // Double-precision FP Multiply
255   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
256                                InstrStage<11, [A8_NPipe], 0>,
257                                InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
258   //
259   // Single-precision FP MAC
260   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
261                                InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
262   //
263   // Double-precision FP MAC
264   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
265                                InstrStage<19, [A8_NPipe], 0>,
266                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
267   //
268   // Single-precision FP DIV
269   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
270                                InstrStage<20, [A8_NPipe], 0>,
271                                InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
272   //
273   // Double-precision FP DIV
274   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
275                                InstrStage<29, [A8_NPipe], 0>,
276                                InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
277   //
278   // Single-precision FP SQRT
279   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
280                                InstrStage<19, [A8_NPipe], 0>,
281                                InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
282   //
283   // Double-precision FP SQRT
284   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
285                                InstrStage<29, [A8_NPipe], 0>,
286                                InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
287   //
288   // Single-precision FP Load
289   // use A8_Issue to enforce the 1 load/store per cycle limit
290   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
291                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
292                                InstrStage<1, [A8_LdSt0], 0>,
293                                InstrStage<1, [A8_NLSPipe]>]>,
294   //
295   // Double-precision FP Load
296   // use A8_Issue to enforce the 1 load/store per cycle limit
297   InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
298                                InstrStage<1, [A8_Pipe0], 0>,
299                                InstrStage<1, [A8_Pipe1]>,
300                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
301                                InstrStage<1, [A8_LdSt0], 0>,
302                                InstrStage<1, [A8_NLSPipe]>]>,
303   //
304   // FP Load Multiple
305   // use A8_Issue to enforce the 1 load/store per cycle limit
306   InstrItinData<IIC_fpLoadm,  [InstrStage<3, [A8_Issue], 0>,
307                                InstrStage<2, [A8_Pipe0], 0>,
308                                InstrStage<2, [A8_Pipe1]>,
309                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
310                                InstrStage<1, [A8_LdSt0], 0>,
311                                InstrStage<1, [A8_NLSPipe]>]>,
312   //
313   // Single-precision FP Store
314   // use A8_Issue to enforce the 1 load/store per cycle limit
315   InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
316                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
317                                InstrStage<1, [A8_LdSt0], 0>,
318                                InstrStage<1, [A8_NLSPipe]>]>,
319   //
320   // Double-precision FP Store
321   // use A8_Issue to enforce the 1 load/store per cycle limit
322   InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
323                                InstrStage<1, [A8_Pipe0], 0>,
324                                InstrStage<1, [A8_Pipe1]>,
325                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
326                                InstrStage<1, [A8_LdSt0], 0>,
327                                InstrStage<1, [A8_NLSPipe]>]>,
328   //
329   // FP Store Multiple
330   // use A8_Issue to enforce the 1 load/store per cycle limit
331   InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
332                                InstrStage<2, [A8_Pipe0], 0>,
333                                InstrStage<2, [A8_Pipe1]>,
334                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
335                                InstrStage<1, [A8_LdSt0], 0>,
336                                InstrStage<1, [A8_NLSPipe]>]>,
337
338   // NEON
339   // Issue through integer pipeline, and execute in NEON unit.
340   //
341   // VLD1
342   // FIXME: We don't model this instruction properly
343   InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Issue], 0>,
344                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
345                                InstrStage<1, [A8_LdSt0], 0>,
346                                InstrStage<1, [A8_NLSPipe]>]>,
347   //
348   // VLD2
349   // FIXME: We don't model this instruction properly
350   InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Issue], 0>,
351                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
352                                InstrStage<1, [A8_LdSt0], 0>,
353                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
354   //
355   // VLD3
356   // FIXME: We don't model this instruction properly
357   InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Issue], 0>,
358                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
359                                InstrStage<1, [A8_LdSt0], 0>,
360                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
361   //
362   // VLD4
363   // FIXME: We don't model this instruction properly
364   InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Issue], 0>,
365                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
366                                InstrStage<1, [A8_LdSt0], 0>,
367                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
368   //
369   // VST
370   // FIXME: We don't model this instruction properly
371   InstrItinData<IIC_VST,      [InstrStage<1, [A8_Issue], 0>,
372                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
373                                InstrStage<1, [A8_LdSt0], 0>,
374                                InstrStage<1, [A8_NLSPipe]>]>,
375   //
376   // Double-register FP Unary
377   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
378                                InstrStage<1, [A8_NPipe]>], [5, 2]>,
379   //
380   // Quad-register FP Unary
381   // Result written in N5, but that is relative to the last cycle of multicycle,
382   // so we use 6 for those cases
383   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
384                                InstrStage<2, [A8_NPipe]>], [6, 2]>,
385   //
386   // Double-register FP Binary
387   InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
388                                InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
389   //
390   // Quad-register FP Binary
391   // Result written in N5, but that is relative to the last cycle of multicycle,
392   // so we use 6 for those cases
393   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
394                                InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
395   //
396   // Move Immediate
397   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
398                                InstrStage<1, [A8_NPipe]>], [3]>,
399   //
400   // Double-register Permute Move
401   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
402                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
403   //
404   // Quad-register Permute Move
405   // Result written in N2, but that is relative to the last cycle of multicycle,
406   // so we use 3 for those cases
407   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
408                                InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
409   //
410   // Integer to Single-precision Move
411   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
412                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
413   //
414   // Integer to Double-precision Move
415   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
416                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
417   //
418   // Single-precision to Integer Move
419   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
420                                InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
421   //
422   // Double-precision to Integer Move
423   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
424                                InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
425   //
426   // Integer to Lane Move
427   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
428                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
429   //
430   // Double-register Permute
431   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
432                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
433   //
434   // Quad-register Permute
435   // Result written in N2, but that is relative to the last cycle of multicycle,
436   // so we use 3 for those cases
437   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
438                                InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
439   //
440   // Quad-register Permute (3 cycle issue)
441   // Result written in N2, but that is relative to the last cycle of multicycle,
442   // so we use 4 for those cases
443   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
444                                InstrStage<1, [A8_NLSPipe]>,
445                                InstrStage<1, [A8_NPipe], 0>,
446                                InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
447   //
448   // Double-register FP Multiple-Accumulate
449   InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
450                                InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
451   //
452   // Quad-register FP Multiple-Accumulate
453   // Result written in N9, but that is relative to the last cycle of multicycle,
454   // so we use 10 for those cases
455   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
456                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
457   //
458   // Double-register Reciprical Step
459   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
460                                InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
461   //
462   // Quad-register Reciprical Step
463   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
464                                InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
465   //
466   // Double-register Integer Count
467   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
468                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
469   //
470   // Quad-register Integer Count
471   // Result written in N3, but that is relative to the last cycle of multicycle,
472   // so we use 4 for those cases
473   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
474                                InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
475   //
476   // Double-register Integer Unary
477   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
478                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
479   //
480   // Quad-register Integer Unary
481   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
482                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
483   //
484   // Double-register Integer Q-Unary
485   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
486                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
487   //
488   // Quad-register Integer CountQ-Unary
489   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
490                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
491   //
492   // Double-register Integer Binary
493   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
494                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
495   //
496   // Quad-register Integer Binary
497   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
498                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
499   //
500   // Double-register Integer Binary (4 cycle)
501   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
502                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
503   //
504   // Quad-register Integer Binary (4 cycle)
505   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
506                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
507
508   //
509   // Double-register Integer Subtract
510   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
511                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
512   //
513   // Quad-register Integer Subtract
514   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
515                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
516   //
517   // Double-register Integer Subtract
518   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
519                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
520   //
521   // Quad-register Integer Subtract
522   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
523                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
524   //
525   // Double-register Integer Shift
526   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
527                                InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
528   //
529   // Quad-register Integer Shift
530   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
531                                InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
532   //
533   // Double-register Integer Shift (4 cycle)
534   InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
535                                InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
536   //
537   // Quad-register Integer Shift (4 cycle)
538   InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
539                                InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
540   //
541   // Double-register Integer Pair Add Long
542   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
543                                InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
544   //
545   // Quad-register Integer Pair Add Long
546   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
547                                InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
548   //
549   // Double-register Absolute Difference and Accumulate
550   InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
551                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
552   //
553   // Quad-register Absolute Difference and Accumulate
554   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
555                                InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
556
557   //
558   // Double-register Integer Multiply (.8, .16)
559   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
560                                InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
561   //
562   // Double-register Integer Multiply (.32)
563   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
564                                InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
565   //
566   // Quad-register Integer Multiply (.8, .16)
567   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
568                                InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
569   //
570   // Quad-register Integer Multiply (.32)
571   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
572                                InstrStage<1, [A8_NPipe]>,
573                                InstrStage<2, [A8_NLSPipe], 0>,
574                                InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
575   //
576   // Double-register Integer Multiply-Accumulate (.8, .16)
577   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
578                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
579   //
580   // Double-register Integer Multiply-Accumulate (.32)
581   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
582                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
583   //
584   // Quad-register Integer Multiply-Accumulate (.8, .16)
585   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
586                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
587   //
588   // Quad-register Integer Multiply-Accumulate (.32)
589   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
590                                InstrStage<1, [A8_NPipe]>,
591                                InstrStage<2, [A8_NLSPipe], 0>,
592                                InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
593   //
594   // Double-register VEXT
595   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
596                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
597   //
598   // Quad-register VEXT
599   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
600                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
601   //
602   // VTB
603   InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
604                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
605   InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
606                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
607   InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
608                                InstrStage<1, [A8_NLSPipe]>,
609                                InstrStage<1, [A8_NPipe], 0>,
610                                InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
611   InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
612                                InstrStage<1, [A8_NLSPipe]>,
613                                InstrStage<1, [A8_NPipe], 0>,
614                                InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
615   //
616   // VTBX
617   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
618                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
619   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
620                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
621   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
622                                InstrStage<1, [A8_NLSPipe]>,
623                                InstrStage<1, [A8_NPipe], 0>,
624                                InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
625   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
626                                InstrStage<1, [A8_NLSPipe]>,
627                                InstrStage<1, [A8_NPipe], 0>,
628                             InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
629 ]>;