Fix a number of problems with ARM fused multiply add/subtract instructions.
[oota-llvm.git] / lib / Target / ARM / ARMScheduleA8.td
1 //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the itinerary class data for the ARM Cortex A8 processors.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //
15 // Scheduling information derived from "Cortex-A8 Technical Reference Manual".
16 // Functional Units.
17 def A8_Pipe0   : FuncUnit; // pipeline 0
18 def A8_Pipe1   : FuncUnit; // pipeline 1
19 def A8_LSPipe  : FuncUnit; // Load / store pipeline
20 def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
21 def A8_NLSPipe : FuncUnit; // NEON LS pipe
22 //
23 // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
24 //
25 def CortexA8Itineraries : ProcessorItineraries<
26   [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
27   [], [
28   // Two fully-pipelined integer ALU pipelines
29   //
30   // No operand cycles
31   InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
32   //
33   // Binary Instructions that produce a result
34   InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
35   InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
36   InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
37   InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
38   InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
39   //
40   // Bitwise Instructions that produce a result
41   InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
42   InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
43   InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
44   InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
45   //
46   // Unary Instructions that produce a result
47   InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
48   InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
49   //
50   // Zero and sign extension instructions
51   InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
52   InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
53   InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
54   //
55   // Compare instructions
56   InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
57   InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
58   InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
59   InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
60   //
61   // Test instructions
62   InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
63   InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
64   InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
65   InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
66   //
67   // Move instructions, unconditional
68   InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
69   InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
70   InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
71   InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
72   InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
73                              InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
74   InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
75                                   InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
76                                   InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
77   InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
78                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
79                                InstrStage<1, [A8_LSPipe]>], [5]>,
80   //
81   // Move instructions, conditional
82   InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
83   InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
84   InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
85   InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
86   InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
87                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
88   //
89   // MVN instructions
90   InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
91   InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
92   InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
93   InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
94
95   // Integer multiply pipeline
96   // Result written in E5, but that is relative to the last cycle of multicycle,
97   // so we use 6 for those cases
98   //
99   InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
100   InstrItinData<IIC_iMAC16   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
101   InstrItinData<IIC_iMUL32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
102   InstrItinData<IIC_iMAC32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
103   InstrItinData<IIC_iMUL64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
104   InstrItinData<IIC_iMAC64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
105
106   // Integer load pipeline
107   //
108   // Immediate offset
109   InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
110                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
111   InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
112                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
113   InstrItinData<IIC_iLoad_d_i,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
114                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
115   //
116   // Register offset
117   InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
118                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
119   InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
120                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
121   InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
122                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
123   //
124   // Scaled register offset, issues over 2 cycles
125   // FIXME: lsl by 2 takes 1 cycle.
126   InstrItinData<IIC_iLoad_si  , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
127                                  InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
128   InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
129                                  InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
130   //
131   // Immediate offset with update
132   InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
133                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
134   InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
135                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
136   //
137   // Register offset with update
138   InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
139                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
140   InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
141                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
142   InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
143                                  InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
144   //
145   // Scaled register offset with update, issues over 2 cycles
146   InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
147                                  InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
148   InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
149                                   InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
150   //
151   // Load multiple, def is the 5th operand. Pipeline 0 only.
152   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
153   InstrItinData<IIC_iLoad_m  , [InstrStage<2, [A8_Pipe0], 0>,
154                                 InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
155   //
156   // Load multiple + update, defs are the 1st and 5th operands.
157   InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
158                                 InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
159   //
160   // Load multiple plus branch
161   InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
162                                 InstrStage<3, [A8_LSPipe]>,
163                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
164                                [1, 2, 1, 1, 3]>,
165   //
166   // Pop, def is the 3rd operand.
167   InstrItinData<IIC_iPop  ,    [InstrStage<3, [A8_Pipe0], 0>,
168                                 InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
169   //
170   // Push, def is the 3th operand.
171   InstrItinData<IIC_iPop_Br,   [InstrStage<3, [A8_Pipe0], 0>,
172                                 InstrStage<3, [A8_LSPipe]>,
173                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
174                                [1, 1, 3]>,
175
176   //
177   // iLoadi + iALUr for t2LDRpci_pic.
178   InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
179                                 InstrStage<1, [A8_LSPipe]>,
180                                 InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
181
182
183   // Integer store pipeline
184   //
185   // Immediate offset
186   InstrItinData<IIC_iStore_i  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
187                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
188   InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
189                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
190   InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
191                                  InstrStage<1, [A8_LSPipe]>], [3, 1]>,
192   //
193   // Register offset
194   InstrItinData<IIC_iStore_r  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
195                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
196   InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
197                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
198   InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
199                                  InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
200   //
201   // Scaled register offset, issues over 2 cycles
202   InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
203                                  InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
204   InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
205                                   InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
206   //
207   // Immediate offset with update
208   InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
209                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
210   InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
211                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
212   //
213   // Register offset with update
214   InstrItinData<IIC_iStore_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
215                                   InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
216   InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
217                                   InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
218   InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
219                                   InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
220   //
221   // Scaled register offset with update, issues over 2 cycles
222   InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
223                                  InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
224   InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
225                                    InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
226   //
227   // Store multiple. Pipeline 0 only.
228   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
229   InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
230                                 InstrStage<2, [A8_LSPipe]>]>,
231   //
232   // Store multiple + update
233   InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
234                                 InstrStage<2, [A8_LSPipe]>], [2]>,
235
236   //
237   // Preload
238   InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
239
240   // Branch
241   //
242   // no delay slots, so the latency of a branch is unimportant
243   InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
244
245   // VFP
246   // Issue through integer pipeline, and execute in NEON unit. We assume
247   // RunFast mode so that NFP pipeline is used for single-precision when
248   // possible.
249   //
250   // FP Special Register to Integer Register File Move
251   InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
252                               InstrStage<1, [A8_NLSPipe]>], [20]>,
253   //
254   // Single-precision FP Unary
255   InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
256                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
257   //
258   // Double-precision FP Unary
259   InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
260                                InstrStage<4, [A8_NPipe], 0>,
261                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
262   //
263   // Single-precision FP Compare
264   InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
265                                InstrStage<1, [A8_NPipe]>], [1, 1]>,
266   //
267   // Double-precision FP Compare
268   InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
269                                InstrStage<4, [A8_NPipe], 0>,
270                                InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
271   //
272   // Single to Double FP Convert
273   InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
274                                InstrStage<7, [A8_NPipe], 0>,
275                                InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
276   //
277   // Double to Single FP Convert
278   InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
279                                InstrStage<5, [A8_NPipe], 0>,
280                                InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
281   //
282   // Single-Precision FP to Integer Convert
283   InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
284                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
285   //
286   // Double-Precision FP to Integer Convert
287   InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
288                                InstrStage<8, [A8_NPipe], 0>,
289                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
290   //
291   // Integer to Single-Precision FP Convert
292   InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
293                                InstrStage<1, [A8_NPipe]>], [7, 1]>,
294   //
295   // Integer to Double-Precision FP Convert
296   InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
297                                InstrStage<8, [A8_NPipe], 0>,
298                                InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
299   //
300   // Single-precision FP ALU
301   InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
302                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
303   //
304   // Double-precision FP ALU
305   InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
306                                InstrStage<9, [A8_NPipe], 0>,
307                                InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
308   //
309   // Single-precision FP Multiply
310   InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
311                                InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
312   //
313   // Double-precision FP Multiply
314   InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
315                                InstrStage<11, [A8_NPipe], 0>,
316                                InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
317   //
318   // Single-precision FP MAC
319   InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
320                                InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
321   //
322   // Double-precision FP MAC
323   InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
324                                InstrStage<19, [A8_NPipe], 0>,
325                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
326   //
327   // Single-precision Fused FP MAC
328   InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
329                                InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
330   //
331   // Double-precision Fused FP MAC
332   InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
333                                InstrStage<19, [A8_NPipe], 0>,
334                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
335   //
336   // Single-precision FP DIV
337   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
338                                InstrStage<20, [A8_NPipe], 0>,
339                                InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
340   //
341   // Double-precision FP DIV
342   InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
343                                InstrStage<29, [A8_NPipe], 0>,
344                                InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
345   //
346   // Single-precision FP SQRT
347   InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
348                                InstrStage<19, [A8_NPipe], 0>,
349                                InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
350   //
351   // Double-precision FP SQRT
352   InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
353                                InstrStage<29, [A8_NPipe], 0>,
354                                InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
355
356   //
357   // Integer to Single-precision Move
358   InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
359                                InstrStage<1, [A8_NPipe]>],
360                               [2, 1]>,
361   //
362   // Integer to Double-precision Move
363   InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
364                                InstrStage<1, [A8_NPipe]>],
365                               [2, 1, 1]>,
366   //
367   // Single-precision to Integer Move
368   InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
369                                InstrStage<1, [A8_NPipe]>],
370                               [20, 1]>,
371   //
372   // Double-precision to Integer Move
373   InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
374                                InstrStage<1, [A8_NPipe]>],
375                               [20, 20, 1]>,
376
377   //
378   // Single-precision FP Load
379   InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
380                                InstrStage<1, [A8_NLSPipe], 0>,
381                                InstrStage<1, [A8_LSPipe]>],
382                               [2, 1]>,
383   //
384   // Double-precision FP Load
385   InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
386                                InstrStage<1, [A8_NLSPipe], 0>,
387                                InstrStage<1, [A8_LSPipe]>],
388                               [2, 1]>,
389   //
390   // FP Load Multiple
391   // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
392   InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
393                                InstrStage<1, [A8_NLSPipe], 0>,
394                                InstrStage<1, [A8_LSPipe]>,
395                                InstrStage<1, [A8_NLSPipe], 0>,
396                                InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
397   //
398   // FP Load Multiple + update
399   InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
400                                InstrStage<1, [A8_NLSPipe], 0>,
401                                InstrStage<1, [A8_LSPipe]>,
402                                InstrStage<1, [A8_NLSPipe], 0>,
403                                InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
404   //
405   // Single-precision FP Store
406   InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
407                                InstrStage<1, [A8_NLSPipe], 0>,
408                                InstrStage<1, [A8_LSPipe]>],
409                               [1, 1]>,
410   //
411   // Double-precision FP Store
412   InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
413                                InstrStage<1, [A8_NLSPipe], 0>,
414                                InstrStage<1, [A8_LSPipe]>],
415                               [1, 1]>,
416   //
417   // FP Store Multiple
418   InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
419                                InstrStage<1, [A8_NLSPipe], 0>,
420                                InstrStage<1, [A8_LSPipe]>,
421                                InstrStage<1, [A8_NLSPipe], 0>,
422                                InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
423   //
424   // FP Store Multiple + update
425   InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
426                                 InstrStage<1, [A8_NLSPipe], 0>,
427                                 InstrStage<1, [A8_LSPipe]>,
428                                 InstrStage<1, [A8_NLSPipe], 0>,
429                                 InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
430
431   // NEON
432   // Issue through integer pipeline, and execute in NEON unit.
433   //
434   // VLD1
435   InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
436                                InstrStage<2, [A8_NLSPipe], 0>,
437                                InstrStage<2, [A8_LSPipe]>],
438                               [2, 1]>,
439   // VLD1x2
440   InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
441                                InstrStage<2, [A8_NLSPipe], 0>,
442                                InstrStage<2, [A8_LSPipe]>],
443                               [2, 2, 1]>,
444   //
445   // VLD1x3
446   InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
447                                InstrStage<3, [A8_NLSPipe], 0>,
448                                InstrStage<3, [A8_LSPipe]>],
449                               [2, 2, 3, 1]>,
450   //
451   // VLD1x4
452   InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
453                                InstrStage<3, [A8_NLSPipe], 0>,
454                                InstrStage<3, [A8_LSPipe]>],
455                               [2, 2, 3, 3, 1]>,
456   //
457   // VLD1u
458   InstrItinData<IIC_VLD1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
459                                InstrStage<2, [A8_NLSPipe], 0>,
460                                InstrStage<2, [A8_LSPipe]>],
461                               [2, 2, 1]>,
462   //
463   // VLD1x2u
464   InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
465                                InstrStage<2, [A8_NLSPipe], 0>,
466                                InstrStage<2, [A8_LSPipe]>],
467                               [2, 2, 2, 1]>,
468   //
469   // VLD1x3u
470   InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
471                                InstrStage<3, [A8_NLSPipe], 0>,
472                                InstrStage<3, [A8_LSPipe]>],
473                               [2, 2, 3, 2, 1]>,
474   //
475   // VLD1x4u
476   InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
477                                InstrStage<3, [A8_NLSPipe], 0>,
478                                InstrStage<3, [A8_LSPipe]>],
479                               [2, 2, 3, 3, 2, 1]>,
480   //
481   // VLD1ln
482   InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
483                                InstrStage<3, [A8_NLSPipe], 0>,
484                                InstrStage<3, [A8_LSPipe]>],
485                               [3, 1, 1, 1]>,
486   //
487   // VLD1lnu
488   InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
489                                InstrStage<3, [A8_NLSPipe], 0>,
490                                InstrStage<3, [A8_LSPipe]>],
491                               [3, 2, 1, 1, 1, 1]>,
492   //
493   // VLD1dup
494   InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
495                                InstrStage<2, [A8_NLSPipe], 0>,
496                                InstrStage<2, [A8_LSPipe]>],
497                               [2, 1]>,
498   //
499   // VLD1dupu
500   InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
501                                InstrStage<2, [A8_NLSPipe], 0>,
502                                InstrStage<2, [A8_LSPipe]>],
503                               [2, 2, 1, 1]>,
504   //
505   // VLD2
506   InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
507                                InstrStage<2, [A8_NLSPipe], 0>,
508                                InstrStage<2, [A8_LSPipe]>],
509                               [2, 2, 1]>,
510   //
511   // VLD2x2
512   InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
513                                InstrStage<3, [A8_NLSPipe], 0>,
514                                InstrStage<3, [A8_LSPipe]>],
515                               [2, 2, 3, 3, 1]>,
516   //
517   // VLD2ln
518   InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
519                                InstrStage<3, [A8_NLSPipe], 0>,
520                                InstrStage<3, [A8_LSPipe]>],
521                               [3, 3, 1, 1, 1, 1]>,
522   //
523   // VLD2u
524   InstrItinData<IIC_VLD2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
525                                InstrStage<2, [A8_NLSPipe], 0>,
526                                InstrStage<2, [A8_LSPipe]>],
527                               [2, 2, 2, 1, 1, 1]>,
528   //
529   // VLD2x2u
530   InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
531                                InstrStage<3, [A8_NLSPipe], 0>,
532                                InstrStage<3, [A8_LSPipe]>],
533                               [2, 2, 3, 3, 2, 1]>,
534   //
535   // VLD2lnu
536   InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
537                                InstrStage<3, [A8_NLSPipe], 0>,
538                                InstrStage<3, [A8_LSPipe]>],
539                               [3, 3, 2, 1, 1, 1, 1, 1]>,
540   //
541   // VLD2dup
542   InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
543                                InstrStage<2, [A8_NLSPipe], 0>,
544                                InstrStage<2, [A8_LSPipe]>],
545                               [2, 2, 1]>,
546   //
547   // VLD2dupu
548   InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
549                                InstrStage<2, [A8_NLSPipe], 0>,
550                                InstrStage<2, [A8_LSPipe]>],
551                               [2, 2, 2, 1, 1]>,
552   //
553   // VLD3
554   InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
555                                InstrStage<4, [A8_NLSPipe], 0>,
556                                InstrStage<4, [A8_LSPipe]>],
557                               [3, 3, 4, 1]>,
558   //
559   // VLD3ln
560   InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
561                                InstrStage<5, [A8_NLSPipe], 0>,
562                                InstrStage<5, [A8_LSPipe]>],
563                               [4, 4, 5, 1, 1, 1, 1, 2]>,
564   //
565   // VLD3u
566   InstrItinData<IIC_VLD3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
567                                InstrStage<4, [A8_NLSPipe], 0>,
568                                InstrStage<4, [A8_LSPipe]>],
569                               [3, 3, 4, 2, 1]>,
570   //
571   // VLD3lnu
572   InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
573                                InstrStage<5, [A8_NLSPipe], 0>,
574                                InstrStage<5, [A8_LSPipe]>],
575                               [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
576   //
577   // VLD3dup
578   InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
579                                InstrStage<3, [A8_NLSPipe], 0>,
580                                InstrStage<3, [A8_LSPipe]>],
581                               [2, 2, 3, 1]>,
582   //
583   // VLD3dupu
584   InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
585                                InstrStage<3, [A8_NLSPipe], 0>,
586                                InstrStage<3, [A8_LSPipe]>],
587                               [2, 2, 3, 2, 1, 1]>,
588   //
589   // VLD4
590   InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
591                                InstrStage<4, [A8_NLSPipe], 0>,
592                                InstrStage<4, [A8_LSPipe]>],
593                               [3, 3, 4, 4, 1]>,
594   //
595   // VLD4ln
596   InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
597                                InstrStage<5, [A8_NLSPipe], 0>,
598                                InstrStage<5, [A8_LSPipe]>],
599                               [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
600   //
601   // VLD4u
602   InstrItinData<IIC_VLD4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
603                                InstrStage<4, [A8_NLSPipe], 0>,
604                                InstrStage<4, [A8_LSPipe]>],
605                               [3, 3, 4, 4, 2, 1]>,
606   //
607   // VLD4lnu
608   InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
609                                InstrStage<5, [A8_NLSPipe], 0>,
610                                InstrStage<5, [A8_LSPipe]>],
611                               [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
612   //
613   // VLD4dup
614   InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
615                                InstrStage<3, [A8_NLSPipe], 0>,
616                                InstrStage<3, [A8_LSPipe]>],
617                               [2, 2, 3, 3, 1]>,
618   //
619   // VLD4dupu
620   InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
621                                InstrStage<3, [A8_NLSPipe], 0>,
622                                InstrStage<3, [A8_LSPipe]>],
623                               [2, 2, 3, 3, 2, 1, 1]>,
624   //
625   // VST1
626   InstrItinData<IIC_VST1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
627                                InstrStage<2, [A8_NLSPipe], 0>,
628                                InstrStage<2, [A8_LSPipe]>],
629                               [1, 1, 1]>,
630   //
631   // VST1x2
632   InstrItinData<IIC_VST1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
633                                InstrStage<2, [A8_NLSPipe], 0>,
634                                InstrStage<2, [A8_LSPipe]>],
635                               [1, 1, 1, 1]>,
636   //
637   // VST1x3
638   InstrItinData<IIC_VST1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
639                                InstrStage<3, [A8_NLSPipe], 0>,
640                                InstrStage<3, [A8_LSPipe]>],
641                               [1, 1, 1, 1, 2]>,
642   //
643   // VST1x4
644   InstrItinData<IIC_VST1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
645                                InstrStage<3, [A8_NLSPipe], 0>,
646                                InstrStage<3, [A8_LSPipe]>],
647                               [1, 1, 1, 1, 2, 2]>,
648   //
649   // VST1u
650   InstrItinData<IIC_VST1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
651                                InstrStage<2, [A8_NLSPipe], 0>,
652                                InstrStage<2, [A8_LSPipe]>],
653                               [2, 1, 1, 1, 1]>,
654   //
655   // VST1x2u
656   InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
657                                InstrStage<2, [A8_NLSPipe], 0>,
658                                InstrStage<2, [A8_LSPipe]>],
659                               [2, 1, 1, 1, 1, 1]>,
660   //
661   // VST1x3u
662   InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
663                                InstrStage<3, [A8_NLSPipe], 0>,
664                                InstrStage<3, [A8_LSPipe]>],
665                               [2, 1, 1, 1, 1, 1, 2]>,
666   //
667   // VST1x4u
668   InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
669                                InstrStage<3, [A8_NLSPipe], 0>,
670                                InstrStage<3, [A8_LSPipe]>],
671                               [2, 1, 1, 1, 1, 1, 2, 2]>,
672   //
673   // VST1ln
674   InstrItinData<IIC_VST1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
675                                InstrStage<2, [A8_NLSPipe], 0>,
676                                InstrStage<2, [A8_LSPipe]>],
677                               [1, 1, 1]>,
678   //
679   // VST1lnu
680   InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
681                                InstrStage<2, [A8_NLSPipe], 0>,
682                                InstrStage<2, [A8_LSPipe]>],
683                               [2, 1, 1, 1, 1]>,
684   //
685   // VST2
686   InstrItinData<IIC_VST2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
687                                InstrStage<2, [A8_NLSPipe], 0>,
688                                InstrStage<2, [A8_LSPipe]>],
689                               [1, 1, 1, 1]>,
690   //
691   // VST2x2
692   InstrItinData<IIC_VST2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
693                                InstrStage<4, [A8_NLSPipe], 0>,
694                                InstrStage<4, [A8_LSPipe]>],
695                               [1, 1, 1, 1, 2, 2]>,
696   //
697   // VST2u
698   InstrItinData<IIC_VST2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
699                                InstrStage<2, [A8_NLSPipe], 0>,
700                                InstrStage<2, [A8_LSPipe]>],
701                               [2, 1, 1, 1, 1, 1]>,
702   //
703   // VST2x2u
704   InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
705                                InstrStage<4, [A8_NLSPipe], 0>,
706                                InstrStage<4, [A8_LSPipe]>],
707                               [2, 1, 1, 1, 1, 1, 2, 2]>,
708   //
709   // VST2ln
710   InstrItinData<IIC_VST2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
711                                InstrStage<2, [A8_NLSPipe], 0>,
712                                InstrStage<2, [A8_LSPipe]>],
713                               [1, 1, 1, 1]>,
714   //
715   // VST2lnu
716   InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
717                                InstrStage<2, [A8_NLSPipe], 0>,
718                                InstrStage<2, [A8_LSPipe]>],
719                               [2, 1, 1, 1, 1, 1]>,
720   //
721   // VST3
722   InstrItinData<IIC_VST3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
723                                InstrStage<3, [A8_NLSPipe], 0>,
724                                InstrStage<3, [A8_LSPipe]>],
725                               [1, 1, 1, 1, 2]>,
726   //
727   // VST3u
728   InstrItinData<IIC_VST3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
729                                InstrStage<3, [A8_NLSPipe], 0>,
730                                InstrStage<3, [A8_LSPipe]>],
731                               [2, 1, 1, 1, 1, 1, 2]>,
732   //
733   // VST3ln
734   InstrItinData<IIC_VST3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
735                                InstrStage<3, [A8_NLSPipe], 0>,
736                                InstrStage<3, [A8_LSPipe]>],
737                               [1, 1, 1, 1, 2]>,
738   //
739   // VST3lnu
740   InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
741                                InstrStage<3, [A8_NLSPipe], 0>,
742                                InstrStage<3, [A8_LSPipe]>],
743                               [2, 1, 1, 1, 1, 1, 2]>,
744   //
745   // VST4
746   InstrItinData<IIC_VST4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
747                                InstrStage<4, [A8_NLSPipe], 0>,
748                                InstrStage<4, [A8_LSPipe]>],
749                               [1, 1, 1, 1, 2, 2]>,
750   //
751   // VST4u
752   InstrItinData<IIC_VST4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
753                                InstrStage<4, [A8_NLSPipe], 0>,
754                                InstrStage<4, [A8_LSPipe]>],
755                               [2, 1, 1, 1, 1, 1, 2, 2]>,
756   //
757   // VST4ln
758   InstrItinData<IIC_VST4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
759                                InstrStage<4, [A8_NLSPipe], 0>,
760                                InstrStage<4, [A8_LSPipe]>],
761                               [1, 1, 1, 1, 2, 2]>,
762   //
763   // VST4lnu
764   InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
765                                InstrStage<4, [A8_NLSPipe], 0>,
766                                InstrStage<4, [A8_LSPipe]>],
767                               [2, 1, 1, 1, 1, 1, 2, 2]>,
768   //
769   // Double-register FP Unary
770   InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
771                                InstrStage<1, [A8_NPipe]>], [5, 2]>,
772   //
773   // Quad-register FP Unary
774   // Result written in N5, but that is relative to the last cycle of multicycle,
775   // so we use 6 for those cases
776   InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
777                                InstrStage<2, [A8_NPipe]>], [6, 2]>,
778   //
779   // Double-register FP Binary
780   InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
781                                InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
782   //
783   // VPADD, etc.
784   InstrItinData<IIC_VPBIND,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
785                                InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
786   //
787   // Double-register FP VMUL
788   InstrItinData<IIC_VFMULD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
789                                InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
790
791   //
792   // Quad-register FP Binary
793   // Result written in N5, but that is relative to the last cycle of multicycle,
794   // so we use 6 for those cases
795   InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
796                                InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
797   //
798   // Quad-register FP VMUL
799   InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
800                                InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
801   //
802   // Move
803   InstrItinData<IIC_VMOV,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
804                                InstrStage<1, [A8_NPipe]>], [1, 1]>,
805   //
806   // Move Immediate
807   InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
808                                InstrStage<1, [A8_NPipe]>], [3]>,
809   //
810   // Double-register Permute Move
811   InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
812                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
813   //
814   // Quad-register Permute Move
815   // Result written in N2, but that is relative to the last cycle of multicycle,
816   // so we use 3 for those cases
817   InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
818                                InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
819   //
820   // Integer to Single-precision Move
821   InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
822                                InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
823   //
824   // Integer to Double-precision Move
825   InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
826                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
827   //
828   // Single-precision to Integer Move
829   InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
830                                InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
831   //
832   // Double-precision to Integer Move
833   InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
834                                InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
835   //
836   // Integer to Lane Move
837   InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
838                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
839   //
840   // Vector narrow move
841   InstrItinData<IIC_VMOVN   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
842                                InstrStage<1, [A8_NPipe]>], [2, 1]>,
843   //
844   // Double-register Permute
845   InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
846                                InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
847   //
848   // Quad-register Permute
849   // Result written in N2, but that is relative to the last cycle of multicycle,
850   // so we use 3 for those cases
851   InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
852                                InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
853   //
854   // Quad-register Permute (3 cycle issue)
855   // Result written in N2, but that is relative to the last cycle of multicycle,
856   // so we use 4 for those cases
857   InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
858                                InstrStage<1, [A8_NLSPipe]>,
859                                InstrStage<1, [A8_NPipe], 0>,
860                                InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
861   //
862   // Double-register FP Multiple-Accumulate
863   InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
864                                InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
865   //
866   // Quad-register FP Multiple-Accumulate
867   // Result written in N9, but that is relative to the last cycle of multicycle,
868   // so we use 10 for those cases
869   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
870                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
871   //
872   // Double-register Fused FP Multiple-Accumulate
873   InstrItinData<IIC_VFMACD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
874                                InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
875   //
876   // Quad-register Fused FP Multiple-Accumulate
877   // Result written in N9, but that is relative to the last cycle of multicycle,
878   // so we use 10 for those cases
879   InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
880                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
881   //
882   // Double-register Reciprical Step
883   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
884                                InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
885   //
886   // Quad-register Reciprical Step
887   InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
888                                InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
889   //
890   // Double-register Integer Count
891   InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
892                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
893   //
894   // Quad-register Integer Count
895   // Result written in N3, but that is relative to the last cycle of multicycle,
896   // so we use 4 for those cases
897   InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
898                                InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
899   //
900   // Double-register Integer Unary
901   InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
902                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
903   //
904   // Quad-register Integer Unary
905   InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
906                                InstrStage<1, [A8_NPipe]>], [4, 2]>,
907   //
908   // Double-register Integer Q-Unary
909   InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
910                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
911   //
912   // Quad-register Integer CountQ-Unary
913   InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
914                                InstrStage<1, [A8_NPipe]>], [4, 1]>,
915   //
916   // Double-register Integer Binary
917   InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
918                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
919   //
920   // Quad-register Integer Binary
921   InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
922                                InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
923   //
924   // Double-register Integer Binary (4 cycle)
925   InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
926                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
927   //
928   // Quad-register Integer Binary (4 cycle)
929   InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
930                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
931
932   //
933   // Double-register Integer Subtract
934   InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
935                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
936   //
937   // Quad-register Integer Subtract
938   InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
939                                InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
940   //
941   // Double-register Integer Subtract
942   InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
943                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
944   //
945   // Quad-register Integer Subtract
946   InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
947                                InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
948   //
949   // Double-register Integer Shift
950   InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
951                                InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
952   //
953   // Quad-register Integer Shift
954   InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
955                                InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
956   //
957   // Double-register Integer Shift (4 cycle)
958   InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
959                                InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
960   //
961   // Quad-register Integer Shift (4 cycle)
962   InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
963                                InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
964   //
965   // Double-register Integer Pair Add Long
966   InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
967                                InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
968   //
969   // Quad-register Integer Pair Add Long
970   InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
971                                InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
972   //
973   // Double-register Absolute Difference and Accumulate
974   InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
975                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
976   //
977   // Quad-register Absolute Difference and Accumulate
978   InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
979                                InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
980
981   //
982   // Double-register Integer Multiply (.8, .16)
983   InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
984                                InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
985   //
986   // Double-register Integer Multiply (.32)
987   InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
988                                InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
989   //
990   // Quad-register Integer Multiply (.8, .16)
991   InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
992                                InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
993   //
994   // Quad-register Integer Multiply (.32)
995   InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
996                                InstrStage<1, [A8_NPipe]>,
997                                InstrStage<2, [A8_NLSPipe], 0>,
998                                InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
999   //
1000   // Double-register Integer Multiply-Accumulate (.8, .16)
1001   InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1002                                InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
1003   //
1004   // Double-register Integer Multiply-Accumulate (.32)
1005   InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1006                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
1007   //
1008   // Quad-register Integer Multiply-Accumulate (.8, .16)
1009   InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1010                                InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
1011   //
1012   // Quad-register Integer Multiply-Accumulate (.32)
1013   InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1014                                InstrStage<1, [A8_NPipe]>,
1015                                InstrStage<2, [A8_NLSPipe], 0>,
1016                                InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
1017   //
1018   // Double-register VEXT
1019   InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1020                                InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
1021   //
1022   // Quad-register VEXT
1023   InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1024                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
1025   //
1026   // VTB
1027   InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1028                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
1029   InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1030                                InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
1031   InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1032                                InstrStage<1, [A8_NLSPipe]>,
1033                                InstrStage<1, [A8_NPipe], 0>,
1034                                InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
1035   InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1036                                InstrStage<1, [A8_NLSPipe]>,
1037                                InstrStage<1, [A8_NPipe], 0>,
1038                                InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
1039   //
1040   // VTBX
1041   InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1042                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
1043   InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1044                                InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
1045   InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1046                                InstrStage<1, [A8_NLSPipe]>,
1047                                InstrStage<1, [A8_NPipe], 0>,
1048                                InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
1049   InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1050                                InstrStage<1, [A8_NLSPipe]>,
1051                                InstrStage<1, [A8_NPipe], 0>,
1052                             InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
1053 ]>;