1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Pipe0 : FuncUnit; // pipeline 0
20 def A9_Pipe1 : FuncUnit; // pipeline 1
21 def A9_AGU : FuncUnit; // Address generation unit for ld / st
22 def A9_NPipe : FuncUnit; // NEON ALU/MUL pipeline
23 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
24 def A9_DRegsN : FuncUnit; // FP register set, NEON side
25 def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
28 def A9_LdBypass : Bypass;
30 // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
32 def CortexA9Itineraries : ProcessorItineraries<
33 [A9_Pipe0, A9_Pipe1, A9_AGU, A9_NPipe, A9_DRegsVFP, A9_DRegsN, A9_MUX0],
35 // Two fully-pipelined integer ALU pipelines
38 // Move instructions, unconditional
39 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
40 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
41 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
42 InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
43 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
44 InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
47 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
49 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
50 [1, 1], [NoBypass, A9_LdBypass]>,
51 InstrItinData<IIC_iMVNsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
53 InstrItinData<IIC_iMVNsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
57 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
59 // Binary Instructions that produce a result
60 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
61 [1, 1], [NoBypass, A9_LdBypass]>,
62 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
63 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
64 InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
65 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
66 InstrItinData<IIC_iALUsir,[InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
67 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
68 InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
70 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
72 // Bitwise Instructions that produce a result
73 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
74 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1, 1]>,
75 InstrItinData<IIC_iBITsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
76 InstrItinData<IIC_iBITsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [3, 1, 1, 1]>,
78 // Unary Instructions that produce a result
81 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
83 // BFC, BFI, UBFX, SBFX
84 InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
87 // Zero and sign extension instructions
88 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
89 InstrItinData<IIC_iEXTAr, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [3, 1, 1]>,
90 InstrItinData<IIC_iEXTAsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>],[3, 1, 1, 1]>,
92 // Compare instructions
93 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
95 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
96 [1, 1], [A9_LdBypass, A9_LdBypass]>,
97 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
98 [1, 1], [A9_LdBypass, NoBypass]>,
99 InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
100 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
103 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
104 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
105 InstrItinData<IIC_iTSTsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
106 InstrItinData<IIC_iTSTsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [1, 1, 1]>,
108 // Move instructions, conditional
109 // FIXME: Correctly model the extra input dep on the destination.
110 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
111 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
112 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
113 InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
115 // Integer multiply pipeline
117 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
118 InstrStage<2, [A9_Pipe0]>], [3, 1, 1]>,
119 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
120 InstrStage<2, [A9_Pipe0]>], [3, 1, 1, 1]>,
121 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
122 InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
123 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
124 InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 1]>,
125 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Pipe1], 0>,
126 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
127 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Pipe1], 0>,
128 InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
129 // Integer load pipeline
130 // FIXME: The timings are some rough approximations
133 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Pipe1]>,
134 InstrStage<1, [A9_MUX0], 0>,
135 InstrStage<1, [A9_AGU]>],
136 [3, 1], [A9_LdBypass]>,
137 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Pipe1]>,
138 InstrStage<1, [A9_MUX0], 0>,
139 InstrStage<2, [A9_AGU]>],
140 [4, 1], [A9_LdBypass]>,
141 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
142 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Pipe1]>,
143 InstrStage<1, [A9_MUX0], 0>,
144 InstrStage<2, [A9_AGU]>],
145 [3, 3, 1], [A9_LdBypass]>,
148 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Pipe1]>,
149 InstrStage<1, [A9_MUX0], 0>,
150 InstrStage<1, [A9_AGU]>],
151 [3, 1, 1], [A9_LdBypass]>,
152 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Pipe1]>,
153 InstrStage<1, [A9_MUX0], 0>,
154 InstrStage<2, [A9_AGU]>],
155 [4, 1, 1], [A9_LdBypass]>,
156 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Pipe1]>,
157 InstrStage<1, [A9_MUX0], 0>,
158 InstrStage<2, [A9_AGU]>],
159 [3, 3, 1, 1], [A9_LdBypass]>,
161 // Scaled register offset
162 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Pipe1]>,
163 InstrStage<1, [A9_MUX0], 0>,
164 InstrStage<1, [A9_AGU]>],
165 [4, 1, 1], [A9_LdBypass]>,
166 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Pipe1]>,
167 InstrStage<1, [A9_MUX0], 0>,
168 InstrStage<2, [A9_AGU]>],
169 [5, 1, 1], [A9_LdBypass]>,
171 // Immediate offset with update
172 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Pipe1]>,
173 InstrStage<1, [A9_MUX0], 0>,
174 InstrStage<1, [A9_AGU]>],
175 [3, 2, 1], [A9_LdBypass]>,
176 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Pipe1]>,
177 InstrStage<1, [A9_MUX0], 0>,
178 InstrStage<2, [A9_AGU]>],
179 [4, 3, 1], [A9_LdBypass]>,
181 // Register offset with update
182 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Pipe1]>,
183 InstrStage<1, [A9_MUX0], 0>,
184 InstrStage<1, [A9_AGU]>],
185 [3, 2, 1, 1], [A9_LdBypass]>,
186 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Pipe1]>,
187 InstrStage<1, [A9_MUX0], 0>,
188 InstrStage<2, [A9_AGU]>],
189 [4, 3, 1, 1], [A9_LdBypass]>,
190 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Pipe1]>,
191 InstrStage<1, [A9_MUX0], 0>,
192 InstrStage<2, [A9_AGU]>],
193 [3, 3, 1, 1], [A9_LdBypass]>,
195 // Scaled register offset with update
196 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Pipe1]>,
197 InstrStage<1, [A9_MUX0], 0>,
198 InstrStage<1, [A9_AGU]>],
199 [4, 3, 1, 1], [A9_LdBypass]>,
200 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Pipe1]>,
201 InstrStage<1, [A9_MUX0], 0>,
202 InstrStage<2, [A9_AGU]>],
203 [5, 4, 1, 1], [A9_LdBypass]>,
206 InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
207 InstrStage<1, [A9_MUX0], 0>,
208 InstrStage<2, [A9_AGU]>],
212 // Load multiple plus branch
213 InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Pipe1]>,
214 InstrStage<1, [A9_MUX0], 0>,
215 InstrStage<1, [A9_AGU]>,
216 InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
219 // iLoadi + iALUr for t2LDRpci_pic.
220 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
221 InstrStage<1, [A9_MUX0], 0>,
222 InstrStage<1, [A9_AGU]>,
223 InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
226 // Integer store pipeline
229 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Pipe1]>,
230 InstrStage<1, [A9_MUX0], 0>,
231 InstrStage<1, [A9_AGU]>], [1, 1]>,
232 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Pipe1]>,
233 InstrStage<1, [A9_MUX0], 0>,
234 InstrStage<2, [A9_AGU]>], [1, 1]>,
235 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
236 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Pipe1]>,
237 InstrStage<1, [A9_MUX0], 0>,
238 InstrStage<2, [A9_AGU]>], [1, 1]>,
241 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Pipe1]>,
242 InstrStage<1, [A9_MUX0], 0>,
243 InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
244 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Pipe1]>,
245 InstrStage<1, [A9_MUX0], 0>,
246 InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
247 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Pipe1]>,
248 InstrStage<1, [A9_MUX0], 0>,
249 InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
251 // Scaled register offset
252 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Pipe1]>,
253 InstrStage<1, [A9_MUX0], 0>,
254 InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
255 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Pipe1]>,
256 InstrStage<1, [A9_MUX0], 0>,
257 InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
259 // Immediate offset with update
260 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Pipe1]>,
261 InstrStage<1, [A9_MUX0], 0>,
262 InstrStage<1, [A9_AGU]>], [2, 1, 1]>,
263 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Pipe1]>,
264 InstrStage<1, [A9_MUX0], 0>,
265 InstrStage<2, [A9_AGU]>], [3, 1, 1]>,
267 // Register offset with update
268 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Pipe1]>,
269 InstrStage<1, [A9_MUX0], 0>,
270 InstrStage<1, [A9_AGU]>],
272 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Pipe1]>,
273 InstrStage<1, [A9_MUX0], 0>,
274 InstrStage<2, [A9_AGU]>],
276 InstrItinData<IIC_iStore_d_ru,[InstrStage<1, [A9_Pipe1]>,
277 InstrStage<1, [A9_MUX0], 0>,
278 InstrStage<2, [A9_AGU]>],
281 // Scaled register offset with update
282 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Pipe1]>,
283 InstrStage<1, [A9_MUX0], 0>,
284 InstrStage<1, [A9_AGU]>],
286 InstrItinData<IIC_iStore_bh_siu,[InstrStage<1, [A9_Pipe1]>,
287 InstrStage<1, [A9_MUX0], 0>,
288 InstrStage<2, [A9_AGU]>],
292 InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
293 InstrStage<1, [A9_MUX0], 0>,
294 InstrStage<1, [A9_AGU]>]>,
297 // no delay slots, so the latency of a branch is unimportant
298 InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
300 // VFP and NEON shares the same register file. This means that every VFP
301 // instruction should wait for full completion of the consecutive NEON
302 // instruction and vice-versa. We model this behavior with two artificial FUs:
303 // DRegsVFP and DRegsVFP.
305 // Every VFP instruction:
306 // - Acquires DRegsVFP resource for 1 cycle
307 // - Reserves DRegsN resource for the whole duration (including time to
308 // register file writeback!).
309 // Every NEON instruction does the same but with FUs swapped.
311 // Since the reserved FU cannot be acquired, this models precisely
312 // "cross-domain" stalls.
315 // Issue through integer pipeline, and execute in NEON unit.
317 // FP Special Register to Integer Register File Move
318 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
319 InstrStage<2, [A9_DRegsN], 0, Reserved>,
320 InstrStage<1, [A9_Pipe1]>,
321 InstrStage<1, [A9_MUX0], 0>,
322 InstrStage<1, [A9_NPipe]>]>,
324 // Single-precision FP Unary
325 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
326 // Extra latency cycles since wbck is 2 cycles
327 InstrStage<3, [A9_DRegsN], 0, Reserved>,
328 InstrStage<1, [A9_Pipe1]>,
329 InstrStage<1, [A9_MUX0], 0>,
330 InstrStage<1, [A9_NPipe]>],
333 // Double-precision FP Unary
334 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
335 // Extra latency cycles since wbck is 2 cycles
336 InstrStage<3, [A9_DRegsN], 0, Reserved>,
337 InstrStage<1, [A9_Pipe1]>,
338 InstrStage<1, [A9_MUX0], 0>,
339 InstrStage<1, [A9_NPipe]>],
343 // Single-precision FP Compare
344 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
345 // Extra latency cycles since wbck is 4 cycles
346 InstrStage<5, [A9_DRegsN], 0, Reserved>,
347 InstrStage<1, [A9_Pipe1]>,
348 InstrStage<1, [A9_MUX0], 0>,
349 InstrStage<1, [A9_NPipe]>],
352 // Double-precision FP Compare
353 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
354 // Extra latency cycles since wbck is 4 cycles
355 InstrStage<5, [A9_DRegsN], 0, Reserved>,
356 InstrStage<1, [A9_Pipe1]>,
357 InstrStage<1, [A9_MUX0], 0>,
358 InstrStage<1, [A9_NPipe]>],
361 // Single to Double FP Convert
362 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
363 InstrStage<5, [A9_DRegsN], 0, Reserved>,
364 InstrStage<1, [A9_Pipe1]>,
365 InstrStage<1, [A9_MUX0], 0>,
366 InstrStage<1, [A9_NPipe]>],
369 // Double to Single FP Convert
370 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
371 InstrStage<5, [A9_DRegsN], 0, Reserved>,
372 InstrStage<1, [A9_Pipe1]>,
373 InstrStage<1, [A9_MUX0], 0>,
374 InstrStage<1, [A9_NPipe]>],
378 // Single to Half FP Convert
379 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
380 InstrStage<5, [A9_DRegsN], 0, Reserved>,
381 InstrStage<1, [A9_Pipe1]>,
382 InstrStage<1, [A9_MUX0], 0>,
383 InstrStage<1, [A9_NPipe]>],
386 // Half to Single FP Convert
387 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
388 InstrStage<3, [A9_DRegsN], 0, Reserved>,
389 InstrStage<1, [A9_Pipe1]>,
390 InstrStage<1, [A9_MUX0], 0>,
391 InstrStage<1, [A9_NPipe]>],
395 // Single-Precision FP to Integer Convert
396 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
397 InstrStage<5, [A9_DRegsN], 0, Reserved>,
398 InstrStage<1, [A9_Pipe1]>,
399 InstrStage<1, [A9_MUX0], 0>,
400 InstrStage<1, [A9_NPipe]>],
403 // Double-Precision FP to Integer Convert
404 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
405 InstrStage<5, [A9_DRegsN], 0, Reserved>,
406 InstrStage<1, [A9_Pipe1]>,
407 InstrStage<1, [A9_MUX0], 0>,
408 InstrStage<1, [A9_NPipe]>],
411 // Integer to Single-Precision FP Convert
412 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
413 InstrStage<5, [A9_DRegsN], 0, Reserved>,
414 InstrStage<1, [A9_Pipe1]>,
415 InstrStage<1, [A9_MUX0], 0>,
416 InstrStage<1, [A9_NPipe]>],
419 // Integer to Double-Precision FP Convert
420 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
421 InstrStage<5, [A9_DRegsN], 0, Reserved>,
422 InstrStage<1, [A9_Pipe1]>,
423 InstrStage<1, [A9_MUX0], 0>,
424 InstrStage<1, [A9_NPipe]>],
427 // Single-precision FP ALU
428 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
429 InstrStage<5, [A9_DRegsN], 0, Reserved>,
430 InstrStage<1, [A9_Pipe1]>,
431 InstrStage<1, [A9_MUX0], 0>,
432 InstrStage<1, [A9_NPipe]>],
435 // Double-precision FP ALU
436 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
437 InstrStage<5, [A9_DRegsN], 0, Reserved>,
438 InstrStage<1, [A9_Pipe1]>,
439 InstrStage<1, [A9_MUX0], 0>,
440 InstrStage<1, [A9_NPipe]>],
443 // Single-precision FP Multiply
444 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
445 InstrStage<6, [A9_DRegsN], 0, Reserved>,
446 InstrStage<1, [A9_Pipe1]>,
447 InstrStage<1, [A9_MUX0], 0>,
448 InstrStage<1, [A9_NPipe]>],
451 // Double-precision FP Multiply
452 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
453 InstrStage<7, [A9_DRegsN], 0, Reserved>,
454 InstrStage<1, [A9_Pipe1]>,
455 InstrStage<1, [A9_MUX0], 0>,
456 InstrStage<2, [A9_NPipe]>],
459 // Single-precision FP MAC
460 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
461 InstrStage<9, [A9_DRegsN], 0, Reserved>,
462 InstrStage<1, [A9_Pipe1]>,
463 InstrStage<1, [A9_MUX0], 0>,
464 InstrStage<1, [A9_NPipe]>],
467 // Double-precision FP MAC
468 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
469 InstrStage<10, [A9_DRegsN], 0, Reserved>,
470 InstrStage<1, [A9_Pipe1]>,
471 InstrStage<1, [A9_MUX0], 0>,
472 InstrStage<2, [A9_NPipe]>],
475 // Single-precision FP DIV
476 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
477 InstrStage<16, [A9_DRegsN], 0, Reserved>,
478 InstrStage<1, [A9_Pipe1]>,
479 InstrStage<1, [A9_MUX0], 0>,
480 InstrStage<10, [A9_NPipe]>],
483 // Double-precision FP DIV
484 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
485 InstrStage<26, [A9_DRegsN], 0, Reserved>,
486 InstrStage<1, [A9_Pipe1]>,
487 InstrStage<1, [A9_MUX0], 0>,
488 InstrStage<20, [A9_NPipe]>],
491 // Single-precision FP SQRT
492 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
493 InstrStage<18, [A9_DRegsN], 0, Reserved>,
494 InstrStage<1, [A9_Pipe1]>,
495 InstrStage<1, [A9_MUX0], 0>,
496 InstrStage<13, [A9_NPipe]>],
499 // Double-precision FP SQRT
500 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
501 InstrStage<33, [A9_DRegsN], 0, Reserved>,
502 InstrStage<1, [A9_Pipe1]>,
503 InstrStage<1, [A9_MUX0], 0>,
504 InstrStage<28, [A9_NPipe]>],
508 // Integer to Single-precision Move
509 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
510 // Extra 1 latency cycle since wbck is 2 cycles
511 InstrStage<3, [A9_DRegsN], 0, Reserved>,
512 InstrStage<1, [A9_Pipe1]>,
513 InstrStage<1, [A9_MUX0], 0>,
514 InstrStage<1, [A9_NPipe]>],
517 // Integer to Double-precision Move
518 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
519 // Extra 1 latency cycle since wbck is 2 cycles
520 InstrStage<3, [A9_DRegsN], 0, Reserved>,
521 InstrStage<1, [A9_Pipe1]>,
522 InstrStage<1, [A9_MUX0], 0>,
523 InstrStage<1, [A9_NPipe]>],
526 // Single-precision to Integer Move
527 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
528 InstrStage<2, [A9_DRegsN], 0, Reserved>,
529 InstrStage<1, [A9_Pipe1]>,
530 InstrStage<1, [A9_MUX0], 0>,
531 InstrStage<1, [A9_NPipe]>],
534 // Double-precision to Integer Move
535 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
536 InstrStage<2, [A9_DRegsN], 0, Reserved>,
537 InstrStage<1, [A9_Pipe1]>,
538 InstrStage<1, [A9_MUX0], 0>,
539 InstrStage<1, [A9_NPipe]>],
542 // Single-precision FP Load
543 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
544 InstrStage<2, [A9_DRegsN], 0, Reserved>,
545 InstrStage<1, [A9_Pipe1], 0>,
546 InstrStage<1, [A9_MUX0], 0>,
547 InstrStage<1, [A9_NPipe]>],
550 // Double-precision FP Load
551 // FIXME: Result latency is 1 if address is 64-bit aligned.
552 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
553 InstrStage<2, [A9_DRegsN], 0, Reserved>,
554 InstrStage<1, [A9_Pipe1], 0>,
555 InstrStage<1, [A9_MUX0], 0>,
556 InstrStage<1, [A9_NPipe]>],
560 InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
561 InstrStage<2, [A9_DRegsN], 0, Reserved>,
562 InstrStage<1, [A9_Pipe1], 0>,
563 InstrStage<1, [A9_MUX0], 0>,
564 InstrStage<1, [A9_NPipe]>]>,
566 // Single-precision FP Store
567 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
568 InstrStage<2, [A9_DRegsN], 0, Reserved>,
569 InstrStage<1, [A9_Pipe1], 0>,
570 InstrStage<1, [A9_MUX0], 0>,
571 InstrStage<1, [A9_NPipe]>],
574 // Double-precision FP Store
575 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
576 InstrStage<2, [A9_DRegsN], 0, Reserved>,
577 InstrStage<1, [A9_Pipe1], 0>,
578 InstrStage<1, [A9_MUX0], 0>,
579 InstrStage<1, [A9_NPipe]>],
583 InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
584 InstrStage<2, [A9_DRegsN], 0, Reserved>,
585 InstrStage<1, [A9_Pipe1], 0>,
586 InstrStage<1, [A9_MUX0], 0>,
587 InstrStage<1, [A9_NPipe]>]>,
589 // Issue through integer pipeline, and execute in NEON unit.
591 // FIXME: We don't model this instruction properly
592 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
593 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
594 InstrStage<1, [A9_Pipe1], 0>,
595 InstrStage<1, [A9_MUX0], 0>,
596 InstrStage<1, [A9_NPipe]>]>,
599 // FIXME: We don't model this instruction properly
600 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
601 // Extra latency cycles since wbck is 6 cycles
602 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
603 InstrStage<1, [A9_Pipe1], 0>,
604 InstrStage<1, [A9_MUX0], 0>,
605 InstrStage<1, [A9_NPipe]>],
609 // FIXME: We don't model this instruction properly
610 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
611 // Extra latency cycles since wbck is 6 cycles
612 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
613 InstrStage<1, [A9_Pipe1], 0>,
614 InstrStage<1, [A9_MUX0], 0>,
615 InstrStage<1, [A9_NPipe]>],
619 // FIXME: We don't model this instruction properly
620 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
621 // Extra latency cycles since wbck is 6 cycles
622 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
623 InstrStage<1, [A9_Pipe1], 0>,
624 InstrStage<1, [A9_MUX0], 0>,
625 InstrStage<1, [A9_NPipe]>],
629 // FIXME: We don't model this instruction properly
630 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
631 // Extra latency cycles since wbck is 6 cycles
632 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
633 InstrStage<1, [A9_Pipe1], 0>,
634 InstrStage<1, [A9_MUX0], 0>,
635 InstrStage<1, [A9_NPipe]>]>,
637 // Double-register Integer Unary
638 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
639 // Extra latency cycles since wbck is 6 cycles
640 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
641 InstrStage<1, [A9_Pipe1]>,
642 InstrStage<1, [A9_MUX0], 0>,
643 InstrStage<1, [A9_NPipe]>],
646 // Quad-register Integer Unary
647 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
648 // Extra latency cycles since wbck is 6 cycles
649 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
650 InstrStage<1, [A9_Pipe1]>,
651 InstrStage<1, [A9_MUX0], 0>,
652 InstrStage<1, [A9_NPipe]>],
655 // Double-register Integer Q-Unary
656 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
657 // Extra latency cycles since wbck is 6 cycles
658 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
659 InstrStage<1, [A9_Pipe1]>,
660 InstrStage<1, [A9_MUX0], 0>,
661 InstrStage<1, [A9_NPipe]>],
664 // Quad-register Integer CountQ-Unary
665 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
666 // Extra latency cycles since wbck is 6 cycles
667 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
668 InstrStage<1, [A9_Pipe1]>,
669 InstrStage<1, [A9_MUX0], 0>,
670 InstrStage<1, [A9_NPipe]>],
673 // Double-register Integer Binary
674 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
675 // Extra latency cycles since wbck is 6 cycles
676 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
677 InstrStage<1, [A9_Pipe1]>,
678 InstrStage<1, [A9_MUX0], 0>,
679 InstrStage<1, [A9_NPipe]>],
682 // Quad-register Integer Binary
683 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
684 // Extra latency cycles since wbck is 6 cycles
685 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
686 InstrStage<1, [A9_Pipe1]>,
687 InstrStage<1, [A9_MUX0], 0>,
688 InstrStage<1, [A9_NPipe]>],
691 // Double-register Integer Subtract
692 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
693 // Extra latency cycles since wbck is 6 cycles
694 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
695 InstrStage<1, [A9_Pipe1]>,
696 InstrStage<1, [A9_MUX0], 0>,
697 InstrStage<1, [A9_NPipe]>],
700 // Quad-register Integer Subtract
701 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
702 // Extra latency cycles since wbck is 6 cycles
703 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
704 InstrStage<1, [A9_Pipe1]>,
705 InstrStage<1, [A9_MUX0], 0>,
706 InstrStage<1, [A9_NPipe]>],
709 // Double-register Integer Shift
710 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
711 // Extra latency cycles since wbck is 6 cycles
712 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
713 InstrStage<1, [A9_Pipe1]>,
714 InstrStage<1, [A9_MUX0], 0>,
715 InstrStage<1, [A9_NPipe]>],
718 // Quad-register Integer Shift
719 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
720 // Extra latency cycles since wbck is 6 cycles
721 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
722 InstrStage<1, [A9_Pipe1]>,
723 InstrStage<1, [A9_MUX0], 0>,
724 InstrStage<1, [A9_NPipe]>],
727 // Double-register Integer Shift (4 cycle)
728 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
729 // Extra latency cycles since wbck is 6 cycles
730 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
731 InstrStage<1, [A9_Pipe1]>,
732 InstrStage<1, [A9_MUX0], 0>,
733 InstrStage<1, [A9_NPipe]>],
736 // Quad-register Integer Shift (4 cycle)
737 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
738 // Extra latency cycles since wbck is 6 cycles
739 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
740 InstrStage<1, [A9_Pipe1]>,
741 InstrStage<1, [A9_MUX0], 0>,
742 InstrStage<1, [A9_NPipe]>],
745 // Double-register Integer Binary (4 cycle)
746 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
747 // Extra latency cycles since wbck is 6 cycles
748 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
749 InstrStage<1, [A9_Pipe1]>,
750 InstrStage<1, [A9_MUX0], 0>,
751 InstrStage<1, [A9_NPipe]>],
754 // Quad-register Integer Binary (4 cycle)
755 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
756 // Extra latency cycles since wbck is 6 cycles
757 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
758 InstrStage<1, [A9_Pipe1]>,
759 InstrStage<1, [A9_MUX0], 0>,
760 InstrStage<1, [A9_NPipe]>],
763 // Double-register Integer Subtract (4 cycle)
764 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
765 // Extra latency cycles since wbck is 6 cycles
766 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
767 InstrStage<1, [A9_Pipe1]>,
768 InstrStage<1, [A9_MUX0], 0>,
769 InstrStage<1, [A9_NPipe]>],
772 // Quad-register Integer Subtract (4 cycle)
773 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
774 // Extra latency cycles since wbck is 6 cycles
775 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
776 InstrStage<1, [A9_Pipe1]>,
777 InstrStage<1, [A9_MUX0], 0>,
778 InstrStage<1, [A9_NPipe]>],
782 // Double-register Integer Count
783 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
784 // Extra latency cycles since wbck is 6 cycles
785 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
786 InstrStage<1, [A9_Pipe1]>,
787 InstrStage<1, [A9_MUX0], 0>,
788 InstrStage<1, [A9_NPipe]>],
791 // Quad-register Integer Count
792 // Result written in N3, but that is relative to the last cycle of multicycle,
793 // so we use 4 for those cases
794 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
795 // Extra latency cycles since wbck is 7 cycles
796 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
797 InstrStage<1, [A9_Pipe1]>,
798 InstrStage<1, [A9_MUX0], 0>,
799 InstrStage<2, [A9_NPipe]>],
802 // Double-register Absolute Difference and Accumulate
803 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
804 // Extra latency cycles since wbck is 6 cycles
805 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
806 InstrStage<1, [A9_Pipe1]>,
807 InstrStage<1, [A9_MUX0], 0>,
808 InstrStage<1, [A9_NPipe]>],
811 // Quad-register Absolute Difference and Accumulate
812 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
813 // Extra latency cycles since wbck is 6 cycles
814 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
815 InstrStage<1, [A9_Pipe1]>,
816 InstrStage<1, [A9_MUX0], 0>,
817 InstrStage<2, [A9_NPipe]>],
820 // Double-register Integer Pair Add Long
821 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
822 // Extra latency cycles since wbck is 6 cycles
823 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
824 InstrStage<1, [A9_Pipe1]>,
825 InstrStage<1, [A9_MUX0], 0>,
826 InstrStage<1, [A9_NPipe]>],
829 // Quad-register Integer Pair Add Long
830 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
831 // Extra latency cycles since wbck is 6 cycles
832 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
833 InstrStage<1, [A9_Pipe1]>,
834 InstrStage<1, [A9_MUX0], 0>,
835 InstrStage<2, [A9_NPipe]>],
839 // Double-register Integer Multiply (.8, .16)
840 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
841 // Extra latency cycles since wbck is 6 cycles
842 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
843 InstrStage<1, [A9_Pipe1]>,
844 InstrStage<1, [A9_MUX0], 0>,
845 InstrStage<1, [A9_NPipe]>],
848 // Quad-register Integer Multiply (.8, .16)
849 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
850 // Extra latency cycles since wbck is 7 cycles
851 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
852 InstrStage<1, [A9_Pipe1]>,
853 InstrStage<1, [A9_MUX0], 0>,
854 InstrStage<2, [A9_NPipe]>],
858 // Double-register Integer Multiply (.32)
859 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
860 // Extra latency cycles since wbck is 7 cycles
861 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
862 InstrStage<1, [A9_Pipe1]>,
863 InstrStage<1, [A9_MUX0], 0>,
864 InstrStage<2, [A9_NPipe]>],
867 // Quad-register Integer Multiply (.32)
868 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
869 // Extra latency cycles since wbck is 9 cycles
870 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
871 InstrStage<1, [A9_Pipe1]>,
872 InstrStage<1, [A9_MUX0], 0>,
873 InstrStage<4, [A9_NPipe]>],
876 // Double-register Integer Multiply-Accumulate (.8, .16)
877 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
878 // Extra latency cycles since wbck is 6 cycles
879 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
880 InstrStage<1, [A9_Pipe1]>,
881 InstrStage<1, [A9_MUX0], 0>,
882 InstrStage<1, [A9_NPipe]>],
885 // Double-register Integer Multiply-Accumulate (.32)
886 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
887 // Extra latency cycles since wbck is 7 cycles
888 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
889 InstrStage<1, [A9_Pipe1]>,
890 InstrStage<1, [A9_MUX0], 0>,
891 InstrStage<2, [A9_NPipe]>],
894 // Quad-register Integer Multiply-Accumulate (.8, .16)
895 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
896 // Extra latency cycles since wbck is 7 cycles
897 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
898 InstrStage<1, [A9_Pipe1]>,
899 InstrStage<1, [A9_MUX0], 0>,
900 InstrStage<2, [A9_NPipe]>],
903 // Quad-register Integer Multiply-Accumulate (.32)
904 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
905 // Extra latency cycles since wbck is 9 cycles
906 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
907 InstrStage<1, [A9_Pipe1]>,
908 InstrStage<1, [A9_MUX0], 0>,
909 InstrStage<4, [A9_NPipe]>],
914 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_DRegsN], 0, Required>,
915 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
916 InstrStage<1, [A9_Pipe1]>,
917 InstrStage<1, [A9_MUX0], 0>,
918 InstrStage<1, [A9_NPipe]>],
922 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
923 // Extra latency cycles since wbck is 6 cycles
924 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
925 InstrStage<1, [A9_Pipe1]>,
926 InstrStage<1, [A9_MUX0], 0>,
927 InstrStage<1, [A9_NPipe]>],
930 // Double-register Permute Move
931 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
932 // FIXME: all latencies are arbitrary, no information is available
933 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
934 InstrStage<1, [A9_Pipe1]>,
935 InstrStage<1, [A9_MUX0], 0>,
936 InstrStage<1, [A9_NPipe]>],
939 // Quad-register Permute Move
940 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
941 // FIXME: all latencies are arbitrary, no information is available
942 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
943 InstrStage<1, [A9_Pipe1]>,
944 InstrStage<1, [A9_MUX0], 0>,
945 InstrStage<1, [A9_NPipe]>],
948 // Integer to Single-precision Move
949 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
950 // FIXME: all latencies are arbitrary, no information is available
951 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
952 InstrStage<1, [A9_Pipe1]>,
953 InstrStage<1, [A9_MUX0], 0>,
954 InstrStage<1, [A9_NPipe]>],
957 // Integer to Double-precision Move
958 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
959 // FIXME: all latencies are arbitrary, no information is available
960 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
961 InstrStage<1, [A9_Pipe1]>,
962 InstrStage<1, [A9_MUX0], 0>,
963 InstrStage<1, [A9_NPipe]>],
966 // Single-precision to Integer Move
967 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
968 // FIXME: all latencies are arbitrary, no information is available
969 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
970 InstrStage<1, [A9_Pipe1]>,
971 InstrStage<1, [A9_MUX0], 0>,
972 InstrStage<1, [A9_NPipe]>],
975 // Double-precision to Integer Move
976 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
977 // FIXME: all latencies are arbitrary, no information is available
978 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
979 InstrStage<1, [A9_Pipe1]>,
980 InstrStage<1, [A9_MUX0], 0>,
981 InstrStage<1, [A9_NPipe]>],
984 // Integer to Lane Move
985 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
986 // FIXME: all latencies are arbitrary, no information is available
987 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
988 InstrStage<1, [A9_Pipe1]>,
989 InstrStage<1, [A9_MUX0], 0>,
990 InstrStage<2, [A9_NPipe]>],
994 // Vector narrow move
995 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_DRegsN], 0, Required>,
996 // Extra latency cycles since wbck is 6 cycles
997 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
998 InstrStage<1, [A9_Pipe1]>,
999 InstrStage<1, [A9_MUX0], 0>,
1000 InstrStage<1, [A9_NPipe]>],
1003 // Double-register FP Unary
1004 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1005 // Extra latency cycles since wbck is 6 cycles
1006 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1007 InstrStage<1, [A9_Pipe1]>,
1008 InstrStage<1, [A9_MUX0], 0>,
1009 InstrStage<1, [A9_NPipe]>],
1012 // Quad-register FP Unary
1013 // Result written in N5, but that is relative to the last cycle of multicycle,
1014 // so we use 6 for those cases
1015 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1016 // Extra latency cycles since wbck is 7 cycles
1017 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1018 InstrStage<1, [A9_Pipe1]>,
1019 InstrStage<1, [A9_MUX0], 0>,
1020 InstrStage<2, [A9_NPipe]>],
1023 // Double-register FP Binary
1024 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1026 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
1027 // Extra latency cycles since wbck is 7 cycles
1028 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1029 InstrStage<1, [A9_Pipe1]>,
1030 InstrStage<1, [A9_MUX0], 0>,
1031 InstrStage<1, [A9_NPipe]>],
1034 // Quad-register FP Binary
1035 // Result written in N5, but that is relative to the last cycle of multicycle,
1036 // so we use 6 for those cases
1037 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1039 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1040 // Extra latency cycles since wbck is 8 cycles
1041 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1042 InstrStage<1, [A9_Pipe1]>,
1043 InstrStage<1, [A9_MUX0], 0>,
1044 InstrStage<2, [A9_NPipe]>],
1047 // Double-register FP Multiple-Accumulate
1048 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1049 // Extra latency cycles since wbck is 7 cycles
1050 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1051 InstrStage<1, [A9_Pipe1]>,
1052 InstrStage<1, [A9_MUX0], 0>,
1053 InstrStage<2, [A9_NPipe]>],
1056 // Quad-register FP Multiple-Accumulate
1057 // Result written in N9, but that is relative to the last cycle of multicycle,
1058 // so we use 10 for those cases
1059 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1060 // Extra latency cycles since wbck is 9 cycles
1061 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1062 InstrStage<1, [A9_Pipe1]>,
1063 InstrStage<1, [A9_MUX0], 0>,
1064 InstrStage<4, [A9_NPipe]>],
1067 // Double-register Reciprical Step
1068 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1069 // Extra latency cycles since wbck is 7 cycles
1070 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1071 InstrStage<1, [A9_Pipe1]>,
1072 InstrStage<1, [A9_MUX0], 0>,
1073 InstrStage<2, [A9_NPipe]>],
1076 // Quad-register Reciprical Step
1077 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1078 // Extra latency cycles since wbck is 9 cycles
1079 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1080 InstrStage<1, [A9_Pipe1]>,
1081 InstrStage<1, [A9_MUX0], 0>,
1082 InstrStage<4, [A9_NPipe]>],
1085 // Double-register Permute
1086 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1087 // Extra latency cycles since wbck is 6 cycles
1088 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1089 InstrStage<1, [A9_Pipe1]>,
1090 InstrStage<1, [A9_MUX0], 0>,
1091 InstrStage<1, [A9_NPipe]>],
1094 // Quad-register Permute
1095 // Result written in N2, but that is relative to the last cycle of multicycle,
1096 // so we use 3 for those cases
1097 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1098 // Extra latency cycles since wbck is 7 cycles
1099 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1100 InstrStage<1, [A9_Pipe1]>,
1101 InstrStage<1, [A9_MUX0], 0>,
1102 InstrStage<2, [A9_NPipe]>],
1105 // Quad-register Permute (3 cycle issue)
1106 // Result written in N2, but that is relative to the last cycle of multicycle,
1107 // so we use 4 for those cases
1108 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1109 // Extra latency cycles since wbck is 8 cycles
1110 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1111 InstrStage<1, [A9_Pipe1]>,
1112 InstrStage<1, [A9_MUX0], 0>,
1113 InstrStage<3, [A9_NPipe]>],
1117 // Double-register VEXT
1118 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1119 // Extra latency cycles since wbck is 7 cycles
1120 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1121 InstrStage<1, [A9_Pipe1]>,
1122 InstrStage<1, [A9_MUX0], 0>,
1123 InstrStage<1, [A9_NPipe]>],
1126 // Quad-register VEXT
1127 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1128 // Extra latency cycles since wbck is 9 cycles
1129 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1130 InstrStage<1, [A9_Pipe1]>,
1131 InstrStage<1, [A9_MUX0], 0>,
1132 InstrStage<2, [A9_NPipe]>],
1136 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1137 // Extra latency cycles since wbck is 7 cycles
1138 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1139 InstrStage<1, [A9_Pipe1]>,
1140 InstrStage<1, [A9_MUX0], 0>,
1141 InstrStage<2, [A9_NPipe]>],
1143 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
1144 // Extra latency cycles since wbck is 7 cycles
1145 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1146 InstrStage<1, [A9_Pipe1]>,
1147 InstrStage<1, [A9_MUX0], 0>,
1148 InstrStage<2, [A9_NPipe]>],
1150 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
1151 // Extra latency cycles since wbck is 8 cycles
1152 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1153 InstrStage<1, [A9_Pipe1]>,
1154 InstrStage<1, [A9_MUX0], 0>,
1155 InstrStage<3, [A9_NPipe]>],
1157 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1158 // Extra latency cycles since wbck is 8 cycles
1159 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1160 InstrStage<1, [A9_Pipe1]>,
1161 InstrStage<1, [A9_MUX0], 0>,
1162 InstrStage<3, [A9_NPipe]>],
1163 [4, 2, 2, 3, 3, 1]>,
1166 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1167 // Extra latency cycles since wbck is 7 cycles
1168 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1169 InstrStage<1, [A9_Pipe1]>,
1170 InstrStage<1, [A9_MUX0], 0>,
1171 InstrStage<2, [A9_NPipe]>],
1173 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
1174 // Extra latency cycles since wbck is 7 cycles
1175 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1176 InstrStage<1, [A9_Pipe1]>,
1177 InstrStage<1, [A9_MUX0], 0>,
1178 InstrStage<2, [A9_NPipe]>],
1180 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1181 // Extra latency cycles since wbck is 8 cycles
1182 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1183 InstrStage<1, [A9_Pipe1]>,
1184 InstrStage<1, [A9_MUX0], 0>,
1185 InstrStage<3, [A9_NPipe]>],
1186 [4, 1, 2, 2, 3, 1]>,
1187 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1188 // Extra latency cycles since wbck is 8 cycles
1189 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1190 InstrStage<1, [A9_Pipe1]>,
1191 InstrStage<1, [A9_MUX0], 0>,
1192 InstrStage<2, [A9_NPipe]>],
1193 [4, 1, 2, 2, 3, 3, 1]>