1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Issue0 : FuncUnit; // Issue 0
20 def A9_Issue1 : FuncUnit; // Issue 1
21 def A9_Branch : FuncUnit; // Branch
22 def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1 : FuncUnit; // ALU pipeline 1
24 def A9_AGU : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe : FuncUnit; // NEON pipeline
26 def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
28 def A9_DRegsN : FuncUnit; // FP register set, NEON side
31 def A9_LdBypass : Bypass;
33 def CortexA9Itineraries : ProcessorItineraries<
34 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
35 A9_DRegsVFP, A9_DRegsN],
37 // Two fully-pipelined integer ALU pipelines
40 // Move instructions, unconditional
41 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
42 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
43 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
44 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
45 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
46 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
47 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
48 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
49 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
50 InstrStage<1, [A9_ALU0, A9_ALU1]>,
51 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
54 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
55 InstrStage<1, [A9_ALU0, A9_ALU1]>],
57 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
58 InstrStage<1, [A9_ALU0, A9_ALU1]>],
59 [1, 1], [NoBypass, A9_LdBypass]>,
60 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
61 InstrStage<2, [A9_ALU0, A9_ALU1]>],
63 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
64 InstrStage<3, [A9_ALU0, A9_ALU1]>],
68 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
69 InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
71 // Binary Instructions that produce a result
72 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
73 InstrStage<1, [A9_ALU0, A9_ALU1]>],
74 [1, 1], [NoBypass, A9_LdBypass]>,
75 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
76 InstrStage<1, [A9_ALU0, A9_ALU1]>],
77 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
78 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
79 InstrStage<2, [A9_ALU0, A9_ALU1]>],
80 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
81 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
82 InstrStage<2, [A9_ALU0, A9_ALU1]>],
83 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
84 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
85 InstrStage<3, [A9_ALU0, A9_ALU1]>],
87 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
89 // Bitwise Instructions that produce a result
90 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
91 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
92 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
94 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
95 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
96 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
99 // Unary Instructions that produce a result
102 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
103 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
105 // BFC, BFI, UBFX, SBFX
106 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
107 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
110 // Zero and sign extension instructions
111 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
112 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
113 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
115 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
116 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
118 // Compare instructions
119 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
120 InstrStage<1, [A9_ALU0, A9_ALU1]>],
122 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
123 InstrStage<1, [A9_ALU0, A9_ALU1]>],
124 [1, 1], [A9_LdBypass, A9_LdBypass]>,
125 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
126 [1, 1], [A9_LdBypass, NoBypass]>,
127 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
128 InstrStage<3, [A9_ALU0, A9_ALU1]>],
129 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
132 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
133 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
134 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
136 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
138 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
139 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
141 // Move instructions, conditional
142 // FIXME: Correctly model the extra input dep on the destination.
143 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
144 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
145 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
146 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
147 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
149 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
152 // Integer multiply pipeline
154 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
155 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
156 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
157 InstrStage<2, [A9_ALU0]>],
159 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
160 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
161 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162 InstrStage<2, [A9_ALU0]>],
164 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
165 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
166 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167 InstrStage<3, [A9_ALU0]>],
169 // Integer load pipeline
170 // FIXME: The timings are some rough approximations
173 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
174 InstrStage<1, [A9_MUX0], 0>,
175 InstrStage<1, [A9_AGU]>],
176 [3, 1], [A9_LdBypass]>,
177 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
178 InstrStage<1, [A9_MUX0], 0>,
179 InstrStage<2, [A9_AGU]>],
180 [4, 1], [A9_LdBypass]>,
181 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
182 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
183 InstrStage<1, [A9_MUX0], 0>,
184 InstrStage<2, [A9_AGU]>],
185 [3, 3, 1], [A9_LdBypass]>,
188 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
189 InstrStage<1, [A9_MUX0], 0>,
190 InstrStage<1, [A9_AGU]>],
191 [3, 1, 1], [A9_LdBypass]>,
192 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
193 InstrStage<1, [A9_MUX0], 0>,
194 InstrStage<2, [A9_AGU]>],
195 [4, 1, 1], [A9_LdBypass]>,
196 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
197 InstrStage<1, [A9_MUX0], 0>,
198 InstrStage<2, [A9_AGU]>],
199 [3, 3, 1, 1], [A9_LdBypass]>,
201 // Scaled register offset
202 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
203 InstrStage<1, [A9_MUX0], 0>,
204 InstrStage<1, [A9_AGU]>],
205 [4, 1, 1], [A9_LdBypass]>,
206 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
207 InstrStage<1, [A9_MUX0], 0>,
208 InstrStage<2, [A9_AGU]>],
209 [5, 1, 1], [A9_LdBypass]>,
211 // Immediate offset with update
212 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
213 InstrStage<1, [A9_MUX0], 0>,
214 InstrStage<1, [A9_AGU]>],
215 [3, 2, 1], [A9_LdBypass]>,
216 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
217 InstrStage<1, [A9_MUX0], 0>,
218 InstrStage<2, [A9_AGU]>],
219 [4, 3, 1], [A9_LdBypass]>,
221 // Register offset with update
222 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
223 InstrStage<1, [A9_MUX0], 0>,
224 InstrStage<1, [A9_AGU]>],
225 [3, 2, 1, 1], [A9_LdBypass]>,
226 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
227 InstrStage<1, [A9_MUX0], 0>,
228 InstrStage<2, [A9_AGU]>],
229 [4, 3, 1, 1], [A9_LdBypass]>,
230 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
231 InstrStage<1, [A9_MUX0], 0>,
232 InstrStage<2, [A9_AGU]>],
233 [3, 3, 1, 1], [A9_LdBypass]>,
235 // Scaled register offset with update
236 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
237 InstrStage<1, [A9_MUX0], 0>,
238 InstrStage<1, [A9_AGU]>],
239 [4, 3, 1, 1], [A9_LdBypass]>,
240 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
241 InstrStage<1, [A9_MUX0], 0>,
242 InstrStage<2, [A9_AGU]>],
243 [5, 4, 1, 1], [A9_LdBypass]>,
246 InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
247 InstrStage<1, [A9_MUX0], 0>,
248 InstrStage<2, [A9_AGU]>],
252 // Load multiple plus branch
253 InstrItinData<IIC_iLoadmBr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
254 InstrStage<1, [A9_MUX0], 0>,
255 InstrStage<1, [A9_AGU]>,
256 InstrStage<1, [A9_Branch]>]>,
259 // iLoadi + iALUr for t2LDRpci_pic.
260 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
261 InstrStage<1, [A9_MUX0], 0>,
262 InstrStage<1, [A9_AGU]>,
263 InstrStage<1, [A9_ALU0, A9_ALU1]>],
266 // Integer store pipeline
269 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
270 InstrStage<1, [A9_MUX0], 0>,
271 InstrStage<1, [A9_AGU]>], [1, 1]>,
272 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
273 InstrStage<1, [A9_MUX0], 0>,
274 InstrStage<2, [A9_AGU]>], [1, 1]>,
275 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
276 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
277 InstrStage<1, [A9_MUX0], 0>,
278 InstrStage<2, [A9_AGU]>], [1, 1]>,
281 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
282 InstrStage<1, [A9_MUX0], 0>,
283 InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
284 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
285 InstrStage<1, [A9_MUX0], 0>,
286 InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
287 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
288 InstrStage<1, [A9_MUX0], 0>,
289 InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
291 // Scaled register offset
292 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
293 InstrStage<1, [A9_MUX0], 0>,
294 InstrStage<1, [A9_AGU]>], [1, 1, 1]>,
295 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
296 InstrStage<1, [A9_MUX0], 0>,
297 InstrStage<2, [A9_AGU]>], [1, 1, 1]>,
299 // Immediate offset with update
300 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
301 InstrStage<1, [A9_MUX0], 0>,
302 InstrStage<1, [A9_AGU]>], [2, 1, 1]>,
303 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
304 InstrStage<1, [A9_MUX0], 0>,
305 InstrStage<2, [A9_AGU]>], [3, 1, 1]>,
307 // Register offset with update
308 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
309 InstrStage<1, [A9_MUX0], 0>,
310 InstrStage<1, [A9_AGU]>],
312 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
313 InstrStage<1, [A9_MUX0], 0>,
314 InstrStage<2, [A9_AGU]>],
316 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
317 InstrStage<1, [A9_MUX0], 0>,
318 InstrStage<2, [A9_AGU]>],
321 // Scaled register offset with update
322 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
323 InstrStage<1, [A9_MUX0], 0>,
324 InstrStage<1, [A9_AGU]>],
326 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
327 InstrStage<1, [A9_MUX0], 0>,
328 InstrStage<2, [A9_AGU]>],
332 InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
333 InstrStage<1, [A9_MUX0], 0>,
334 InstrStage<1, [A9_AGU]>]>,
337 // no delay slots, so the latency of a branch is unimportant
338 InstrItinData<IIC_Br , [InstrStage<1, [A9_Branch]>]>,
340 // VFP and NEON shares the same register file. This means that every VFP
341 // instruction should wait for full completion of the consecutive NEON
342 // instruction and vice-versa. We model this behavior with two artificial FUs:
343 // DRegsVFP and DRegsVFP.
345 // Every VFP instruction:
346 // - Acquires DRegsVFP resource for 1 cycle
347 // - Reserves DRegsN resource for the whole duration (including time to
348 // register file writeback!).
349 // Every NEON instruction does the same but with FUs swapped.
351 // Since the reserved FU cannot be acquired, this models precisely
352 // "cross-domain" stalls.
355 // Issue through integer pipeline, and execute in NEON unit.
357 // FP Special Register to Integer Register File Move
358 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
359 InstrStage<2, [A9_DRegsN], 0, Reserved>,
360 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361 InstrStage<1, [A9_MUX0], 0>,
362 InstrStage<1, [A9_NPipe]>]>,
364 // Single-precision FP Unary
365 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
366 // Extra latency cycles since wbck is 2 cycles
367 InstrStage<3, [A9_DRegsN], 0, Reserved>,
368 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
369 InstrStage<1, [A9_MUX0], 0>,
370 InstrStage<1, [A9_NPipe]>],
373 // Double-precision FP Unary
374 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
375 // Extra latency cycles since wbck is 2 cycles
376 InstrStage<3, [A9_DRegsN], 0, Reserved>,
377 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
378 InstrStage<1, [A9_MUX0], 0>,
379 InstrStage<1, [A9_NPipe]>],
383 // Single-precision FP Compare
384 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
385 // Extra latency cycles since wbck is 4 cycles
386 InstrStage<5, [A9_DRegsN], 0, Reserved>,
387 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
388 InstrStage<1, [A9_MUX0], 0>,
389 InstrStage<1, [A9_NPipe]>],
392 // Double-precision FP Compare
393 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
394 // Extra latency cycles since wbck is 4 cycles
395 InstrStage<5, [A9_DRegsN], 0, Reserved>,
396 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
397 InstrStage<1, [A9_MUX0], 0>,
398 InstrStage<1, [A9_NPipe]>],
401 // Single to Double FP Convert
402 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
403 InstrStage<5, [A9_DRegsN], 0, Reserved>,
404 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
405 InstrStage<1, [A9_MUX0], 0>,
406 InstrStage<1, [A9_NPipe]>],
409 // Double to Single FP Convert
410 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
411 InstrStage<5, [A9_DRegsN], 0, Reserved>,
412 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
413 InstrStage<1, [A9_MUX0], 0>,
414 InstrStage<1, [A9_NPipe]>],
418 // Single to Half FP Convert
419 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
420 InstrStage<5, [A9_DRegsN], 0, Reserved>,
421 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
422 InstrStage<1, [A9_MUX0], 0>,
423 InstrStage<1, [A9_NPipe]>],
426 // Half to Single FP Convert
427 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
428 InstrStage<3, [A9_DRegsN], 0, Reserved>,
429 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
430 InstrStage<1, [A9_MUX0], 0>,
431 InstrStage<1, [A9_NPipe]>],
435 // Single-Precision FP to Integer Convert
436 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
437 InstrStage<5, [A9_DRegsN], 0, Reserved>,
438 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
439 InstrStage<1, [A9_MUX0], 0>,
440 InstrStage<1, [A9_NPipe]>],
443 // Double-Precision FP to Integer Convert
444 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
445 InstrStage<5, [A9_DRegsN], 0, Reserved>,
446 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
447 InstrStage<1, [A9_MUX0], 0>,
448 InstrStage<1, [A9_NPipe]>],
451 // Integer to Single-Precision FP Convert
452 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
453 InstrStage<5, [A9_DRegsN], 0, Reserved>,
454 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
455 InstrStage<1, [A9_MUX0], 0>,
456 InstrStage<1, [A9_NPipe]>],
459 // Integer to Double-Precision FP Convert
460 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
461 InstrStage<5, [A9_DRegsN], 0, Reserved>,
462 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
463 InstrStage<1, [A9_MUX0], 0>,
464 InstrStage<1, [A9_NPipe]>],
467 // Single-precision FP ALU
468 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
469 InstrStage<5, [A9_DRegsN], 0, Reserved>,
470 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
471 InstrStage<1, [A9_MUX0], 0>,
472 InstrStage<1, [A9_NPipe]>],
475 // Double-precision FP ALU
476 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
477 InstrStage<5, [A9_DRegsN], 0, Reserved>,
478 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
479 InstrStage<1, [A9_MUX0], 0>,
480 InstrStage<1, [A9_NPipe]>],
483 // Single-precision FP Multiply
484 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
485 InstrStage<6, [A9_DRegsN], 0, Reserved>,
486 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
487 InstrStage<1, [A9_MUX0], 0>,
488 InstrStage<1, [A9_NPipe]>],
491 // Double-precision FP Multiply
492 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
493 InstrStage<7, [A9_DRegsN], 0, Reserved>,
494 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495 InstrStage<1, [A9_MUX0], 0>,
496 InstrStage<2, [A9_NPipe]>],
499 // Single-precision FP MAC
500 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
501 InstrStage<9, [A9_DRegsN], 0, Reserved>,
502 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503 InstrStage<1, [A9_MUX0], 0>,
504 InstrStage<1, [A9_NPipe]>],
507 // Double-precision FP MAC
508 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
509 InstrStage<10, [A9_DRegsN], 0, Reserved>,
510 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
511 InstrStage<1, [A9_MUX0], 0>,
512 InstrStage<2, [A9_NPipe]>],
515 // Single-precision FP DIV
516 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
517 InstrStage<16, [A9_DRegsN], 0, Reserved>,
518 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
519 InstrStage<1, [A9_MUX0], 0>,
520 InstrStage<10, [A9_NPipe]>],
523 // Double-precision FP DIV
524 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
525 InstrStage<26, [A9_DRegsN], 0, Reserved>,
526 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
527 InstrStage<1, [A9_MUX0], 0>,
528 InstrStage<20, [A9_NPipe]>],
531 // Single-precision FP SQRT
532 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
533 InstrStage<18, [A9_DRegsN], 0, Reserved>,
534 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
535 InstrStage<1, [A9_MUX0], 0>,
536 InstrStage<13, [A9_NPipe]>],
539 // Double-precision FP SQRT
540 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
541 InstrStage<33, [A9_DRegsN], 0, Reserved>,
542 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
543 InstrStage<1, [A9_MUX0], 0>,
544 InstrStage<28, [A9_NPipe]>],
548 // Integer to Single-precision Move
549 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
550 // Extra 1 latency cycle since wbck is 2 cycles
551 InstrStage<3, [A9_DRegsN], 0, Reserved>,
552 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
553 InstrStage<1, [A9_MUX0], 0>,
554 InstrStage<1, [A9_NPipe]>],
557 // Integer to Double-precision Move
558 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
559 // Extra 1 latency cycle since wbck is 2 cycles
560 InstrStage<3, [A9_DRegsN], 0, Reserved>,
561 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
562 InstrStage<1, [A9_MUX0], 0>,
563 InstrStage<1, [A9_NPipe]>],
566 // Single-precision to Integer Move
567 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
568 InstrStage<2, [A9_DRegsN], 0, Reserved>,
569 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
570 InstrStage<1, [A9_MUX0], 0>,
571 InstrStage<1, [A9_NPipe]>],
574 // Double-precision to Integer Move
575 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
576 InstrStage<2, [A9_DRegsN], 0, Reserved>,
577 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
578 InstrStage<1, [A9_MUX0], 0>,
579 InstrStage<1, [A9_NPipe]>],
582 // Single-precision FP Load
583 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
584 InstrStage<2, [A9_DRegsN], 0, Reserved>,
585 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
586 InstrStage<1, [A9_MUX0], 0>,
587 InstrStage<1, [A9_NPipe]>],
590 // Double-precision FP Load
591 // FIXME: Result latency is 1 if address is 64-bit aligned.
592 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
593 InstrStage<2, [A9_DRegsN], 0, Reserved>,
594 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
595 InstrStage<1, [A9_MUX0], 0>,
596 InstrStage<1, [A9_NPipe]>],
600 InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
601 InstrStage<2, [A9_DRegsN], 0, Reserved>,
602 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
603 InstrStage<1, [A9_MUX0], 0>,
604 InstrStage<1, [A9_NPipe]>]>,
606 // Single-precision FP Store
607 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
608 InstrStage<2, [A9_DRegsN], 0, Reserved>,
609 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
610 InstrStage<1, [A9_MUX0], 0>,
611 InstrStage<1, [A9_NPipe]>],
614 // Double-precision FP Store
615 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
616 InstrStage<2, [A9_DRegsN], 0, Reserved>,
617 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
618 InstrStage<1, [A9_MUX0], 0>,
619 InstrStage<1, [A9_NPipe]>],
623 InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
624 InstrStage<2, [A9_DRegsN], 0, Reserved>,
625 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
626 InstrStage<1, [A9_MUX0], 0>,
627 InstrStage<1, [A9_NPipe]>]>,
629 // Issue through integer pipeline, and execute in NEON unit.
631 // FIXME: We don't model this instruction properly
632 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
633 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
634 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
635 InstrStage<1, [A9_MUX0], 0>,
636 InstrStage<1, [A9_NPipe]>]>,
639 // FIXME: We don't model this instruction properly
640 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
641 // Extra latency cycles since wbck is 6 cycles
642 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
643 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
644 InstrStage<1, [A9_MUX0], 0>,
645 InstrStage<1, [A9_NPipe]>],
649 // FIXME: We don't model this instruction properly
650 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
651 // Extra latency cycles since wbck is 6 cycles
652 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
653 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
654 InstrStage<1, [A9_MUX0], 0>,
655 InstrStage<1, [A9_NPipe]>],
659 // FIXME: We don't model this instruction properly
660 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
661 // Extra latency cycles since wbck is 6 cycles
662 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
663 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
664 InstrStage<1, [A9_MUX0], 0>,
665 InstrStage<1, [A9_NPipe]>],
669 // FIXME: We don't model this instruction properly
670 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
671 // Extra latency cycles since wbck is 6 cycles
672 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
673 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
674 InstrStage<1, [A9_MUX0], 0>,
675 InstrStage<1, [A9_NPipe]>]>,
677 // Double-register Integer Unary
678 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
679 // Extra latency cycles since wbck is 6 cycles
680 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
681 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
682 InstrStage<1, [A9_MUX0], 0>,
683 InstrStage<1, [A9_NPipe]>],
686 // Quad-register Integer Unary
687 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
688 // Extra latency cycles since wbck is 6 cycles
689 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
690 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
691 InstrStage<1, [A9_MUX0], 0>,
692 InstrStage<1, [A9_NPipe]>],
695 // Double-register Integer Q-Unary
696 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
697 // Extra latency cycles since wbck is 6 cycles
698 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
699 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
700 InstrStage<1, [A9_MUX0], 0>,
701 InstrStage<1, [A9_NPipe]>],
704 // Quad-register Integer CountQ-Unary
705 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
706 // Extra latency cycles since wbck is 6 cycles
707 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
708 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
709 InstrStage<1, [A9_MUX0], 0>,
710 InstrStage<1, [A9_NPipe]>],
713 // Double-register Integer Binary
714 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
715 // Extra latency cycles since wbck is 6 cycles
716 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
717 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
718 InstrStage<1, [A9_MUX0], 0>,
719 InstrStage<1, [A9_NPipe]>],
722 // Quad-register Integer Binary
723 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
724 // Extra latency cycles since wbck is 6 cycles
725 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
726 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
727 InstrStage<1, [A9_MUX0], 0>,
728 InstrStage<1, [A9_NPipe]>],
731 // Double-register Integer Subtract
732 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
733 // Extra latency cycles since wbck is 6 cycles
734 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
735 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
736 InstrStage<1, [A9_MUX0], 0>,
737 InstrStage<1, [A9_NPipe]>],
740 // Quad-register Integer Subtract
741 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
742 // Extra latency cycles since wbck is 6 cycles
743 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
744 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
745 InstrStage<1, [A9_MUX0], 0>,
746 InstrStage<1, [A9_NPipe]>],
749 // Double-register Integer Shift
750 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
751 // Extra latency cycles since wbck is 6 cycles
752 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
753 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
754 InstrStage<1, [A9_MUX0], 0>,
755 InstrStage<1, [A9_NPipe]>],
758 // Quad-register Integer Shift
759 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
760 // Extra latency cycles since wbck is 6 cycles
761 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
762 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
763 InstrStage<1, [A9_MUX0], 0>,
764 InstrStage<1, [A9_NPipe]>],
767 // Double-register Integer Shift (4 cycle)
768 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
769 // Extra latency cycles since wbck is 6 cycles
770 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
771 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
772 InstrStage<1, [A9_MUX0], 0>,
773 InstrStage<1, [A9_NPipe]>],
776 // Quad-register Integer Shift (4 cycle)
777 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
778 // Extra latency cycles since wbck is 6 cycles
779 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
780 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
781 InstrStage<1, [A9_MUX0], 0>,
782 InstrStage<1, [A9_NPipe]>],
785 // Double-register Integer Binary (4 cycle)
786 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
787 // Extra latency cycles since wbck is 6 cycles
788 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
789 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
790 InstrStage<1, [A9_MUX0], 0>,
791 InstrStage<1, [A9_NPipe]>],
794 // Quad-register Integer Binary (4 cycle)
795 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
796 // Extra latency cycles since wbck is 6 cycles
797 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
798 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
799 InstrStage<1, [A9_MUX0], 0>,
800 InstrStage<1, [A9_NPipe]>],
803 // Double-register Integer Subtract (4 cycle)
804 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
805 // Extra latency cycles since wbck is 6 cycles
806 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
807 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
808 InstrStage<1, [A9_MUX0], 0>,
809 InstrStage<1, [A9_NPipe]>],
812 // Quad-register Integer Subtract (4 cycle)
813 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
814 // Extra latency cycles since wbck is 6 cycles
815 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
816 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
817 InstrStage<1, [A9_MUX0], 0>,
818 InstrStage<1, [A9_NPipe]>],
822 // Double-register Integer Count
823 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
824 // Extra latency cycles since wbck is 6 cycles
825 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
826 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
827 InstrStage<1, [A9_MUX0], 0>,
828 InstrStage<1, [A9_NPipe]>],
831 // Quad-register Integer Count
832 // Result written in N3, but that is relative to the last cycle of multicycle,
833 // so we use 4 for those cases
834 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
835 // Extra latency cycles since wbck is 7 cycles
836 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
837 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
838 InstrStage<1, [A9_MUX0], 0>,
839 InstrStage<2, [A9_NPipe]>],
842 // Double-register Absolute Difference and Accumulate
843 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
844 // Extra latency cycles since wbck is 6 cycles
845 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
846 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
847 InstrStage<1, [A9_MUX0], 0>,
848 InstrStage<1, [A9_NPipe]>],
851 // Quad-register Absolute Difference and Accumulate
852 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
853 // Extra latency cycles since wbck is 6 cycles
854 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
855 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
856 InstrStage<1, [A9_MUX0], 0>,
857 InstrStage<2, [A9_NPipe]>],
860 // Double-register Integer Pair Add Long
861 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
862 // Extra latency cycles since wbck is 6 cycles
863 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
864 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
865 InstrStage<1, [A9_MUX0], 0>,
866 InstrStage<1, [A9_NPipe]>],
869 // Quad-register Integer Pair Add Long
870 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
871 // Extra latency cycles since wbck is 6 cycles
872 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
873 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
874 InstrStage<1, [A9_MUX0], 0>,
875 InstrStage<2, [A9_NPipe]>],
879 // Double-register Integer Multiply (.8, .16)
880 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
881 // Extra latency cycles since wbck is 6 cycles
882 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
883 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
884 InstrStage<1, [A9_MUX0], 0>,
885 InstrStage<1, [A9_NPipe]>],
888 // Quad-register Integer Multiply (.8, .16)
889 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
890 // Extra latency cycles since wbck is 7 cycles
891 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
892 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
893 InstrStage<1, [A9_MUX0], 0>,
894 InstrStage<2, [A9_NPipe]>],
898 // Double-register Integer Multiply (.32)
899 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
900 // Extra latency cycles since wbck is 7 cycles
901 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
902 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
903 InstrStage<1, [A9_MUX0], 0>,
904 InstrStage<2, [A9_NPipe]>],
907 // Quad-register Integer Multiply (.32)
908 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
909 // Extra latency cycles since wbck is 9 cycles
910 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
911 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
912 InstrStage<1, [A9_MUX0], 0>,
913 InstrStage<4, [A9_NPipe]>],
916 // Double-register Integer Multiply-Accumulate (.8, .16)
917 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
918 // Extra latency cycles since wbck is 6 cycles
919 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
920 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
921 InstrStage<1, [A9_MUX0], 0>,
922 InstrStage<1, [A9_NPipe]>],
925 // Double-register Integer Multiply-Accumulate (.32)
926 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
927 // Extra latency cycles since wbck is 7 cycles
928 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
929 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
930 InstrStage<1, [A9_MUX0], 0>,
931 InstrStage<2, [A9_NPipe]>],
934 // Quad-register Integer Multiply-Accumulate (.8, .16)
935 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
936 // Extra latency cycles since wbck is 7 cycles
937 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
938 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
939 InstrStage<1, [A9_MUX0], 0>,
940 InstrStage<2, [A9_NPipe]>],
943 // Quad-register Integer Multiply-Accumulate (.32)
944 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
945 // Extra latency cycles since wbck is 9 cycles
946 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
947 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
948 InstrStage<1, [A9_MUX0], 0>,
949 InstrStage<4, [A9_NPipe]>],
954 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_DRegsN], 0, Required>,
955 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
956 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
957 InstrStage<1, [A9_MUX0], 0>,
958 InstrStage<1, [A9_NPipe]>],
962 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
963 // Extra latency cycles since wbck is 6 cycles
964 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
965 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
966 InstrStage<1, [A9_MUX0], 0>,
967 InstrStage<1, [A9_NPipe]>],
970 // Double-register Permute Move
971 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
972 // FIXME: all latencies are arbitrary, no information is available
973 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
974 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
975 InstrStage<1, [A9_MUX0], 0>,
976 InstrStage<1, [A9_NPipe]>],
979 // Quad-register Permute Move
980 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
981 // FIXME: all latencies are arbitrary, no information is available
982 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
983 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
984 InstrStage<1, [A9_MUX0], 0>,
985 InstrStage<1, [A9_NPipe]>],
988 // Integer to Single-precision Move
989 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
990 // FIXME: all latencies are arbitrary, no information is available
991 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
992 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
993 InstrStage<1, [A9_MUX0], 0>,
994 InstrStage<1, [A9_NPipe]>],
997 // Integer to Double-precision Move
998 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
999 // FIXME: all latencies are arbitrary, no information is available
1000 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1001 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1002 InstrStage<1, [A9_MUX0], 0>,
1003 InstrStage<1, [A9_NPipe]>],
1006 // Single-precision to Integer Move
1007 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
1008 // FIXME: all latencies are arbitrary, no information is available
1009 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1010 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1011 InstrStage<1, [A9_MUX0], 0>,
1012 InstrStage<1, [A9_NPipe]>],
1015 // Double-precision to Integer Move
1016 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
1017 // FIXME: all latencies are arbitrary, no information is available
1018 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1019 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1020 InstrStage<1, [A9_MUX0], 0>,
1021 InstrStage<1, [A9_NPipe]>],
1024 // Integer to Lane Move
1025 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
1026 // FIXME: all latencies are arbitrary, no information is available
1027 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1028 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1029 InstrStage<1, [A9_MUX0], 0>,
1030 InstrStage<2, [A9_NPipe]>],
1034 // Vector narrow move
1035 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_DRegsN], 0, Required>,
1036 // Extra latency cycles since wbck is 6 cycles
1037 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1038 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1039 InstrStage<1, [A9_MUX0], 0>,
1040 InstrStage<1, [A9_NPipe]>],
1043 // Double-register FP Unary
1044 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1045 // Extra latency cycles since wbck is 6 cycles
1046 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1047 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1048 InstrStage<1, [A9_MUX0], 0>,
1049 InstrStage<1, [A9_NPipe]>],
1052 // Quad-register FP Unary
1053 // Result written in N5, but that is relative to the last cycle of multicycle,
1054 // so we use 6 for those cases
1055 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1056 // Extra latency cycles since wbck is 7 cycles
1057 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1058 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1059 InstrStage<1, [A9_MUX0], 0>,
1060 InstrStage<2, [A9_NPipe]>],
1063 // Double-register FP Binary
1064 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1066 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
1067 // Extra latency cycles since wbck is 7 cycles
1068 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1069 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1070 InstrStage<1, [A9_MUX0], 0>,
1071 InstrStage<1, [A9_NPipe]>],
1074 // Quad-register FP Binary
1075 // Result written in N5, but that is relative to the last cycle of multicycle,
1076 // so we use 6 for those cases
1077 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1079 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1080 // Extra latency cycles since wbck is 8 cycles
1081 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1082 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1083 InstrStage<1, [A9_MUX0], 0>,
1084 InstrStage<2, [A9_NPipe]>],
1087 // Double-register FP Multiple-Accumulate
1088 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1089 // Extra latency cycles since wbck is 7 cycles
1090 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1091 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1092 InstrStage<1, [A9_MUX0], 0>,
1093 InstrStage<2, [A9_NPipe]>],
1096 // Quad-register FP Multiple-Accumulate
1097 // Result written in N9, but that is relative to the last cycle of multicycle,
1098 // so we use 10 for those cases
1099 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1100 // Extra latency cycles since wbck is 9 cycles
1101 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1102 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1103 InstrStage<1, [A9_MUX0], 0>,
1104 InstrStage<4, [A9_NPipe]>],
1107 // Double-register Reciprical Step
1108 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1109 // Extra latency cycles since wbck is 7 cycles
1110 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1111 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1112 InstrStage<1, [A9_MUX0], 0>,
1113 InstrStage<2, [A9_NPipe]>],
1116 // Quad-register Reciprical Step
1117 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1118 // Extra latency cycles since wbck is 9 cycles
1119 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1120 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1121 InstrStage<1, [A9_MUX0], 0>,
1122 InstrStage<4, [A9_NPipe]>],
1125 // Double-register Permute
1126 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1127 // Extra latency cycles since wbck is 6 cycles
1128 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1129 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1130 InstrStage<1, [A9_MUX0], 0>,
1131 InstrStage<1, [A9_NPipe]>],
1134 // Quad-register Permute
1135 // Result written in N2, but that is relative to the last cycle of multicycle,
1136 // so we use 3 for those cases
1137 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1138 // Extra latency cycles since wbck is 7 cycles
1139 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1140 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1141 InstrStage<1, [A9_MUX0], 0>,
1142 InstrStage<2, [A9_NPipe]>],
1145 // Quad-register Permute (3 cycle issue)
1146 // Result written in N2, but that is relative to the last cycle of multicycle,
1147 // so we use 4 for those cases
1148 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1149 // Extra latency cycles since wbck is 8 cycles
1150 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1151 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1152 InstrStage<1, [A9_MUX0], 0>,
1153 InstrStage<3, [A9_NPipe]>],
1157 // Double-register VEXT
1158 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1159 // Extra latency cycles since wbck is 7 cycles
1160 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1161 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1162 InstrStage<1, [A9_MUX0], 0>,
1163 InstrStage<1, [A9_NPipe]>],
1166 // Quad-register VEXT
1167 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1168 // Extra latency cycles since wbck is 9 cycles
1169 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1170 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1171 InstrStage<1, [A9_MUX0], 0>,
1172 InstrStage<2, [A9_NPipe]>],
1176 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1177 // Extra latency cycles since wbck is 7 cycles
1178 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1179 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1180 InstrStage<1, [A9_MUX0], 0>,
1181 InstrStage<2, [A9_NPipe]>],
1183 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
1184 // Extra latency cycles since wbck is 7 cycles
1185 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1186 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1187 InstrStage<1, [A9_MUX0], 0>,
1188 InstrStage<2, [A9_NPipe]>],
1190 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
1191 // Extra latency cycles since wbck is 8 cycles
1192 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1193 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1194 InstrStage<1, [A9_MUX0], 0>,
1195 InstrStage<3, [A9_NPipe]>],
1197 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1198 // Extra latency cycles since wbck is 8 cycles
1199 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1200 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1201 InstrStage<1, [A9_MUX0], 0>,
1202 InstrStage<3, [A9_NPipe]>],
1203 [4, 2, 2, 3, 3, 1]>,
1206 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1207 // Extra latency cycles since wbck is 7 cycles
1208 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1209 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1210 InstrStage<1, [A9_MUX0], 0>,
1211 InstrStage<2, [A9_NPipe]>],
1213 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
1214 // Extra latency cycles since wbck is 7 cycles
1215 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1216 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1217 InstrStage<1, [A9_MUX0], 0>,
1218 InstrStage<2, [A9_NPipe]>],
1220 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1221 // Extra latency cycles since wbck is 8 cycles
1222 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1223 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1224 InstrStage<1, [A9_MUX0], 0>,
1225 InstrStage<3, [A9_NPipe]>],
1226 [4, 1, 2, 2, 3, 1]>,
1227 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1228 // Extra latency cycles since wbck is 8 cycles
1229 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1230 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1231 InstrStage<1, [A9_MUX0], 0>,
1232 InstrStage<2, [A9_NPipe]>],
1233 [4, 1, 2, 2, 3, 3, 1]>