1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
19 def A9_Issue0 : FuncUnit; // Issue 0
20 def A9_Issue1 : FuncUnit; // Issue 1
21 def A9_Branch : FuncUnit; // Branch
22 def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
23 def A9_ALU1 : FuncUnit; // ALU pipeline 1
24 def A9_AGU : FuncUnit; // Address generation unit for ld / st
25 def A9_NPipe : FuncUnit; // NEON pipeline
26 def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
27 def A9_LS0 : FuncUnit; // L/S Units, 32-bit per unit. Fake FU to limit l/s.
28 def A9_LS1 : FuncUnit; // L/S Units, 32-bit per unit.
29 def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
30 def A9_DRegsN : FuncUnit; // FP register set, NEON side
33 def A9_LdBypass : Bypass;
35 def CortexA9Itineraries : ProcessorItineraries<
36 [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
37 A9_LS0, A9_LS1, A9_DRegsVFP, A9_DRegsN],
39 // Two fully-pipelined integer ALU pipelines
42 // Move instructions, unconditional
43 InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
44 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
45 InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
46 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
47 InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
48 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
49 InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
50 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
51 InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
52 InstrStage<1, [A9_ALU0, A9_ALU1]>,
53 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
56 InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
57 InstrStage<1, [A9_ALU0, A9_ALU1]>],
59 InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
60 InstrStage<1, [A9_ALU0, A9_ALU1]>],
61 [1, 1], [NoBypass, A9_LdBypass]>,
62 InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
63 InstrStage<2, [A9_ALU0, A9_ALU1]>],
65 InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
66 InstrStage<3, [A9_ALU0, A9_ALU1]>],
70 InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
71 InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
73 // Binary Instructions that produce a result
74 InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
75 InstrStage<1, [A9_ALU0, A9_ALU1]>],
76 [1, 1], [NoBypass, A9_LdBypass]>,
77 InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
78 InstrStage<1, [A9_ALU0, A9_ALU1]>],
79 [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
80 InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
81 InstrStage<2, [A9_ALU0, A9_ALU1]>],
82 [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
83 InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
84 InstrStage<2, [A9_ALU0, A9_ALU1]>],
85 [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
86 InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
87 InstrStage<3, [A9_ALU0, A9_ALU1]>],
89 [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
91 // Bitwise Instructions that produce a result
92 InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
93 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
94 InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
95 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
96 InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
97 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
98 InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
99 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
101 // Unary Instructions that produce a result
104 InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
105 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
107 // BFC, BFI, UBFX, SBFX
108 InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
109 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
112 // Zero and sign extension instructions
113 InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
114 InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
115 InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
116 InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
117 InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
118 InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
120 // Compare instructions
121 InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
122 InstrStage<1, [A9_ALU0, A9_ALU1]>],
124 InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
125 InstrStage<1, [A9_ALU0, A9_ALU1]>],
126 [1, 1], [A9_LdBypass, A9_LdBypass]>,
127 InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_ALU0, A9_ALU1]>],
128 [1, 1], [A9_LdBypass, NoBypass]>,
129 InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
130 InstrStage<3, [A9_ALU0, A9_ALU1]>],
131 [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
134 InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
135 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
136 InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
137 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
138 InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
139 InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
140 InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
141 InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
143 // Move instructions, conditional
144 // FIXME: Correctly model the extra input dep on the destination.
145 InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
146 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
147 InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
148 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
149 InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
150 InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
151 InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
152 InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
154 // Integer multiply pipeline
156 InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
157 InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
158 InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
159 InstrStage<2, [A9_ALU0]>],
161 InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
162 InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
163 InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
164 InstrStage<2, [A9_ALU0]>],
166 InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
167 InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
168 InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
169 InstrStage<3, [A9_ALU0]>],
171 // Integer load pipeline
172 // FIXME: The timings are some rough approximations
175 InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
176 InstrStage<1, [A9_MUX0], 0>,
177 InstrStage<1, [A9_AGU]>,
178 InstrStage<1, [A9_LS0, A9_LS1]>],
179 [3, 1], [A9_LdBypass]>,
180 InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
181 InstrStage<1, [A9_MUX0], 0>,
182 InstrStage<2, [A9_AGU]>,
183 InstrStage<1, [A9_LS0, A9_LS1]>],
184 [4, 1], [A9_LdBypass]>,
185 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
186 InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
187 InstrStage<1, [A9_MUX0], 0>,
188 InstrStage<2, [A9_AGU]>,
189 InstrStage<1, [A9_LS0, A9_LS1]>],
190 [3, 3, 1], [A9_LdBypass]>,
193 InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
194 InstrStage<1, [A9_MUX0], 0>,
195 InstrStage<1, [A9_AGU]>,
196 InstrStage<1, [A9_LS0, A9_LS1]>],
197 [3, 1, 1], [A9_LdBypass]>,
198 InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
199 InstrStage<1, [A9_MUX0], 0>,
200 InstrStage<2, [A9_AGU]>,
201 InstrStage<1, [A9_LS0, A9_LS1]>],
202 [4, 1, 1], [A9_LdBypass]>,
203 InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
204 InstrStage<1, [A9_MUX0], 0>,
205 InstrStage<2, [A9_AGU]>,
206 InstrStage<1, [A9_LS0, A9_LS1]>],
207 [3, 3, 1, 1], [A9_LdBypass]>,
209 // Scaled register offset
210 InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
211 InstrStage<1, [A9_MUX0], 0>,
212 InstrStage<1, [A9_AGU]>,
213 InstrStage<1, [A9_LS0, A9_LS1]>],
214 [4, 1, 1], [A9_LdBypass]>,
215 InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
216 InstrStage<1, [A9_MUX0], 0>,
217 InstrStage<2, [A9_AGU]>,
218 InstrStage<1, [A9_LS0, A9_LS1]>],
219 [5, 1, 1], [A9_LdBypass]>,
221 // Immediate offset with update
222 InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
223 InstrStage<1, [A9_MUX0], 0>,
224 InstrStage<1, [A9_AGU]>,
225 InstrStage<1, [A9_LS0, A9_LS1]>],
226 [3, 2, 1], [A9_LdBypass]>,
227 InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
228 InstrStage<1, [A9_MUX0], 0>,
229 InstrStage<2, [A9_AGU]>,
230 InstrStage<1, [A9_LS0, A9_LS1]>],
231 [4, 3, 1], [A9_LdBypass]>,
233 // Register offset with update
234 InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
235 InstrStage<1, [A9_MUX0], 0>,
236 InstrStage<1, [A9_AGU]>,
237 InstrStage<1, [A9_LS0, A9_LS1]>],
238 [3, 2, 1, 1], [A9_LdBypass]>,
239 InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
240 InstrStage<1, [A9_MUX0], 0>,
241 InstrStage<2, [A9_AGU]>,
242 InstrStage<1, [A9_LS0, A9_LS1]>],
243 [4, 3, 1, 1], [A9_LdBypass]>,
244 InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
245 InstrStage<1, [A9_MUX0], 0>,
246 InstrStage<2, [A9_AGU]>,
247 InstrStage<1, [A9_LS0, A9_LS1]>],
248 [3, 3, 1, 1], [A9_LdBypass]>,
250 // Scaled register offset with update
251 InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
252 InstrStage<1, [A9_MUX0], 0>,
253 InstrStage<1, [A9_AGU]>,
254 InstrStage<1, [A9_LS0, A9_LS1]>],
255 [4, 3, 1, 1], [A9_LdBypass]>,
256 InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
257 InstrStage<1, [A9_MUX0], 0>,
258 InstrStage<2, [A9_AGU]>,
259 InstrStage<1, [A9_LS0, A9_LS1]>],
260 [5, 4, 1, 1], [A9_LdBypass]>,
262 // Load multiple, def is the 5th operand.
263 // FIXME: This assumes 3 to 4 registers.
264 InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
265 InstrStage<1, [A9_MUX0], 0>,
266 InstrStage<2, [A9_AGU]>,
267 InstrStage<2, [A9_LS0, A9_LS1]>],
269 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
271 // Load multiple + update, defs are the 1st and 5th operands.
272 InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
273 InstrStage<1, [A9_MUX0], 0>,
274 InstrStage<2, [A9_AGU]>,
275 InstrStage<2, [A9_LS0, A9_LS1]>],
277 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
279 // Load multiple plus branch
280 InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
281 InstrStage<1, [A9_MUX0], 0>,
282 InstrStage<1, [A9_AGU]>,
283 InstrStage<2, [A9_LS0, A9_LS1]>,
284 InstrStage<1, [A9_Branch]>],
286 [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
288 // Pop, def is the 3rd operand.
289 InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
290 InstrStage<1, [A9_MUX0], 0>,
291 InstrStage<2, [A9_AGU]>,
292 InstrStage<2, [A9_LS0, A9_LS1]>],
294 [NoBypass, NoBypass, A9_LdBypass]>,
296 // Pop + branch, def is the 3rd operand.
297 InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
298 InstrStage<1, [A9_MUX0], 0>,
299 InstrStage<2, [A9_AGU]>,
300 InstrStage<2, [A9_LS0, A9_LS1]>,
301 InstrStage<1, [A9_Branch]>],
303 [NoBypass, NoBypass, A9_LdBypass]>,
306 // iLoadi + iALUr for t2LDRpci_pic.
307 InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
308 InstrStage<1, [A9_MUX0], 0>,
309 InstrStage<1, [A9_AGU]>,
310 InstrStage<1, [A9_LS0, A9_LS1]>,
311 InstrStage<1, [A9_ALU0, A9_ALU1]>],
314 // Integer store pipeline
317 InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
318 InstrStage<1, [A9_MUX0], 0>,
319 InstrStage<1, [A9_AGU]>,
320 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
321 InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
322 InstrStage<1, [A9_MUX0], 0>,
323 InstrStage<2, [A9_AGU]>,
324 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
325 // FIXME: If address is 64-bit aligned, AGU cycles is 1.
326 InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
327 InstrStage<1, [A9_MUX0], 0>,
328 InstrStage<2, [A9_AGU]>,
329 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>,
332 InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
333 InstrStage<1, [A9_MUX0], 0>,
334 InstrStage<1, [A9_AGU]>,
335 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
336 InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
337 InstrStage<1, [A9_MUX0], 0>,
338 InstrStage<2, [A9_AGU]>,
339 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
340 InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
341 InstrStage<1, [A9_MUX0], 0>,
342 InstrStage<2, [A9_AGU]>,
343 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
345 // Scaled register offset
346 InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
347 InstrStage<1, [A9_MUX0], 0>,
348 InstrStage<1, [A9_AGU]>,
349 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
350 InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
351 InstrStage<1, [A9_MUX0], 0>,
352 InstrStage<2, [A9_AGU]>,
353 InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>,
355 // Immediate offset with update
356 InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
357 InstrStage<1, [A9_MUX0], 0>,
358 InstrStage<1, [A9_AGU]>,
359 InstrStage<1, [A9_LS0, A9_LS1]>], [2, 1, 1]>,
360 InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
361 InstrStage<1, [A9_MUX0], 0>,
362 InstrStage<2, [A9_AGU]>,
363 InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1]>,
365 // Register offset with update
366 InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
367 InstrStage<1, [A9_MUX0], 0>,
368 InstrStage<1, [A9_AGU]>,
369 InstrStage<1, [A9_LS0, A9_LS1]>],
371 InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
372 InstrStage<1, [A9_MUX0], 0>,
373 InstrStage<2, [A9_AGU]>,
374 InstrStage<1, [A9_LS0, A9_LS1]>],
376 InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
377 InstrStage<1, [A9_MUX0], 0>,
378 InstrStage<2, [A9_AGU]>,
379 InstrStage<1, [A9_LS0, A9_LS1]>],
382 // Scaled register offset with update
383 InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
384 InstrStage<1, [A9_MUX0], 0>,
385 InstrStage<1, [A9_AGU]>,
386 InstrStage<1, [A9_LS0, A9_LS1]>],
388 InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
389 InstrStage<1, [A9_MUX0], 0>,
390 InstrStage<2, [A9_AGU]>,
391 InstrStage<1, [A9_LS0, A9_LS1]>],
395 InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
396 InstrStage<1, [A9_MUX0], 0>,
397 InstrStage<1, [A9_AGU]>,
398 InstrStage<2, [A9_LS0, A9_LS1]>]>,
400 // Store multiple + update
401 InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
402 InstrStage<1, [A9_MUX0], 0>,
403 InstrStage<1, [A9_AGU]>,
404 InstrStage<2, [A9_LS0, A9_LS1]>], [2]>,
408 // no delay slots, so the latency of a branch is unimportant
409 InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
410 InstrStage<1, [A9_Issue1], 0>,
411 InstrStage<1, [A9_Branch]>]>,
413 // VFP and NEON shares the same register file. This means that every VFP
414 // instruction should wait for full completion of the consecutive NEON
415 // instruction and vice-versa. We model this behavior with two artificial FUs:
416 // DRegsVFP and DRegsVFP.
418 // Every VFP instruction:
419 // - Acquires DRegsVFP resource for 1 cycle
420 // - Reserves DRegsN resource for the whole duration (including time to
421 // register file writeback!).
422 // Every NEON instruction does the same but with FUs swapped.
424 // Since the reserved FU cannot be acquired, this models precisely
425 // "cross-domain" stalls.
428 // Issue through integer pipeline, and execute in NEON unit.
430 // FP Special Register to Integer Register File Move
431 InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
432 InstrStage<2, [A9_DRegsN], 0, Reserved>,
433 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
434 InstrStage<1, [A9_MUX0], 0>,
435 InstrStage<1, [A9_NPipe]>]>,
437 // Single-precision FP Unary
438 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
439 // Extra latency cycles since wbck is 2 cycles
440 InstrStage<3, [A9_DRegsN], 0, Reserved>,
441 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
442 InstrStage<1, [A9_MUX0], 0>,
443 InstrStage<1, [A9_NPipe]>],
446 // Double-precision FP Unary
447 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
448 // Extra latency cycles since wbck is 2 cycles
449 InstrStage<3, [A9_DRegsN], 0, Reserved>,
450 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
451 InstrStage<1, [A9_MUX0], 0>,
452 InstrStage<1, [A9_NPipe]>],
456 // Single-precision FP Compare
457 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
458 // Extra latency cycles since wbck is 4 cycles
459 InstrStage<5, [A9_DRegsN], 0, Reserved>,
460 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
461 InstrStage<1, [A9_MUX0], 0>,
462 InstrStage<1, [A9_NPipe]>],
465 // Double-precision FP Compare
466 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
467 // Extra latency cycles since wbck is 4 cycles
468 InstrStage<5, [A9_DRegsN], 0, Reserved>,
469 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
470 InstrStage<1, [A9_MUX0], 0>,
471 InstrStage<1, [A9_NPipe]>],
474 // Single to Double FP Convert
475 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
476 InstrStage<5, [A9_DRegsN], 0, Reserved>,
477 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
478 InstrStage<1, [A9_MUX0], 0>,
479 InstrStage<1, [A9_NPipe]>],
482 // Double to Single FP Convert
483 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
484 InstrStage<5, [A9_DRegsN], 0, Reserved>,
485 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
486 InstrStage<1, [A9_MUX0], 0>,
487 InstrStage<1, [A9_NPipe]>],
491 // Single to Half FP Convert
492 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
493 InstrStage<5, [A9_DRegsN], 0, Reserved>,
494 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
495 InstrStage<1, [A9_MUX0], 0>,
496 InstrStage<1, [A9_NPipe]>],
499 // Half to Single FP Convert
500 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
501 InstrStage<3, [A9_DRegsN], 0, Reserved>,
502 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
503 InstrStage<1, [A9_MUX0], 0>,
504 InstrStage<1, [A9_NPipe]>],
508 // Single-Precision FP to Integer Convert
509 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
510 InstrStage<5, [A9_DRegsN], 0, Reserved>,
511 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
512 InstrStage<1, [A9_MUX0], 0>,
513 InstrStage<1, [A9_NPipe]>],
516 // Double-Precision FP to Integer Convert
517 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
518 InstrStage<5, [A9_DRegsN], 0, Reserved>,
519 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
520 InstrStage<1, [A9_MUX0], 0>,
521 InstrStage<1, [A9_NPipe]>],
524 // Integer to Single-Precision FP Convert
525 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
526 InstrStage<5, [A9_DRegsN], 0, Reserved>,
527 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
528 InstrStage<1, [A9_MUX0], 0>,
529 InstrStage<1, [A9_NPipe]>],
532 // Integer to Double-Precision FP Convert
533 InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
534 InstrStage<5, [A9_DRegsN], 0, Reserved>,
535 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
536 InstrStage<1, [A9_MUX0], 0>,
537 InstrStage<1, [A9_NPipe]>],
540 // Single-precision FP ALU
541 InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
542 InstrStage<5, [A9_DRegsN], 0, Reserved>,
543 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
544 InstrStage<1, [A9_MUX0], 0>,
545 InstrStage<1, [A9_NPipe]>],
548 // Double-precision FP ALU
549 InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
550 InstrStage<5, [A9_DRegsN], 0, Reserved>,
551 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
552 InstrStage<1, [A9_MUX0], 0>,
553 InstrStage<1, [A9_NPipe]>],
556 // Single-precision FP Multiply
557 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
558 InstrStage<6, [A9_DRegsN], 0, Reserved>,
559 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
560 InstrStage<1, [A9_MUX0], 0>,
561 InstrStage<1, [A9_NPipe]>],
564 // Double-precision FP Multiply
565 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
566 InstrStage<7, [A9_DRegsN], 0, Reserved>,
567 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
568 InstrStage<1, [A9_MUX0], 0>,
569 InstrStage<2, [A9_NPipe]>],
572 // Single-precision FP MAC
573 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
574 InstrStage<9, [A9_DRegsN], 0, Reserved>,
575 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
576 InstrStage<1, [A9_MUX0], 0>,
577 InstrStage<1, [A9_NPipe]>],
580 // Double-precision FP MAC
581 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
582 InstrStage<10, [A9_DRegsN], 0, Reserved>,
583 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
584 InstrStage<1, [A9_MUX0], 0>,
585 InstrStage<2, [A9_NPipe]>],
588 // Single-precision FP DIV
589 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
590 InstrStage<16, [A9_DRegsN], 0, Reserved>,
591 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
592 InstrStage<1, [A9_MUX0], 0>,
593 InstrStage<10, [A9_NPipe]>],
596 // Double-precision FP DIV
597 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
598 InstrStage<26, [A9_DRegsN], 0, Reserved>,
599 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
600 InstrStage<1, [A9_MUX0], 0>,
601 InstrStage<20, [A9_NPipe]>],
604 // Single-precision FP SQRT
605 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
606 InstrStage<18, [A9_DRegsN], 0, Reserved>,
607 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
608 InstrStage<1, [A9_MUX0], 0>,
609 InstrStage<13, [A9_NPipe]>],
612 // Double-precision FP SQRT
613 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
614 InstrStage<33, [A9_DRegsN], 0, Reserved>,
615 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
616 InstrStage<1, [A9_MUX0], 0>,
617 InstrStage<28, [A9_NPipe]>],
621 // Integer to Single-precision Move
622 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
623 // Extra 1 latency cycle since wbck is 2 cycles
624 InstrStage<3, [A9_DRegsN], 0, Reserved>,
625 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
626 InstrStage<1, [A9_MUX0], 0>,
627 InstrStage<1, [A9_NPipe]>],
630 // Integer to Double-precision Move
631 InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
632 // Extra 1 latency cycle since wbck is 2 cycles
633 InstrStage<3, [A9_DRegsN], 0, Reserved>,
634 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
635 InstrStage<1, [A9_MUX0], 0>,
636 InstrStage<1, [A9_NPipe]>],
639 // Single-precision to Integer Move
640 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
641 InstrStage<2, [A9_DRegsN], 0, Reserved>,
642 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
643 InstrStage<1, [A9_MUX0], 0>,
644 InstrStage<1, [A9_NPipe]>],
647 // Double-precision to Integer Move
648 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
649 InstrStage<2, [A9_DRegsN], 0, Reserved>,
650 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
651 InstrStage<1, [A9_MUX0], 0>,
652 InstrStage<1, [A9_NPipe]>],
655 // Single-precision FP Load
656 InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
657 InstrStage<2, [A9_DRegsN], 0, Reserved>,
658 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
659 InstrStage<1, [A9_MUX0], 0>,
660 InstrStage<1, [A9_NPipe]>],
663 // Double-precision FP Load
664 // FIXME: Result latency is 1 if address is 64-bit aligned.
665 InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
666 InstrStage<2, [A9_DRegsN], 0, Reserved>,
667 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
668 InstrStage<1, [A9_MUX0], 0>,
669 InstrStage<1, [A9_NPipe]>],
673 InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
674 InstrStage<2, [A9_DRegsN], 0, Reserved>,
675 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
676 InstrStage<1, [A9_MUX0], 0>,
677 InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
679 // FP Load Multiple + update
680 InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
681 InstrStage<2, [A9_DRegsN], 0, Reserved>,
682 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
683 InstrStage<1, [A9_MUX0], 0>,
684 InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
686 // Single-precision FP Store
687 InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
688 InstrStage<2, [A9_DRegsN], 0, Reserved>,
689 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
690 InstrStage<1, [A9_MUX0], 0>,
691 InstrStage<1, [A9_NPipe]>],
694 // Double-precision FP Store
695 InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
696 InstrStage<2, [A9_DRegsN], 0, Reserved>,
697 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
698 InstrStage<1, [A9_MUX0], 0>,
699 InstrStage<1, [A9_NPipe]>],
703 InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
704 InstrStage<2, [A9_DRegsN], 0, Reserved>,
705 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
706 InstrStage<1, [A9_MUX0], 0>,
707 InstrStage<1, [A9_NPipe]>], [1, 1, 1, 1]>,
709 // FP Store Multiple + update
710 InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
711 InstrStage<2, [A9_DRegsN], 0, Reserved>,
712 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
713 InstrStage<1, [A9_MUX0], 0>,
714 InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>,
717 // FIXME: Conservatively assume insufficent alignment.
718 InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
719 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
720 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
721 InstrStage<1, [A9_MUX0], 0>,
722 InstrStage<2, [A9_NPipe]>],
725 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_DRegsN], 0, Required>,
726 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
727 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
728 InstrStage<1, [A9_MUX0], 0>,
729 InstrStage<2, [A9_NPipe]>],
732 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_DRegsN], 0, Required>,
733 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
734 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
735 InstrStage<1, [A9_MUX0], 0>,
736 InstrStage<3, [A9_NPipe]>],
739 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_DRegsN], 0, Required>,
740 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
741 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
742 InstrStage<1, [A9_MUX0], 0>,
743 InstrStage<3, [A9_NPipe]>],
746 InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_DRegsN], 0, Required>,
747 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
748 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
749 InstrStage<1, [A9_MUX0], 0>,
750 InstrStage<2, [A9_NPipe]>],
753 InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_DRegsN], 0, Required>,
754 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
755 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
756 InstrStage<1, [A9_MUX0], 0>,
757 InstrStage<2, [A9_NPipe]>],
760 InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_DRegsN], 0, Required>,
761 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
762 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
763 InstrStage<1, [A9_MUX0], 0>,
764 InstrStage<3, [A9_NPipe]>],
767 InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_DRegsN], 0, Required>,
768 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
769 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
770 InstrStage<1, [A9_MUX0], 0>,
771 InstrStage<3, [A9_NPipe]>],
775 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
776 // Extra latency cycles since wbck is 7 cycles
777 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
778 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
779 InstrStage<1, [A9_MUX0], 0>,
780 InstrStage<2, [A9_NPipe]>],
784 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_DRegsN], 0, Required>,
785 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
786 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
787 InstrStage<1, [A9_MUX0], 0>,
788 InstrStage<3, [A9_NPipe]>],
792 InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_DRegsN], 0, Required>,
793 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
794 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
795 InstrStage<1, [A9_MUX0], 0>,
796 InstrStage<3, [A9_NPipe]>],
800 InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_DRegsN], 0, Required>,
801 // Extra latency cycles since wbck is 7 cycles
802 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
803 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
804 InstrStage<1, [A9_MUX0], 0>,
805 InstrStage<2, [A9_NPipe]>],
809 InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_DRegsN], 0, Required>,
810 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
811 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
812 InstrStage<1, [A9_MUX0], 0>,
813 InstrStage<3, [A9_NPipe]>],
817 InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_DRegsN], 0, Required>,
818 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
819 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
820 InstrStage<1, [A9_MUX0], 0>,
821 InstrStage<3, [A9_NPipe]>],
822 [4, 4, 2, 1, 1, 1, 1, 1]>,
825 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
826 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
827 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
828 InstrStage<1, [A9_MUX0], 0>,
829 InstrStage<4, [A9_NPipe]>],
833 InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_DRegsN], 0, Required>,
834 InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
835 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
836 InstrStage<1, [A9_MUX0], 0>,
837 InstrStage<5, [A9_NPipe]>],
838 [5, 5, 6, 1, 1, 1, 1, 2]>,
841 InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_DRegsN], 0, Required>,
842 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
843 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
844 InstrStage<1, [A9_MUX0], 0>,
845 InstrStage<4, [A9_NPipe]>],
849 InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_DRegsN], 0, Required>,
850 InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
851 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
852 InstrStage<1, [A9_MUX0], 0>,
853 InstrStage<5, [A9_NPipe]>],
854 [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
857 // FIXME: We don't model this instruction properly
858 InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
859 // Extra latency cycles since wbck is 6 cycles
860 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
861 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
862 InstrStage<1, [A9_MUX0], 0>,
863 InstrStage<1, [A9_NPipe]>],
867 // FIXME: We don't model this instruction properly
868 InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
869 // Extra latency cycles since wbck is 6 cycles
870 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
871 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
872 InstrStage<1, [A9_MUX0], 0>,
873 InstrStage<1, [A9_NPipe]>]>,
875 // Double-register Integer Unary
876 InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
877 // Extra latency cycles since wbck is 6 cycles
878 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
879 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
880 InstrStage<1, [A9_MUX0], 0>,
881 InstrStage<1, [A9_NPipe]>],
884 // Quad-register Integer Unary
885 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
886 // Extra latency cycles since wbck is 6 cycles
887 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
888 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
889 InstrStage<1, [A9_MUX0], 0>,
890 InstrStage<1, [A9_NPipe]>],
893 // Double-register Integer Q-Unary
894 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
895 // Extra latency cycles since wbck is 6 cycles
896 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
897 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
898 InstrStage<1, [A9_MUX0], 0>,
899 InstrStage<1, [A9_NPipe]>],
902 // Quad-register Integer CountQ-Unary
903 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
904 // Extra latency cycles since wbck is 6 cycles
905 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
906 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
907 InstrStage<1, [A9_MUX0], 0>,
908 InstrStage<1, [A9_NPipe]>],
911 // Double-register Integer Binary
912 InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
913 // Extra latency cycles since wbck is 6 cycles
914 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
915 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
916 InstrStage<1, [A9_MUX0], 0>,
917 InstrStage<1, [A9_NPipe]>],
920 // Quad-register Integer Binary
921 InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
922 // Extra latency cycles since wbck is 6 cycles
923 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
924 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
925 InstrStage<1, [A9_MUX0], 0>,
926 InstrStage<1, [A9_NPipe]>],
929 // Double-register Integer Subtract
930 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
931 // Extra latency cycles since wbck is 6 cycles
932 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
933 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
934 InstrStage<1, [A9_MUX0], 0>,
935 InstrStage<1, [A9_NPipe]>],
938 // Quad-register Integer Subtract
939 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
940 // Extra latency cycles since wbck is 6 cycles
941 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
942 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
943 InstrStage<1, [A9_MUX0], 0>,
944 InstrStage<1, [A9_NPipe]>],
947 // Double-register Integer Shift
948 InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
949 // Extra latency cycles since wbck is 6 cycles
950 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
951 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
952 InstrStage<1, [A9_MUX0], 0>,
953 InstrStage<1, [A9_NPipe]>],
956 // Quad-register Integer Shift
957 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
958 // Extra latency cycles since wbck is 6 cycles
959 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
960 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
961 InstrStage<1, [A9_MUX0], 0>,
962 InstrStage<1, [A9_NPipe]>],
965 // Double-register Integer Shift (4 cycle)
966 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
967 // Extra latency cycles since wbck is 6 cycles
968 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
969 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
970 InstrStage<1, [A9_MUX0], 0>,
971 InstrStage<1, [A9_NPipe]>],
974 // Quad-register Integer Shift (4 cycle)
975 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
976 // Extra latency cycles since wbck is 6 cycles
977 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
978 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
979 InstrStage<1, [A9_MUX0], 0>,
980 InstrStage<1, [A9_NPipe]>],
983 // Double-register Integer Binary (4 cycle)
984 InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
985 // Extra latency cycles since wbck is 6 cycles
986 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
987 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
988 InstrStage<1, [A9_MUX0], 0>,
989 InstrStage<1, [A9_NPipe]>],
992 // Quad-register Integer Binary (4 cycle)
993 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
994 // Extra latency cycles since wbck is 6 cycles
995 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
996 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
997 InstrStage<1, [A9_MUX0], 0>,
998 InstrStage<1, [A9_NPipe]>],
1001 // Double-register Integer Subtract (4 cycle)
1002 InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1003 // Extra latency cycles since wbck is 6 cycles
1004 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1005 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1006 InstrStage<1, [A9_MUX0], 0>,
1007 InstrStage<1, [A9_NPipe]>],
1010 // Quad-register Integer Subtract (4 cycle)
1011 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1012 // Extra latency cycles since wbck is 6 cycles
1013 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1014 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1015 InstrStage<1, [A9_MUX0], 0>,
1016 InstrStage<1, [A9_NPipe]>],
1020 // Double-register Integer Count
1021 InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1022 // Extra latency cycles since wbck is 6 cycles
1023 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1024 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1025 InstrStage<1, [A9_MUX0], 0>,
1026 InstrStage<1, [A9_NPipe]>],
1029 // Quad-register Integer Count
1030 // Result written in N3, but that is relative to the last cycle of multicycle,
1031 // so we use 4 for those cases
1032 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1033 // Extra latency cycles since wbck is 7 cycles
1034 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1035 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1036 InstrStage<1, [A9_MUX0], 0>,
1037 InstrStage<2, [A9_NPipe]>],
1040 // Double-register Absolute Difference and Accumulate
1041 InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1042 // Extra latency cycles since wbck is 6 cycles
1043 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1044 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1045 InstrStage<1, [A9_MUX0], 0>,
1046 InstrStage<1, [A9_NPipe]>],
1049 // Quad-register Absolute Difference and Accumulate
1050 InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1051 // Extra latency cycles since wbck is 6 cycles
1052 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1053 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1054 InstrStage<1, [A9_MUX0], 0>,
1055 InstrStage<2, [A9_NPipe]>],
1058 // Double-register Integer Pair Add Long
1059 InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1060 // Extra latency cycles since wbck is 6 cycles
1061 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1062 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1063 InstrStage<1, [A9_MUX0], 0>,
1064 InstrStage<1, [A9_NPipe]>],
1067 // Quad-register Integer Pair Add Long
1068 InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1069 // Extra latency cycles since wbck is 6 cycles
1070 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1071 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1072 InstrStage<1, [A9_MUX0], 0>,
1073 InstrStage<2, [A9_NPipe]>],
1077 // Double-register Integer Multiply (.8, .16)
1078 InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1079 // Extra latency cycles since wbck is 6 cycles
1080 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1081 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1082 InstrStage<1, [A9_MUX0], 0>,
1083 InstrStage<1, [A9_NPipe]>],
1086 // Quad-register Integer Multiply (.8, .16)
1087 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1088 // Extra latency cycles since wbck is 7 cycles
1089 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1090 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1091 InstrStage<1, [A9_MUX0], 0>,
1092 InstrStage<2, [A9_NPipe]>],
1096 // Double-register Integer Multiply (.32)
1097 InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1098 // Extra latency cycles since wbck is 7 cycles
1099 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1100 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1101 InstrStage<1, [A9_MUX0], 0>,
1102 InstrStage<2, [A9_NPipe]>],
1105 // Quad-register Integer Multiply (.32)
1106 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1107 // Extra latency cycles since wbck is 9 cycles
1108 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1109 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1110 InstrStage<1, [A9_MUX0], 0>,
1111 InstrStage<4, [A9_NPipe]>],
1114 // Double-register Integer Multiply-Accumulate (.8, .16)
1115 InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1116 // Extra latency cycles since wbck is 6 cycles
1117 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1118 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1119 InstrStage<1, [A9_MUX0], 0>,
1120 InstrStage<1, [A9_NPipe]>],
1123 // Double-register Integer Multiply-Accumulate (.32)
1124 InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
1125 // Extra latency cycles since wbck is 7 cycles
1126 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1127 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1128 InstrStage<1, [A9_MUX0], 0>,
1129 InstrStage<2, [A9_NPipe]>],
1132 // Quad-register Integer Multiply-Accumulate (.8, .16)
1133 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1134 // Extra latency cycles since wbck is 7 cycles
1135 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1136 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1137 InstrStage<1, [A9_MUX0], 0>,
1138 InstrStage<2, [A9_NPipe]>],
1141 // Quad-register Integer Multiply-Accumulate (.32)
1142 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
1143 // Extra latency cycles since wbck is 9 cycles
1144 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1145 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1146 InstrStage<1, [A9_MUX0], 0>,
1147 InstrStage<4, [A9_NPipe]>],
1152 InstrItinData<IIC_VMOV, [InstrStage<1, [A9_DRegsN], 0, Required>,
1153 InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
1154 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1155 InstrStage<1, [A9_MUX0], 0>,
1156 InstrStage<1, [A9_NPipe]>],
1160 InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
1161 // Extra latency cycles since wbck is 6 cycles
1162 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1163 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1164 InstrStage<1, [A9_MUX0], 0>,
1165 InstrStage<1, [A9_NPipe]>],
1168 // Double-register Permute Move
1169 InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1170 // FIXME: all latencies are arbitrary, no information is available
1171 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1172 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1173 InstrStage<1, [A9_MUX0], 0>,
1174 InstrStage<1, [A9_NPipe]>],
1177 // Quad-register Permute Move
1178 InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1179 // FIXME: all latencies are arbitrary, no information is available
1180 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1181 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1182 InstrStage<1, [A9_MUX0], 0>,
1183 InstrStage<1, [A9_NPipe]>],
1186 // Integer to Single-precision Move
1187 InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
1188 // FIXME: all latencies are arbitrary, no information is available
1189 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1190 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1191 InstrStage<1, [A9_MUX0], 0>,
1192 InstrStage<1, [A9_NPipe]>],
1195 // Integer to Double-precision Move
1196 InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
1197 // FIXME: all latencies are arbitrary, no information is available
1198 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1199 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1200 InstrStage<1, [A9_MUX0], 0>,
1201 InstrStage<1, [A9_NPipe]>],
1204 // Single-precision to Integer Move
1205 InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
1206 // FIXME: all latencies are arbitrary, no information is available
1207 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1208 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1209 InstrStage<1, [A9_MUX0], 0>,
1210 InstrStage<1, [A9_NPipe]>],
1213 // Double-precision to Integer Move
1214 InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
1215 // FIXME: all latencies are arbitrary, no information is available
1216 InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
1217 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1218 InstrStage<1, [A9_MUX0], 0>,
1219 InstrStage<1, [A9_NPipe]>],
1222 // Integer to Lane Move
1223 InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
1224 // FIXME: all latencies are arbitrary, no information is available
1225 InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
1226 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1227 InstrStage<1, [A9_MUX0], 0>,
1228 InstrStage<2, [A9_NPipe]>],
1232 // Vector narrow move
1233 InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_DRegsN], 0, Required>,
1234 // Extra latency cycles since wbck is 6 cycles
1235 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1236 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1237 InstrStage<1, [A9_MUX0], 0>,
1238 InstrStage<1, [A9_NPipe]>],
1241 // Double-register FP Unary
1242 InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1243 // Extra latency cycles since wbck is 6 cycles
1244 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1245 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1246 InstrStage<1, [A9_MUX0], 0>,
1247 InstrStage<1, [A9_NPipe]>],
1250 // Quad-register FP Unary
1251 // Result written in N5, but that is relative to the last cycle of multicycle,
1252 // so we use 6 for those cases
1253 InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1254 // Extra latency cycles since wbck is 7 cycles
1255 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1256 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1257 InstrStage<1, [A9_MUX0], 0>,
1258 InstrStage<2, [A9_NPipe]>],
1261 // Double-register FP Binary
1262 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1264 InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
1265 // Extra latency cycles since wbck is 7 cycles
1266 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1267 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1268 InstrStage<1, [A9_MUX0], 0>,
1269 InstrStage<1, [A9_NPipe]>],
1272 // Quad-register FP Binary
1273 // Result written in N5, but that is relative to the last cycle of multicycle,
1274 // so we use 6 for those cases
1275 // FIXME: We're using this itin for many instructions and [2, 2] here is too
1277 InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1278 // Extra latency cycles since wbck is 8 cycles
1279 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1280 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1281 InstrStage<1, [A9_MUX0], 0>,
1282 InstrStage<2, [A9_NPipe]>],
1285 // Double-register FP Multiple-Accumulate
1286 InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1287 // Extra latency cycles since wbck is 7 cycles
1288 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1289 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1290 InstrStage<1, [A9_MUX0], 0>,
1291 InstrStage<2, [A9_NPipe]>],
1294 // Quad-register FP Multiple-Accumulate
1295 // Result written in N9, but that is relative to the last cycle of multicycle,
1296 // so we use 10 for those cases
1297 InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1298 // Extra latency cycles since wbck is 9 cycles
1299 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1300 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1301 InstrStage<1, [A9_MUX0], 0>,
1302 InstrStage<4, [A9_NPipe]>],
1305 // Double-register Reciprical Step
1306 InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1307 // Extra latency cycles since wbck is 7 cycles
1308 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1309 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1310 InstrStage<1, [A9_MUX0], 0>,
1311 InstrStage<2, [A9_NPipe]>],
1314 // Quad-register Reciprical Step
1315 InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1316 // Extra latency cycles since wbck is 9 cycles
1317 InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
1318 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1319 InstrStage<1, [A9_MUX0], 0>,
1320 InstrStage<4, [A9_NPipe]>],
1323 // Double-register Permute
1324 InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1325 // Extra latency cycles since wbck is 6 cycles
1326 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1327 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1328 InstrStage<1, [A9_MUX0], 0>,
1329 InstrStage<1, [A9_NPipe]>],
1332 // Quad-register Permute
1333 // Result written in N2, but that is relative to the last cycle of multicycle,
1334 // so we use 3 for those cases
1335 InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1336 // Extra latency cycles since wbck is 7 cycles
1337 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1338 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1339 InstrStage<1, [A9_MUX0], 0>,
1340 InstrStage<2, [A9_NPipe]>],
1343 // Quad-register Permute (3 cycle issue)
1344 // Result written in N2, but that is relative to the last cycle of multicycle,
1345 // so we use 4 for those cases
1346 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1347 // Extra latency cycles since wbck is 8 cycles
1348 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1349 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1350 InstrStage<1, [A9_MUX0], 0>,
1351 InstrStage<3, [A9_NPipe]>],
1355 // Double-register VEXT
1356 InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
1357 // Extra latency cycles since wbck is 7 cycles
1358 InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
1359 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1360 InstrStage<1, [A9_MUX0], 0>,
1361 InstrStage<1, [A9_NPipe]>],
1364 // Quad-register VEXT
1365 InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
1366 // Extra latency cycles since wbck is 9 cycles
1367 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1368 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1369 InstrStage<1, [A9_MUX0], 0>,
1370 InstrStage<2, [A9_NPipe]>],
1374 InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1375 // Extra latency cycles since wbck is 7 cycles
1376 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1377 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1378 InstrStage<1, [A9_MUX0], 0>,
1379 InstrStage<2, [A9_NPipe]>],
1381 InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
1382 // Extra latency cycles since wbck is 7 cycles
1383 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1384 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1385 InstrStage<1, [A9_MUX0], 0>,
1386 InstrStage<2, [A9_NPipe]>],
1388 InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
1389 // Extra latency cycles since wbck is 8 cycles
1390 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1391 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1392 InstrStage<1, [A9_MUX0], 0>,
1393 InstrStage<3, [A9_NPipe]>],
1395 InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1396 // Extra latency cycles since wbck is 8 cycles
1397 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1398 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1399 InstrStage<1, [A9_MUX0], 0>,
1400 InstrStage<3, [A9_NPipe]>],
1401 [4, 2, 2, 3, 3, 1]>,
1404 InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
1405 // Extra latency cycles since wbck is 7 cycles
1406 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1407 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1408 InstrStage<1, [A9_MUX0], 0>,
1409 InstrStage<2, [A9_NPipe]>],
1411 InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
1412 // Extra latency cycles since wbck is 7 cycles
1413 InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
1414 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1415 InstrStage<1, [A9_MUX0], 0>,
1416 InstrStage<2, [A9_NPipe]>],
1418 InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
1419 // Extra latency cycles since wbck is 8 cycles
1420 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1421 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1422 InstrStage<1, [A9_MUX0], 0>,
1423 InstrStage<3, [A9_NPipe]>],
1424 [4, 1, 2, 2, 3, 1]>,
1425 InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
1426 // Extra latency cycles since wbck is 8 cycles
1427 InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
1428 InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
1429 InstrStage<1, [A9_MUX0], 0>,
1430 InstrStage<2, [A9_NPipe]>],
1431 [4, 1, 2, 2, 3, 3, 1]>