1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM Cortex A9 processors.
12 //===----------------------------------------------------------------------===//
15 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
18 // Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
20 def CortexA9Itineraries : ProcessorItineraries<[
21 // VFP and NEON shares the same register file. This means that every VFP
22 // instruction should wait for full completion of the consecutive NEON
23 // instruction and vice-versa. We model this behavior with two artificial FUs:
24 // DRegsVFP and DRegsVFP.
26 // Every VFP instruction:
27 // - Acquires DRegsVFP resource for 1 cycle
28 // - Reserves DRegsN resource for the whole duration (including time to
29 // register file writeback!).
30 // Every NEON instruction does the same but with FUs swapped.
32 // Since the reserved FU cannot be acquired this models precisly "cross-domain"
36 // Issue through integer pipeline, and execute in NEON unit.
38 // FP Special Register to Integer Register File Move
39 InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
40 InstrStage<2, [FU_DRegsN], 0, Reserved>,
41 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
42 InstrStage<1, [FU_NPipe]>]>,
44 // Single-precision FP Unary
45 InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
46 // Extra latency cycles since wbck is 2 cycles
47 InstrStage<3, [FU_DRegsN], 0, Reserved>,
48 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
49 InstrStage<1, [FU_NPipe]>], [1, 1]>,
51 // Double-precision FP Unary
52 InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
53 // Extra latency cycles since wbck is 2 cycles
54 InstrStage<3, [FU_DRegsN], 0, Reserved>,
55 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
56 InstrStage<1, [FU_NPipe]>], [1, 1]>,
59 // Single-precision FP Compare
60 InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
61 // Extra latency cycles since wbck is 4 cycles
62 InstrStage<5, [FU_DRegsN], 0, Reserved>,
63 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
64 InstrStage<1, [FU_NPipe]>], [1, 1]>,
66 // Double-precision FP Compare
67 InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
68 // Extra latency cycles since wbck is 4 cycles
69 InstrStage<5, [FU_DRegsN], 0, Reserved>,
70 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
71 InstrStage<1, [FU_NPipe]>], [1, 1]>,
73 // Single to Double FP Convert
74 InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
75 InstrStage<5, [FU_DRegsN], 0, Reserved>,
76 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
77 InstrStage<1, [FU_NPipe]>], [4, 1]>,
79 // Double to Single FP Convert
80 InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
81 InstrStage<5, [FU_DRegsN], 0, Reserved>,
82 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
83 InstrStage<1, [FU_NPipe]>], [4, 1]>,
86 // Single to Half FP Convert
87 InstrItinData<IIC_fpCVTSH , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
88 InstrStage<5, [FU_DRegsN], 0, Reserved>,
89 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
90 InstrStage<1, [FU_NPipe]>], [4, 1]>,
92 // Half to Single FP Convert
93 InstrItinData<IIC_fpCVTHS , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
94 InstrStage<3, [FU_DRegsN], 0, Reserved>,
95 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
96 InstrStage<1, [FU_NPipe]>], [2, 1]>,
99 // Single-Precision FP to Integer Convert
100 InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
101 InstrStage<5, [FU_DRegsN], 0, Reserved>,
102 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
103 InstrStage<1, [FU_NPipe]>], [4, 1]>,
105 // Double-Precision FP to Integer Convert
106 InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
107 InstrStage<5, [FU_DRegsN], 0, Reserved>,
108 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
109 InstrStage<1, [FU_NPipe]>], [4, 1]>,
111 // Integer to Single-Precision FP Convert
112 InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
113 InstrStage<5, [FU_DRegsN], 0, Reserved>,
114 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
115 InstrStage<1, [FU_NPipe]>], [4, 1]>,
117 // Integer to Double-Precision FP Convert
118 InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
119 InstrStage<5, [FU_DRegsN], 0, Reserved>,
120 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
121 InstrStage<1, [FU_NPipe]>], [4, 1]>,
123 // Single-precision FP ALU
124 InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
125 InstrStage<5, [FU_DRegsN], 0, Reserved>,
126 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
127 InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
129 // Double-precision FP ALU
130 InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
131 InstrStage<5, [FU_DRegsN], 0, Reserved>,
132 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
133 InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
135 // Single-precision FP Multiply
136 InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
137 InstrStage<6, [FU_DRegsN], 0, Reserved>,
138 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
139 InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
141 // Double-precision FP Multiply
142 InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
143 InstrStage<7, [FU_DRegsN], 0, Reserved>,
144 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
145 InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
147 // Single-precision FP MAC
148 InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
149 InstrStage<9, [FU_DRegsN], 0, Reserved>,
150 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
151 InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
153 // Double-precision FP MAC
154 InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
155 InstrStage<10, [FU_DRegsN], 0, Reserved>,
156 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
157 InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>,
159 // Single-precision FP DIV
160 InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
161 InstrStage<16, [FU_DRegsN], 0, Reserved>,
162 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
163 InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
165 // Double-precision FP DIV
166 InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_DRegsVFP], 0, Required>,
167 InstrStage<26, [FU_DRegsN], 0, Reserved>,
168 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
169 InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
171 // Single-precision FP SQRT
172 InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
173 InstrStage<18, [FU_DRegsN], 0, Reserved>,
174 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
175 InstrStage<13, [FU_NPipe]>], [17, 1]>,
177 // Double-precision FP SQRT
178 InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
179 InstrStage<33, [FU_DRegsN], 0, Reserved>,
180 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
181 InstrStage<28, [FU_NPipe]>], [32, 1]>,
184 // Integer to Single-precision Move
185 InstrItinData<IIC_fpMOVIS, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
186 // Extra 1 latency cycle since wbck is 2 cycles
187 InstrStage<3, [FU_DRegsN], 0, Reserved>,
188 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
189 InstrStage<1, [FU_NPipe]>], [1, 1]>,
191 // Integer to Double-precision Move
192 InstrItinData<IIC_fpMOVID, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
193 // Extra 1 latency cycle since wbck is 2 cycles
194 InstrStage<3, [FU_DRegsN], 0, Reserved>,
195 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
196 InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
198 // Single-precision to Integer Move
199 InstrItinData<IIC_fpMOVSI, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
200 InstrStage<2, [FU_DRegsN], 0, Reserved>,
201 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
202 InstrStage<1, [FU_NPipe]>], [1, 1]>,
204 // Double-precision to Integer Move
205 InstrItinData<IIC_fpMOVDI, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
206 InstrStage<2, [FU_DRegsN], 0, Reserved>,
207 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
208 InstrStage<1, [FU_NPipe]>], [1, 1, 1]>,
210 // Single-precision FP Load
211 // use FU_Issue to enforce the 1 load/store per cycle limit
212 InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
213 InstrStage<2, [FU_DRegsN], 0, Reserved>,
214 InstrStage<1, [FU_Issue], 0>,
215 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
216 InstrStage<1, [FU_LdSt0], 0>,
217 InstrStage<1, [FU_NPipe]>]>,
219 // Double-precision FP Load
220 // use FU_Issue to enforce the 1 load/store per cycle limit
221 InstrItinData<IIC_fpLoad64, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
222 InstrStage<2, [FU_DRegsN], 0, Reserved>,
223 InstrStage<1, [FU_Issue], 0>,
224 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
225 InstrStage<1, [FU_LdSt0], 0>,
226 InstrStage<1, [FU_NPipe]>]>,
229 // use FU_Issue to enforce the 1 load/store per cycle limit
230 InstrItinData<IIC_fpLoadm, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
231 InstrStage<2, [FU_DRegsN], 0, Reserved>,
232 InstrStage<1, [FU_Issue], 0>,
233 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
234 InstrStage<1, [FU_LdSt0], 0>,
235 InstrStage<1, [FU_NPipe]>]>,
237 // Single-precision FP Store
238 // use FU_Issue to enforce the 1 load/store per cycle limit
239 InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_DRegsVFP], 0, Required>,
240 InstrStage<2, [FU_DRegsN], 0, Reserved>,
241 InstrStage<1, [FU_Issue], 0>,
242 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
243 InstrStage<1, [FU_LdSt0], 0>,
244 InstrStage<1, [FU_NPipe]>]>,
246 // Double-precision FP Store
247 // use FU_Issue to enforce the 1 load/store per cycle limit
248 InstrItinData<IIC_fpStore64,[InstrStage<1, [FU_DRegsVFP], 0, Required>,
249 InstrStage<2, [FU_DRegsN], 0, Reserved>,
250 InstrStage<1, [FU_Issue], 0>,
251 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
252 InstrStage<1, [FU_LdSt0], 0>,
253 InstrStage<1, [FU_NPipe]>]>,
256 // use FU_Issue to enforce the 1 load/store per cycle limit
257 InstrItinData<IIC_fpStorem, [InstrStage<1, [FU_DRegsVFP], 0, Required>,
258 InstrStage<2, [FU_DRegsN], 0, Reserved>,
259 InstrStage<1, [FU_Issue], 0>,
260 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
261 InstrStage<1, [FU_LdSt0], 0>,
262 InstrStage<1, [FU_NPipe]>]>,
264 // Issue through integer pipeline, and execute in NEON unit.
265 // FIXME: Neon pipeline and LdSt unit are multiplexed.
266 // Add some syntactic sugar to model this!
268 // FIXME: We don't model this instruction properly
269 InstrItinData<IIC_VLD1, [InstrStage<1, [FU_DRegsN], 0, Required>,
270 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
271 InstrStage<1, [FU_Issue], 0>,
272 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
273 InstrStage<1, [FU_LdSt0], 0>,
274 InstrStage<1, [FU_NPipe]>]>,
277 // FIXME: We don't model this instruction properly
278 InstrItinData<IIC_VLD2, [InstrStage<1, [FU_DRegsN], 0, Required>,
279 // Extra latency cycles since wbck is 6 cycles
280 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
281 InstrStage<1, [FU_Issue], 0>,
282 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
283 InstrStage<1, [FU_LdSt0], 0>,
284 InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
287 // FIXME: We don't model this instruction properly
288 InstrItinData<IIC_VLD3, [InstrStage<1, [FU_DRegsN], 0, Required>,
289 // Extra latency cycles since wbck is 6 cycles
290 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
291 InstrStage<1, [FU_Issue], 0>,
292 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
293 InstrStage<1, [FU_LdSt0], 0>,
294 InstrStage<1, [FU_NPipe]>], [2, 2, 2, 1]>,
297 // FIXME: We don't model this instruction properly
298 InstrItinData<IIC_VLD4, [InstrStage<1, [FU_DRegsN], 0, Required>,
299 // Extra latency cycles since wbck is 6 cycles
300 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
301 InstrStage<1, [FU_Issue], 0>,
302 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
303 InstrStage<1, [FU_LdSt0], 0>,
304 InstrStage<1, [FU_NPipe]>], [2, 2, 2, 2, 1]>,
307 // FIXME: We don't model this instruction properly
308 InstrItinData<IIC_VST, [InstrStage<1, [FU_DRegsN], 0, Required>,
309 // Extra latency cycles since wbck is 6 cycles
310 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
311 InstrStage<1, [FU_Issue], 0>,
312 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
313 InstrStage<1, [FU_LdSt0], 0>,
314 InstrStage<1, [FU_NPipe]>]>,
316 // Double-register Integer Unary
317 InstrItinData<IIC_VUNAiD, [InstrStage<1, [FU_DRegsN], 0, Required>,
318 // Extra latency cycles since wbck is 6 cycles
319 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
320 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
321 InstrStage<1, [FU_NPipe]>], [4, 2]>,
323 // Quad-register Integer Unary
324 InstrItinData<IIC_VUNAiQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
325 // Extra latency cycles since wbck is 6 cycles
326 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
327 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
328 InstrStage<1, [FU_NPipe]>], [4, 2]>,
330 // Double-register Integer Q-Unary
331 InstrItinData<IIC_VQUNAiD, [InstrStage<1, [FU_DRegsN], 0, Required>,
332 // Extra latency cycles since wbck is 6 cycles
333 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
334 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
335 InstrStage<1, [FU_NPipe]>], [4, 1]>,
337 // Quad-register Integer CountQ-Unary
338 InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
339 // Extra latency cycles since wbck is 6 cycles
340 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
341 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
342 InstrStage<1, [FU_NPipe]>], [4, 1]>,
344 // Double-register Integer Binary
345 InstrItinData<IIC_VBINiD, [InstrStage<1, [FU_DRegsN], 0, Required>,
346 // Extra latency cycles since wbck is 6 cycles
347 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
348 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
349 InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
351 // Quad-register Integer Binary
352 InstrItinData<IIC_VBINiQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
353 // Extra latency cycles since wbck is 6 cycles
354 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
355 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
356 InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
358 // Double-register Integer Subtract
359 InstrItinData<IIC_VSUBiD, [InstrStage<1, [FU_DRegsN], 0, Required>,
360 // Extra latency cycles since wbck is 6 cycles
361 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
362 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
363 InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
365 // Quad-register Integer Subtract
366 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
367 // Extra latency cycles since wbck is 6 cycles
368 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
369 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
370 InstrStage<1, [FU_NPipe]>], [3, 2, 1]>,
372 // Double-register Integer Shift
373 InstrItinData<IIC_VSHLiD, [InstrStage<1, [FU_DRegsN], 0, Required>,
374 // Extra latency cycles since wbck is 6 cycles
375 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
376 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
377 InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
379 // Quad-register Integer Shift
380 InstrItinData<IIC_VSHLiQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
381 // Extra latency cycles since wbck is 6 cycles
382 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
383 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
384 InstrStage<1, [FU_NPipe]>], [3, 1, 1]>,
386 // Double-register Integer Shift (4 cycle)
387 InstrItinData<IIC_VSHLi4D, [InstrStage<1, [FU_DRegsN], 0, Required>,
388 // Extra latency cycles since wbck is 6 cycles
389 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
390 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
391 InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
393 // Quad-register Integer Shift (4 cycle)
394 InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [FU_DRegsN], 0, Required>,
395 // Extra latency cycles since wbck is 6 cycles
396 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
397 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
398 InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
400 // Double-register Integer Binary (4 cycle)
401 InstrItinData<IIC_VBINi4D, [InstrStage<1, [FU_DRegsN], 0, Required>,
402 // Extra latency cycles since wbck is 6 cycles
403 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
404 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
405 InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
407 // Quad-register Integer Binary (4 cycle)
408 InstrItinData<IIC_VBINi4Q, [InstrStage<1, [FU_DRegsN], 0, Required>,
409 // Extra latency cycles since wbck is 6 cycles
410 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
411 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
412 InstrStage<1, [FU_NPipe]>], [4, 2, 2]>,
414 // Double-register Integer Subtract (4 cycle)
415 InstrItinData<IIC_VSUBiD, [InstrStage<1, [FU_DRegsN], 0, Required>,
416 // Extra latency cycles since wbck is 6 cycles
417 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
418 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
419 InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
421 // Quad-register Integer Subtract (4 cycle)
422 InstrItinData<IIC_VSUBiQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
423 // Extra latency cycles since wbck is 6 cycles
424 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
425 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
426 InstrStage<1, [FU_NPipe]>], [4, 2, 1]>,
429 // Double-register Integer Count
430 InstrItinData<IIC_VCNTiD, [InstrStage<1, [FU_DRegsN], 0, Required>,
431 // Extra latency cycles since wbck is 6 cycles
432 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
433 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
434 InstrStage<1, [FU_NPipe]>], [3, 2, 2]>,
436 // Quad-register Integer Count
437 // Result written in N3, but that is relative to the last cycle of multicycle,
438 // so we use 4 for those cases
439 InstrItinData<IIC_VCNTiQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
440 // Extra latency cycles since wbck is 7 cycles
441 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
442 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
443 InstrStage<2, [FU_NPipe]>], [4, 2, 2]>,
445 // Double-register Absolute Difference and Accumulate
446 InstrItinData<IIC_VABAD, [InstrStage<1, [FU_DRegsN], 0, Required>,
447 // Extra latency cycles since wbck is 6 cycles
448 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
449 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
450 InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>,
452 // Quad-register Absolute Difference and Accumulate
453 InstrItinData<IIC_VABAQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
454 // Extra latency cycles since wbck is 6 cycles
455 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
456 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
457 InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
459 // Double-register Integer Pair Add Long
460 InstrItinData<IIC_VPALiD, [InstrStage<1, [FU_DRegsN], 0, Required>,
461 // Extra latency cycles since wbck is 6 cycles
462 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
463 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
464 InstrStage<1, [FU_NPipe]>], [6, 3, 1]>,
466 // Quad-register Integer Pair Add Long
467 InstrItinData<IIC_VPALiQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
468 // Extra latency cycles since wbck is 6 cycles
469 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
470 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
471 InstrStage<2, [FU_NPipe]>], [6, 3, 1]>,
474 // Double-register Integer Multiply (.8, .16)
475 InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_DRegsN], 0, Required>,
476 // Extra latency cycles since wbck is 6 cycles
477 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
478 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
479 InstrStage<1, [FU_NPipe]>], [6, 2, 2]>,
481 // Quad-register Integer Multiply (.8, .16)
482 InstrItinData<IIC_VMULi16Q, [InstrStage<1, [FU_DRegsN], 0, Required>,
483 // Extra latency cycles since wbck is 7 cycles
484 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
485 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
486 InstrStage<2, [FU_NPipe]>], [7, 2, 2]>,
489 // Double-register Integer Multiply (.32)
490 InstrItinData<IIC_VMULi32D, [InstrStage<1, [FU_DRegsN], 0, Required>,
491 // Extra latency cycles since wbck is 7 cycles
492 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
493 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
494 InstrStage<2, [FU_NPipe]>], [7, 2, 1]>,
496 // Quad-register Integer Multiply (.32)
497 InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_DRegsN], 0, Required>,
498 // Extra latency cycles since wbck is 9 cycles
499 InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
500 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
501 InstrStage<4, [FU_NPipe]>], [9, 2, 1]>,
503 // Double-register Integer Multiply-Accumulate (.8, .16)
504 InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_DRegsN], 0, Required>,
505 // Extra latency cycles since wbck is 6 cycles
506 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
507 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
508 InstrStage<1, [FU_NPipe]>], [6, 3, 2, 2]>,
510 // Double-register Integer Multiply-Accumulate (.32)
511 InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_DRegsN], 0, Required>,
512 // Extra latency cycles since wbck is 7 cycles
513 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
514 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
515 InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>,
517 // Quad-register Integer Multiply-Accumulate (.8, .16)
518 InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_DRegsN], 0, Required>,
519 // Extra latency cycles since wbck is 7 cycles
520 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
521 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
522 InstrStage<2, [FU_NPipe]>], [7, 3, 2, 2]>,
524 // Quad-register Integer Multiply-Accumulate (.32)
525 InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_DRegsN], 0, Required>,
526 // Extra latency cycles since wbck is 9 cycles
527 InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
528 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
529 InstrStage<4, [FU_NPipe]>], [9, 3, 2, 1]>,
532 InstrItinData<IIC_VMOVImm, [InstrStage<1, [FU_DRegsN], 0, Required>,
533 // Extra latency cycles since wbck is 6 cycles
534 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
535 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
536 InstrStage<1, [FU_NPipe]>], [3]>,
538 // Double-register Permute Move
539 InstrItinData<IIC_VMOVD, [InstrStage<1, [FU_DRegsN], 0, Required>,
540 // FIXME: all latencies are arbitrary, no information is available
541 InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
542 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
543 InstrStage<1, [FU_NLSPipe]>], [2, 1]>,
545 // Quad-register Permute Move
546 // Result written in N2, but that is relative to the last cycle of multicycle,
547 // so we use 3 for those cases
548 InstrItinData<IIC_VMOVQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
549 // FIXME: all latencies are arbitrary, no information is available
550 InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
551 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
552 InstrStage<2, [FU_NPipe]>], [3, 1]>,
554 // Integer to Single-precision Move
555 InstrItinData<IIC_VMOVIS , [InstrStage<1, [FU_DRegsN], 0, Required>,
556 // FIXME: all latencies are arbitrary, no information is available
557 InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
558 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
559 InstrStage<1, [FU_NPipe]>], [2, 1]>,
561 // Integer to Double-precision Move
562 InstrItinData<IIC_VMOVID , [InstrStage<1, [FU_DRegsN], 0, Required>,
563 // FIXME: all latencies are arbitrary, no information is available
564 InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
565 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
566 InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
568 // Single-precision to Integer Move
569 InstrItinData<IIC_VMOVSI , [InstrStage<1, [FU_DRegsN], 0, Required>,
570 // FIXME: all latencies are arbitrary, no information is available
571 InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
572 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
573 InstrStage<1, [FU_NPipe]>], [2, 1]>,
575 // Double-precision to Integer Move
576 InstrItinData<IIC_VMOVDI , [InstrStage<1, [FU_DRegsN], 0, Required>,
577 // FIXME: all latencies are arbitrary, no information is available
578 InstrStage<3, [FU_DRegsVFP], 0, Reserved>,
579 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
580 InstrStage<1, [FU_NPipe]>], [2, 2, 1]>,
582 // Integer to Lane Move
583 InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_DRegsN], 0, Required>,
584 // FIXME: all latencies are arbitrary, no information is available
585 InstrStage<4, [FU_DRegsVFP], 0, Reserved>,
586 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
587 InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
590 // Double-register FP Unary
591 InstrItinData<IIC_VUNAD, [InstrStage<1, [FU_DRegsN], 0, Required>,
592 // Extra latency cycles since wbck is 6 cycles
593 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
594 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
595 InstrStage<1, [FU_NPipe]>], [5, 2]>,
597 // Quad-register FP Unary
598 // Result written in N5, but that is relative to the last cycle of multicycle,
599 // so we use 6 for those cases
600 InstrItinData<IIC_VUNAQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
601 // Extra latency cycles since wbck is 7 cycles
602 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
603 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
604 InstrStage<2, [FU_NPipe]>], [6, 2]>,
606 // Double-register FP Binary
607 // FIXME: We're using this itin for many instructions and [2, 2] here is too
609 InstrItinData<IIC_VBIND, [InstrStage<1, [FU_DRegsN], 0, Required>,
610 // Extra latency cycles since wbck is 7 cycles
611 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
612 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
613 InstrStage<1, [FU_NPipe]>], [5, 2, 2]>,
615 // Quad-register FP Binary
616 // Result written in N5, but that is relative to the last cycle of multicycle,
617 // so we use 6 for those cases
618 // FIXME: We're using this itin for many instructions and [2, 2] here is too
620 InstrItinData<IIC_VBINQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
621 // Extra latency cycles since wbck is 8 cycles
622 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
623 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
624 InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
626 // Double-register FP Multiple-Accumulate
627 InstrItinData<IIC_VMACD, [InstrStage<1, [FU_DRegsN], 0, Required>,
628 // Extra latency cycles since wbck is 7 cycles
629 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
630 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
631 InstrStage<2, [FU_NPipe]>], [6, 3, 2, 1]>,
633 // Quad-register FP Multiple-Accumulate
634 // Result written in N9, but that is relative to the last cycle of multicycle,
635 // so we use 10 for those cases
636 InstrItinData<IIC_VMACQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
637 // Extra latency cycles since wbck is 9 cycles
638 InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
639 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
640 InstrStage<4, [FU_NPipe]>], [8, 4, 2, 1]>,
642 // Double-register Reciprical Step
643 InstrItinData<IIC_VRECSD, [InstrStage<1, [FU_DRegsN], 0, Required>,
644 // Extra latency cycles since wbck is 7 cycles
645 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
646 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
647 InstrStage<2, [FU_NPipe]>], [6, 2, 2]>,
649 // Quad-register Reciprical Step
650 InstrItinData<IIC_VRECSQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
651 // Extra latency cycles since wbck is 9 cycles
652 InstrStage<10, [FU_DRegsVFP], 0, Reserved>,
653 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
654 InstrStage<4, [FU_NPipe]>], [8, 2, 2]>,
656 // Double-register Permute
657 InstrItinData<IIC_VPERMD, [InstrStage<1, [FU_DRegsN], 0, Required>,
658 // Extra latency cycles since wbck is 6 cycles
659 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
660 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
661 InstrStage<1, [FU_NPipe]>], [2, 2, 1, 1]>,
663 // Quad-register Permute
664 // Result written in N2, but that is relative to the last cycle of multicycle,
665 // so we use 3 for those cases
666 InstrItinData<IIC_VPERMQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
667 // Extra latency cycles since wbck is 7 cycles
668 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
669 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
670 InstrStage<2, [FU_NPipe]>], [3, 3, 1, 1]>,
672 // Quad-register Permute (3 cycle issue)
673 // Result written in N2, but that is relative to the last cycle of multicycle,
674 // so we use 4 for those cases
675 InstrItinData<IIC_VPERMQ3, [InstrStage<1, [FU_DRegsN], 0, Required>,
676 // Extra latency cycles since wbck is 8 cycles
677 InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
678 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
679 InstrStage<3, [FU_NLSPipe]>], [4, 4, 1, 1]>,
682 // Double-register VEXT
683 InstrItinData<IIC_VEXTD, [InstrStage<1, [FU_DRegsN], 0, Required>,
684 // Extra latency cycles since wbck is 7 cycles
685 InstrStage<7, [FU_DRegsVFP], 0, Reserved>,
686 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
687 InstrStage<1, [FU_NPipe]>], [2, 1, 1]>,
689 // Quad-register VEXT
690 InstrItinData<IIC_VEXTQ, [InstrStage<1, [FU_DRegsN], 0, Required>,
691 // Extra latency cycles since wbck is 9 cycles
692 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
693 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
694 InstrStage<2, [FU_NPipe]>], [3, 1, 1]>,
697 InstrItinData<IIC_VTB1, [InstrStage<1, [FU_DRegsN], 0, Required>,
698 // Extra latency cycles since wbck is 7 cycles
699 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
700 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
701 InstrStage<2, [FU_NPipe]>], [3, 2, 1]>,
702 InstrItinData<IIC_VTB2, [InstrStage<2, [FU_DRegsN], 0, Required>,
703 // Extra latency cycles since wbck is 7 cycles
704 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
705 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
706 InstrStage<2, [FU_NPipe]>], [3, 2, 2, 1]>,
707 InstrItinData<IIC_VTB3, [InstrStage<2, [FU_DRegsN], 0, Required>,
708 // Extra latency cycles since wbck is 8 cycles
709 InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
710 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
711 InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 1]>,
712 InstrItinData<IIC_VTB4, [InstrStage<1, [FU_DRegsN], 0, Required>,
713 // Extra latency cycles since wbck is 8 cycles
714 InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
715 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
716 InstrStage<3, [FU_NPipe]>], [4, 2, 2, 3, 3, 1]>,
719 InstrItinData<IIC_VTBX1, [InstrStage<1, [FU_DRegsN], 0, Required>,
720 // Extra latency cycles since wbck is 7 cycles
721 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
722 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
723 InstrStage<2, [FU_NPipe]>], [3, 1, 2, 1]>,
724 InstrItinData<IIC_VTBX2, [InstrStage<1, [FU_DRegsN], 0, Required>,
725 // Extra latency cycles since wbck is 7 cycles
726 InstrStage<8, [FU_DRegsVFP], 0, Reserved>,
727 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
728 InstrStage<2, [FU_NPipe]>], [3, 1, 2, 2, 1]>,
729 InstrItinData<IIC_VTBX3, [InstrStage<1, [FU_DRegsN], 0, Required>,
730 // Extra latency cycles since wbck is 8 cycles
731 InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
732 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
733 InstrStage<3, [FU_NPipe]>], [4, 1, 2, 2, 3, 1]>,
734 InstrItinData<IIC_VTBX4, [InstrStage<1, [FU_DRegsN], 0, Required>,
735 // Extra latency cycles since wbck is 8 cycles
736 InstrStage<9, [FU_DRegsVFP], 0, Reserved>,
737 InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
738 InstrStage<2, [FU_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>