1 //=- AArch64SchedM1.td - Samsung Exynos-M1 Scheduling Defs ---*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the machine model for Samsung Exynos-M1 to support
11 // instruction scheduling and other instruction cost heuristics.
13 //===----------------------------------------------------------------------===//
15 //===----------------------------------------------------------------------===//
16 // The Exynos-M1 is a traditional superscalar microprocessor with a
17 // 4-wide in-order stage for decode and dispatch and a wider issue stage.
18 // The execution units and loads and stores are out-of-order.
20 def ExynosM1Model : SchedMachineModel {
21 let IssueWidth = 4; // Up to 4 uops per cycle.
22 let MinLatency = 0; // OoO.
23 let MicroOpBufferSize = 96; // ROB size.
24 let LoopMicroOpBufferSize = 32; // Instruction queue size.
25 let LoadLatency = 4; // Optimistic load cases.
26 let MispredictPenalty = 14; // Minimum branch misprediction penalty.
27 let CompleteModel = 0; // Use the default model otherwise.
30 //===----------------------------------------------------------------------===//
31 // Define each kind of processor resource and number available on the Exynos-M1,
32 // which has 9 pipelines, each with its own queue with out-of-order dispatch.
34 def M1UnitA : ProcResource<2>; // Simple integer
35 def M1UnitC : ProcResource<1>; // Simple and complex integer
36 def M1UnitB : ProcResource<2>; // Branch
37 def M1UnitL : ProcResource<1>; // Load
38 def M1UnitS : ProcResource<1>; // Store
39 def M1PipeF0 : ProcResource<1>; // FP #0
40 def M1PipeF1 : ProcResource<1>; // FP #1
42 let Super = M1PipeF0 in {
43 def M1UnitFMAC : ProcResource<1>; // FP multiplication
44 def M1UnitFCVT : ProcResource<1>; // FP conversion
45 def M1UnitNAL0 : ProcResource<1>; // Simple vector.
46 def M1UnitNMISC : ProcResource<1>; // Miscellanea
47 def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
50 let Super = M1PipeF1 in {
51 def M1UnitFADD : ProcResource<1>; // Simple FP
53 def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
54 def M1UnitNAL1 : ProcResource<1>; // Simple vector.
55 def M1UnitFST : ProcResource<1>; // FP store
58 let SchedModel = ExynosM1Model in {
59 def M1UnitALU : ProcResGroup<[M1UnitA,
60 M1UnitC]>; // All simple integer.
61 def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
62 M1UnitNAL1]>; // All simple vector.
65 let SchedModel = ExynosM1Model in {
67 //===----------------------------------------------------------------------===//
68 // Coarse scheduling model for the Exynos-M1.
70 // Branch instructions.
71 // TODO: Non-conditional direct branches take zero cycles and units.
72 def : WriteRes<WriteBr, [M1UnitB]> { let Latency = 1; }
73 def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
74 // TODO: Branch and link is much different.
76 // Arithmetic and logical integer instructions.
77 def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; }
78 // TODO: Shift over 3 and some extensions take 2 cycles.
79 def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
80 def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
81 def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; }
84 def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
86 // Divide and multiply instructions.
87 // TODO: Division blocks the divider inside C.
88 def : WriteRes<WriteID32, [M1UnitC]> { let Latency = 13; }
89 def : WriteRes<WriteID64, [M1UnitC]> { let Latency = 21; }
90 // TODO: Long multiplication take 5 cycles and also the ALU.
91 // TODO: Multiplication with accumulation can be advanced.
92 def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
93 // TODO: 64-bit multiplication has a throughput of 1/2.
94 def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; }
96 // Miscellaneous instructions.
97 def : WriteRes<WriteExtr, [M1UnitALU,
98 M1UnitALU]> { let Latency = 2; }
100 // TODO: The latency for the post or pre register is 1 cycle.
101 def : WriteRes<WriteAdr, []> { let Latency = 0; }
103 // Load instructions.
104 def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; }
105 // TODO: Extended address requires also the ALU.
106 def : WriteRes<WriteLDIdx, [M1UnitL]> { let Latency = 5; }
107 def : WriteRes<WriteLDHi, [M1UnitALU]> { let Latency = 4; }
109 // Store instructions.
110 def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; }
111 // TODO: Extended address requires also the ALU.
112 def : WriteRes<WriteSTIdx, [M1UnitS]> { let Latency = 1; }
113 def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; }
114 def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; }
116 // FP data instructions.
117 def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; }
118 // TODO: FCCMP is much different.
119 def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
120 // TODO: DP takes longer.
121 def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15; }
122 // TODO: MACC takes longer.
123 def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; }
125 // FP miscellaneous instructions.
126 // TODO: Conversion between register files is much different.
127 def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; }
128 def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; }
129 // TODO: Copy from FPR to GPR is much different.
130 def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; }
132 // FP load instructions.
133 // TODO: ASIMD loads are much different.
134 def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
136 // FP store instructions.
137 // TODO: ASIMD stores are much different.
138 def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; }
140 // ASIMD FP instructions.
141 // TODO: Other operations are much different.
142 def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
144 // Other miscellaneous instructions.
145 def : WriteRes<WriteSys, []> { let Latency = 1; }
146 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
147 def : WriteRes<WriteHint, []> { let Latency = 1; }
149 //===----------------------------------------------------------------------===//
152 // TODO: Add FP register forwarding rules.
154 def : ReadAdvance<ReadI, 0>;
155 def : ReadAdvance<ReadISReg, 0>;
156 def : ReadAdvance<ReadIEReg, 0>;
157 def : ReadAdvance<ReadIM, 0>;
158 // Integer multiply-accumulate.
159 // TODO: The forwarding for WriteIM64 saves actually 3 cycles.
160 def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
161 def : ReadAdvance<ReadID, 0>;
162 def : ReadAdvance<ReadExtrHi, 0>;
163 def : ReadAdvance<ReadAdrBase, 0>;
164 def : ReadAdvance<ReadVLD, 0>;
166 //===----------------------------------------------------------------------===//
167 // Finer scheduling model for the Exynos-M1.
169 def M1WriteNEONA : SchedWriteRes<[M1UnitNALU,
171 M1UnitFADD]> { let Latency = 9; }
172 def M1WriteNEONB : SchedWriteRes<[M1UnitNALU,
173 M1UnitFST]> { let Latency = 5; }
174 def M1WriteNEONC : SchedWriteRes<[M1UnitNALU,
175 M1UnitFST]> { let Latency = 6; }
176 def M1WriteNEOND : SchedWriteRes<[M1UnitNALU,
178 M1UnitL]> { let Latency = 10; }
179 def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT,
180 M1UnitFST]> { let Latency = 8; }
181 def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT,
183 M1UnitL]> { let Latency = 13; }
184 def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC,
185 M1UnitFST]> { let Latency = 6; }
186 def M1WriteNEONH : SchedWriteRes<[M1UnitNALU,
187 M1UnitFST]> { let Latency = 3; }
188 def M1WriteNEONI : SchedWriteRes<[M1UnitFST,
189 M1UnitL]> { let Latency = 9; }
190 def M1WriteALU1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
191 def M1WriteB : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
192 // FIXME: This is the worst case, conditional branch and link.
193 def M1WriteBL : SchedWriteRes<[M1UnitB,
194 M1UnitALU]> { let Latency = 1; }
195 // FIXME: This is the worst case, when using LR.
196 def M1WriteBLR : SchedWriteRes<[M1UnitB,
198 M1UnitALU]> { let Latency = 2; }
199 def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
200 def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
201 def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; }
202 def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; }
203 def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; }
204 def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; }
205 def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; }
206 def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15; }
207 def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23; }
208 def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; }
209 def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; }
210 def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; }
211 def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; }
212 def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; }
213 def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
214 def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; }
215 def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; }
216 def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; }
217 def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; }
218 def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; }
219 def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
220 def M1WriteTB : SchedWriteRes<[M1UnitC,
221 M1UnitALU]> { let Latency = 2; }
223 // Branch instructions
224 def : InstRW<[M1WriteB ], (instrs Bcc)>;
225 def : InstRW<[M1WriteBL], (instrs BL)>;
226 def : InstRW<[M1WriteBLR], (instrs BLR)>;
227 def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
228 def : InstRW<[M1WriteTB], (instregex "^TBN?Z[WX]")>;
230 // Arithmetic and logical integer instructions.
231 def : InstRW<[M1WriteALU1], (instrs COPY)>;
233 // Divide and multiply instructions.
235 // Miscellaneous instructions.
237 // Load instructions.
239 // Store instructions.
241 // FP data instructions.
242 def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
243 def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>;
244 def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>;
245 def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>;
246 def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>;
247 def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>;
248 def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>;
249 def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>;
250 def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
251 def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>;
252 def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>;
253 def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
254 def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
256 // FP miscellaneous instructions.
257 def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>;
258 def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
259 def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>;
260 def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>;
261 def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>;
262 def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>;
264 // FP load instructions.
266 // FP store instructions.
268 // ASIMD instructions.
269 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
270 def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>;
271 def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>;
272 def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>;
273 def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
274 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>;
275 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>;
276 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>;
277 def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>;
278 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>;
279 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>;
280 def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
281 def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>;
282 def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
283 def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>;
284 def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>;
285 def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>;
286 def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>;
287 def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
288 def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
289 def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
290 def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>;
291 def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>;
292 def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>;
293 def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>;
294 def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>;
296 // ASIMD FP instructions.
297 def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>;
298 def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>;
299 def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>;
300 def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
301 def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>;
302 def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>;
303 def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>;
304 def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>;
305 def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>;
306 def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>;
307 def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
308 def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v")>;
309 def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v")>;
310 def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>;
312 // ASIMD miscellaneous instructions.
313 def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>;
314 def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>;
315 def : InstRW<[M1WriteNALU1], (instregex "^CPY")>;
316 def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>;
317 def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>;
318 def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>;
319 def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>;
320 def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev")>;
321 def : InstRW<[M1WriteNMISC1], (instregex "^[FU](RECP|RSQRT)Xv")>;
322 def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>;
323 def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>;
324 def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>;
325 def : InstRW<[WriteSequence<[M1WriteNAL11], 2>],
326 (instregex "^TB[LX]v8i8Two")>;
327 def : InstRW<[WriteSequence<[M1WriteNAL11], 3>],
328 (instregex "^TB[LX]v8i8Three")>;
329 def : InstRW<[WriteSequence<[M1WriteNAL11], 4>],
330 (instregex "^TB[LX]v8i8Four")>;
331 def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>;
332 def : InstRW<[WriteSequence<[M1WriteNAL12], 2>],
333 (instregex "^TB[LX]v16i8Two")>;
334 def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
335 (instregex "^TB[LX]v16i8Three")>;
336 def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
337 (instregex "^TB[LX]v16i8Four")>;
338 def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>;
339 def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>;
340 def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)(1|2)(v8i8|v4i16|v2i32)")>;
341 def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>;
342 def : InstRW<[M1WriteNALU1], (instregex "^ZIP(1|2)v")>;
344 // ASIMD load instructions.
346 // ASIMD store instructions.
348 // Cryptography instructions.
349 def : InstRW<[M1WriteNCRYPT1], (instregex "^AES")>;
350 def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
351 def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
352 def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>;
353 def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>;
354 def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
357 def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
359 } // SchedModel = ExynosM1Model