1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the AArch64TargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64ISelLowering.h"
15 #include "AArch64CallingConvention.h"
16 #include "AArch64MachineFunctionInfo.h"
17 #include "AArch64PerfectShuffle.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "AArch64TargetObjectFile.h"
21 #include "MCTargetDesc/AArch64AddressingModes.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/IR/GetElementPtrTypeIterator.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Target/TargetOptions.h"
38 #define DEBUG_TYPE "aarch64-lower"
40 STATISTIC(NumTailCalls, "Number of tail calls");
41 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
43 // Place holder until extr generation is tested fully.
45 EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
46 cl::desc("Allow AArch64 (or (shift)(shift))->extract"),
50 EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
51 cl::desc("Allow AArch64 SLI/SRI formation"),
54 // FIXME: The necessary dtprel relocations don't seem to be supported
55 // well in the GNU bfd and gold linkers at the moment. Therefore, by
56 // default, for now, fall back to GeneralDynamic code generation.
57 cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
58 "aarch64-elf-ldtls-generation", cl::Hidden,
59 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
62 /// Value type used for condition codes.
63 static const MVT MVT_CC = MVT::i32;
65 AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
66 const AArch64Subtarget &STI)
67 : TargetLowering(TM), Subtarget(&STI) {
69 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
70 // we have to make something up. Arbitrarily, choose ZeroOrOne.
71 setBooleanContents(ZeroOrOneBooleanContent);
72 // When comparing vectors the result sets the different elements in the
73 // vector to all-one or all-zero.
74 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
76 // Set up the register classes.
77 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
78 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
80 if (Subtarget->hasFPARMv8()) {
81 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
82 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
83 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
84 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
87 if (Subtarget->hasNEON()) {
88 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
89 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
90 // Someone set us up the NEON.
91 addDRTypeForNEON(MVT::v2f32);
92 addDRTypeForNEON(MVT::v8i8);
93 addDRTypeForNEON(MVT::v4i16);
94 addDRTypeForNEON(MVT::v2i32);
95 addDRTypeForNEON(MVT::v1i64);
96 addDRTypeForNEON(MVT::v1f64);
97 addDRTypeForNEON(MVT::v4f16);
99 addQRTypeForNEON(MVT::v4f32);
100 addQRTypeForNEON(MVT::v2f64);
101 addQRTypeForNEON(MVT::v16i8);
102 addQRTypeForNEON(MVT::v8i16);
103 addQRTypeForNEON(MVT::v4i32);
104 addQRTypeForNEON(MVT::v2i64);
105 addQRTypeForNEON(MVT::v8f16);
108 // Compute derived properties from the register classes
109 computeRegisterProperties(Subtarget->getRegisterInfo());
111 // Provide all sorts of operation actions
112 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
113 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
114 setOperationAction(ISD::SETCC, MVT::i32, Custom);
115 setOperationAction(ISD::SETCC, MVT::i64, Custom);
116 setOperationAction(ISD::SETCC, MVT::f32, Custom);
117 setOperationAction(ISD::SETCC, MVT::f64, Custom);
118 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
119 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
120 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
121 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
122 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
123 setOperationAction(ISD::SELECT, MVT::i32, Custom);
124 setOperationAction(ISD::SELECT, MVT::i64, Custom);
125 setOperationAction(ISD::SELECT, MVT::f32, Custom);
126 setOperationAction(ISD::SELECT, MVT::f64, Custom);
127 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
128 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
129 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
130 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
131 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
132 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
134 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
135 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
136 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
138 setOperationAction(ISD::FREM, MVT::f32, Expand);
139 setOperationAction(ISD::FREM, MVT::f64, Expand);
140 setOperationAction(ISD::FREM, MVT::f80, Expand);
142 // Custom lowering hooks are needed for XOR
143 // to fold it into CSINC/CSINV.
144 setOperationAction(ISD::XOR, MVT::i32, Custom);
145 setOperationAction(ISD::XOR, MVT::i64, Custom);
147 // Virtually no operation on f128 is legal, but LLVM can't expand them when
148 // there's a valid register class, so we need custom operations in most cases.
149 setOperationAction(ISD::FABS, MVT::f128, Expand);
150 setOperationAction(ISD::FADD, MVT::f128, Custom);
151 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
152 setOperationAction(ISD::FCOS, MVT::f128, Expand);
153 setOperationAction(ISD::FDIV, MVT::f128, Custom);
154 setOperationAction(ISD::FMA, MVT::f128, Expand);
155 setOperationAction(ISD::FMUL, MVT::f128, Custom);
156 setOperationAction(ISD::FNEG, MVT::f128, Expand);
157 setOperationAction(ISD::FPOW, MVT::f128, Expand);
158 setOperationAction(ISD::FREM, MVT::f128, Expand);
159 setOperationAction(ISD::FRINT, MVT::f128, Expand);
160 setOperationAction(ISD::FSIN, MVT::f128, Expand);
161 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
162 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
163 setOperationAction(ISD::FSUB, MVT::f128, Custom);
164 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
165 setOperationAction(ISD::SETCC, MVT::f128, Custom);
166 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
167 setOperationAction(ISD::SELECT, MVT::f128, Custom);
168 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
169 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
171 // Lowering for many of the conversions is actually specified by the non-f128
172 // type. The LowerXXX function will be trivial when f128 isn't involved.
173 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
174 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
175 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
176 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
177 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
178 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
179 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
180 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
181 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
182 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
183 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
184 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
185 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
186 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
188 // Variable arguments.
189 setOperationAction(ISD::VASTART, MVT::Other, Custom);
190 setOperationAction(ISD::VAARG, MVT::Other, Custom);
191 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
192 setOperationAction(ISD::VAEND, MVT::Other, Expand);
194 // Variable-sized objects.
195 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
196 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
197 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
199 // Constant pool entries
200 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
203 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
205 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
206 setOperationAction(ISD::ADDC, MVT::i32, Custom);
207 setOperationAction(ISD::ADDE, MVT::i32, Custom);
208 setOperationAction(ISD::SUBC, MVT::i32, Custom);
209 setOperationAction(ISD::SUBE, MVT::i32, Custom);
210 setOperationAction(ISD::ADDC, MVT::i64, Custom);
211 setOperationAction(ISD::ADDE, MVT::i64, Custom);
212 setOperationAction(ISD::SUBC, MVT::i64, Custom);
213 setOperationAction(ISD::SUBE, MVT::i64, Custom);
215 // AArch64 lacks both left-rotate and popcount instructions.
216 setOperationAction(ISD::ROTL, MVT::i32, Expand);
217 setOperationAction(ISD::ROTL, MVT::i64, Expand);
218 for (MVT VT : MVT::vector_valuetypes()) {
219 setOperationAction(ISD::ROTL, VT, Expand);
220 setOperationAction(ISD::ROTR, VT, Expand);
223 // AArch64 doesn't have {U|S}MUL_LOHI.
224 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
225 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
228 // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero
229 // counterparts, which AArch64 supports directly.
230 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
231 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
232 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
233 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
235 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
236 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
238 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
239 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
240 for (MVT VT : MVT::vector_valuetypes()) {
241 setOperationAction(ISD::SDIVREM, VT, Expand);
242 setOperationAction(ISD::UDIVREM, VT, Expand);
244 setOperationAction(ISD::SREM, MVT::i32, Expand);
245 setOperationAction(ISD::SREM, MVT::i64, Expand);
246 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
247 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
248 setOperationAction(ISD::UREM, MVT::i32, Expand);
249 setOperationAction(ISD::UREM, MVT::i64, Expand);
251 // Custom lower Add/Sub/Mul with overflow.
252 setOperationAction(ISD::SADDO, MVT::i32, Custom);
253 setOperationAction(ISD::SADDO, MVT::i64, Custom);
254 setOperationAction(ISD::UADDO, MVT::i32, Custom);
255 setOperationAction(ISD::UADDO, MVT::i64, Custom);
256 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
257 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
258 setOperationAction(ISD::USUBO, MVT::i32, Custom);
259 setOperationAction(ISD::USUBO, MVT::i64, Custom);
260 setOperationAction(ISD::SMULO, MVT::i32, Custom);
261 setOperationAction(ISD::SMULO, MVT::i64, Custom);
262 setOperationAction(ISD::UMULO, MVT::i32, Custom);
263 setOperationAction(ISD::UMULO, MVT::i64, Custom);
265 setOperationAction(ISD::FSIN, MVT::f32, Expand);
266 setOperationAction(ISD::FSIN, MVT::f64, Expand);
267 setOperationAction(ISD::FCOS, MVT::f32, Expand);
268 setOperationAction(ISD::FCOS, MVT::f64, Expand);
269 setOperationAction(ISD::FPOW, MVT::f32, Expand);
270 setOperationAction(ISD::FPOW, MVT::f64, Expand);
271 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
272 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
274 // f16 is a storage-only type, always promote it to f32.
275 setOperationAction(ISD::SETCC, MVT::f16, Promote);
276 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
277 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
278 setOperationAction(ISD::SELECT, MVT::f16, Promote);
279 setOperationAction(ISD::FADD, MVT::f16, Promote);
280 setOperationAction(ISD::FSUB, MVT::f16, Promote);
281 setOperationAction(ISD::FMUL, MVT::f16, Promote);
282 setOperationAction(ISD::FDIV, MVT::f16, Promote);
283 setOperationAction(ISD::FREM, MVT::f16, Promote);
284 setOperationAction(ISD::FMA, MVT::f16, Promote);
285 setOperationAction(ISD::FNEG, MVT::f16, Promote);
286 setOperationAction(ISD::FABS, MVT::f16, Promote);
287 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
288 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
289 setOperationAction(ISD::FCOS, MVT::f16, Promote);
290 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
291 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
292 setOperationAction(ISD::FPOW, MVT::f16, Promote);
293 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
294 setOperationAction(ISD::FRINT, MVT::f16, Promote);
295 setOperationAction(ISD::FSIN, MVT::f16, Promote);
296 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
297 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
298 setOperationAction(ISD::FEXP, MVT::f16, Promote);
299 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
300 setOperationAction(ISD::FLOG, MVT::f16, Promote);
301 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
302 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
303 setOperationAction(ISD::FROUND, MVT::f16, Promote);
304 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
305 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
306 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
307 setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
308 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
310 // v4f16 is also a storage-only type, so promote it to v4f32 when that is
312 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
313 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
314 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
315 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
316 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
317 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
318 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
319 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
320 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
321 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
322 AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
323 AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
325 // Expand all other v4f16 operations.
326 // FIXME: We could generate better code by promoting some operations to
328 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
329 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
330 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
331 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
332 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
333 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
334 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
335 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
336 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
337 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
338 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
339 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
340 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
341 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
342 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
343 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
344 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
345 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
346 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
347 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
348 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
349 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
350 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
351 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
352 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
353 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
356 // v8f16 is also a storage-only type, so expand it.
357 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
358 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
359 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
360 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
361 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
362 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
363 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
364 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
365 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
366 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
367 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
368 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
369 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
370 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
371 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
372 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
373 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
374 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
375 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
376 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
377 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
378 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
379 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
380 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
381 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
382 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
383 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
384 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
385 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
386 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
387 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
389 // AArch64 has implementations of a lot of rounding-like FP operations.
390 for (MVT Ty : {MVT::f32, MVT::f64}) {
391 setOperationAction(ISD::FFLOOR, Ty, Legal);
392 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
393 setOperationAction(ISD::FCEIL, Ty, Legal);
394 setOperationAction(ISD::FRINT, Ty, Legal);
395 setOperationAction(ISD::FTRUNC, Ty, Legal);
396 setOperationAction(ISD::FROUND, Ty, Legal);
397 setOperationAction(ISD::FMINNUM, Ty, Legal);
398 setOperationAction(ISD::FMAXNUM, Ty, Legal);
399 setOperationAction(ISD::FMINNAN, Ty, Legal);
400 setOperationAction(ISD::FMAXNAN, Ty, Legal);
403 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
405 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
406 // This requires the Performance Monitors extension.
407 if (Subtarget->hasPerfMon())
408 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
410 if (Subtarget->isTargetMachO()) {
411 // For iOS, we don't want to the normal expansion of a libcall to
412 // sincos. We want to issue a libcall to __sincos_stret to avoid memory
414 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
415 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
417 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
418 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
421 // Make floating-point constants legal for the large code model, so they don't
422 // become loads from the constant pool.
423 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
424 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
425 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
428 // AArch64 does not have floating-point extending loads, i1 sign-extending
429 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
430 for (MVT VT : MVT::fp_valuetypes()) {
431 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
432 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
433 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
434 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
436 for (MVT VT : MVT::integer_valuetypes())
437 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
439 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
440 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
441 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
442 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
443 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
444 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
445 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
447 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
448 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
450 // Indexed loads and stores are supported.
451 for (unsigned im = (unsigned)ISD::PRE_INC;
452 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
453 setIndexedLoadAction(im, MVT::i8, Legal);
454 setIndexedLoadAction(im, MVT::i16, Legal);
455 setIndexedLoadAction(im, MVT::i32, Legal);
456 setIndexedLoadAction(im, MVT::i64, Legal);
457 setIndexedLoadAction(im, MVT::f64, Legal);
458 setIndexedLoadAction(im, MVT::f32, Legal);
459 setIndexedLoadAction(im, MVT::f16, Legal);
460 setIndexedStoreAction(im, MVT::i8, Legal);
461 setIndexedStoreAction(im, MVT::i16, Legal);
462 setIndexedStoreAction(im, MVT::i32, Legal);
463 setIndexedStoreAction(im, MVT::i64, Legal);
464 setIndexedStoreAction(im, MVT::f64, Legal);
465 setIndexedStoreAction(im, MVT::f32, Legal);
466 setIndexedStoreAction(im, MVT::f16, Legal);
470 setOperationAction(ISD::TRAP, MVT::Other, Legal);
472 // We combine OR nodes for bitfield operations.
473 setTargetDAGCombine(ISD::OR);
475 // Vector add and sub nodes may conceal a high-half opportunity.
476 // Also, try to fold ADD into CSINC/CSINV..
477 setTargetDAGCombine(ISD::ADD);
478 setTargetDAGCombine(ISD::SUB);
480 setTargetDAGCombine(ISD::XOR);
481 setTargetDAGCombine(ISD::SINT_TO_FP);
482 setTargetDAGCombine(ISD::UINT_TO_FP);
484 setTargetDAGCombine(ISD::FP_TO_SINT);
485 setTargetDAGCombine(ISD::FP_TO_UINT);
486 setTargetDAGCombine(ISD::FDIV);
488 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
490 setTargetDAGCombine(ISD::ANY_EXTEND);
491 setTargetDAGCombine(ISD::ZERO_EXTEND);
492 setTargetDAGCombine(ISD::SIGN_EXTEND);
493 setTargetDAGCombine(ISD::BITCAST);
494 setTargetDAGCombine(ISD::CONCAT_VECTORS);
495 setTargetDAGCombine(ISD::STORE);
496 if (Subtarget->supportsAddressTopByteIgnored())
497 setTargetDAGCombine(ISD::LOAD);
499 setTargetDAGCombine(ISD::MUL);
501 setTargetDAGCombine(ISD::SELECT);
502 setTargetDAGCombine(ISD::VSELECT);
504 setTargetDAGCombine(ISD::INTRINSIC_VOID);
505 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
506 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
507 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
509 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
510 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
511 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
513 setStackPointerRegisterToSaveRestore(AArch64::SP);
515 setSchedulingPreference(Sched::Hybrid);
518 MaskAndBranchFoldingIsLegal = true;
519 EnableExtLdPromotion = true;
521 setMinFunctionAlignment(2);
523 setHasExtractBitsInsn(true);
525 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
527 if (Subtarget->hasNEON()) {
528 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
529 // silliness like this:
530 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
531 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
532 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
533 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
534 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
535 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
536 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
537 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
538 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
539 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
540 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
541 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
542 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
543 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
544 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
545 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
546 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
547 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
548 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
549 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
550 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
551 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
552 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
553 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
554 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
556 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
557 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
558 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
559 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
560 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
562 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
564 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
565 // elements smaller than i32, so promote the input to i32 first.
566 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
567 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
568 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
569 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
570 // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
571 // -> v8f16 conversions.
572 setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
573 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
574 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
575 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
576 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
577 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
578 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
579 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
580 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
581 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
582 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
583 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
584 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
586 // AArch64 doesn't have MUL.2d:
587 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
588 // Custom handling for some quad-vector types to detect MULL.
589 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
590 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
591 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
593 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
594 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
595 // Likewise, narrowing and extending vector loads/stores aren't handled
597 for (MVT VT : MVT::vector_valuetypes()) {
598 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
600 setOperationAction(ISD::MULHS, VT, Expand);
601 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
602 setOperationAction(ISD::MULHU, VT, Expand);
603 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
605 setOperationAction(ISD::BSWAP, VT, Expand);
607 for (MVT InnerVT : MVT::vector_valuetypes()) {
608 setTruncStoreAction(VT, InnerVT, Expand);
609 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
610 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
611 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
615 // AArch64 has implementations of a lot of rounding-like FP operations.
616 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
617 setOperationAction(ISD::FFLOOR, Ty, Legal);
618 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
619 setOperationAction(ISD::FCEIL, Ty, Legal);
620 setOperationAction(ISD::FRINT, Ty, Legal);
621 setOperationAction(ISD::FTRUNC, Ty, Legal);
622 setOperationAction(ISD::FROUND, Ty, Legal);
626 // Prefer likely predicted branches to selects on out-of-order cores.
627 if (Subtarget->isCortexA57())
628 PredictableSelectIsExpensive = true;
631 void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
632 if (VT == MVT::v2f32 || VT == MVT::v4f16) {
633 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
634 AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32);
636 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
637 AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32);
638 } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) {
639 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
640 AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64);
642 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
643 AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i64);
646 // Mark vector float intrinsics as expand.
647 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
648 setOperationAction(ISD::FSIN, VT.getSimpleVT(), Expand);
649 setOperationAction(ISD::FCOS, VT.getSimpleVT(), Expand);
650 setOperationAction(ISD::FPOWI, VT.getSimpleVT(), Expand);
651 setOperationAction(ISD::FPOW, VT.getSimpleVT(), Expand);
652 setOperationAction(ISD::FLOG, VT.getSimpleVT(), Expand);
653 setOperationAction(ISD::FLOG2, VT.getSimpleVT(), Expand);
654 setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand);
655 setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand);
656 setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand);
658 // But we do support custom-lowering for FCOPYSIGN.
659 setOperationAction(ISD::FCOPYSIGN, VT.getSimpleVT(), Custom);
662 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
663 setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
664 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
665 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
666 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Custom);
667 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
668 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
669 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
670 setOperationAction(ISD::AND, VT.getSimpleVT(), Custom);
671 setOperationAction(ISD::OR, VT.getSimpleVT(), Custom);
672 setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
673 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
675 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
676 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
677 setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
678 for (MVT InnerVT : MVT::all_valuetypes())
679 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT.getSimpleVT(), Expand);
681 // CNT supports only B element sizes.
682 if (VT != MVT::v8i8 && VT != MVT::v16i8)
683 setOperationAction(ISD::CTPOP, VT.getSimpleVT(), Expand);
685 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
686 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
687 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
688 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
689 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
691 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
692 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
694 // [SU][MIN|MAX] are available for all NEON types apart from i64.
695 if (!VT.isFloatingPoint() &&
696 VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
697 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
698 setOperationAction(Opcode, VT.getSimpleVT(), Legal);
700 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
701 if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::f16)
702 for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
703 ISD::FMINNUM, ISD::FMAXNUM})
704 setOperationAction(Opcode, VT.getSimpleVT(), Legal);
706 if (Subtarget->isLittleEndian()) {
707 for (unsigned im = (unsigned)ISD::PRE_INC;
708 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
709 setIndexedLoadAction(im, VT.getSimpleVT(), Legal);
710 setIndexedStoreAction(im, VT.getSimpleVT(), Legal);
715 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
716 addRegisterClass(VT, &AArch64::FPR64RegClass);
717 addTypeForNEON(VT, MVT::v2i32);
720 void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
721 addRegisterClass(VT, &AArch64::FPR128RegClass);
722 addTypeForNEON(VT, MVT::v4i32);
725 EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
729 return VT.changeVectorElementTypeToInteger();
732 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
733 /// Mask are known to be either zero or one and return them in the
734 /// KnownZero/KnownOne bitsets.
735 void AArch64TargetLowering::computeKnownBitsForTargetNode(
736 const SDValue Op, APInt &KnownZero, APInt &KnownOne,
737 const SelectionDAG &DAG, unsigned Depth) const {
738 switch (Op.getOpcode()) {
741 case AArch64ISD::CSEL: {
742 APInt KnownZero2, KnownOne2;
743 DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1);
744 DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1);
745 KnownZero &= KnownZero2;
746 KnownOne &= KnownOne2;
749 case ISD::INTRINSIC_W_CHAIN: {
750 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
751 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
754 case Intrinsic::aarch64_ldaxr:
755 case Intrinsic::aarch64_ldxr: {
756 unsigned BitWidth = KnownOne.getBitWidth();
757 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
758 unsigned MemBits = VT.getScalarType().getSizeInBits();
759 KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
765 case ISD::INTRINSIC_WO_CHAIN:
766 case ISD::INTRINSIC_VOID: {
767 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
771 case Intrinsic::aarch64_neon_umaxv:
772 case Intrinsic::aarch64_neon_uminv: {
773 // Figure out the datatype of the vector operand. The UMINV instruction
774 // will zero extend the result, so we can mark as known zero all the
775 // bits larger than the element datatype. 32-bit or larget doesn't need
776 // this as those are legal types and will be handled by isel directly.
777 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
778 unsigned BitWidth = KnownZero.getBitWidth();
779 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
780 assert(BitWidth >= 8 && "Unexpected width!");
781 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
783 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
784 assert(BitWidth >= 16 && "Unexpected width!");
785 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
795 MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
800 bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
804 if (Subtarget->requiresStrictAlign())
807 // FIXME: This is mostly true for Cyclone, but not necessarily others.
809 // FIXME: Define an attribute for slow unaligned accesses instead of
810 // relying on the CPU type as a proxy.
811 // On Cyclone, unaligned 128-bit stores are slow.
812 *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||
813 // See comments in performSTORECombine() for more details about
816 // Code that uses clang vector extensions can mark that it
817 // wants unaligned accesses to be treated as fast by
818 // underspecifying alignment to be 1 or 2.
821 // Disregard v2i64. Memcpy lowering produces those and splitting
822 // them regresses performance on micro-benchmarks and olden/bh.
829 AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
830 const TargetLibraryInfo *libInfo) const {
831 return AArch64::createFastISel(funcInfo, libInfo);
834 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
835 switch ((AArch64ISD::NodeType)Opcode) {
836 case AArch64ISD::FIRST_NUMBER: break;
837 case AArch64ISD::CALL: return "AArch64ISD::CALL";
838 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
839 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
840 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
841 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
842 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
843 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
844 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
845 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
846 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
847 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
848 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
849 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
850 case AArch64ISD::ADC: return "AArch64ISD::ADC";
851 case AArch64ISD::SBC: return "AArch64ISD::SBC";
852 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
853 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
854 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
855 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
856 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
857 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
858 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
859 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
860 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
861 case AArch64ISD::DUP: return "AArch64ISD::DUP";
862 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
863 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
864 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
865 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
866 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
867 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
868 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
869 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
870 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
871 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
872 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
873 case AArch64ISD::BICi: return "AArch64ISD::BICi";
874 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
875 case AArch64ISD::BSL: return "AArch64ISD::BSL";
876 case AArch64ISD::NEG: return "AArch64ISD::NEG";
877 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
878 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
879 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
880 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
881 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
882 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
883 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
884 case AArch64ISD::REV16: return "AArch64ISD::REV16";
885 case AArch64ISD::REV32: return "AArch64ISD::REV32";
886 case AArch64ISD::REV64: return "AArch64ISD::REV64";
887 case AArch64ISD::EXT: return "AArch64ISD::EXT";
888 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
889 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
890 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
891 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
892 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
893 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
894 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
895 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
896 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
897 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
898 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
899 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
900 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
901 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
902 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
903 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
904 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
905 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
906 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
907 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
908 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
909 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
910 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
911 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
912 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
913 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
914 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
915 case AArch64ISD::NOT: return "AArch64ISD::NOT";
916 case AArch64ISD::BIT: return "AArch64ISD::BIT";
917 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
918 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
919 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
920 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
921 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
922 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
923 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
924 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
925 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
926 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
927 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
928 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
929 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
930 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
931 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
932 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
933 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
934 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
935 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
936 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
937 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
938 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
939 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
940 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
941 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
942 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
943 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
944 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
945 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
946 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
947 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
948 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
949 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
950 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
951 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
952 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
953 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
954 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
955 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
956 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
962 AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
963 MachineBasicBlock *MBB) const {
964 // We materialise the F128CSEL pseudo-instruction as some control flow and a
968 // [... previous instrs leading to comparison ...]
974 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
976 MachineFunction *MF = MBB->getParent();
977 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
978 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
979 DebugLoc DL = MI->getDebugLoc();
980 MachineFunction::iterator It = ++MBB->getIterator();
982 unsigned DestReg = MI->getOperand(0).getReg();
983 unsigned IfTrueReg = MI->getOperand(1).getReg();
984 unsigned IfFalseReg = MI->getOperand(2).getReg();
985 unsigned CondCode = MI->getOperand(3).getImm();
986 bool NZCVKilled = MI->getOperand(4).isKill();
988 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
989 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
990 MF->insert(It, TrueBB);
991 MF->insert(It, EndBB);
993 // Transfer rest of current basic-block to EndBB
994 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
996 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
998 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
999 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1000 MBB->addSuccessor(TrueBB);
1001 MBB->addSuccessor(EndBB);
1003 // TrueBB falls through to the end.
1004 TrueBB->addSuccessor(EndBB);
1007 TrueBB->addLiveIn(AArch64::NZCV);
1008 EndBB->addLiveIn(AArch64::NZCV);
1011 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1017 MI->eraseFromParent();
1022 AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
1023 MachineBasicBlock *BB) const {
1024 switch (MI->getOpcode()) {
1029 llvm_unreachable("Unexpected instruction for custom inserter!");
1031 case AArch64::F128CSEL:
1032 return EmitF128CSEL(MI, BB);
1034 case TargetOpcode::STACKMAP:
1035 case TargetOpcode::PATCHPOINT:
1036 return emitPatchPoint(MI, BB);
1040 //===----------------------------------------------------------------------===//
1041 // AArch64 Lowering private implementation.
1042 //===----------------------------------------------------------------------===//
1044 //===----------------------------------------------------------------------===//
1046 //===----------------------------------------------------------------------===//
1048 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1050 static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1053 llvm_unreachable("Unknown condition code!");
1055 return AArch64CC::NE;
1057 return AArch64CC::EQ;
1059 return AArch64CC::GT;
1061 return AArch64CC::GE;
1063 return AArch64CC::LT;
1065 return AArch64CC::LE;
1067 return AArch64CC::HI;
1069 return AArch64CC::HS;
1071 return AArch64CC::LO;
1073 return AArch64CC::LS;
1077 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1078 static void changeFPCCToAArch64CC(ISD::CondCode CC,
1079 AArch64CC::CondCode &CondCode,
1080 AArch64CC::CondCode &CondCode2) {
1081 CondCode2 = AArch64CC::AL;
1084 llvm_unreachable("Unknown FP condition!");
1087 CondCode = AArch64CC::EQ;
1091 CondCode = AArch64CC::GT;
1095 CondCode = AArch64CC::GE;
1098 CondCode = AArch64CC::MI;
1101 CondCode = AArch64CC::LS;
1104 CondCode = AArch64CC::MI;
1105 CondCode2 = AArch64CC::GT;
1108 CondCode = AArch64CC::VC;
1111 CondCode = AArch64CC::VS;
1114 CondCode = AArch64CC::EQ;
1115 CondCode2 = AArch64CC::VS;
1118 CondCode = AArch64CC::HI;
1121 CondCode = AArch64CC::PL;
1125 CondCode = AArch64CC::LT;
1129 CondCode = AArch64CC::LE;
1133 CondCode = AArch64CC::NE;
1138 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1139 /// CC usable with the vector instructions. Fewer operations are available
1140 /// without a real NZCV register, so we have to use less efficient combinations
1141 /// to get the same effect.
1142 static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1143 AArch64CC::CondCode &CondCode,
1144 AArch64CC::CondCode &CondCode2,
1149 // Mostly the scalar mappings work fine.
1150 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1153 Invert = true; // Fallthrough
1155 CondCode = AArch64CC::MI;
1156 CondCode2 = AArch64CC::GE;
1163 // All of the compare-mask comparisons are ordered, but we can switch
1164 // between the two by a double inversion. E.g. ULE == !OGT.
1166 changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1171 static bool isLegalArithImmed(uint64_t C) {
1172 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1173 return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1176 static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1177 SDLoc dl, SelectionDAG &DAG) {
1178 EVT VT = LHS.getValueType();
1180 if (VT.isFloatingPoint())
1181 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1183 // The CMP instruction is just an alias for SUBS, and representing it as
1184 // SUBS means that it's possible to get CSE with subtract operations.
1185 // A later phase can perform the optimization of setting the destination
1186 // register to WZR/XZR if it ends up being unused.
1187 unsigned Opcode = AArch64ISD::SUBS;
1189 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
1190 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1191 // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
1192 // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
1193 // can be set differently by this operation. It comes down to whether
1194 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1195 // everything is fine. If not then the optimization is wrong. Thus general
1196 // comparisons are only valid if op2 != 0.
1198 // So, finally, the only LLVM-native comparisons that don't mention C and V
1199 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1200 // the absence of information about op2.
1201 Opcode = AArch64ISD::ADDS;
1202 RHS = RHS.getOperand(1);
1203 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1204 !isUnsignedIntSetCC(CC)) {
1205 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1206 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1207 // of the signed comparisons.
1208 Opcode = AArch64ISD::ANDS;
1209 RHS = LHS.getOperand(1);
1210 LHS = LHS.getOperand(0);
1213 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1217 /// \defgroup AArch64CCMP CMP;CCMP matching
1219 /// These functions deal with the formation of CMP;CCMP;... sequences.
1220 /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1221 /// a comparison. They set the NZCV flags to a predefined value if their
1222 /// predicate is false. This allows to express arbitrary conjunctions, for
1223 /// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
1226 /// ccmp B, inv(CB), CA
1227 /// check for CB flags
1229 /// In general we can create code for arbitrary "... (and (and A B) C)"
1230 /// sequences. We can also implement some "or" expressions, because "(or A B)"
1231 /// is equivalent to "not (and (not A) (not B))" and we can implement some
1232 /// negation operations:
1233 /// We can negate the results of a single comparison by inverting the flags
1234 /// used when the predicate fails and inverting the flags tested in the next
1235 /// instruction; We can also negate the results of the whole previous
1236 /// conditional compare sequence by inverting the flags tested in the next
1237 /// instruction. However there is no way to negate the result of a partial
1240 /// Therefore on encountering an "or" expression we can negate the subtree on
1241 /// one side and have to be able to push the negate to the leafs of the subtree
1242 /// on the other side (see also the comments in code). As complete example:
1243 /// "or (or (setCA (cmp A)) (setCB (cmp B)))
1244 /// (and (setCC (cmp C)) (setCD (cmp D)))"
1245 /// is transformed to
1246 /// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
1247 /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1248 /// and implemented as:
1250 /// ccmp D, inv(CD), CC
1251 /// ccmp A, CA, inv(CD)
1252 /// ccmp B, CB, inv(CA)
1253 /// check for CB flags
1254 /// A counterexample is "or (and A B) (and C D)" which cannot be implemented
1255 /// by conditional compare sequences.
1258 /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1259 static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1260 ISD::CondCode CC, SDValue CCOp,
1261 SDValue Condition, unsigned NZCV,
1262 SDLoc DL, SelectionDAG &DAG) {
1263 unsigned Opcode = 0;
1264 if (LHS.getValueType().isFloatingPoint())
1265 Opcode = AArch64ISD::FCCMP;
1266 else if (RHS.getOpcode() == ISD::SUB) {
1267 SDValue SubOp0 = RHS.getOperand(0);
1268 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1269 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1270 Opcode = AArch64ISD::CCMN;
1271 RHS = RHS.getOperand(1);
1275 Opcode = AArch64ISD::CCMP;
1277 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1278 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1281 /// Returns true if @p Val is a tree of AND/OR/SETCC operations.
1282 /// CanPushNegate is set to true if we can push a negate operation through
1283 /// the tree in a was that we are left with AND operations and negate operations
1284 /// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
1285 /// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
1286 /// brought into such a form.
1287 static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate,
1288 unsigned Depth = 0) {
1289 if (!Val.hasOneUse())
1291 unsigned Opcode = Val->getOpcode();
1292 if (Opcode == ISD::SETCC) {
1293 CanPushNegate = true;
1296 // Protect against stack overflow.
1299 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1300 SDValue O0 = Val->getOperand(0);
1301 SDValue O1 = Val->getOperand(1);
1302 bool CanPushNegateL;
1303 if (!isConjunctionDisjunctionTree(O0, CanPushNegateL, Depth+1))
1305 bool CanPushNegateR;
1306 if (!isConjunctionDisjunctionTree(O1, CanPushNegateR, Depth+1))
1308 // We cannot push a negate through an AND operation (it would become an OR),
1309 // we can however change a (not (or x y)) to (and (not x) (not y)) if we can
1310 // push the negate through the x/y subtrees.
1311 CanPushNegate = (Opcode == ISD::OR) && CanPushNegateL && CanPushNegateR;
1317 /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1318 /// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1319 /// Tries to transform the given i1 producing node @p Val to a series compare
1320 /// and conditional compare operations. @returns an NZCV flags producing node
1321 /// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1322 /// transformation was not possible.
1323 /// On recursive invocations @p PushNegate may be set to true to have negation
1324 /// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
1325 /// for the comparisons in the current subtree; @p Depth limits the search
1326 /// depth to avoid stack overflow.
1327 static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
1328 AArch64CC::CondCode &OutCC, bool PushNegate = false,
1329 SDValue CCOp = SDValue(), AArch64CC::CondCode Predicate = AArch64CC::AL,
1330 unsigned Depth = 0) {
1331 // We're at a tree leaf, produce a conditional comparison operation.
1332 unsigned Opcode = Val->getOpcode();
1333 if (Opcode == ISD::SETCC) {
1334 SDValue LHS = Val->getOperand(0);
1335 SDValue RHS = Val->getOperand(1);
1336 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1337 bool isInteger = LHS.getValueType().isInteger();
1339 CC = getSetCCInverse(CC, isInteger);
1341 // Determine OutCC and handle FP special case.
1343 OutCC = changeIntCCToAArch64CC(CC);
1345 assert(LHS.getValueType().isFloatingPoint());
1346 AArch64CC::CondCode ExtraCC;
1347 changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
1348 // Surpisingly some floating point conditions can't be tested with a
1349 // single condition code. Construct an additional comparison in this case.
1350 // See comment below on how we deal with OR conditions.
1351 if (ExtraCC != AArch64CC::AL) {
1353 if (!CCOp.getNode())
1354 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1356 SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
1357 // Note that we want the inverse of ExtraCC, so NZCV is not inversed.
1358 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
1359 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
1363 Predicate = AArch64CC::getInvertedCondCode(ExtraCC);
1364 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1368 // Produce a normal comparison if we are first in the chain
1369 if (!CCOp.getNode())
1370 return emitComparison(LHS, RHS, CC, DL, DAG);
1371 // Otherwise produce a ccmp.
1372 SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
1373 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1374 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1375 return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL,
1377 } else if ((Opcode != ISD::AND && Opcode != ISD::OR) || !Val->hasOneUse())
1380 assert((Opcode == ISD::OR || !PushNegate)
1381 && "Can only push negate through OR operation");
1383 // Check if both sides can be transformed.
1384 SDValue LHS = Val->getOperand(0);
1385 SDValue RHS = Val->getOperand(1);
1386 bool CanPushNegateL;
1387 if (!isConjunctionDisjunctionTree(LHS, CanPushNegateL, Depth+1))
1389 bool CanPushNegateR;
1390 if (!isConjunctionDisjunctionTree(RHS, CanPushNegateR, Depth+1))
1393 // Do we need to negate our operands?
1394 bool NegateOperands = Opcode == ISD::OR;
1395 // We can negate the results of all previous operations by inverting the
1396 // predicate flags giving us a free negation for one side. For the other side
1397 // we need to be able to push the negation to the leafs of the tree.
1398 if (NegateOperands) {
1399 if (!CanPushNegateL && !CanPushNegateR)
1401 // Order the side where we can push the negate through to LHS.
1402 if (!CanPushNegateL && CanPushNegateR)
1403 std::swap(LHS, RHS);
1405 bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
1406 bool NeedsNegOutR = RHS->getOpcode() == ISD::OR;
1407 if (NeedsNegOutL && NeedsNegOutR)
1409 // Order the side where we need to negate the output flags to RHS so it
1410 // gets emitted first.
1412 std::swap(LHS, RHS);
1415 // Emit RHS. If we want to negate the tree we only need to push a negate
1416 // through if we are already in a PushNegate case, otherwise we can negate
1417 // the "flags to test" afterwards.
1418 AArch64CC::CondCode RHSCC;
1419 SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate,
1420 CCOp, Predicate, Depth+1);
1421 if (NegateOperands && !PushNegate)
1422 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1423 // Emit LHS. We must push the negate through if we need to negate it.
1424 SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands,
1425 CmpR, RHSCC, Depth+1);
1426 // If we transformed an OR to and AND then we have to negate the result
1427 // (or absorb a PushNegate resulting in a double negation).
1428 if (Opcode == ISD::OR && !PushNegate)
1429 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1435 static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1436 SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
1437 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1438 EVT VT = RHS.getValueType();
1439 uint64_t C = RHSC->getZExtValue();
1440 if (!isLegalArithImmed(C)) {
1441 // Constant does not fit, try adjusting it by one?
1447 if ((VT == MVT::i32 && C != 0x80000000 &&
1448 isLegalArithImmed((uint32_t)(C - 1))) ||
1449 (VT == MVT::i64 && C != 0x80000000ULL &&
1450 isLegalArithImmed(C - 1ULL))) {
1451 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1452 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1453 RHS = DAG.getConstant(C, dl, VT);
1458 if ((VT == MVT::i32 && C != 0 &&
1459 isLegalArithImmed((uint32_t)(C - 1))) ||
1460 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1461 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1462 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1463 RHS = DAG.getConstant(C, dl, VT);
1468 if ((VT == MVT::i32 && C != INT32_MAX &&
1469 isLegalArithImmed((uint32_t)(C + 1))) ||
1470 (VT == MVT::i64 && C != INT64_MAX &&
1471 isLegalArithImmed(C + 1ULL))) {
1472 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1473 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1474 RHS = DAG.getConstant(C, dl, VT);
1479 if ((VT == MVT::i32 && C != UINT32_MAX &&
1480 isLegalArithImmed((uint32_t)(C + 1))) ||
1481 (VT == MVT::i64 && C != UINT64_MAX &&
1482 isLegalArithImmed(C + 1ULL))) {
1483 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1484 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1485 RHS = DAG.getConstant(C, dl, VT);
1492 AArch64CC::CondCode AArch64CC;
1493 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1494 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1496 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1497 // For the i8 operand, the largest immediate is 255, so this can be easily
1498 // encoded in the compare instruction. For the i16 operand, however, the
1499 // largest immediate cannot be encoded in the compare.
1500 // Therefore, use a sign extending load and cmn to avoid materializing the
1501 // -1 constant. For example,
1503 // ldrh w0, [x0, #0]
1506 // ldrsh w0, [x0, #0]
1508 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1509 // if and only if (sext LHS) == (sext RHS). The checks are in place to
1510 // ensure both the LHS and RHS are truly zero extended and to make sure the
1511 // transformation is profitable.
1512 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1513 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1514 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1515 LHS.getNode()->hasNUsesOfValue(1, 0)) {
1516 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1517 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1519 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1520 DAG.getValueType(MVT::i16));
1521 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1522 RHS.getValueType()),
1524 AArch64CC = changeIntCCToAArch64CC(CC);
1528 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1529 if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
1530 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1531 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1537 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1538 AArch64CC = changeIntCCToAArch64CC(CC);
1540 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1544 static std::pair<SDValue, SDValue>
1545 getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
1546 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
1547 "Unsupported value type");
1548 SDValue Value, Overflow;
1550 SDValue LHS = Op.getOperand(0);
1551 SDValue RHS = Op.getOperand(1);
1553 switch (Op.getOpcode()) {
1555 llvm_unreachable("Unknown overflow instruction!");
1557 Opc = AArch64ISD::ADDS;
1561 Opc = AArch64ISD::ADDS;
1565 Opc = AArch64ISD::SUBS;
1569 Opc = AArch64ISD::SUBS;
1572 // Multiply needs a little bit extra work.
1576 bool IsSigned = Op.getOpcode() == ISD::SMULO;
1577 if (Op.getValueType() == MVT::i32) {
1578 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1579 // For a 32 bit multiply with overflow check we want the instruction
1580 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
1581 // need to generate the following pattern:
1582 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
1583 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
1584 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
1585 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1586 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
1587 DAG.getConstant(0, DL, MVT::i64));
1588 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
1589 // operation. We need to clear out the upper 32 bits, because we used a
1590 // widening multiply that wrote all 64 bits. In the end this should be a
1592 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
1594 // The signed overflow check requires more than just a simple check for
1595 // any bit set in the upper 32 bits of the result. These bits could be
1596 // just the sign bits of a negative number. To perform the overflow
1597 // check we have to arithmetic shift right the 32nd bit of the result by
1598 // 31 bits. Then we compare the result to the upper 32 bits.
1599 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
1600 DAG.getConstant(32, DL, MVT::i64));
1601 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
1602 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
1603 DAG.getConstant(31, DL, MVT::i64));
1604 // It is important that LowerBits is last, otherwise the arithmetic
1605 // shift will not be folded into the compare (SUBS).
1606 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
1607 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1610 // The overflow check for unsigned multiply is easy. We only need to
1611 // check if any of the upper 32 bits are set. This can be done with a
1612 // CMP (shifted register). For that we need to generate the following
1614 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
1615 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
1616 DAG.getConstant(32, DL, MVT::i64));
1617 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1619 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1620 DAG.getConstant(0, DL, MVT::i64),
1621 UpperBits).getValue(1);
1625 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
1626 // For the 64 bit multiply
1627 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1629 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
1630 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
1631 DAG.getConstant(63, DL, MVT::i64));
1632 // It is important that LowerBits is last, otherwise the arithmetic
1633 // shift will not be folded into the compare (SUBS).
1634 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1635 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1638 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
1639 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1641 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1642 DAG.getConstant(0, DL, MVT::i64),
1643 UpperBits).getValue(1);
1650 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
1652 // Emit the AArch64 operation with overflow check.
1653 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
1654 Overflow = Value.getValue(1);
1656 return std::make_pair(Value, Overflow);
1659 SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
1660 RTLIB::Libcall Call) const {
1661 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1662 return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
1665 static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
1666 SDValue Sel = Op.getOperand(0);
1667 SDValue Other = Op.getOperand(1);
1669 // If neither operand is a SELECT_CC, give up.
1670 if (Sel.getOpcode() != ISD::SELECT_CC)
1671 std::swap(Sel, Other);
1672 if (Sel.getOpcode() != ISD::SELECT_CC)
1675 // The folding we want to perform is:
1676 // (xor x, (select_cc a, b, cc, 0, -1) )
1678 // (csel x, (xor x, -1), cc ...)
1680 // The latter will get matched to a CSINV instruction.
1682 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
1683 SDValue LHS = Sel.getOperand(0);
1684 SDValue RHS = Sel.getOperand(1);
1685 SDValue TVal = Sel.getOperand(2);
1686 SDValue FVal = Sel.getOperand(3);
1689 // FIXME: This could be generalized to non-integer comparisons.
1690 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
1693 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
1694 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
1696 // The values aren't constants, this isn't the pattern we're looking for.
1697 if (!CFVal || !CTVal)
1700 // We can commute the SELECT_CC by inverting the condition. This
1701 // might be needed to make this fit into a CSINV pattern.
1702 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
1703 std::swap(TVal, FVal);
1704 std::swap(CTVal, CFVal);
1705 CC = ISD::getSetCCInverse(CC, true);
1708 // If the constants line up, perform the transform!
1709 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
1711 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
1714 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
1715 DAG.getConstant(-1ULL, dl, Other.getValueType()));
1717 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
1724 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
1725 EVT VT = Op.getValueType();
1727 // Let legalize expand this if it isn't a legal type yet.
1728 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
1731 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
1734 bool ExtraOp = false;
1735 switch (Op.getOpcode()) {
1737 llvm_unreachable("Invalid code");
1739 Opc = AArch64ISD::ADDS;
1742 Opc = AArch64ISD::SUBS;
1745 Opc = AArch64ISD::ADCS;
1749 Opc = AArch64ISD::SBCS;
1755 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
1756 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
1760 static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
1761 // Let legalize expand this if it isn't a legal type yet.
1762 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
1766 AArch64CC::CondCode CC;
1767 // The actual operation that sets the overflow or carry flag.
1768 SDValue Value, Overflow;
1769 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
1771 // We use 0 and 1 as false and true values.
1772 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
1773 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
1775 // We use an inverted condition, because the conditional select is inverted
1776 // too. This will allow it to be selected to a single instruction:
1777 // CSINC Wd, WZR, WZR, invert(cond).
1778 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
1779 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
1782 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
1783 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
1786 // Prefetch operands are:
1787 // 1: Address to prefetch
1789 // 3: int locality (0 = no locality ... 3 = extreme locality)
1790 // 4: bool isDataCache
1791 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
1793 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
1794 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
1795 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
1797 bool IsStream = !Locality;
1798 // When the locality number is set
1800 // The front-end should have filtered out the out-of-range values
1801 assert(Locality <= 3 && "Prefetch locality out-of-range");
1802 // The locality degree is the opposite of the cache speed.
1803 // Put the number the other way around.
1804 // The encoding starts at 0 for level 1
1805 Locality = 3 - Locality;
1808 // built the mask value encoding the expected behavior.
1809 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
1810 (!IsData << 3) | // IsDataCache bit
1811 (Locality << 1) | // Cache level bits
1812 (unsigned)IsStream; // Stream bit
1813 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
1814 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
1817 SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
1818 SelectionDAG &DAG) const {
1819 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
1822 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
1824 return LowerF128Call(Op, DAG, LC);
1827 SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
1828 SelectionDAG &DAG) const {
1829 if (Op.getOperand(0).getValueType() != MVT::f128) {
1830 // It's legal except when f128 is involved
1835 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
1837 // FP_ROUND node has a second operand indicating whether it is known to be
1838 // precise. That doesn't take part in the LibCall so we can't directly use
1840 SDValue SrcVal = Op.getOperand(0);
1841 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
1845 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
1846 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
1847 // Any additional optimization in this function should be recorded
1848 // in the cost tables.
1849 EVT InVT = Op.getOperand(0).getValueType();
1850 EVT VT = Op.getValueType();
1851 unsigned NumElts = InVT.getVectorNumElements();
1853 // f16 vectors are promoted to f32 before a conversion.
1854 if (InVT.getVectorElementType() == MVT::f16) {
1855 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
1858 Op.getOpcode(), dl, Op.getValueType(),
1859 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
1862 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
1865 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
1867 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
1870 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
1873 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
1874 VT.getVectorNumElements());
1875 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
1876 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
1879 // Type changing conversions are illegal.
1883 SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
1884 SelectionDAG &DAG) const {
1885 if (Op.getOperand(0).getValueType().isVector())
1886 return LowerVectorFP_TO_INT(Op, DAG);
1888 // f16 conversions are promoted to f32.
1889 if (Op.getOperand(0).getValueType() == MVT::f16) {
1892 Op.getOpcode(), dl, Op.getValueType(),
1893 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
1896 if (Op.getOperand(0).getValueType() != MVT::f128) {
1897 // It's legal except when f128 is involved
1902 if (Op.getOpcode() == ISD::FP_TO_SINT)
1903 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
1905 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
1907 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1908 return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
1911 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
1912 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
1913 // Any additional optimization in this function should be recorded
1914 // in the cost tables.
1915 EVT VT = Op.getValueType();
1917 SDValue In = Op.getOperand(0);
1918 EVT InVT = In.getValueType();
1920 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
1922 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
1923 InVT.getVectorNumElements());
1924 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
1925 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
1928 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
1930 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1931 EVT CastVT = VT.changeVectorElementTypeToInteger();
1932 In = DAG.getNode(CastOpc, dl, CastVT, In);
1933 return DAG.getNode(Op.getOpcode(), dl, VT, In);
1939 SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
1940 SelectionDAG &DAG) const {
1941 if (Op.getValueType().isVector())
1942 return LowerVectorINT_TO_FP(Op, DAG);
1944 // f16 conversions are promoted to f32.
1945 if (Op.getValueType() == MVT::f16) {
1948 ISD::FP_ROUND, dl, MVT::f16,
1949 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
1950 DAG.getIntPtrConstant(0, dl));
1953 // i128 conversions are libcalls.
1954 if (Op.getOperand(0).getValueType() == MVT::i128)
1957 // Other conversions are legal, unless it's to the completely software-based
1959 if (Op.getValueType() != MVT::f128)
1963 if (Op.getOpcode() == ISD::SINT_TO_FP)
1964 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
1966 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
1968 return LowerF128Call(Op, DAG, LC);
1971 SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
1972 SelectionDAG &DAG) const {
1973 // For iOS, we want to call an alternative entry point: __sincos_stret,
1974 // which returns the values in two S / D registers.
1976 SDValue Arg = Op.getOperand(0);
1977 EVT ArgVT = Arg.getValueType();
1978 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
1985 Entry.isSExt = false;
1986 Entry.isZExt = false;
1987 Args.push_back(Entry);
1989 const char *LibcallName =
1990 (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
1992 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
1994 StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
1995 TargetLowering::CallLoweringInfo CLI(DAG);
1996 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
1997 .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0);
1999 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2000 return CallResult.first;
2003 static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2004 if (Op.getValueType() != MVT::f16)
2007 assert(Op.getOperand(0).getValueType() == MVT::i16);
2010 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2011 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2013 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2014 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2018 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2019 if (OrigVT.getSizeInBits() >= 64)
2022 assert(OrigVT.isSimple() && "Expecting a simple value type");
2024 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2025 switch (OrigSimpleTy) {
2026 default: llvm_unreachable("Unexpected Vector Type");
2035 static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2038 unsigned ExtOpcode) {
2039 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2040 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2041 // 64-bits we need to insert a new extension so that it will be 64-bits.
2042 assert(ExtTy.is128BitVector() && "Unexpected extension size");
2043 if (OrigTy.getSizeInBits() >= 64)
2046 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2047 EVT NewVT = getExtensionTo64Bits(OrigTy);
2049 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2052 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2054 EVT VT = N->getValueType(0);
2056 if (N->getOpcode() != ISD::BUILD_VECTOR)
2059 for (const SDValue &Elt : N->op_values()) {
2060 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2061 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
2062 unsigned HalfSize = EltSize / 2;
2064 if (!isIntN(HalfSize, C->getSExtValue()))
2067 if (!isUIntN(HalfSize, C->getZExtValue()))
2078 static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2079 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2080 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2081 N->getOperand(0)->getValueType(0),
2085 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
2086 EVT VT = N->getValueType(0);
2088 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
2089 unsigned NumElts = VT.getVectorNumElements();
2090 MVT TruncVT = MVT::getIntegerVT(EltSize);
2091 SmallVector<SDValue, 8> Ops;
2092 for (unsigned i = 0; i != NumElts; ++i) {
2093 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2094 const APInt &CInt = C->getAPIntValue();
2095 // Element types smaller than 32 bits are not legal, so use i32 elements.
2096 // The values are implicitly truncated so sext vs. zext doesn't matter.
2097 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2099 return DAG.getNode(ISD::BUILD_VECTOR, dl,
2100 MVT::getVectorVT(TruncVT, NumElts), Ops);
2103 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2104 if (N->getOpcode() == ISD::SIGN_EXTEND)
2106 if (isExtendedBUILD_VECTOR(N, DAG, true))
2111 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2112 if (N->getOpcode() == ISD::ZERO_EXTEND)
2114 if (isExtendedBUILD_VECTOR(N, DAG, false))
2119 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2120 unsigned Opcode = N->getOpcode();
2121 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2122 SDNode *N0 = N->getOperand(0).getNode();
2123 SDNode *N1 = N->getOperand(1).getNode();
2124 return N0->hasOneUse() && N1->hasOneUse() &&
2125 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2130 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2131 unsigned Opcode = N->getOpcode();
2132 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2133 SDNode *N0 = N->getOperand(0).getNode();
2134 SDNode *N1 = N->getOperand(1).getNode();
2135 return N0->hasOneUse() && N1->hasOneUse() &&
2136 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2141 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2142 // Multiplications are only custom-lowered for 128-bit vectors so that
2143 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2144 EVT VT = Op.getValueType();
2145 assert(VT.is128BitVector() && VT.isInteger() &&
2146 "unexpected type for custom-lowering ISD::MUL");
2147 SDNode *N0 = Op.getOperand(0).getNode();
2148 SDNode *N1 = Op.getOperand(1).getNode();
2149 unsigned NewOpc = 0;
2151 bool isN0SExt = isSignExtended(N0, DAG);
2152 bool isN1SExt = isSignExtended(N1, DAG);
2153 if (isN0SExt && isN1SExt)
2154 NewOpc = AArch64ISD::SMULL;
2156 bool isN0ZExt = isZeroExtended(N0, DAG);
2157 bool isN1ZExt = isZeroExtended(N1, DAG);
2158 if (isN0ZExt && isN1ZExt)
2159 NewOpc = AArch64ISD::UMULL;
2160 else if (isN1SExt || isN1ZExt) {
2161 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2162 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2163 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2164 NewOpc = AArch64ISD::SMULL;
2166 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2167 NewOpc = AArch64ISD::UMULL;
2169 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2171 NewOpc = AArch64ISD::UMULL;
2177 if (VT == MVT::v2i64)
2178 // Fall through to expand this. It is not legal.
2181 // Other vector multiplications are legal.
2186 // Legalize to a S/UMULL instruction
2189 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2191 Op0 = skipExtensionForVectorMULL(N0, DAG);
2192 assert(Op0.getValueType().is64BitVector() &&
2193 Op1.getValueType().is64BitVector() &&
2194 "unexpected types for extended operands to VMULL");
2195 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2197 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2198 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2199 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2200 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2201 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2202 EVT Op1VT = Op1.getValueType();
2203 return DAG.getNode(N0->getOpcode(), DL, VT,
2204 DAG.getNode(NewOpc, DL, VT,
2205 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2206 DAG.getNode(NewOpc, DL, VT,
2207 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2210 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2211 SelectionDAG &DAG) const {
2212 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2215 default: return SDValue(); // Don't custom lower most intrinsics.
2216 case Intrinsic::aarch64_thread_pointer: {
2217 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2218 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2220 case Intrinsic::aarch64_neon_smax:
2221 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2222 Op.getOperand(1), Op.getOperand(2));
2223 case Intrinsic::aarch64_neon_umax:
2224 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2225 Op.getOperand(1), Op.getOperand(2));
2226 case Intrinsic::aarch64_neon_smin:
2227 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2228 Op.getOperand(1), Op.getOperand(2));
2229 case Intrinsic::aarch64_neon_umin:
2230 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2231 Op.getOperand(1), Op.getOperand(2));
2235 SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
2236 SelectionDAG &DAG) const {
2237 switch (Op.getOpcode()) {
2239 llvm_unreachable("unimplemented operand");
2242 return LowerBITCAST(Op, DAG);
2243 case ISD::GlobalAddress:
2244 return LowerGlobalAddress(Op, DAG);
2245 case ISD::GlobalTLSAddress:
2246 return LowerGlobalTLSAddress(Op, DAG);
2248 return LowerSETCC(Op, DAG);
2250 return LowerBR_CC(Op, DAG);
2252 return LowerSELECT(Op, DAG);
2253 case ISD::SELECT_CC:
2254 return LowerSELECT_CC(Op, DAG);
2255 case ISD::JumpTable:
2256 return LowerJumpTable(Op, DAG);
2257 case ISD::ConstantPool:
2258 return LowerConstantPool(Op, DAG);
2259 case ISD::BlockAddress:
2260 return LowerBlockAddress(Op, DAG);
2262 return LowerVASTART(Op, DAG);
2264 return LowerVACOPY(Op, DAG);
2266 return LowerVAARG(Op, DAG);
2271 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2278 return LowerXALUO(Op, DAG);
2280 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2282 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2284 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2286 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2288 return LowerFP_ROUND(Op, DAG);
2289 case ISD::FP_EXTEND:
2290 return LowerFP_EXTEND(Op, DAG);
2291 case ISD::FRAMEADDR:
2292 return LowerFRAMEADDR(Op, DAG);
2293 case ISD::RETURNADDR:
2294 return LowerRETURNADDR(Op, DAG);
2295 case ISD::INSERT_VECTOR_ELT:
2296 return LowerINSERT_VECTOR_ELT(Op, DAG);
2297 case ISD::EXTRACT_VECTOR_ELT:
2298 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2299 case ISD::BUILD_VECTOR:
2300 return LowerBUILD_VECTOR(Op, DAG);
2301 case ISD::VECTOR_SHUFFLE:
2302 return LowerVECTOR_SHUFFLE(Op, DAG);
2303 case ISD::EXTRACT_SUBVECTOR:
2304 return LowerEXTRACT_SUBVECTOR(Op, DAG);
2308 return LowerVectorSRA_SRL_SHL(Op, DAG);
2309 case ISD::SHL_PARTS:
2310 return LowerShiftLeftParts(Op, DAG);
2311 case ISD::SRL_PARTS:
2312 case ISD::SRA_PARTS:
2313 return LowerShiftRightParts(Op, DAG);
2315 return LowerCTPOP(Op, DAG);
2316 case ISD::FCOPYSIGN:
2317 return LowerFCOPYSIGN(Op, DAG);
2319 return LowerVectorAND(Op, DAG);
2321 return LowerVectorOR(Op, DAG);
2323 return LowerXOR(Op, DAG);
2325 return LowerPREFETCH(Op, DAG);
2326 case ISD::SINT_TO_FP:
2327 case ISD::UINT_TO_FP:
2328 return LowerINT_TO_FP(Op, DAG);
2329 case ISD::FP_TO_SINT:
2330 case ISD::FP_TO_UINT:
2331 return LowerFP_TO_INT(Op, DAG);
2333 return LowerFSINCOS(Op, DAG);
2335 return LowerMUL(Op, DAG);
2336 case ISD::INTRINSIC_WO_CHAIN:
2337 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2341 //===----------------------------------------------------------------------===//
2342 // Calling Convention Implementation
2343 //===----------------------------------------------------------------------===//
2345 #include "AArch64GenCallingConv.inc"
2347 /// Selects the correct CCAssignFn for a given CallingConvention value.
2348 CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2349 bool IsVarArg) const {
2352 llvm_unreachable("Unsupported calling convention.");
2353 case CallingConv::WebKit_JS:
2354 return CC_AArch64_WebKit_JS;
2355 case CallingConv::GHC:
2356 return CC_AArch64_GHC;
2357 case CallingConv::C:
2358 case CallingConv::Fast:
2359 if (!Subtarget->isTargetDarwin())
2360 return CC_AArch64_AAPCS;
2361 return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2365 SDValue AArch64TargetLowering::LowerFormalArguments(
2366 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2367 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
2368 SmallVectorImpl<SDValue> &InVals) const {
2369 MachineFunction &MF = DAG.getMachineFunction();
2370 MachineFrameInfo *MFI = MF.getFrameInfo();
2372 // Assign locations to all of the incoming arguments.
2373 SmallVector<CCValAssign, 16> ArgLocs;
2374 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2377 // At this point, Ins[].VT may already be promoted to i32. To correctly
2378 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2379 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2380 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2381 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2383 unsigned NumArgs = Ins.size();
2384 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
2385 unsigned CurArgIdx = 0;
2386 for (unsigned i = 0; i != NumArgs; ++i) {
2387 MVT ValVT = Ins[i].VT;
2388 if (Ins[i].isOrigArg()) {
2389 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
2390 CurArgIdx = Ins[i].getOrigArgIndex();
2392 // Get type of the original argument.
2393 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
2394 /*AllowUnknown*/ true);
2395 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
2396 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2397 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2399 else if (ActualMVT == MVT::i16)
2402 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2404 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
2405 assert(!Res && "Call operand has unhandled type");
2408 assert(ArgLocs.size() == Ins.size());
2409 SmallVector<SDValue, 16> ArgValues;
2410 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2411 CCValAssign &VA = ArgLocs[i];
2413 if (Ins[i].Flags.isByVal()) {
2414 // Byval is used for HFAs in the PCS, but the system should work in a
2415 // non-compliant manner for larger structs.
2416 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2417 int Size = Ins[i].Flags.getByValSize();
2418 unsigned NumRegs = (Size + 7) / 8;
2420 // FIXME: This works on big-endian for composite byvals, which are the common
2421 // case. It should also work for fundamental types too.
2423 MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
2424 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
2425 InVals.push_back(FrameIdxN);
2430 if (VA.isRegLoc()) {
2431 // Arguments stored in registers.
2432 EVT RegVT = VA.getLocVT();
2435 const TargetRegisterClass *RC;
2437 if (RegVT == MVT::i32)
2438 RC = &AArch64::GPR32RegClass;
2439 else if (RegVT == MVT::i64)
2440 RC = &AArch64::GPR64RegClass;
2441 else if (RegVT == MVT::f16)
2442 RC = &AArch64::FPR16RegClass;
2443 else if (RegVT == MVT::f32)
2444 RC = &AArch64::FPR32RegClass;
2445 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
2446 RC = &AArch64::FPR64RegClass;
2447 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
2448 RC = &AArch64::FPR128RegClass;
2450 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
2452 // Transform the arguments in physical registers into virtual ones.
2453 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2454 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
2456 // If this is an 8, 16 or 32-bit value, it is really passed promoted
2457 // to 64 bits. Insert an assert[sz]ext to capture this, then
2458 // truncate to the right size.
2459 switch (VA.getLocInfo()) {
2461 llvm_unreachable("Unknown loc info!");
2462 case CCValAssign::Full:
2464 case CCValAssign::BCvt:
2465 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
2467 case CCValAssign::AExt:
2468 case CCValAssign::SExt:
2469 case CCValAssign::ZExt:
2470 // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
2471 // nodes after our lowering.
2472 assert(RegVT == Ins[i].VT && "incorrect register location selected");
2476 InVals.push_back(ArgValue);
2478 } else { // VA.isRegLoc()
2479 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
2480 unsigned ArgOffset = VA.getLocMemOffset();
2481 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
2483 uint32_t BEAlign = 0;
2484 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
2485 !Ins[i].Flags.isInConsecutiveRegs())
2486 BEAlign = 8 - ArgSize;
2488 int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
2490 // Create load nodes to retrieve arguments from the stack.
2491 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2494 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
2495 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
2496 MVT MemVT = VA.getValVT();
2498 switch (VA.getLocInfo()) {
2501 case CCValAssign::BCvt:
2502 MemVT = VA.getLocVT();
2504 case CCValAssign::SExt:
2505 ExtType = ISD::SEXTLOAD;
2507 case CCValAssign::ZExt:
2508 ExtType = ISD::ZEXTLOAD;
2510 case CCValAssign::AExt:
2511 ExtType = ISD::EXTLOAD;
2515 ArgValue = DAG.getExtLoad(
2516 ExtType, DL, VA.getLocVT(), Chain, FIN,
2517 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
2518 MemVT, false, false, false, 0);
2520 InVals.push_back(ArgValue);
2526 if (!Subtarget->isTargetDarwin()) {
2527 // The AAPCS variadic function ABI is identical to the non-variadic
2528 // one. As a result there may be more arguments in registers and we should
2529 // save them for future reference.
2530 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
2533 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2534 // This will point to the next argument passed via stack.
2535 unsigned StackOffset = CCInfo.getNextStackOffset();
2536 // We currently pass all varargs at 8-byte alignment.
2537 StackOffset = ((StackOffset + 7) & ~7);
2538 AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true));
2541 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2542 unsigned StackArgSize = CCInfo.getNextStackOffset();
2543 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
2544 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
2545 // This is a non-standard ABI so by fiat I say we're allowed to make full
2546 // use of the stack area to be popped, which must be aligned to 16 bytes in
2548 StackArgSize = RoundUpToAlignment(StackArgSize, 16);
2550 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
2551 // a multiple of 16.
2552 FuncInfo->setArgumentStackToRestore(StackArgSize);
2554 // This realignment carries over to the available bytes below. Our own
2555 // callers will guarantee the space is free by giving an aligned value to
2558 // Even if we're not expected to free up the space, it's useful to know how
2559 // much is there while considering tail calls (because we can reuse it).
2560 FuncInfo->setBytesInStackArgArea(StackArgSize);
2565 void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
2566 SelectionDAG &DAG, SDLoc DL,
2567 SDValue &Chain) const {
2568 MachineFunction &MF = DAG.getMachineFunction();
2569 MachineFrameInfo *MFI = MF.getFrameInfo();
2570 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2571 auto PtrVT = getPointerTy(DAG.getDataLayout());
2573 SmallVector<SDValue, 8> MemOps;
2575 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
2576 AArch64::X3, AArch64::X4, AArch64::X5,
2577 AArch64::X6, AArch64::X7 };
2578 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
2579 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
2581 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
2583 if (GPRSaveSize != 0) {
2584 GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
2586 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
2588 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
2589 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
2590 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
2591 SDValue Store = DAG.getStore(
2592 Val.getValue(1), DL, Val, FIN,
2593 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8), false,
2595 MemOps.push_back(Store);
2597 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
2600 FuncInfo->setVarArgsGPRIndex(GPRIdx);
2601 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
2603 if (Subtarget->hasFPARMv8()) {
2604 static const MCPhysReg FPRArgRegs[] = {
2605 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
2606 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
2607 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
2608 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
2610 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
2612 if (FPRSaveSize != 0) {
2613 FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
2615 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
2617 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
2618 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
2619 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
2621 SDValue Store = DAG.getStore(
2622 Val.getValue(1), DL, Val, FIN,
2623 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16),
2625 MemOps.push_back(Store);
2626 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2627 DAG.getConstant(16, DL, PtrVT));
2630 FuncInfo->setVarArgsFPRIndex(FPRIdx);
2631 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
2634 if (!MemOps.empty()) {
2635 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
2639 /// LowerCallResult - Lower the result values of a call into the
2640 /// appropriate copies out of appropriate physical registers.
2641 SDValue AArch64TargetLowering::LowerCallResult(
2642 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2643 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
2644 SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2645 SDValue ThisVal) const {
2646 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
2647 ? RetCC_AArch64_WebKit_JS
2648 : RetCC_AArch64_AAPCS;
2649 // Assign locations to each value returned by this call.
2650 SmallVector<CCValAssign, 16> RVLocs;
2651 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2653 CCInfo.AnalyzeCallResult(Ins, RetCC);
2655 // Copy all of the result registers out of their specified physreg.
2656 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2657 CCValAssign VA = RVLocs[i];
2659 // Pass 'this' value directly from the argument to return value, to avoid
2660 // reg unit interference
2661 if (i == 0 && isThisReturn) {
2662 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
2663 "unexpected return calling convention register assignment");
2664 InVals.push_back(ThisVal);
2669 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
2670 Chain = Val.getValue(1);
2671 InFlag = Val.getValue(2);
2673 switch (VA.getLocInfo()) {
2675 llvm_unreachable("Unknown loc info!");
2676 case CCValAssign::Full:
2678 case CCValAssign::BCvt:
2679 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
2683 InVals.push_back(Val);
2689 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
2690 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2691 bool isCalleeStructRet, bool isCallerStructRet,
2692 const SmallVectorImpl<ISD::OutputArg> &Outs,
2693 const SmallVectorImpl<SDValue> &OutVals,
2694 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
2695 // For CallingConv::C this function knows whether the ABI needs
2696 // changing. That's not true for other conventions so they will have to opt in
2698 if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
2701 const MachineFunction &MF = DAG.getMachineFunction();
2702 const Function *CallerF = MF.getFunction();
2703 CallingConv::ID CallerCC = CallerF->getCallingConv();
2704 bool CCMatch = CallerCC == CalleeCC;
2706 // Byval parameters hand the function a pointer directly into the stack area
2707 // we want to reuse during a tail call. Working around this *is* possible (see
2708 // X86) but less efficient and uglier in LowerCall.
2709 for (Function::const_arg_iterator i = CallerF->arg_begin(),
2710 e = CallerF->arg_end();
2712 if (i->hasByValAttr())
2715 if (getTargetMachine().Options.GuaranteedTailCallOpt) {
2716 if (IsTailCallConvention(CalleeCC) && CCMatch)
2721 // Externally-defined functions with weak linkage should not be
2722 // tail-called on AArch64 when the OS does not support dynamic
2723 // pre-emption of symbols, as the AAELF spec requires normal calls
2724 // to undefined weak functions to be replaced with a NOP or jump to the
2725 // next instruction. The behaviour of branch instructions in this
2726 // situation (as used for tail calls) is implementation-defined, so we
2727 // cannot rely on the linker replacing the tail call with a return.
2728 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2729 const GlobalValue *GV = G->getGlobal();
2730 const Triple &TT = getTargetMachine().getTargetTriple();
2731 if (GV->hasExternalWeakLinkage() &&
2732 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2736 // Now we search for cases where we can use a tail call without changing the
2737 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
2740 // I want anyone implementing a new calling convention to think long and hard
2741 // about this assert.
2742 assert((!isVarArg || CalleeCC == CallingConv::C) &&
2743 "Unexpected variadic calling convention");
2745 if (isVarArg && !Outs.empty()) {
2746 // At least two cases here: if caller is fastcc then we can't have any
2747 // memory arguments (we'd be expected to clean up the stack afterwards). If
2748 // caller is C then we could potentially use its argument area.
2750 // FIXME: for now we take the most conservative of these in both cases:
2751 // disallow all variadic memory operands.
2752 SmallVector<CCValAssign, 16> ArgLocs;
2753 CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
2756 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
2757 for (const CCValAssign &ArgLoc : ArgLocs)
2758 if (!ArgLoc.isRegLoc())
2762 // If the calling conventions do not match, then we'd better make sure the
2763 // results are returned in the same way as what the caller expects.
2765 SmallVector<CCValAssign, 16> RVLocs1;
2766 CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1,
2768 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForCall(CalleeCC, isVarArg));
2770 SmallVector<CCValAssign, 16> RVLocs2;
2771 CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2,
2773 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForCall(CallerCC, isVarArg));
2775 if (RVLocs1.size() != RVLocs2.size())
2777 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
2778 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
2780 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
2782 if (RVLocs1[i].isRegLoc()) {
2783 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
2786 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
2792 // Nothing more to check if the callee is taking no arguments
2796 SmallVector<CCValAssign, 16> ArgLocs;
2797 CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs,
2800 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2802 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2804 // If the stack arguments for this call would fit into our own save area then
2805 // the call can be made tail.
2806 return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
2809 SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
2811 MachineFrameInfo *MFI,
2812 int ClobberedFI) const {
2813 SmallVector<SDValue, 8> ArgChains;
2814 int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
2815 int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
2817 // Include the original chain at the beginning of the list. When this is
2818 // used by target LowerCall hooks, this helps legalize find the
2819 // CALLSEQ_BEGIN node.
2820 ArgChains.push_back(Chain);
2822 // Add a chain value for each stack argument corresponding
2823 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
2824 UE = DAG.getEntryNode().getNode()->use_end();
2826 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
2827 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
2828 if (FI->getIndex() < 0) {
2829 int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
2830 int64_t InLastByte = InFirstByte;
2831 InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
2833 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
2834 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
2835 ArgChains.push_back(SDValue(L, 1));
2838 // Build a tokenfactor for all the chains.
2839 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
2842 bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
2843 bool TailCallOpt) const {
2844 return CallCC == CallingConv::Fast && TailCallOpt;
2847 bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
2848 return CallCC == CallingConv::Fast;
2851 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
2852 /// and add input and output parameter nodes.
2854 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
2855 SmallVectorImpl<SDValue> &InVals) const {
2856 SelectionDAG &DAG = CLI.DAG;
2858 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
2859 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
2860 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
2861 SDValue Chain = CLI.Chain;
2862 SDValue Callee = CLI.Callee;
2863 bool &IsTailCall = CLI.IsTailCall;
2864 CallingConv::ID CallConv = CLI.CallConv;
2865 bool IsVarArg = CLI.IsVarArg;
2867 MachineFunction &MF = DAG.getMachineFunction();
2868 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2869 bool IsThisReturn = false;
2871 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2872 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
2873 bool IsSibCall = false;
2876 // Check if it's really possible to do a tail call.
2877 IsTailCall = isEligibleForTailCallOptimization(
2878 Callee, CallConv, IsVarArg, IsStructRet,
2879 MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG);
2880 if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
2881 report_fatal_error("failed to perform tail call elimination on a call "
2882 "site marked musttail");
2884 // A sibling call is one where we're under the usual C ABI and not planning
2885 // to change that but can still do a tail call:
2886 if (!TailCallOpt && IsTailCall)
2893 // Analyze operands of the call, assigning locations to each operand.
2894 SmallVector<CCValAssign, 16> ArgLocs;
2895 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
2899 // Handle fixed and variable vector arguments differently.
2900 // Variable vector arguments always go into memory.
2901 unsigned NumArgs = Outs.size();
2903 for (unsigned i = 0; i != NumArgs; ++i) {
2904 MVT ArgVT = Outs[i].VT;
2905 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2906 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
2907 /*IsVarArg=*/ !Outs[i].IsFixed);
2908 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
2909 assert(!Res && "Call operand has unhandled type");
2913 // At this point, Outs[].VT may already be promoted to i32. To correctly
2914 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2915 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2916 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
2917 // we use a special version of AnalyzeCallOperands to pass in ValVT and
2919 unsigned NumArgs = Outs.size();
2920 for (unsigned i = 0; i != NumArgs; ++i) {
2921 MVT ValVT = Outs[i].VT;
2922 // Get type of the original argument.
2923 EVT ActualVT = getValueType(DAG.getDataLayout(),
2924 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
2925 /*AllowUnknown*/ true);
2926 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
2927 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
2928 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2929 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2931 else if (ActualMVT == MVT::i16)
2934 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2935 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
2936 assert(!Res && "Call operand has unhandled type");
2941 // Get a count of how many bytes are to be pushed on the stack.
2942 unsigned NumBytes = CCInfo.getNextStackOffset();
2945 // Since we're not changing the ABI to make this a tail call, the memory
2946 // operands are already available in the caller's incoming argument space.
2950 // FPDiff is the byte offset of the call's argument area from the callee's.
2951 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2952 // by this amount for a tail call. In a sibling call it must be 0 because the
2953 // caller will deallocate the entire stack and the callee still expects its
2954 // arguments to begin at SP+0. Completely unused for non-tail calls.
2957 if (IsTailCall && !IsSibCall) {
2958 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
2960 // Since callee will pop argument stack as a tail call, we must keep the
2961 // popped size 16-byte aligned.
2962 NumBytes = RoundUpToAlignment(NumBytes, 16);
2964 // FPDiff will be negative if this tail call requires more space than we
2965 // would automatically have in our incoming argument space. Positive if we
2966 // can actually shrink the stack.
2967 FPDiff = NumReusableBytes - NumBytes;
2969 // The stack pointer must be 16-byte aligned at all times it's used for a
2970 // memory operation, which in practice means at *all* times and in
2971 // particular across call boundaries. Therefore our own arguments started at
2972 // a 16-byte aligned SP and the delta applied for the tail call should
2973 // satisfy the same constraint.
2974 assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
2977 // Adjust the stack pointer for the new arguments...
2978 // These operations are automatically eliminated by the prolog/epilog pass
2980 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL,
2984 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
2985 getPointerTy(DAG.getDataLayout()));
2987 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2988 SmallVector<SDValue, 8> MemOpChains;
2989 auto PtrVT = getPointerTy(DAG.getDataLayout());
2991 // Walk the register/memloc assignments, inserting copies/loads.
2992 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
2993 ++i, ++realArgIdx) {
2994 CCValAssign &VA = ArgLocs[i];
2995 SDValue Arg = OutVals[realArgIdx];
2996 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2998 // Promote the value if needed.
2999 switch (VA.getLocInfo()) {
3001 llvm_unreachable("Unknown loc info!");
3002 case CCValAssign::Full:
3004 case CCValAssign::SExt:
3005 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3007 case CCValAssign::ZExt:
3008 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3010 case CCValAssign::AExt:
3011 if (Outs[realArgIdx].ArgVT == MVT::i1) {
3012 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3013 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3014 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3016 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3018 case CCValAssign::BCvt:
3019 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3021 case CCValAssign::FPExt:
3022 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3026 if (VA.isRegLoc()) {
3027 if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) {
3028 assert(VA.getLocVT() == MVT::i64 &&
3029 "unexpected calling convention register assignment");
3030 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
3031 "unexpected use of 'returned'");
3032 IsThisReturn = true;
3034 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3036 assert(VA.isMemLoc());
3039 MachinePointerInfo DstInfo;
3041 // FIXME: This works on big-endian for composite byvals, which are the
3042 // common case. It should also work for fundamental types too.
3043 uint32_t BEAlign = 0;
3044 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3045 : VA.getValVT().getSizeInBits();
3046 OpSize = (OpSize + 7) / 8;
3047 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3048 !Flags.isInConsecutiveRegs()) {
3050 BEAlign = 8 - OpSize;
3052 unsigned LocMemOffset = VA.getLocMemOffset();
3053 int32_t Offset = LocMemOffset + BEAlign;
3054 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3055 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3058 Offset = Offset + FPDiff;
3059 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3061 DstAddr = DAG.getFrameIndex(FI, PtrVT);
3063 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
3065 // Make sure any stack arguments overlapping with where we're storing
3066 // are loaded before this eventual operation. Otherwise they'll be
3068 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3070 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3072 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3073 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
3077 if (Outs[i].Flags.isByVal()) {
3079 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3080 SDValue Cpy = DAG.getMemcpy(
3081 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3082 /*isVol = */ false, /*AlwaysInline = */ false,
3083 /*isTailCall = */ false,
3084 DstInfo, MachinePointerInfo());
3086 MemOpChains.push_back(Cpy);
3088 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3089 // promoted to a legal register type i32, we should truncate Arg back to
3091 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3092 VA.getValVT() == MVT::i16)
3093 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3096 DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0);
3097 MemOpChains.push_back(Store);
3102 if (!MemOpChains.empty())
3103 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3105 // Build a sequence of copy-to-reg nodes chained together with token chain
3106 // and flag operands which copy the outgoing args into the appropriate regs.
3108 for (auto &RegToPass : RegsToPass) {
3109 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3110 RegToPass.second, InFlag);
3111 InFlag = Chain.getValue(1);
3114 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3115 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3116 // node so that legalize doesn't hack it.
3117 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3118 Subtarget->isTargetMachO()) {
3119 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3120 const GlobalValue *GV = G->getGlobal();
3121 bool InternalLinkage = GV->hasInternalLinkage();
3122 if (InternalLinkage)
3123 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3126 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3127 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3129 } else if (ExternalSymbolSDNode *S =
3130 dyn_cast<ExternalSymbolSDNode>(Callee)) {
3131 const char *Sym = S->getSymbol();
3132 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3133 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3135 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3136 const GlobalValue *GV = G->getGlobal();
3137 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3138 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3139 const char *Sym = S->getSymbol();
3140 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3143 // We don't usually want to end the call-sequence here because we would tidy
3144 // the frame up *after* the call, however in the ABI-changing tail-call case
3145 // we've carefully laid out the parameters so that when sp is reset they'll be
3146 // in the correct location.
3147 if (IsTailCall && !IsSibCall) {
3148 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3149 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3150 InFlag = Chain.getValue(1);
3153 std::vector<SDValue> Ops;
3154 Ops.push_back(Chain);
3155 Ops.push_back(Callee);
3158 // Each tail call may have to adjust the stack by a different amount, so
3159 // this information must travel along with the operation for eventual
3160 // consumption by emitEpilogue.
3161 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3164 // Add argument registers to the end of the list so that they are known live
3166 for (auto &RegToPass : RegsToPass)
3167 Ops.push_back(DAG.getRegister(RegToPass.first,
3168 RegToPass.second.getValueType()));
3170 // Add a register mask operand representing the call-preserved registers.
3171 const uint32_t *Mask;
3172 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3174 // For 'this' returns, use the X0-preserving mask if applicable
3175 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3177 IsThisReturn = false;
3178 Mask = TRI->getCallPreservedMask(MF, CallConv);
3181 Mask = TRI->getCallPreservedMask(MF, CallConv);
3183 assert(Mask && "Missing call preserved mask for calling convention");
3184 Ops.push_back(DAG.getRegisterMask(Mask));
3186 if (InFlag.getNode())
3187 Ops.push_back(InFlag);
3189 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3191 // If we're doing a tall call, use a TC_RETURN here rather than an
3192 // actual call instruction.
3194 MF.getFrameInfo()->setHasTailCall();
3195 return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3198 // Returns a chain and a flag for retval copy to use.
3199 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3200 InFlag = Chain.getValue(1);
3202 uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt)
3203 ? RoundUpToAlignment(NumBytes, 16)
3206 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3207 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3210 InFlag = Chain.getValue(1);
3212 // Handle result values, copying them out of physregs into vregs that we
3214 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3215 InVals, IsThisReturn,
3216 IsThisReturn ? OutVals[0] : SDValue());
3219 bool AArch64TargetLowering::CanLowerReturn(
3220 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3221 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3222 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3223 ? RetCC_AArch64_WebKit_JS
3224 : RetCC_AArch64_AAPCS;
3225 SmallVector<CCValAssign, 16> RVLocs;
3226 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3227 return CCInfo.CheckReturn(Outs, RetCC);
3231 AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3233 const SmallVectorImpl<ISD::OutputArg> &Outs,
3234 const SmallVectorImpl<SDValue> &OutVals,
3235 SDLoc DL, SelectionDAG &DAG) const {
3236 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3237 ? RetCC_AArch64_WebKit_JS
3238 : RetCC_AArch64_AAPCS;
3239 SmallVector<CCValAssign, 16> RVLocs;
3240 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3242 CCInfo.AnalyzeReturn(Outs, RetCC);
3244 // Copy the result values into the output registers.
3246 SmallVector<SDValue, 4> RetOps(1, Chain);
3247 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3248 ++i, ++realRVLocIdx) {
3249 CCValAssign &VA = RVLocs[i];
3250 assert(VA.isRegLoc() && "Can only return in registers!");
3251 SDValue Arg = OutVals[realRVLocIdx];
3253 switch (VA.getLocInfo()) {
3255 llvm_unreachable("Unknown loc info!");
3256 case CCValAssign::Full:
3257 if (Outs[i].ArgVT == MVT::i1) {
3258 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3259 // value. This is strictly redundant on Darwin (which uses "zeroext
3260 // i1"), but will be optimised out before ISel.
3261 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3262 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3265 case CCValAssign::BCvt:
3266 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3270 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3271 Flag = Chain.getValue(1);
3272 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3274 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3275 const MCPhysReg *I =
3276 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3279 if (AArch64::GPR64RegClass.contains(*I))
3280 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3281 else if (AArch64::FPR64RegClass.contains(*I))
3282 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3284 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3288 RetOps[0] = Chain; // Update chain.
3290 // Add the flag if we have it.
3292 RetOps.push_back(Flag);
3294 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3297 //===----------------------------------------------------------------------===//
3298 // Other Lowering Code
3299 //===----------------------------------------------------------------------===//
3301 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
3302 SelectionDAG &DAG) const {
3303 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3305 const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
3306 const GlobalValue *GV = GN->getGlobal();
3307 unsigned char OpFlags =
3308 Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
3310 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
3311 "unexpected offset in global node");
3313 // This also catched the large code model case for Darwin.