1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the interfaces that ARM uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
16 #include "ARMAddressingModes.h"
17 #include "ARMConstantPoolValue.h"
18 #include "ARMISelLowering.h"
19 #include "ARMMachineFunctionInfo.h"
20 #include "ARMPerfectShuffle.h"
21 #include "ARMRegisterInfo.h"
22 #include "ARMSubtarget.h"
23 #include "ARMTargetMachine.h"
24 #include "ARMTargetObjectFile.h"
25 #include "llvm/CallingConv.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/GlobalValue.h"
29 #include "llvm/Instruction.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Type.h"
32 #include "llvm/CodeGen/CallingConvLower.h"
33 #include "llvm/CodeGen/MachineBasicBlock.h"
34 #include "llvm/CodeGen/MachineFrameInfo.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineInstrBuilder.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/PseudoSourceValue.h"
39 #include "llvm/CodeGen/SelectionDAG.h"
40 #include "llvm/MC/MCSectionMachO.h"
41 #include "llvm/Target/TargetOptions.h"
42 #include "llvm/ADT/VectorExtras.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/MathExtras.h"
46 #include "llvm/Support/raw_ostream.h"
51 EnableARMLongCalls("arm-long-calls", cl::Hidden,
52 cl::desc("Generate calls via indirect call instructions."),
55 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
56 CCValAssign::LocInfo &LocInfo,
57 ISD::ArgFlagsTy &ArgFlags,
59 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
60 CCValAssign::LocInfo &LocInfo,
61 ISD::ArgFlagsTy &ArgFlags,
63 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
64 CCValAssign::LocInfo &LocInfo,
65 ISD::ArgFlagsTy &ArgFlags,
67 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
68 CCValAssign::LocInfo &LocInfo,
69 ISD::ArgFlagsTy &ArgFlags,
72 void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
73 EVT PromotedBitwiseVT) {
74 if (VT != PromotedLdStVT) {
75 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
76 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
77 PromotedLdStVT.getSimpleVT());
79 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
80 AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
81 PromotedLdStVT.getSimpleVT());
84 EVT ElemTy = VT.getVectorElementType();
85 if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
86 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
87 if (ElemTy == MVT::i8 || ElemTy == MVT::i16)
88 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
89 if (ElemTy != MVT::i32) {
90 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
91 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
92 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
93 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
95 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
96 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
97 if (llvm::ModelWithRegSequence())
98 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
100 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
101 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
102 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
103 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
104 if (VT.isInteger()) {
105 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
106 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
107 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
110 // Promote all bit-wise operations.
111 if (VT.isInteger() && VT != PromotedBitwiseVT) {
112 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
113 AddPromotedToType (ISD::AND, VT.getSimpleVT(),
114 PromotedBitwiseVT.getSimpleVT());
115 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote);
116 AddPromotedToType (ISD::OR, VT.getSimpleVT(),
117 PromotedBitwiseVT.getSimpleVT());
118 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
119 AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
120 PromotedBitwiseVT.getSimpleVT());
123 // Neon does not support vector divide/remainder operations.
124 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
125 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
126 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
127 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
128 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
129 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
132 void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
133 addRegisterClass(VT, ARM::DPRRegisterClass);
134 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
137 void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
138 addRegisterClass(VT, ARM::QPRRegisterClass);
139 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
142 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
143 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
144 return new TargetLoweringObjectFileMachO();
146 return new ARMElfTargetObjectFile();
149 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
150 : TargetLowering(TM, createTLOF(TM)) {
151 Subtarget = &TM.getSubtarget<ARMSubtarget>();
153 if (Subtarget->isTargetDarwin()) {
154 // Uses VFP for Thumb libfuncs if available.
155 if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
156 // Single-precision floating-point arithmetic.
157 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
158 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
159 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
160 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
162 // Double-precision floating-point arithmetic.
163 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
164 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
165 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
166 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
168 // Single-precision comparisons.
169 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
170 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
171 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
172 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
173 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
174 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
175 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
176 setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
178 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
179 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
180 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
181 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
182 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
183 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
184 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
185 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
187 // Double-precision comparisons.
188 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
189 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
190 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
191 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
192 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
193 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
194 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
195 setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
197 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
198 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
199 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
200 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
201 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
202 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
203 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
204 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
206 // Floating-point to integer conversions.
207 // i64 conversions are done via library routines even when generating VFP
208 // instructions, so use the same ones.
209 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
210 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
211 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
212 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
214 // Conversions between floating types.
215 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
216 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
218 // Integer to floating-point conversions.
219 // i64 conversions are done via library routines even when generating VFP
220 // instructions, so use the same ones.
221 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
222 // e.g., __floatunsidf vs. __floatunssidfvfp.
223 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
224 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
225 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
226 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
230 // These libcalls are not available in 32-bit.
231 setLibcallName(RTLIB::SHL_I128, 0);
232 setLibcallName(RTLIB::SRL_I128, 0);
233 setLibcallName(RTLIB::SRA_I128, 0);
235 // Libcalls should use the AAPCS base standard ABI, even if hard float
236 // is in effect, as per the ARM RTABI specification, section 4.1.2.
237 if (Subtarget->isAAPCS_ABI()) {
238 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
239 setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
240 CallingConv::ARM_AAPCS);
244 if (Subtarget->isThumb1Only())
245 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
247 addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
248 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
249 addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
250 addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
252 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
255 if (Subtarget->hasNEON()) {
256 addDRTypeForNEON(MVT::v2f32);
257 addDRTypeForNEON(MVT::v8i8);
258 addDRTypeForNEON(MVT::v4i16);
259 addDRTypeForNEON(MVT::v2i32);
260 addDRTypeForNEON(MVT::v1i64);
262 addQRTypeForNEON(MVT::v4f32);
263 addQRTypeForNEON(MVT::v2f64);
264 addQRTypeForNEON(MVT::v16i8);
265 addQRTypeForNEON(MVT::v8i16);
266 addQRTypeForNEON(MVT::v4i32);
267 addQRTypeForNEON(MVT::v2i64);
269 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
270 // neither Neon nor VFP support any arithmetic operations on it.
271 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
272 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
273 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
274 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
275 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
276 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
277 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
278 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
279 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
280 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
281 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
282 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
283 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
284 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
285 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
286 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
287 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
288 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
289 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
290 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
291 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
292 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
293 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
294 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
296 // Neon does not support some operations on v1i64 and v2i64 types.
297 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
298 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
299 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
300 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
302 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
303 setTargetDAGCombine(ISD::SHL);
304 setTargetDAGCombine(ISD::SRL);
305 setTargetDAGCombine(ISD::SRA);
306 setTargetDAGCombine(ISD::SIGN_EXTEND);
307 setTargetDAGCombine(ISD::ZERO_EXTEND);
308 setTargetDAGCombine(ISD::ANY_EXTEND);
309 setTargetDAGCombine(ISD::SELECT_CC);
312 computeRegisterProperties();
314 // ARM does not have f32 extending load.
315 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
317 // ARM does not have i1 sign extending load.
318 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
320 // ARM supports all 4 flavors of integer indexed load / store.
321 if (!Subtarget->isThumb1Only()) {
322 for (unsigned im = (unsigned)ISD::PRE_INC;
323 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
324 setIndexedLoadAction(im, MVT::i1, Legal);
325 setIndexedLoadAction(im, MVT::i8, Legal);
326 setIndexedLoadAction(im, MVT::i16, Legal);
327 setIndexedLoadAction(im, MVT::i32, Legal);
328 setIndexedStoreAction(im, MVT::i1, Legal);
329 setIndexedStoreAction(im, MVT::i8, Legal);
330 setIndexedStoreAction(im, MVT::i16, Legal);
331 setIndexedStoreAction(im, MVT::i32, Legal);
335 // i64 operation support.
336 if (Subtarget->isThumb1Only()) {
337 setOperationAction(ISD::MUL, MVT::i64, Expand);
338 setOperationAction(ISD::MULHU, MVT::i32, Expand);
339 setOperationAction(ISD::MULHS, MVT::i32, Expand);
340 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
341 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
343 setOperationAction(ISD::MUL, MVT::i64, Expand);
344 setOperationAction(ISD::MULHU, MVT::i32, Expand);
345 if (!Subtarget->hasV6Ops())
346 setOperationAction(ISD::MULHS, MVT::i32, Expand);
348 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
349 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
350 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
351 setOperationAction(ISD::SRL, MVT::i64, Custom);
352 setOperationAction(ISD::SRA, MVT::i64, Custom);
354 // ARM does not have ROTL.
355 setOperationAction(ISD::ROTL, MVT::i32, Expand);
356 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
357 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
358 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
359 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
361 // Only ARMv6 has BSWAP.
362 if (!Subtarget->hasV6Ops())
363 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
365 // These are expanded into libcalls.
366 if (!Subtarget->hasV7MOps()) {
367 // v7M has a hardware divider
368 setOperationAction(ISD::SDIV, MVT::i32, Expand);
369 setOperationAction(ISD::UDIV, MVT::i32, Expand);
371 setOperationAction(ISD::SREM, MVT::i32, Expand);
372 setOperationAction(ISD::UREM, MVT::i32, Expand);
373 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
374 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
376 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
377 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
378 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
379 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
380 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
382 // Use the default implementation.
383 setOperationAction(ISD::VASTART, MVT::Other, Custom);
384 setOperationAction(ISD::VAARG, MVT::Other, Expand);
385 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
386 setOperationAction(ISD::VAEND, MVT::Other, Expand);
387 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
388 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
389 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
390 // FIXME: Shouldn't need this, since no register is used, but the legalizer
391 // doesn't yet know how to not do that for SjLj.
392 setExceptionSelectorRegister(ARM::R0);
393 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
394 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
396 if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2() || Subtarget->hasV7MOps()) {
397 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
398 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
400 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
402 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
403 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
404 // iff target supports vfp2.
405 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
407 // We want to custom lower some of our intrinsics.
408 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
410 setOperationAction(ISD::SETCC, MVT::i32, Expand);
411 setOperationAction(ISD::SETCC, MVT::f32, Expand);
412 setOperationAction(ISD::SETCC, MVT::f64, Expand);
413 setOperationAction(ISD::SELECT, MVT::i32, Expand);
414 setOperationAction(ISD::SELECT, MVT::f32, Expand);
415 setOperationAction(ISD::SELECT, MVT::f64, Expand);
416 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
417 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
418 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
420 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
421 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
422 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
423 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
424 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
426 // We don't support sin/cos/fmod/copysign/pow
427 setOperationAction(ISD::FSIN, MVT::f64, Expand);
428 setOperationAction(ISD::FSIN, MVT::f32, Expand);
429 setOperationAction(ISD::FCOS, MVT::f32, Expand);
430 setOperationAction(ISD::FCOS, MVT::f64, Expand);
431 setOperationAction(ISD::FREM, MVT::f64, Expand);
432 setOperationAction(ISD::FREM, MVT::f32, Expand);
433 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
434 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
435 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
437 setOperationAction(ISD::FPOW, MVT::f64, Expand);
438 setOperationAction(ISD::FPOW, MVT::f32, Expand);
440 // Various VFP goodness
441 if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
442 // int <-> fp are custom expanded into bit_convert + ARMISD ops.
443 if (Subtarget->hasVFP2()) {
444 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
445 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
446 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
447 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
449 // Special handling for half-precision FP.
450 if (!Subtarget->hasFP16()) {
451 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
452 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
456 // We have target-specific dag combine patterns for the following nodes:
457 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
458 setTargetDAGCombine(ISD::ADD);
459 setTargetDAGCombine(ISD::SUB);
461 setStackPointerRegisterToSaveRestore(ARM::SP);
462 setSchedulingPreference(SchedulingForRegPressure);
464 // FIXME: If-converter should use instruction latency to determine
465 // profitability rather than relying on fixed limits.
466 if (Subtarget->getCPUString() == "generic") {
467 // Generic (and overly aggressive) if-conversion limits.
468 setIfCvtBlockSizeLimit(10);
469 setIfCvtDupBlockSizeLimit(2);
470 } else if (Subtarget->hasV7Ops()) {
471 setIfCvtBlockSizeLimit(3);
472 setIfCvtDupBlockSizeLimit(1);
473 } else if (Subtarget->hasV6Ops()) {
474 setIfCvtBlockSizeLimit(2);
475 setIfCvtDupBlockSizeLimit(1);
477 setIfCvtBlockSizeLimit(3);
478 setIfCvtDupBlockSizeLimit(2);
481 maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
482 // Do not enable CodePlacementOpt for now: it currently runs after the
483 // ARMConstantIslandPass and messes up branch relaxation and placement
484 // of constant islands.
485 // benefitFromCodePlacementOpt = true;
488 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
491 case ARMISD::Wrapper: return "ARMISD::Wrapper";
492 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
493 case ARMISD::CALL: return "ARMISD::CALL";
494 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
495 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
496 case ARMISD::tCALL: return "ARMISD::tCALL";
497 case ARMISD::BRCOND: return "ARMISD::BRCOND";
498 case ARMISD::BR_JT: return "ARMISD::BR_JT";
499 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
500 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
501 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
502 case ARMISD::CMP: return "ARMISD::CMP";
503 case ARMISD::CMPZ: return "ARMISD::CMPZ";
504 case ARMISD::CMPFP: return "ARMISD::CMPFP";
505 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
506 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
507 case ARMISD::CMOV: return "ARMISD::CMOV";
508 case ARMISD::CNEG: return "ARMISD::CNEG";
510 case ARMISD::RBIT: return "ARMISD::RBIT";
512 case ARMISD::FTOSI: return "ARMISD::FTOSI";
513 case ARMISD::FTOUI: return "ARMISD::FTOUI";
514 case ARMISD::SITOF: return "ARMISD::SITOF";
515 case ARMISD::UITOF: return "ARMISD::UITOF";
517 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
518 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
519 case ARMISD::RRX: return "ARMISD::RRX";
521 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
522 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
524 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
525 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
527 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
529 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
531 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
532 case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER";
534 case ARMISD::VCEQ: return "ARMISD::VCEQ";
535 case ARMISD::VCGE: return "ARMISD::VCGE";
536 case ARMISD::VCGEU: return "ARMISD::VCGEU";
537 case ARMISD::VCGT: return "ARMISD::VCGT";
538 case ARMISD::VCGTU: return "ARMISD::VCGTU";
539 case ARMISD::VTST: return "ARMISD::VTST";
541 case ARMISD::VSHL: return "ARMISD::VSHL";
542 case ARMISD::VSHRs: return "ARMISD::VSHRs";
543 case ARMISD::VSHRu: return "ARMISD::VSHRu";
544 case ARMISD::VSHLLs: return "ARMISD::VSHLLs";
545 case ARMISD::VSHLLu: return "ARMISD::VSHLLu";
546 case ARMISD::VSHLLi: return "ARMISD::VSHLLi";
547 case ARMISD::VSHRN: return "ARMISD::VSHRN";
548 case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
549 case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
550 case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
551 case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
552 case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
553 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
554 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
555 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
556 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
557 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
558 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
559 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
560 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
561 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
562 case ARMISD::VDUP: return "ARMISD::VDUP";
563 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
564 case ARMISD::VEXT: return "ARMISD::VEXT";
565 case ARMISD::VREV64: return "ARMISD::VREV64";
566 case ARMISD::VREV32: return "ARMISD::VREV32";
567 case ARMISD::VREV16: return "ARMISD::VREV16";
568 case ARMISD::VZIP: return "ARMISD::VZIP";
569 case ARMISD::VUZP: return "ARMISD::VUZP";
570 case ARMISD::VTRN: return "ARMISD::VTRN";
571 case ARMISD::FMAX: return "ARMISD::FMAX";
572 case ARMISD::FMIN: return "ARMISD::FMIN";
576 /// getFunctionAlignment - Return the Log2 alignment of this function.
577 unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
578 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
581 //===----------------------------------------------------------------------===//
583 //===----------------------------------------------------------------------===//
585 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
586 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
588 default: llvm_unreachable("Unknown condition code!");
589 case ISD::SETNE: return ARMCC::NE;
590 case ISD::SETEQ: return ARMCC::EQ;
591 case ISD::SETGT: return ARMCC::GT;
592 case ISD::SETGE: return ARMCC::GE;
593 case ISD::SETLT: return ARMCC::LT;
594 case ISD::SETLE: return ARMCC::LE;
595 case ISD::SETUGT: return ARMCC::HI;
596 case ISD::SETUGE: return ARMCC::HS;
597 case ISD::SETULT: return ARMCC::LO;
598 case ISD::SETULE: return ARMCC::LS;
602 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
603 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
604 ARMCC::CondCodes &CondCode2) {
605 CondCode2 = ARMCC::AL;
607 default: llvm_unreachable("Unknown FP condition!");
609 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
611 case ISD::SETOGT: CondCode = ARMCC::GT; break;
613 case ISD::SETOGE: CondCode = ARMCC::GE; break;
614 case ISD::SETOLT: CondCode = ARMCC::MI; break;
615 case ISD::SETOLE: CondCode = ARMCC::LS; break;
616 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
617 case ISD::SETO: CondCode = ARMCC::VC; break;
618 case ISD::SETUO: CondCode = ARMCC::VS; break;
619 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
620 case ISD::SETUGT: CondCode = ARMCC::HI; break;
621 case ISD::SETUGE: CondCode = ARMCC::PL; break;
623 case ISD::SETULT: CondCode = ARMCC::LT; break;
625 case ISD::SETULE: CondCode = ARMCC::LE; break;
627 case ISD::SETUNE: CondCode = ARMCC::NE; break;
631 //===----------------------------------------------------------------------===//
632 // Calling Convention Implementation
633 //===----------------------------------------------------------------------===//
635 #include "ARMGenCallingConv.inc"
637 // APCS f64 is in register pairs, possibly split to stack
638 static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
639 CCValAssign::LocInfo &LocInfo,
640 CCState &State, bool CanFail) {
641 static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
643 // Try to get the first register.
644 if (unsigned Reg = State.AllocateReg(RegList, 4))
645 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
647 // For the 2nd half of a v2f64, do not fail.
651 // Put the whole thing on the stack.
652 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
653 State.AllocateStack(8, 4),
658 // Try to get the second register.
659 if (unsigned Reg = State.AllocateReg(RegList, 4))
660 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
662 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
663 State.AllocateStack(4, 4),
668 static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
669 CCValAssign::LocInfo &LocInfo,
670 ISD::ArgFlagsTy &ArgFlags,
672 if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
674 if (LocVT == MVT::v2f64 &&
675 !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
677 return true; // we handled it
680 // AAPCS f64 is in aligned register pairs
681 static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
682 CCValAssign::LocInfo &LocInfo,
683 CCState &State, bool CanFail) {
684 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
685 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
687 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
689 // For the 2nd half of a v2f64, do not just fail.
693 // Put the whole thing on the stack.
694 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
695 State.AllocateStack(8, 8),
701 for (i = 0; i < 2; ++i)
702 if (HiRegList[i] == Reg)
705 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
706 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
711 static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
712 CCValAssign::LocInfo &LocInfo,
713 ISD::ArgFlagsTy &ArgFlags,
715 if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
717 if (LocVT == MVT::v2f64 &&
718 !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
720 return true; // we handled it
723 static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
724 CCValAssign::LocInfo &LocInfo, CCState &State) {
725 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
726 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
728 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
730 return false; // we didn't handle it
733 for (i = 0; i < 2; ++i)
734 if (HiRegList[i] == Reg)
737 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
738 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
743 static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
744 CCValAssign::LocInfo &LocInfo,
745 ISD::ArgFlagsTy &ArgFlags,
747 if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
749 if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
751 return true; // we handled it
754 static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
755 CCValAssign::LocInfo &LocInfo,
756 ISD::ArgFlagsTy &ArgFlags,
758 return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
762 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
763 /// given CallingConvention value.
764 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
766 bool isVarArg) const {
769 llvm_unreachable("Unsupported calling convention");
771 case CallingConv::Fast:
772 // Use target triple & subtarget features to do actual dispatch.
773 if (Subtarget->isAAPCS_ABI()) {
774 if (Subtarget->hasVFP2() &&
775 FloatABIType == FloatABI::Hard && !isVarArg)
776 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
778 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
780 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
781 case CallingConv::ARM_AAPCS_VFP:
782 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
783 case CallingConv::ARM_AAPCS:
784 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
785 case CallingConv::ARM_APCS:
786 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
790 /// LowerCallResult - Lower the result values of a call into the
791 /// appropriate copies out of appropriate physical registers.
793 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
794 CallingConv::ID CallConv, bool isVarArg,
795 const SmallVectorImpl<ISD::InputArg> &Ins,
796 DebugLoc dl, SelectionDAG &DAG,
797 SmallVectorImpl<SDValue> &InVals) const {
799 // Assign locations to each value returned by this call.
800 SmallVector<CCValAssign, 16> RVLocs;
801 CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
802 RVLocs, *DAG.getContext());
803 CCInfo.AnalyzeCallResult(Ins,
804 CCAssignFnForNode(CallConv, /* Return*/ true,
807 // Copy all of the result registers out of their specified physreg.
808 for (unsigned i = 0; i != RVLocs.size(); ++i) {
809 CCValAssign VA = RVLocs[i];
812 if (VA.needsCustom()) {
813 // Handle f64 or half of a v2f64.
814 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
816 Chain = Lo.getValue(1);
817 InFlag = Lo.getValue(2);
818 VA = RVLocs[++i]; // skip ahead to next loc
819 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
821 Chain = Hi.getValue(1);
822 InFlag = Hi.getValue(2);
823 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
825 if (VA.getLocVT() == MVT::v2f64) {
826 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
827 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
828 DAG.getConstant(0, MVT::i32));
830 VA = RVLocs[++i]; // skip ahead to next loc
831 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
832 Chain = Lo.getValue(1);
833 InFlag = Lo.getValue(2);
834 VA = RVLocs[++i]; // skip ahead to next loc
835 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
836 Chain = Hi.getValue(1);
837 InFlag = Hi.getValue(2);
838 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
839 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
840 DAG.getConstant(1, MVT::i32));
843 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
845 Chain = Val.getValue(1);
846 InFlag = Val.getValue(2);
849 switch (VA.getLocInfo()) {
850 default: llvm_unreachable("Unknown loc info!");
851 case CCValAssign::Full: break;
852 case CCValAssign::BCvt:
853 Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
857 InVals.push_back(Val);
863 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
864 /// by "Src" to address "Dst" of size "Size". Alignment information is
865 /// specified by the specific parameter attribute. The copy will be passed as
866 /// a byval function parameter.
867 /// Sometimes what we are copying is the end of a larger object, the part that
868 /// does not fit in registers.
870 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
871 ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
873 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
874 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
875 /*isVolatile=*/false, /*AlwaysInline=*/false,
879 /// LowerMemOpCallTo - Store the argument to the stack.
881 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
882 SDValue StackPtr, SDValue Arg,
883 DebugLoc dl, SelectionDAG &DAG,
884 const CCValAssign &VA,
885 ISD::ArgFlagsTy Flags) const {
886 unsigned LocMemOffset = VA.getLocMemOffset();
887 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
888 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
889 if (Flags.isByVal()) {
890 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
892 return DAG.getStore(Chain, dl, Arg, PtrOff,
893 PseudoSourceValue::getStack(), LocMemOffset,
897 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
898 SDValue Chain, SDValue &Arg,
899 RegsToPassVector &RegsToPass,
900 CCValAssign &VA, CCValAssign &NextVA,
902 SmallVector<SDValue, 8> &MemOpChains,
903 ISD::ArgFlagsTy Flags) const {
905 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
906 DAG.getVTList(MVT::i32, MVT::i32), Arg);
907 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
909 if (NextVA.isRegLoc())
910 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
912 assert(NextVA.isMemLoc());
913 if (StackPtr.getNode() == 0)
914 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
916 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
922 /// LowerCall - Lowering a call into a callseq_start <-
923 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
926 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
927 CallingConv::ID CallConv, bool isVarArg,
929 const SmallVectorImpl<ISD::OutputArg> &Outs,
930 const SmallVectorImpl<ISD::InputArg> &Ins,
931 DebugLoc dl, SelectionDAG &DAG,
932 SmallVectorImpl<SDValue> &InVals) const {
933 // ARM target does not yet support tail call optimization.
936 // Analyze operands of the call, assigning locations to each operand.
937 SmallVector<CCValAssign, 16> ArgLocs;
938 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
940 CCInfo.AnalyzeCallOperands(Outs,
941 CCAssignFnForNode(CallConv, /* Return*/ false,
944 // Get a count of how many bytes are to be pushed on the stack.
945 unsigned NumBytes = CCInfo.getNextStackOffset();
947 // Adjust the stack pointer for the new arguments...
948 // These operations are automatically eliminated by the prolog/epilog pass
949 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
951 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
953 RegsToPassVector RegsToPass;
954 SmallVector<SDValue, 8> MemOpChains;
956 // Walk the register/memloc assignments, inserting copies/loads. In the case
957 // of tail call optimization, arguments are handled later.
958 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
961 CCValAssign &VA = ArgLocs[i];
962 SDValue Arg = Outs[realArgIdx].Val;
963 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
965 // Promote the value if needed.
966 switch (VA.getLocInfo()) {
967 default: llvm_unreachable("Unknown loc info!");
968 case CCValAssign::Full: break;
969 case CCValAssign::SExt:
970 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
972 case CCValAssign::ZExt:
973 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
975 case CCValAssign::AExt:
976 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
978 case CCValAssign::BCvt:
979 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
983 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
984 if (VA.needsCustom()) {
985 if (VA.getLocVT() == MVT::v2f64) {
986 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
987 DAG.getConstant(0, MVT::i32));
988 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
989 DAG.getConstant(1, MVT::i32));
991 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
992 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
994 VA = ArgLocs[++i]; // skip ahead to next loc
996 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
997 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
999 assert(VA.isMemLoc());
1001 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1002 dl, DAG, VA, Flags));
1005 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1006 StackPtr, MemOpChains, Flags);
1008 } else if (VA.isRegLoc()) {
1009 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1011 assert(VA.isMemLoc());
1013 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1014 dl, DAG, VA, Flags));
1018 if (!MemOpChains.empty())
1019 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1020 &MemOpChains[0], MemOpChains.size());
1022 // Build a sequence of copy-to-reg nodes chained together with token chain
1023 // and flag operands which copy the outgoing args into the appropriate regs.
1025 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1026 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1027 RegsToPass[i].second, InFlag);
1028 InFlag = Chain.getValue(1);
1031 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1032 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1033 // node so that legalize doesn't hack it.
1034 bool isDirect = false;
1035 bool isARMFunc = false;
1036 bool isLocalARMFunc = false;
1037 MachineFunction &MF = DAG.getMachineFunction();
1038 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1040 if (EnableARMLongCalls) {
1041 assert (getTargetMachine().getRelocationModel() == Reloc::Static
1042 && "long-calls with non-static relocation model!");
1043 // Handle a global address or an external symbol. If it's not one of
1044 // those, the target's already in a register, so we don't need to do
1046 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1047 const GlobalValue *GV = G->getGlobal();
1048 // Create a constant pool entry for the callee address
1049 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1050 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1053 // Get the address of the callee into a register
1054 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1055 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1056 Callee = DAG.getLoad(getPointerTy(), dl,
1057 DAG.getEntryNode(), CPAddr,
1058 PseudoSourceValue::getConstantPool(), 0,
1060 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
1061 const char *Sym = S->getSymbol();
1063 // Create a constant pool entry for the callee address
1064 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1065 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1066 Sym, ARMPCLabelIndex, 0);
1067 // Get the address of the callee into a register
1068 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1069 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1070 Callee = DAG.getLoad(getPointerTy(), dl,
1071 DAG.getEntryNode(), CPAddr,
1072 PseudoSourceValue::getConstantPool(), 0,
1075 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1076 const GlobalValue *GV = G->getGlobal();
1078 bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
1079 bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
1080 getTargetMachine().getRelocationModel() != Reloc::Static;
1081 isARMFunc = !Subtarget->isThumb() || isStub;
1082 // ARM call to a local ARM function is predicable.
1083 isLocalARMFunc = !Subtarget->isThumb() && !isExt;
1084 // tBX takes a register source operand.
1085 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1086 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1087 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
1090 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1091 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1092 Callee = DAG.getLoad(getPointerTy(), dl,
1093 DAG.getEntryNode(), CPAddr,
1094 PseudoSourceValue::getConstantPool(), 0,
1096 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1097 Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1098 getPointerTy(), Callee, PICLabel);
1100 Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
1101 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1103 bool isStub = Subtarget->isTargetDarwin() &&
1104 getTargetMachine().getRelocationModel() != Reloc::Static;
1105 isARMFunc = !Subtarget->isThumb() || isStub;
1106 // tBX takes a register source operand.
1107 const char *Sym = S->getSymbol();
1108 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
1109 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1110 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1111 Sym, ARMPCLabelIndex, 4);
1112 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
1113 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1114 Callee = DAG.getLoad(getPointerTy(), dl,
1115 DAG.getEntryNode(), CPAddr,
1116 PseudoSourceValue::getConstantPool(), 0,
1118 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1119 Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
1120 getPointerTy(), Callee, PICLabel);
1122 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
1125 // FIXME: handle tail calls differently.
1127 if (Subtarget->isThumb()) {
1128 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
1129 CallOpc = ARMISD::CALL_NOLINK;
1131 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
1133 CallOpc = (isDirect || Subtarget->hasV5TOps())
1134 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
1135 : ARMISD::CALL_NOLINK;
1137 if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) {
1138 // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
1139 Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
1140 InFlag = Chain.getValue(1);
1143 std::vector<SDValue> Ops;
1144 Ops.push_back(Chain);
1145 Ops.push_back(Callee);
1147 // Add argument registers to the end of the list so that they are known live
1149 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1150 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1151 RegsToPass[i].second.getValueType()));
1153 if (InFlag.getNode())
1154 Ops.push_back(InFlag);
1155 // Returns a chain and a flag for retval copy to use.
1156 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1157 &Ops[0], Ops.size());
1158 InFlag = Chain.getValue(1);
1160 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
1161 DAG.getIntPtrConstant(0, true), InFlag);
1163 InFlag = Chain.getValue(1);
1165 // Handle result values, copying them out of physregs into vregs that we
1167 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
1172 ARMTargetLowering::LowerReturn(SDValue Chain,
1173 CallingConv::ID CallConv, bool isVarArg,
1174 const SmallVectorImpl<ISD::OutputArg> &Outs,
1175 DebugLoc dl, SelectionDAG &DAG) const {
1177 // CCValAssign - represent the assignment of the return value to a location.
1178 SmallVector<CCValAssign, 16> RVLocs;
1180 // CCState - Info about the registers and stack slots.
1181 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
1184 // Analyze outgoing return values.
1185 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
1188 // If this is the first return lowered for this function, add
1189 // the regs to the liveout set for the function.
1190 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1191 for (unsigned i = 0; i != RVLocs.size(); ++i)
1192 if (RVLocs[i].isRegLoc())
1193 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1198 // Copy the result values into the output registers.
1199 for (unsigned i = 0, realRVLocIdx = 0;
1201 ++i, ++realRVLocIdx) {
1202 CCValAssign &VA = RVLocs[i];
1203 assert(VA.isRegLoc() && "Can only return in registers!");
1205 SDValue Arg = Outs[realRVLocIdx].Val;
1207 switch (VA.getLocInfo()) {
1208 default: llvm_unreachable("Unknown loc info!");
1209 case CCValAssign::Full: break;
1210 case CCValAssign::BCvt:
1211 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
1215 if (VA.needsCustom()) {
1216 if (VA.getLocVT() == MVT::v2f64) {
1217 // Extract the first half and return it in two registers.
1218 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1219 DAG.getConstant(0, MVT::i32));
1220 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
1221 DAG.getVTList(MVT::i32, MVT::i32), Half);
1223 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
1224 Flag = Chain.getValue(1);
1225 VA = RVLocs[++i]; // skip ahead to next loc
1226 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1227 HalfGPRs.getValue(1), Flag);
1228 Flag = Chain.getValue(1);
1229 VA = RVLocs[++i]; // skip ahead to next loc
1231 // Extract the 2nd half and fall through to handle it as an f64 value.
1232 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1233 DAG.getConstant(1, MVT::i32));
1235 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
1237 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1238 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
1239 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
1240 Flag = Chain.getValue(1);
1241 VA = RVLocs[++i]; // skip ahead to next loc
1242 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
1245 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
1247 // Guarantee that all emitted copies are
1248 // stuck together, avoiding something bad.
1249 Flag = Chain.getValue(1);
1254 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1256 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
1261 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
1262 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
1263 // one of the above mentioned nodes. It has to be wrapped because otherwise
1264 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
1265 // be used to form addressing mode. These wrapped nodes will be selected
1267 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
1268 EVT PtrVT = Op.getValueType();
1269 // FIXME there is no actual debug info here
1270 DebugLoc dl = Op.getDebugLoc();
1271 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1273 if (CP->isMachineConstantPoolEntry())
1274 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1275 CP->getAlignment());
1277 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1278 CP->getAlignment());
1279 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
1282 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
1283 SelectionDAG &DAG) const {
1284 MachineFunction &MF = DAG.getMachineFunction();
1285 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1286 unsigned ARMPCLabelIndex = 0;
1287 DebugLoc DL = Op.getDebugLoc();
1288 EVT PtrVT = getPointerTy();
1289 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1290 Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1292 if (RelocM == Reloc::Static) {
1293 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
1295 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1296 ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1297 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
1298 ARMCP::CPBlockAddress,
1300 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1302 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
1303 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
1304 PseudoSourceValue::getConstantPool(), 0,
1306 if (RelocM == Reloc::Static)
1308 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1309 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
1312 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
1314 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1315 SelectionDAG &DAG) const {
1316 DebugLoc dl = GA->getDebugLoc();
1317 EVT PtrVT = getPointerTy();
1318 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1319 MachineFunction &MF = DAG.getMachineFunction();
1320 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1321 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1322 ARMConstantPoolValue *CPV =
1323 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1324 ARMCP::CPValue, PCAdj, "tlsgd", true);
1325 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1326 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
1327 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
1328 PseudoSourceValue::getConstantPool(), 0,
1330 SDValue Chain = Argument.getValue(1);
1332 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1333 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
1335 // call __tls_get_addr.
1338 Entry.Node = Argument;
1339 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
1340 Args.push_back(Entry);
1341 // FIXME: is there useful debug info available here?
1342 std::pair<SDValue, SDValue> CallResult =
1343 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
1344 false, false, false, false,
1345 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
1346 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
1347 return CallResult.first;
1350 // Lower ISD::GlobalTLSAddress using the "initial exec" or
1351 // "local exec" model.
1353 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
1354 SelectionDAG &DAG) const {
1355 const GlobalValue *GV = GA->getGlobal();
1356 DebugLoc dl = GA->getDebugLoc();
1358 SDValue Chain = DAG.getEntryNode();
1359 EVT PtrVT = getPointerTy();
1360 // Get the Thread Pointer
1361 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1363 if (GV->isDeclaration()) {
1364 MachineFunction &MF = DAG.getMachineFunction();
1365 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1366 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1367 // Initial exec model.
1368 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
1369 ARMConstantPoolValue *CPV =
1370 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
1371 ARMCP::CPValue, PCAdj, "gottpoff", true);
1372 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1373 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1374 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1375 PseudoSourceValue::getConstantPool(), 0,
1377 Chain = Offset.getValue(1);
1379 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1380 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
1382 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1383 PseudoSourceValue::getConstantPool(), 0,
1387 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
1388 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1389 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
1390 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
1391 PseudoSourceValue::getConstantPool(), 0,
1395 // The address of the thread local variable is the add of the thread
1396 // pointer with the offset of the variable.
1397 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
1401 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
1402 // TODO: implement the "local dynamic" model
1403 assert(Subtarget->isTargetELF() &&
1404 "TLS not implemented for non-ELF targets");
1405 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1406 // If the relocation model is PIC, use the "General Dynamic" TLS Model,
1407 // otherwise use the "Local Exec" TLS Model
1408 if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
1409 return LowerToTLSGeneralDynamicModel(GA, DAG);
1411 return LowerToTLSExecModels(GA, DAG);
1414 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
1415 SelectionDAG &DAG) const {
1416 EVT PtrVT = getPointerTy();
1417 DebugLoc dl = Op.getDebugLoc();
1418 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1419 Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1420 if (RelocM == Reloc::PIC_) {
1421 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
1422 ARMConstantPoolValue *CPV =
1423 new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT");
1424 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1425 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1426 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
1428 PseudoSourceValue::getConstantPool(), 0,
1430 SDValue Chain = Result.getValue(1);
1431 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1432 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
1434 Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1435 PseudoSourceValue::getGOT(), 0,
1439 // If we have T2 ops, we can materialize the address directly via movt/movw
1440 // pair. This is always cheaper.
1441 if (Subtarget->useMovt()) {
1442 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
1443 DAG.getTargetGlobalAddress(GV, PtrVT));
1445 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1446 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1447 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1448 PseudoSourceValue::getConstantPool(), 0,
1454 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
1455 SelectionDAG &DAG) const {
1456 MachineFunction &MF = DAG.getMachineFunction();
1457 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1458 unsigned ARMPCLabelIndex = 0;
1459 EVT PtrVT = getPointerTy();
1460 DebugLoc dl = Op.getDebugLoc();
1461 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1462 Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1464 if (RelocM == Reloc::Static)
1465 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
1467 ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1468 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
1469 ARMConstantPoolValue *CPV =
1470 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
1471 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1473 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1475 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1476 PseudoSourceValue::getConstantPool(), 0,
1478 SDValue Chain = Result.getValue(1);
1480 if (RelocM == Reloc::PIC_) {
1481 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1482 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1485 if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
1486 Result = DAG.getLoad(PtrVT, dl, Chain, Result,
1487 PseudoSourceValue::getGOT(), 0,
1493 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
1494 SelectionDAG &DAG) const {
1495 assert(Subtarget->isTargetELF() &&
1496 "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
1497 MachineFunction &MF = DAG.getMachineFunction();
1498 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1499 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1500 EVT PtrVT = getPointerTy();
1501 DebugLoc dl = Op.getDebugLoc();
1502 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
1503 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
1504 "_GLOBAL_OFFSET_TABLE_",
1505 ARMPCLabelIndex, PCAdj);
1506 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1507 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1508 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1509 PseudoSourceValue::getConstantPool(), 0,
1511 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1512 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1516 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
1517 const ARMSubtarget *Subtarget)
1519 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1520 DebugLoc dl = Op.getDebugLoc();
1522 default: return SDValue(); // Don't custom lower most intrinsics.
1523 case Intrinsic::arm_thread_pointer: {
1524 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1525 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
1527 case Intrinsic::eh_sjlj_lsda: {
1528 MachineFunction &MF = DAG.getMachineFunction();
1529 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1530 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
1531 EVT PtrVT = getPointerTy();
1532 DebugLoc dl = Op.getDebugLoc();
1533 Reloc::Model RelocM = getTargetMachine().getRelocationModel();
1535 unsigned PCAdj = (RelocM != Reloc::PIC_)
1536 ? 0 : (Subtarget->isThumb() ? 4 : 8);
1537 ARMConstantPoolValue *CPV =
1538 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
1539 ARMCP::CPLSDA, PCAdj);
1540 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
1541 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
1543 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
1544 PseudoSourceValue::getConstantPool(), 0,
1546 SDValue Chain = Result.getValue(1);
1548 if (RelocM == Reloc::PIC_) {
1549 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
1550 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
1554 case Intrinsic::eh_sjlj_setjmp:
1555 SDValue Val = Subtarget->isThumb() ?
1556 DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
1557 DAG.getConstant(0, MVT::i32);
1558 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1),
1563 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
1564 const ARMSubtarget *Subtarget) {
1565 DebugLoc dl = Op.getDebugLoc();
1566 SDValue Op5 = Op.getOperand(5);
1568 unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
1569 if (isDeviceBarrier) {
1570 if (Subtarget->hasV7Ops())
1571 Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
1573 Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
1574 DAG.getConstant(0, MVT::i32));
1576 if (Subtarget->hasV7Ops())
1577 Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
1579 Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
1580 DAG.getConstant(0, MVT::i32));
1585 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
1586 MachineFunction &MF = DAG.getMachineFunction();
1587 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
1589 // vastart just stores the address of the VarArgsFrameIndex slot into the
1590 // memory location argument.
1591 DebugLoc dl = Op.getDebugLoc();
1592 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1593 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1594 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1595 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
1600 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1601 SelectionDAG &DAG) const {
1602 SDNode *Node = Op.getNode();
1603 DebugLoc dl = Node->getDebugLoc();
1604 EVT VT = Node->getValueType(0);
1605 SDValue Chain = Op.getOperand(0);
1606 SDValue Size = Op.getOperand(1);
1607 SDValue Align = Op.getOperand(2);
1609 // Chain the dynamic stack allocation so that it doesn't modify the stack
1610 // pointer when other instructions are using the stack.
1611 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
1613 unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
1614 unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
1615 if (AlignVal > StackAlign)
1616 // Do this now since selection pass cannot introduce new target
1617 // independent node.
1618 Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
1620 // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
1621 // using a "add r, sp, r" instead. Negate the size now so we don't have to
1622 // do even more horrible hack later.
1623 MachineFunction &MF = DAG.getMachineFunction();
1624 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1625 if (AFI->isThumb1OnlyFunction()) {
1627 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
1629 uint32_t Val = C->getZExtValue();
1630 if (Val <= 508 && ((Val & 3) == 0))
1634 Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
1637 SDVTList VTList = DAG.getVTList(VT, MVT::Other);
1638 SDValue Ops1[] = { Chain, Size, Align };
1639 SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
1640 Chain = Res.getValue(1);
1641 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
1642 DAG.getIntPtrConstant(0, true), SDValue());
1643 SDValue Ops2[] = { Res, Chain };
1644 return DAG.getMergeValues(Ops2, 2, dl);
1648 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
1649 SDValue &Root, SelectionDAG &DAG,
1650 DebugLoc dl) const {
1651 MachineFunction &MF = DAG.getMachineFunction();
1652 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1654 TargetRegisterClass *RC;
1655 if (AFI->isThumb1OnlyFunction())
1656 RC = ARM::tGPRRegisterClass;
1658 RC = ARM::GPRRegisterClass;
1660 // Transform the arguments stored in physical registers into virtual ones.
1661 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1662 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1665 if (NextVA.isMemLoc()) {
1666 MachineFrameInfo *MFI = MF.getFrameInfo();
1667 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
1669 // Create load node to retrieve arguments from the stack.
1670 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1671 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
1672 PseudoSourceValue::getFixedStack(FI), 0,
1675 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1676 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
1679 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
1683 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
1684 CallingConv::ID CallConv, bool isVarArg,
1685 const SmallVectorImpl<ISD::InputArg>
1687 DebugLoc dl, SelectionDAG &DAG,
1688 SmallVectorImpl<SDValue> &InVals)
1691 MachineFunction &MF = DAG.getMachineFunction();
1692 MachineFrameInfo *MFI = MF.getFrameInfo();
1694 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1696 // Assign locations to all of the incoming arguments.
1697 SmallVector<CCValAssign, 16> ArgLocs;
1698 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1700 CCInfo.AnalyzeFormalArguments(Ins,
1701 CCAssignFnForNode(CallConv, /* Return*/ false,
1704 SmallVector<SDValue, 16> ArgValues;
1706 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1707 CCValAssign &VA = ArgLocs[i];
1709 // Arguments stored in registers.
1710 if (VA.isRegLoc()) {
1711 EVT RegVT = VA.getLocVT();
1714 if (VA.needsCustom()) {
1715 // f64 and vector types are split up into multiple registers or
1716 // combinations of registers and stack slots.
1717 if (VA.getLocVT() == MVT::v2f64) {
1718 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
1720 VA = ArgLocs[++i]; // skip ahead to next loc
1722 if (VA.isMemLoc()) {
1723 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
1725 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1726 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
1727 PseudoSourceValue::getFixedStack(FI), 0,
1730 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
1733 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1734 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
1735 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
1736 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
1737 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
1739 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
1742 TargetRegisterClass *RC;
1744 if (RegVT == MVT::f32)
1745 RC = ARM::SPRRegisterClass;
1746 else if (RegVT == MVT::f64)
1747 RC = ARM::DPRRegisterClass;
1748 else if (RegVT == MVT::v2f64)
1749 RC = ARM::QPRRegisterClass;
1750 else if (RegVT == MVT::i32)
1751 RC = (AFI->isThumb1OnlyFunction() ?
1752 ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
1754 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
1756 // Transform the arguments in physical registers into virtual ones.
1757 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
1758 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1761 // If this is an 8 or 16-bit value, it is really passed promoted
1762 // to 32 bits. Insert an assert[sz]ext to capture this, then
1763 // truncate to the right size.
1764 switch (VA.getLocInfo()) {
1765 default: llvm_unreachable("Unknown loc info!");
1766 case CCValAssign::Full: break;
1767 case CCValAssign::BCvt:
1768 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
1770 case CCValAssign::SExt:
1771 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1772 DAG.getValueType(VA.getValVT()));
1773 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1775 case CCValAssign::ZExt:
1776 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1777 DAG.getValueType(VA.getValVT()));
1778 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1782 InVals.push_back(ArgValue);
1784 } else { // VA.isRegLoc()
1787 assert(VA.isMemLoc());
1788 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
1790 unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
1791 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
1794 // Create load nodes to retrieve arguments from the stack.
1795 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
1796 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
1797 PseudoSourceValue::getFixedStack(FI), 0,
1804 static const unsigned GPRArgRegs[] = {
1805 ARM::R0, ARM::R1, ARM::R2, ARM::R3
1808 unsigned NumGPRs = CCInfo.getFirstUnallocated
1809 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
1811 unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
1812 unsigned VARegSize = (4 - NumGPRs) * 4;
1813 unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
1814 unsigned ArgOffset = CCInfo.getNextStackOffset();
1815 if (VARegSaveSize) {
1816 // If this function is vararg, store any remaining integer argument regs
1817 // to their spots on the stack so that they may be loaded by deferencing
1818 // the result of va_next.
1819 AFI->setVarArgsRegSaveSize(VARegSaveSize);
1820 AFI->setVarArgsFrameIndex(
1821 MFI->CreateFixedObject(VARegSaveSize,
1822 ArgOffset + VARegSaveSize - VARegSize,
1824 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
1827 SmallVector<SDValue, 4> MemOps;
1828 for (; NumGPRs < 4; ++NumGPRs) {
1829 TargetRegisterClass *RC;
1830 if (AFI->isThumb1OnlyFunction())
1831 RC = ARM::tGPRRegisterClass;
1833 RC = ARM::GPRRegisterClass;
1835 unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
1836 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
1838 DAG.getStore(Val.getValue(1), dl, Val, FIN,
1839 PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0,
1841 MemOps.push_back(Store);
1842 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
1843 DAG.getConstant(4, getPointerTy()));
1845 if (!MemOps.empty())
1846 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1847 &MemOps[0], MemOps.size());
1849 // This will point to the next argument passed via stack.
1850 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
1857 /// isFloatingPointZero - Return true if this is +0.0.
1858 static bool isFloatingPointZero(SDValue Op) {
1859 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1860 return CFP->getValueAPF().isPosZero();
1861 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1862 // Maybe this has already been legalized into the constant pool?
1863 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
1864 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
1865 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
1866 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1867 return CFP->getValueAPF().isPosZero();
1873 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
1874 /// the given operands.
1876 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1877 SDValue &ARMCC, SelectionDAG &DAG,
1878 DebugLoc dl) const {
1879 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1880 unsigned C = RHSC->getZExtValue();
1881 if (!isLegalICmpImmediate(C)) {
1882 // Constant does not fit, try adjusting it by one?
1887 if (isLegalICmpImmediate(C-1)) {
1888 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1889 RHS = DAG.getConstant(C-1, MVT::i32);
1894 if (C > 0 && isLegalICmpImmediate(C-1)) {
1895 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1896 RHS = DAG.getConstant(C-1, MVT::i32);
1901 if (isLegalICmpImmediate(C+1)) {
1902 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1903 RHS = DAG.getConstant(C+1, MVT::i32);
1908 if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
1909 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1910 RHS = DAG.getConstant(C+1, MVT::i32);
1917 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
1918 ARMISD::NodeType CompareType;
1921 CompareType = ARMISD::CMP;
1926 CompareType = ARMISD::CMPZ;
1929 ARMCC = DAG.getConstant(CondCode, MVT::i32);
1930 return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
1933 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
1934 static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
1937 if (!isFloatingPointZero(RHS))
1938 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
1940 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
1941 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
1944 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
1945 EVT VT = Op.getValueType();
1946 SDValue LHS = Op.getOperand(0);
1947 SDValue RHS = Op.getOperand(1);
1948 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
1949 SDValue TrueVal = Op.getOperand(2);
1950 SDValue FalseVal = Op.getOperand(3);
1951 DebugLoc dl = Op.getDebugLoc();
1953 if (LHS.getValueType() == MVT::i32) {
1955 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
1956 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
1957 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
1960 ARMCC::CondCodes CondCode, CondCode2;
1961 FPCCToARMCC(CC, CondCode, CondCode2);
1963 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
1964 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
1965 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
1966 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
1968 if (CondCode2 != ARMCC::AL) {
1969 SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
1970 // FIXME: Needs another CMP because flag can have but one use.
1971 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
1972 Result = DAG.getNode(ARMISD::CMOV, dl, VT,
1973 Result, TrueVal, ARMCC2, CCR, Cmp2);
1978 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
1979 SDValue Chain = Op.getOperand(0);
1980 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
1981 SDValue LHS = Op.getOperand(2);
1982 SDValue RHS = Op.getOperand(3);
1983 SDValue Dest = Op.getOperand(4);
1984 DebugLoc dl = Op.getDebugLoc();
1986 if (LHS.getValueType() == MVT::i32) {
1988 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
1989 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
1990 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
1991 Chain, Dest, ARMCC, CCR,Cmp);
1994 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
1995 ARMCC::CondCodes CondCode, CondCode2;
1996 FPCCToARMCC(CC, CondCode, CondCode2);
1998 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
1999 SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
2000 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2001 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
2002 SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
2003 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2004 if (CondCode2 != ARMCC::AL) {
2005 ARMCC = DAG.getConstant(CondCode2, MVT::i32);
2006 SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
2007 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
2012 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
2013 SDValue Chain = Op.getOperand(0);
2014 SDValue Table = Op.getOperand(1);
2015 SDValue Index = Op.getOperand(2);
2016 DebugLoc dl = Op.getDebugLoc();
2018 EVT PTy = getPointerTy();
2019 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
2020 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2021 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
2022 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
2023 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
2024 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
2025 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
2026 if (Subtarget->isThumb2()) {
2027 // Thumb2 uses a two-level jump. That is, it jumps into the jump table
2028 // which does another jump to the destination. This also makes it easier
2029 // to translate it to TBB / TBH later.
2030 // FIXME: This might not work if the function is extremely large.
2031 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
2032 Addr, Op.getOperand(2), JTI, UId);
2034 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
2035 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
2036 PseudoSourceValue::getJumpTable(), 0,
2038 Chain = Addr.getValue(1);
2039 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
2040 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2042 Addr = DAG.getLoad(PTy, dl, Chain, Addr,
2043 PseudoSourceValue::getJumpTable(), 0, false, false, 0);
2044 Chain = Addr.getValue(1);
2045 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
2049 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2050 DebugLoc dl = Op.getDebugLoc();
2053 switch (Op.getOpcode()) {
2055 assert(0 && "Invalid opcode!");
2056 case ISD::FP_TO_SINT:
2057 Opc = ARMISD::FTOSI;
2059 case ISD::FP_TO_UINT:
2060 Opc = ARMISD::FTOUI;
2063 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
2064 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
2067 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2068 EVT VT = Op.getValueType();
2069 DebugLoc dl = Op.getDebugLoc();
2072 switch (Op.getOpcode()) {
2074 assert(0 && "Invalid opcode!");
2075 case ISD::SINT_TO_FP:
2076 Opc = ARMISD::SITOF;
2078 case ISD::UINT_TO_FP:
2079 Opc = ARMISD::UITOF;
2083 Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
2084 return DAG.getNode(Opc, dl, VT, Op);
2087 static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
2088 // Implement fcopysign with a fabs and a conditional fneg.
2089 SDValue Tmp0 = Op.getOperand(0);
2090 SDValue Tmp1 = Op.getOperand(1);
2091 DebugLoc dl = Op.getDebugLoc();
2092 EVT VT = Op.getValueType();
2093 EVT SrcVT = Tmp1.getValueType();
2094 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
2095 SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
2096 SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
2097 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2098 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
2101 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
2102 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
2103 MFI->setFrameAddressIsTaken(true);
2104 EVT VT = Op.getValueType();
2105 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
2106 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2107 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
2108 ? ARM::R7 : ARM::R11;
2109 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
2111 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
2117 ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
2119 SDValue Dst, SDValue Src,
2120 SDValue Size, unsigned Align,
2121 bool isVolatile, bool AlwaysInline,
2125 uint64_t SrcSVOff) const {
2126 // Do repeated 4-byte loads and stores. To be improved.
2127 // This requires 4-byte alignment.
2128 if ((Align & 3) != 0)
2130 // This requires the copy size to be a constant, preferrably
2131 // within a subtarget-specific limit.
2132 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
2135 uint64_t SizeVal = ConstantSize->getZExtValue();
2136 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
2139 unsigned BytesLeft = SizeVal & 3;
2140 unsigned NumMemOps = SizeVal >> 2;
2141 unsigned EmittedNumMemOps = 0;
2143 unsigned VTSize = 4;
2145 const unsigned MAX_LOADS_IN_LDM = 6;
2146 SDValue TFOps[MAX_LOADS_IN_LDM];
2147 SDValue Loads[MAX_LOADS_IN_LDM];
2148 uint64_t SrcOff = 0, DstOff = 0;
2150 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
2151 // same number of stores. The loads and stores will get combined into
2152 // ldm/stm later on.
2153 while (EmittedNumMemOps < NumMemOps) {
2155 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
2156 Loads[i] = DAG.getLoad(VT, dl, Chain,
2157 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
2158 DAG.getConstant(SrcOff, MVT::i32)),
2159 SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
2160 TFOps[i] = Loads[i].getValue(1);
2163 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
2166 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
2167 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
2168 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
2169 DAG.getConstant(DstOff, MVT::i32)),
2170 DstSV, DstSVOff + DstOff, isVolatile, false, 0);
2173 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
2175 EmittedNumMemOps += i;
2181 // Issue loads / stores for the trailing (1 - 3) bytes.
2182 unsigned BytesLeftSave = BytesLeft;
2185 if (BytesLeft >= 2) {
2193 Loads[i] = DAG.getLoad(VT, dl, Chain,
2194 DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
2195 DAG.getConstant(SrcOff, MVT::i32)),
2196 SrcSV, SrcSVOff + SrcOff, false, false, 0);
2197 TFOps[i] = Loads[i].getValue(1);
2200 BytesLeft -= VTSize;
2202 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
2205 BytesLeft = BytesLeftSave;
2207 if (BytesLeft >= 2) {
2215 TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
2216 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
2217 DAG.getConstant(DstOff, MVT::i32)),
2218 DstSV, DstSVOff + DstOff, false, false, 0);
2221 BytesLeft -= VTSize;
2223 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
2226 /// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
2227 /// expand a bit convert where either the source or destination type is i64 to
2228 /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
2229 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
2230 /// vectors), since the legalizer won't know what to do with that.
2231 static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
2232 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2233 DebugLoc dl = N->getDebugLoc();
2234 SDValue Op = N->getOperand(0);
2236 // This function is only supposed to be called for i64 types, either as the
2237 // source or destination of the bit convert.
2238 EVT SrcVT = Op.getValueType();
2239 EVT DstVT = N->getValueType(0);
2240 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
2241 "ExpandBIT_CONVERT called for non-i64 type");
2243 // Turn i64->f64 into VMOVDRR.
2244 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
2245 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2246 DAG.getConstant(0, MVT::i32));
2247 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
2248 DAG.getConstant(1, MVT::i32));
2249 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2252 // Turn f64->i64 into VMOVRRD.
2253 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
2254 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
2255 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
2256 // Merge the pieces into a single i64 value.
2257 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
2263 /// getZeroVector - Returns a vector of specified type with all zero elements.
2265 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2266 assert(VT.isVector() && "Expected a vector type");
2268 // Zero vectors are used to represent vector negation and in those cases
2269 // will be implemented with the NEON VNEG instruction. However, VNEG does
2270 // not support i64 elements, so sometimes the zero vectors will need to be
2271 // explicitly constructed. For those cases, and potentially other uses in
2272 // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
2273 // to their dest type. This ensures they get CSE'd.
2275 SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
2276 SmallVector<SDValue, 8> Ops;
2279 if (VT.getSizeInBits() == 64) {
2280 Ops.assign(8, Cst); TVT = MVT::v8i8;
2282 Ops.assign(16, Cst); TVT = MVT::v16i8;
2284 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
2286 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
2289 /// getOnesVector - Returns a vector of specified type with all bits set.
2291 static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2292 assert(VT.isVector() && "Expected a vector type");
2294 // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their
2295 // dest type. This ensures they get CSE'd.
2297 SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
2298 SmallVector<SDValue, 8> Ops;
2301 if (VT.getSizeInBits() == 64) {
2302 Ops.assign(8, Cst); TVT = MVT::v8i8;
2304 Ops.assign(16, Cst); TVT = MVT::v16i8;
2306 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
2308 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
2311 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
2312 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
2313 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
2314 SelectionDAG &DAG) const {
2315 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2316 EVT VT = Op.getValueType();
2317 unsigned VTBits = VT.getSizeInBits();
2318 DebugLoc dl = Op.getDebugLoc();
2319 SDValue ShOpLo = Op.getOperand(0);
2320 SDValue ShOpHi = Op.getOperand(1);
2321 SDValue ShAmt = Op.getOperand(2);
2323 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
2325 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
2327 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2328 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2329 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
2330 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2331 DAG.getConstant(VTBits, MVT::i32));
2332 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
2333 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2334 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
2336 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2337 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2339 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
2340 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
2343 SDValue Ops[2] = { Lo, Hi };
2344 return DAG.getMergeValues(Ops, 2, dl);
2347 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
2348 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
2349 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
2350 SelectionDAG &DAG) const {
2351 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
2352 EVT VT = Op.getValueType();
2353 unsigned VTBits = VT.getSizeInBits();
2354 DebugLoc dl = Op.getDebugLoc();
2355 SDValue ShOpLo = Op.getOperand(0);
2356 SDValue ShOpHi = Op.getOperand(1);
2357 SDValue ShAmt = Op.getOperand(2);
2360 assert(Op.getOpcode() == ISD::SHL_PARTS);
2361 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
2362 DAG.getConstant(VTBits, MVT::i32), ShAmt);
2363 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
2364 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
2365 DAG.getConstant(VTBits, MVT::i32));
2366 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
2367 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
2369 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
2370 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
2371 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
2373 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
2374 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
2377 SDValue Ops[2] = { Lo, Hi };
2378 return DAG.getMergeValues(Ops, 2, dl);
2381 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
2382 const ARMSubtarget *ST) {
2383 EVT VT = N->getValueType(0);
2384 DebugLoc dl = N->getDebugLoc();
2386 if (!ST->hasV6T2Ops())
2389 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
2390 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
2393 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
2394 const ARMSubtarget *ST) {
2395 EVT VT = N->getValueType(0);
2396 DebugLoc dl = N->getDebugLoc();
2398 // Lower vector shifts on NEON to use VSHL.
2399 if (VT.isVector()) {
2400 assert(ST->hasNEON() && "unexpected vector shift");
2402 // Left shifts translate directly to the vshiftu intrinsic.
2403 if (N->getOpcode() == ISD::SHL)
2404 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2405 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
2406 N->getOperand(0), N->getOperand(1));
2408 assert((N->getOpcode() == ISD::SRA ||
2409 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
2411 // NEON uses the same intrinsics for both left and right shifts. For
2412 // right shifts, the shift amounts are negative, so negate the vector of
2414 EVT ShiftVT = N->getOperand(1).getValueType();
2415 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
2416 getZeroVector(ShiftVT, DAG, dl),
2418 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
2419 Intrinsic::arm_neon_vshifts :
2420 Intrinsic::arm_neon_vshiftu);
2421 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
2422 DAG.getConstant(vshiftInt, MVT::i32),
2423 N->getOperand(0), NegatedCount);
2426 // We can get here for a node like i32 = ISD::SHL i32, i64
2430 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
2431 "Unknown shift to lower!");
2433 // We only lower SRA, SRL of 1 here, all others use generic lowering.
2434 if (!isa<ConstantSDNode>(N->getOperand(1)) ||
2435 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
2438 // If we are in thumb mode, we don't have RRX.
2439 if (ST->isThumb1Only()) return SDValue();
2441 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
2442 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2443 DAG.getConstant(0, MVT::i32));
2444 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
2445 DAG.getConstant(1, MVT::i32));
2447 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
2448 // captures the result into a carry flag.
2449 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
2450 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
2452 // The low part is an ARMISD::RRX operand, which shifts the carry in.
2453 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
2455 // Merge the pieces into a single i64 value.
2456 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
2459 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
2460 SDValue TmpOp0, TmpOp1;
2461 bool Invert = false;
2465 SDValue Op0 = Op.getOperand(0);
2466 SDValue Op1 = Op.getOperand(1);
2467 SDValue CC = Op.getOperand(2);
2468 EVT VT = Op.getValueType();
2469 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2470 DebugLoc dl = Op.getDebugLoc();
2472 if (Op.getOperand(1).getValueType().isFloatingPoint()) {
2473 switch (SetCCOpcode) {
2474 default: llvm_unreachable("Illegal FP comparison"); break;
2476 case ISD::SETNE: Invert = true; // Fallthrough
2478 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
2480 case ISD::SETLT: Swap = true; // Fallthrough
2482 case ISD::SETGT: Opc = ARMISD::VCGT; break;
2484 case ISD::SETLE: Swap = true; // Fallthrough
2486 case ISD::SETGE: Opc = ARMISD::VCGE; break;
2487 case ISD::SETUGE: Swap = true; // Fallthrough
2488 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
2489 case ISD::SETUGT: Swap = true; // Fallthrough
2490 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
2491 case ISD::SETUEQ: Invert = true; // Fallthrough
2493 // Expand this to (OLT | OGT).
2497 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2498 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
2500 case ISD::SETUO: Invert = true; // Fallthrough
2502 // Expand this to (OLT | OGE).
2506 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
2507 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
2511 // Integer comparisons.
2512 switch (SetCCOpcode) {
2513 default: llvm_unreachable("Illegal integer comparison"); break;
2514 case ISD::SETNE: Invert = true;
2515 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
2516 case ISD::SETLT: Swap = true;
2517 case ISD::SETGT: Opc = ARMISD::VCGT; break;
2518 case ISD::SETLE: Swap = true;
2519 case ISD::SETGE: Opc = ARMISD::VCGE; break;
2520 case ISD::SETULT: Swap = true;
2521 case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
2522 case ISD::SETULE: Swap = true;
2523 case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
2526 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
2527 if (Opc == ARMISD::VCEQ) {
2530 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
2532 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
2535 // Ignore bitconvert.
2536 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT)
2537 AndOp = AndOp.getOperand(0);
2539 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
2541 Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0));
2542 Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1));
2549 std::swap(Op0, Op1);
2551 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
2554 Result = DAG.getNOT(dl, Result, VT);
2559 /// isVMOVSplat - Check if the specified splat value corresponds to an immediate
2560 /// VMOV instruction, and if so, return the constant being splatted.
2561 static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef,
2562 unsigned SplatBitSize, SelectionDAG &DAG) {
2563 switch (SplatBitSize) {
2565 // Any 1-byte value is OK.
2566 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
2567 return DAG.getTargetConstant(SplatBits, MVT::i8);
2570 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
2571 if ((SplatBits & ~0xff) == 0 ||
2572 (SplatBits & ~0xff00) == 0)
2573 return DAG.getTargetConstant(SplatBits, MVT::i16);
2577 // NEON's 32-bit VMOV supports splat values where:
2578 // * only one byte is nonzero, or
2579 // * the least significant byte is 0xff and the second byte is nonzero, or
2580 // * the least significant 2 bytes are 0xff and the third is nonzero.
2581 if ((SplatBits & ~0xff) == 0 ||
2582 (SplatBits & ~0xff00) == 0 ||
2583 (SplatBits & ~0xff0000) == 0 ||
2584 (SplatBits & ~0xff000000) == 0)
2585 return DAG.getTargetConstant(SplatBits, MVT::i32);
2587 if ((SplatBits & ~0xffff) == 0 &&
2588 ((SplatBits | SplatUndef) & 0xff) == 0xff)
2589 return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32);
2591 if ((SplatBits & ~0xffffff) == 0 &&
2592 ((SplatBits | SplatUndef) & 0xffff) == 0xffff)
2593 return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32);
2595 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
2596 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
2597 // VMOV.I32. A (very) minor optimization would be to replicate the value
2598 // and fall through here to test for a valid 64-bit splat. But, then the
2599 // caller would also need to check and handle the change in size.
2603 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
2604 uint64_t BitMask = 0xff;
2606 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
2607 if (((SplatBits | SplatUndef) & BitMask) == BitMask)
2609 else if ((SplatBits & BitMask) != 0)
2613 return DAG.getTargetConstant(Val, MVT::i64);
2617 llvm_unreachable("unexpected size for isVMOVSplat");
2624 /// getVMOVImm - If this is a build_vector of constants which can be
2625 /// formed by using a VMOV instruction of the specified element size,
2626 /// return the constant being splatted. The ByteSize field indicates the
2627 /// number of bytes of each element [1248].
2628 SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2629 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
2630 APInt SplatBits, SplatUndef;
2631 unsigned SplatBitSize;
2633 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
2634 HasAnyUndefs, ByteSize * 8))
2637 if (SplatBitSize > ByteSize * 8)
2640 return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
2644 static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
2645 bool &ReverseVEXT, unsigned &Imm) {
2646 unsigned NumElts = VT.getVectorNumElements();
2647 ReverseVEXT = false;
2650 // If this is a VEXT shuffle, the immediate value is the index of the first
2651 // element. The other shuffle indices must be the successive elements after
2653 unsigned ExpectedElt = Imm;
2654 for (unsigned i = 1; i < NumElts; ++i) {
2655 // Increment the expected index. If it wraps around, it may still be
2656 // a VEXT but the source vectors must be swapped.
2658 if (ExpectedElt == NumElts * 2) {
2663 if (ExpectedElt != static_cast<unsigned>(M[i]))
2667 // Adjust the index value if the source operands will be swapped.
2674 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
2675 /// instruction with the specified blocksize. (The order of the elements
2676 /// within each block of the vector is reversed.)
2677 static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
2678 unsigned BlockSize) {
2679 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
2680 "Only possible block sizes for VREV are: 16, 32, 64");
2682 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2686 unsigned NumElts = VT.getVectorNumElements();
2687 unsigned BlockElts = M[0] + 1;
2689 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
2692 for (unsigned i = 0; i < NumElts; ++i) {
2693 if ((unsigned) M[i] !=
2694 (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
2701 static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
2702 unsigned &WhichResult) {
2703 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2707 unsigned NumElts = VT.getVectorNumElements();
2708 WhichResult = (M[0] == 0 ? 0 : 1);
2709 for (unsigned i = 0; i < NumElts; i += 2) {
2710 if ((unsigned) M[i] != i + WhichResult ||
2711 (unsigned) M[i+1] != i + NumElts + WhichResult)
2717 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
2718 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2719 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
2720 static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
2721 unsigned &WhichResult) {
2722 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2726 unsigned NumElts = VT.getVectorNumElements();
2727 WhichResult = (M[0] == 0 ? 0 : 1);
2728 for (unsigned i = 0; i < NumElts; i += 2) {
2729 if ((unsigned) M[i] != i + WhichResult ||
2730 (unsigned) M[i+1] != i + WhichResult)
2736 static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
2737 unsigned &WhichResult) {
2738 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2742 unsigned NumElts = VT.getVectorNumElements();
2743 WhichResult = (M[0] == 0 ? 0 : 1);
2744 for (unsigned i = 0; i != NumElts; ++i) {
2745 if ((unsigned) M[i] != 2 * i + WhichResult)
2749 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2750 if (VT.is64BitVector() && EltSz == 32)
2756 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
2757 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2758 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
2759 static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
2760 unsigned &WhichResult) {
2761 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2765 unsigned Half = VT.getVectorNumElements() / 2;
2766 WhichResult = (M[0] == 0 ? 0 : 1);
2767 for (unsigned j = 0; j != 2; ++j) {
2768 unsigned Idx = WhichResult;
2769 for (unsigned i = 0; i != Half; ++i) {
2770 if ((unsigned) M[i + j * Half] != Idx)
2776 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2777 if (VT.is64BitVector() && EltSz == 32)
2783 static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
2784 unsigned &WhichResult) {
2785 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2789 unsigned NumElts = VT.getVectorNumElements();
2790 WhichResult = (M[0] == 0 ? 0 : 1);
2791 unsigned Idx = WhichResult * NumElts / 2;
2792 for (unsigned i = 0; i != NumElts; i += 2) {
2793 if ((unsigned) M[i] != Idx ||
2794 (unsigned) M[i+1] != Idx + NumElts)
2799 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2800 if (VT.is64BitVector() && EltSz == 32)
2806 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
2807 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
2808 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
2809 static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
2810 unsigned &WhichResult) {
2811 unsigned EltSz = VT.getVectorElementType().getSizeInBits();
2815 unsigned NumElts = VT.getVectorNumElements();
2816 WhichResult = (M[0] == 0 ? 0 : 1);
2817 unsigned Idx = WhichResult * NumElts / 2;
2818 for (unsigned i = 0; i != NumElts; i += 2) {
2819 if ((unsigned) M[i] != Idx ||
2820 (unsigned) M[i+1] != Idx)
2825 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
2826 if (VT.is64BitVector() && EltSz == 32)
2833 static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
2834 // Canonicalize all-zeros and all-ones vectors.
2835 ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
2836 if (ConstVal->isNullValue())
2837 return getZeroVector(VT, DAG, dl);
2838 if (ConstVal->isAllOnesValue())
2839 return getOnesVector(VT, DAG, dl);
2842 if (VT.is64BitVector()) {
2843 switch (Val.getValueType().getSizeInBits()) {
2844 case 8: CanonicalVT = MVT::v8i8; break;
2845 case 16: CanonicalVT = MVT::v4i16; break;
2846 case 32: CanonicalVT = MVT::v2i32; break;
2847 case 64: CanonicalVT = MVT::v1i64; break;
2848 default: llvm_unreachable("unexpected splat element type"); break;
2851 assert(VT.is128BitVector() && "unknown splat vector size");
2852 switch (Val.getValueType().getSizeInBits()) {
2853 case 8: CanonicalVT = MVT::v16i8; break;
2854 case 16: CanonicalVT = MVT::v8i16; break;
2855 case 32: CanonicalVT = MVT::v4i32; break;
2856 case 64: CanonicalVT = MVT::v2i64; break;
2857 default: llvm_unreachable("unexpected splat element type"); break;
2861 // Build a canonical splat for this value.
2862 SmallVector<SDValue, 8> Ops;
2863 Ops.assign(CanonicalVT.getVectorNumElements(), Val);
2864 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0],
2866 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res);
2869 // If this is a case we can't handle, return null and let the default
2870 // expansion code take care of it.
2871 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
2872 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
2873 DebugLoc dl = Op.getDebugLoc();
2874 EVT VT = Op.getValueType();
2876 APInt SplatBits, SplatUndef;
2877 unsigned SplatBitSize;
2879 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
2880 if (SplatBitSize <= 64) {
2881 SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
2882 SplatUndef.getZExtValue(), SplatBitSize, DAG);
2884 return BuildSplat(Val, VT, DAG, dl);
2888 // If there are only 2 elements in a 128-bit vector, insert them into an
2889 // undef vector. This handles the common case for 128-bit vector argument
2890 // passing, where the insertions should be translated to subreg accesses
2891 // with no real instructions.
2892 if (VT.is128BitVector() && Op.getNumOperands() == 2) {
2893 SDValue Val = DAG.getUNDEF(VT);
2894 SDValue Op0 = Op.getOperand(0);
2895 SDValue Op1 = Op.getOperand(1);
2896 if (Op0.getOpcode() != ISD::UNDEF)
2897 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0,
2898 DAG.getIntPtrConstant(0));
2899 if (Op1.getOpcode() != ISD::UNDEF)
2900 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1,
2901 DAG.getIntPtrConstant(1));
2908 /// isShuffleMaskLegal - Targets can use this to indicate that they only
2909 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
2910 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
2911 /// are assumed to be legal.
2913 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
2915 if (VT.getVectorNumElements() == 4 &&
2916 (VT.is128BitVector() || VT.is64BitVector())) {
2917 unsigned PFIndexes[4];
2918 for (unsigned i = 0; i != 4; ++i) {
2922 PFIndexes[i] = M[i];
2925 // Compute the index in the perfect shuffle table.
2926 unsigned PFTableIndex =
2927 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
2928 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
2929 unsigned Cost = (PFEntry >> 30);
2936 unsigned Imm, WhichResult;
2938 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
2939 isVREVMask(M, VT, 64) ||
2940 isVREVMask(M, VT, 32) ||
2941 isVREVMask(M, VT, 16) ||
2942 isVEXTMask(M, VT, ReverseVEXT, Imm) ||
2943 isVTRNMask(M, VT, WhichResult) ||
2944 isVUZPMask(M, VT, WhichResult) ||
2945 isVZIPMask(M, VT, WhichResult) ||
2946 isVTRN_v_undef_Mask(M, VT, WhichResult) ||
2947 isVUZP_v_undef_Mask(M, VT, WhichResult) ||
2948 isVZIP_v_undef_Mask(M, VT, WhichResult));
2951 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
2952 /// the specified operations to build the shuffle.
2953 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
2954 SDValue RHS, SelectionDAG &DAG,
2956 unsigned OpNum = (PFEntry >> 26) & 0x0F;
2957 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
2958 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
2961 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
2970 OP_VUZPL, // VUZP, left result
2971 OP_VUZPR, // VUZP, right result
2972 OP_VZIPL, // VZIP, left result
2973 OP_VZIPR, // VZIP, right result
2974 OP_VTRNL, // VTRN, left result
2975 OP_VTRNR // VTRN, right result
2978 if (OpNum == OP_COPY) {
2979 if (LHSID == (1*9+2)*9+3) return LHS;
2980 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
2984 SDValue OpLHS, OpRHS;
2985 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
2986 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
2987 EVT VT = OpLHS.getValueType();
2990 default: llvm_unreachable("Unknown shuffle opcode!");
2992 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
2997 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
2998 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
3002 return DAG.getNode(ARMISD::VEXT, dl, VT,
3004 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
3007 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3008 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
3011 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3012 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
3015 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3016 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
3020 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
3021 SDValue V1 = Op.getOperand(0);
3022 SDValue V2 = Op.getOperand(1);
3023 DebugLoc dl = Op.getDebugLoc();
3024 EVT VT = Op.getValueType();
3025 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
3026 SmallVector<int, 8> ShuffleMask;
3028 // Convert shuffles that are directly supported on NEON to target-specific
3029 // DAG nodes, instead of keeping them as shuffles and matching them again
3030 // during code selection. This is more efficient and avoids the possibility
3031 // of inconsistencies between legalization and selection.
3032 // FIXME: floating-point vectors should be canonicalized to integer vectors
3033 // of the same time so that they get CSEd properly.
3034 SVN->getMask(ShuffleMask);
3036 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
3037 int Lane = SVN->getSplatIndex();
3038 // If this is undef splat, generate it via "just" vdup, if possible.
3039 if (Lane == -1) Lane = 0;
3041 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
3042 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
3044 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
3045 DAG.getConstant(Lane, MVT::i32));
3050 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
3053 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
3054 DAG.getConstant(Imm, MVT::i32));
3057 if (isVREVMask(ShuffleMask, VT, 64))
3058 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
3059 if (isVREVMask(ShuffleMask, VT, 32))
3060 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
3061 if (isVREVMask(ShuffleMask, VT, 16))
3062 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
3064 // Check for Neon shuffles that modify both input vectors in place.
3065 // If both results are used, i.e., if there are two shuffles with the same
3066 // source operands and with masks corresponding to both results of one of
3067 // these operations, DAG memoization will ensure that a single node is
3068 // used for both shuffles.
3069 unsigned WhichResult;
3070 if (isVTRNMask(ShuffleMask, VT, WhichResult))
3071 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3072 V1, V2).getValue(WhichResult);
3073 if (isVUZPMask(ShuffleMask, VT, WhichResult))
3074 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3075 V1, V2).getValue(WhichResult);
3076 if (isVZIPMask(ShuffleMask, VT, WhichResult))
3077 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3078 V1, V2).getValue(WhichResult);
3080 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
3081 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
3082 V1, V1).getValue(WhichResult);
3083 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3084 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
3085 V1, V1).getValue(WhichResult);
3086 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
3087 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
3088 V1, V1).getValue(WhichResult);
3090 // If the shuffle is not directly supported and it has 4 elements, use
3091 // the PerfectShuffle-generated table to synthesize it from other shuffles.
3092 if (VT.getVectorNumElements() == 4 &&
3093 (VT.is128BitVector() || VT.is64BitVector())) {
3094 unsigned PFIndexes[4];
3095 for (unsigned i = 0; i != 4; ++i) {
3096 if (ShuffleMask[i] < 0)
3099 PFIndexes[i] = ShuffleMask[i];
3102 // Compute the index in the perfect shuffle table.
3103 unsigned PFTableIndex =
3104 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
3106 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
3107 unsigned Cost = (PFEntry >> 30);
3110 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
3116 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
3117 EVT VT = Op.getValueType();
3118 DebugLoc dl = Op.getDebugLoc();
3119 SDValue Vec = Op.getOperand(0);
3120 SDValue Lane = Op.getOperand(1);
3121 assert(VT == MVT::i32 &&
3122 Vec.getValueType().getVectorElementType().getSizeInBits() < 32 &&
3123 "unexpected type for custom-lowering vector extract");
3124 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
3127 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
3128 // The only time a CONCAT_VECTORS operation can have legal types is when
3129 // two 64-bit vectors are concatenated to a 128-bit vector.
3130 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
3131 "unexpected CONCAT_VECTORS");
3132 DebugLoc dl = Op.getDebugLoc();
3133 SDValue Val = DAG.getUNDEF(MVT::v2f64);
3134 SDValue Op0 = Op.getOperand(0);
3135 SDValue Op1 = Op.getOperand(1);
3136 if (Op0.getOpcode() != ISD::UNDEF)
3137 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3138 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0),
3139 DAG.getIntPtrConstant(0));
3140 if (Op1.getOpcode() != ISD::UNDEF)
3141 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
3142 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1),
3143 DAG.getIntPtrConstant(1));
3144 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
3147 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3148 switch (Op.getOpcode()) {
3149 default: llvm_unreachable("Don't know how to custom lower this!");
3150 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
3151 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
3152 case ISD::GlobalAddress:
3153 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
3154 LowerGlobalAddressELF(Op, DAG);
3155 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
3156 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
3157 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
3158 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
3159 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
3160 case ISD::VASTART: return LowerVASTART(Op, DAG);
3161 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
3162 case ISD::SINT_TO_FP:
3163 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
3164 case ISD::FP_TO_SINT:
3165 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
3166 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
3167 case ISD::RETURNADDR: break;
3168 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
3169 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3170 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
3172 case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
3175 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
3176 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
3177 case ISD::SRL_PARTS:
3178 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
3179 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
3180 case ISD::VSETCC: return LowerVSETCC(Op, DAG);
3181 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
3182 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3183 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3184 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3189 /// ReplaceNodeResults - Replace the results of node with an illegal result
3190 /// type with new values built out of custom code.
3191 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
3192 SmallVectorImpl<SDValue>&Results,
3193 SelectionDAG &DAG) const {
3195 switch (N->getOpcode()) {
3197 llvm_unreachable("Don't know how to custom expand this!");
3199 case ISD::BIT_CONVERT:
3200 Res = ExpandBIT_CONVERT(N, DAG);
3204 Res = LowerShift(N, DAG, Subtarget);
3208 Results.push_back(Res);
3211 //===----------------------------------------------------------------------===//
3212 // ARM Scheduler Hooks
3213 //===----------------------------------------------------------------------===//
3216 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
3217 MachineBasicBlock *BB,
3218 unsigned Size) const {
3219 unsigned dest = MI->getOperand(0).getReg();
3220 unsigned ptr = MI->getOperand(1).getReg();
3221 unsigned oldval = MI->getOperand(2).getReg();
3222 unsigned newval = MI->getOperand(3).getReg();
3223 unsigned scratch = BB->getParent()->getRegInfo()
3224 .createVirtualRegister(ARM::GPRRegisterClass);
3225 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3226 DebugLoc dl = MI->getDebugLoc();
3227 bool isThumb2 = Subtarget->isThumb2();
3229 unsigned ldrOpc, strOpc;
3231 default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3233 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3234 strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
3237 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3238 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3241 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3242 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3246 MachineFunction *MF = BB->getParent();
3247 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3248 MachineFunction::iterator It = BB;
3249 ++It; // insert the new blocks after the current block
3251 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3252 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
3253 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3254 MF->insert(It, loop1MBB);
3255 MF->insert(It, loop2MBB);
3256 MF->insert(It, exitMBB);
3257 exitMBB->transferSuccessors(BB);
3261 // fallthrough --> loop1MBB
3262 BB->addSuccessor(loop1MBB);
3265 // ldrex dest, [ptr]
3269 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3270 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
3271 .addReg(dest).addReg(oldval));
3272 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3273 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3274 BB->addSuccessor(loop2MBB);
3275 BB->addSuccessor(exitMBB);
3278 // strex scratch, newval, [ptr]
3282 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
3284 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3285 .addReg(scratch).addImm(0));
3286 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3287 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3288 BB->addSuccessor(loop1MBB);
3289 BB->addSuccessor(exitMBB);
3295 MF->DeleteMachineInstr(MI); // The instruction is gone now.
3301 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
3302 unsigned Size, unsigned BinOpcode) const {
3303 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3304 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3306 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3307 MachineFunction *MF = BB->getParent();
3308 MachineFunction::iterator It = BB;
3311 unsigned dest = MI->getOperand(0).getReg();
3312 unsigned ptr = MI->getOperand(1).getReg();
3313 unsigned incr = MI->getOperand(2).getReg();
3314 DebugLoc dl = MI->getDebugLoc();
3316 bool isThumb2 = Subtarget->isThumb2();
3317 unsigned ldrOpc, strOpc;
3319 default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
3321 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
3322 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
3325 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
3326 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
3329 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
3330 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
3334 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3335 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3336 MF->insert(It, loopMBB);
3337 MF->insert(It, exitMBB);
3338 exitMBB->transferSuccessors(BB);
3340 MachineRegisterInfo &RegInfo = MF->getRegInfo();
3341 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3342 unsigned scratch2 = (!BinOpcode) ? incr :
3343 RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
3347 // fallthrough --> loopMBB
3348 BB->addSuccessor(loopMBB);
3352 // <binop> scratch2, dest, incr
3353 // strex scratch, scratch2, ptr
3356 // fallthrough --> exitMBB
3358 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
3360 // operand order needs to go the other way for NAND
3361 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
3362 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3363 addReg(incr).addReg(dest)).addReg(0);
3365 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
3366 addReg(dest).addReg(incr)).addReg(0);
3369 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
3371 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
3372 .addReg(scratch).addImm(0));
3373 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
3374 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
3376 BB->addSuccessor(loopMBB);
3377 BB->addSuccessor(exitMBB);
3383 MF->DeleteMachineInstr(MI); // The instruction is gone now.
3389 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
3390 MachineBasicBlock *BB) const {
3391 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
3392 DebugLoc dl = MI->getDebugLoc();
3393 bool isThumb2 = Subtarget->isThumb2();
3394 switch (MI->getOpcode()) {
3397 llvm_unreachable("Unexpected instr type to insert");
3399 case ARM::ATOMIC_LOAD_ADD_I8:
3400 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3401 case ARM::ATOMIC_LOAD_ADD_I16:
3402 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3403 case ARM::ATOMIC_LOAD_ADD_I32:
3404 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
3406 case ARM::ATOMIC_LOAD_AND_I8:
3407 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3408 case ARM::ATOMIC_LOAD_AND_I16:
3409 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3410 case ARM::ATOMIC_LOAD_AND_I32:
3411 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
3413 case ARM::ATOMIC_LOAD_OR_I8:
3414 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3415 case ARM::ATOMIC_LOAD_OR_I16:
3416 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3417 case ARM::ATOMIC_LOAD_OR_I32:
3418 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
3420 case ARM::ATOMIC_LOAD_XOR_I8:
3421 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3422 case ARM::ATOMIC_LOAD_XOR_I16:
3423 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3424 case ARM::ATOMIC_LOAD_XOR_I32:
3425 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
3427 case ARM::ATOMIC_LOAD_NAND_I8:
3428 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3429 case ARM::ATOMIC_LOAD_NAND_I16:
3430 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3431 case ARM::ATOMIC_LOAD_NAND_I32:
3432 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
3434 case ARM::ATOMIC_LOAD_SUB_I8:
3435 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3436 case ARM::ATOMIC_LOAD_SUB_I16:
3437 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3438 case ARM::ATOMIC_LOAD_SUB_I32:
3439 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
3441 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
3442 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
3443 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
3445 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
3446 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
3447 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
3449 case ARM::tMOVCCr_pseudo: {
3450 // To "insert" a SELECT_CC instruction, we actually have to insert the
3451 // diamond control-flow pattern. The incoming instruction knows the
3452 // destination vreg to set, the condition code register to branch on, the
3453 // true/false values to select between, and a branch opcode to use.
3454 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3455 MachineFunction::iterator It = BB;
3461 // cmpTY ccX, r1, r2
3463 // fallthrough --> copy0MBB
3464 MachineBasicBlock *thisMBB = BB;
3465 MachineFunction *F = BB->getParent();
3466 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
3467 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
3468 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
3469 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
3470 F->insert(It, copy0MBB);
3471 F->insert(It, sinkMBB);
3472 // Update machine-CFG edges by first adding all successors of the current
3473 // block to the new block which will contain the Phi node for the select.
3474 for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
3475 E = BB->succ_end(); I != E; ++I)
3476 sinkMBB->addSuccessor(*I);
3477 // Next, remove all successors of the current block, and add the true
3478 // and fallthrough blocks as its successors.
3479 while (!BB->succ_empty())
3480 BB->removeSuccessor(BB->succ_begin());
3481 BB->addSuccessor(copy0MBB);
3482 BB->addSuccessor(sinkMBB);
3485 // %FalseValue = ...
3486 // # fallthrough to sinkMBB
3489 // Update machine-CFG edges
3490 BB->addSuccessor(sinkMBB);
3493 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
3496 BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
3497 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
3498 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
3500 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
3507 case ARM::t2SUBrSPi_:
3508 case ARM::t2SUBrSPi12_:
3509 case ARM::t2SUBrSPs_: {
3510 MachineFunction *MF = BB->getParent();
3511 unsigned DstReg = MI->getOperand(0).getReg();
3512 unsigned SrcReg = MI->getOperand(1).getReg();
3513 bool DstIsDead = MI->getOperand(0).isDead();
3514 bool SrcIsKill = MI->getOperand(1).isKill();
3516 if (SrcReg != ARM::SP) {
3517 // Copy the source to SP from virtual register.
3518 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
3519 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
3520 ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
3521 BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
3522 .addReg(SrcReg, getKillRegState(SrcIsKill));
3526 bool NeedPred = false, NeedCC = false, NeedOp3 = false;
3527 switch (MI->getOpcode()) {
3529 llvm_unreachable("Unexpected pseudo instruction!");
3535 OpOpc = ARM::tADDspr;
3538 OpOpc = ARM::tSUBspi;
3540 case ARM::t2SUBrSPi_:
3541 OpOpc = ARM::t2SUBrSPi;
3542 NeedPred = true; NeedCC = true;
3544 case ARM::t2SUBrSPi12_:
3545 OpOpc = ARM::t2SUBrSPi12;
3548 case ARM::t2SUBrSPs_:
3549 OpOpc = ARM::t2SUBrSPs;
3550 NeedPred = true; NeedCC = true; NeedOp3 = true;
3553 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
3554 if (OpOpc == ARM::tAND)
3555 AddDefaultT1CC(MIB);
3556 MIB.addReg(ARM::SP);
3557 MIB.addOperand(MI->getOperand(2));
3559 MIB.addOperand(MI->getOperand(3));
3561 AddDefaultPred(MIB);
3565 // Copy the result from SP to virtual register.
3566 const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
3567 unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
3568 ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
3569 BuildMI(BB, dl, TII->get(CopyOpc))
3570 .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
3572 MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
3578 //===----------------------------------------------------------------------===//
3579 // ARM Optimization Hooks
3580 //===----------------------------------------------------------------------===//
3583 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
3584 TargetLowering::DAGCombinerInfo &DCI) {
3585 SelectionDAG &DAG = DCI.DAG;
3586 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3587 EVT VT = N->getValueType(0);
3588 unsigned Opc = N->getOpcode();
3589 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
3590 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
3591 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
3592 ISD::CondCode CC = ISD::SETCC_INVALID;
3595 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
3597 SDValue CCOp = Slct.getOperand(0);
3598 if (CCOp.getOpcode() == ISD::SETCC)
3599 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
3602 bool DoXform = false;
3604 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
3607 if (LHS.getOpcode() == ISD::Constant &&
3608 cast<ConstantSDNode>(LHS)->isNullValue()) {
3610 } else if (CC != ISD::SETCC_INVALID &&
3611 RHS.getOpcode() == ISD::Constant &&
3612 cast<ConstantSDNode>(RHS)->isNullValue()) {
3613 std::swap(LHS, RHS);
3614 SDValue Op0 = Slct.getOperand(0);
3615 EVT OpVT = isSlctCC ? Op0.getValueType() :
3616 Op0.getOperand(0).getValueType();
3617 bool isInt = OpVT.isInteger();
3618 CC = ISD::getSetCCInverse(CC, isInt);
3620 if (!TLI.isCondCodeLegal(CC, OpVT))
3621 return SDValue(); // Inverse operator isn't legal.
3628 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
3630 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
3631 Slct.getOperand(0), Slct.getOperand(1), CC);
3632 SDValue CCOp = Slct.getOperand(0);
3634 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
3635 CCOp.getOperand(0), CCOp.getOperand(1), CC);
3636 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
3637 CCOp, OtherOp, Result);
3642 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
3643 static SDValue PerformADDCombine(SDNode *N,
3644 TargetLowering::DAGCombinerInfo &DCI) {
3645 // added by evan in r37685 with no testcase.
3646 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3648 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
3649 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
3650 SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
3651 if (Result.getNode()) return Result;
3653 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
3654 SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
3655 if (Result.getNode()) return Result;
3661 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
3662 static SDValue PerformSUBCombine(SDNode *N,
3663 TargetLowering::DAGCombinerInfo &DCI) {
3664 // added by evan in r37685 with no testcase.
3665 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3667 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
3668 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
3669 SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
3670 if (Result.getNode()) return Result;
3676 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
3677 /// ARMISD::VMOVRRD.
3678 static SDValue PerformVMOVRRDCombine(SDNode *N,
3679 TargetLowering::DAGCombinerInfo &DCI) {
3680 // fmrrd(fmdrr x, y) -> x,y
3681 SDValue InDouble = N->getOperand(0);
3682 if (InDouble.getOpcode() == ARMISD::VMOVDRR)
3683 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
3687 /// getVShiftImm - Check if this is a valid build_vector for the immediate
3688 /// operand of a vector shift operation, where all the elements of the
3689 /// build_vector must have the same constant integer value.
3690 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
3691 // Ignore bit_converts.
3692 while (Op.getOpcode() == ISD::BIT_CONVERT)
3693 Op = Op.getOperand(0);
3694 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
3695 APInt SplatBits, SplatUndef;
3696 unsigned SplatBitSize;
3698 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
3699 HasAnyUndefs, ElementBits) ||
3700 SplatBitSize > ElementBits)
3702 Cnt = SplatBits.getSExtValue();
3706 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
3707 /// operand of a vector shift left operation. That value must be in the range:
3708 /// 0 <= Value < ElementBits for a left shift; or
3709 /// 0 <= Value <= ElementBits for a long left shift.
3710 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
3711 assert(VT.isVector() && "vector shift count is not a vector type");
3712 unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
3713 if (! getVShiftImm(Op, ElementBits, Cnt))
3715 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
3718 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
3719 /// operand of a vector shift right operation. For a shift opcode, the value
3720 /// is positive, but for an intrinsic the value count must be negative. The
3721 /// absolute value must be in the range:
3722 /// 1 <= |Value| <= ElementBits for a right shift; or
3723 /// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
3724 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
3726 assert(VT.isVector() && "vector shift count is not a vector type");
3727 unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
3728 if (! getVShiftImm(Op, ElementBits, Cnt))
3732 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
3735 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
3736 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
3737 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
3740 // Don't do anything for most intrinsics.
3743 // Vector shifts: check for immediate versions and lower them.
3744 // Note: This is done during DAG combining instead of DAG legalizing because
3745 // the build_vectors for 64-bit vector element shift counts are generally
3746 // not legal, and it is hard to see their values after they get legalized to
3747 // loads from a constant pool.
3748 case Intrinsic::arm_neon_vshifts:
3749 case Intrinsic::arm_neon_vshiftu:
3750 case Intrinsic::arm_neon_vshiftls:
3751 case Intrinsic::arm_neon_vshiftlu:
3752 case Intrinsic::arm_neon_vshiftn:
3753 case Intrinsic::arm_neon_vrshifts:
3754 case Intrinsic::arm_neon_vrshiftu:
3755 case Intrinsic::arm_neon_vrshiftn:
3756 case Intrinsic::arm_neon_vqshifts:
3757 case Intrinsic::arm_neon_vqshiftu:
3758 case Intrinsic::arm_neon_vqshiftsu:
3759 case Intrinsic::arm_neon_vqshiftns:
3760 case Intrinsic::arm_neon_vqshiftnu:
3761 case Intrinsic::arm_neon_vqshiftnsu:
3762 case Intrinsic::arm_neon_vqrshiftns:
3763 case Intrinsic::arm_neon_vqrshiftnu:
3764 case Intrinsic::arm_neon_vqrshiftnsu: {
3765 EVT VT = N->getOperand(1).getValueType();
3767 unsigned VShiftOpc = 0;
3770 case Intrinsic::arm_neon_vshifts:
3771 case Intrinsic::arm_neon_vshiftu:
3772 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
3773 VShiftOpc = ARMISD::VSHL;
3776 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
3777 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
3778 ARMISD::VSHRs : ARMISD::VSHRu);
3783 case Intrinsic::arm_neon_vshiftls:
3784 case Intrinsic::arm_neon_vshiftlu:
3785 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
3787 llvm_unreachable("invalid shift count for vshll intrinsic");
3789 case Intrinsic::arm_neon_vrshifts:
3790 case Intrinsic::arm_neon_vrshiftu:
3791 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
3795 case Intrinsic::arm_neon_vqshifts:
3796 case Intrinsic::arm_neon_vqshiftu:
3797 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
3801 case Intrinsic::arm_neon_vqshiftsu:
3802 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
3804 llvm_unreachable("invalid shift count for vqshlu intrinsic");
3806 case Intrinsic::arm_neon_vshiftn:
3807 case Intrinsic::arm_neon_vrshiftn:
3808 case Intrinsic::arm_neon_vqshiftns:
3809 case Intrinsic::arm_neon_vqshiftnu:
3810 case Intrinsic::arm_neon_vqshiftnsu:
3811 case Intrinsic::arm_neon_vqrshiftns:
3812 case Intrinsic::arm_neon_vqrshiftnu:
3813 case Intrinsic::arm_neon_vqrshiftnsu:
3814 // Narrowing shifts require an immediate right shift.
3815 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
3817 llvm_unreachable("invalid shift count for narrowing vector shift intrinsic");
3820 llvm_unreachable("unhandled vector shift");
3824 case Intrinsic::arm_neon_vshifts:
3825 case Intrinsic::arm_neon_vshiftu:
3826 // Opcode already set above.
3828 case Intrinsic::arm_neon_vshiftls:
3829 case Intrinsic::arm_neon_vshiftlu:
3830 if (Cnt == VT.getVectorElementType().getSizeInBits())
3831 VShiftOpc = ARMISD::VSHLLi;
3833 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
3834 ARMISD::VSHLLs : ARMISD::VSHLLu);
3836 case Intrinsic::arm_neon_vshiftn:
3837 VShiftOpc = ARMISD::VSHRN; break;
3838 case Intrinsic::arm_neon_vrshifts:
3839 VShiftOpc = ARMISD::VRSHRs; break;
3840 case Intrinsic::arm_neon_vrshiftu:
3841 VShiftOpc = ARMISD::VRSHRu; break;
3842 case Intrinsic::arm_neon_vrshiftn:
3843 VShiftOpc = ARMISD::VRSHRN; break;
3844 case Intrinsic::arm_neon_vqshifts:
3845 VShiftOpc = ARMISD::VQSHLs; break;
3846 case Intrinsic::arm_neon_vqshiftu:
3847 VShiftOpc = ARMISD::VQSHLu; break;
3848 case Intrinsic::arm_neon_vqshiftsu:
3849 VShiftOpc = ARMISD::VQSHLsu; break;
3850 case Intrinsic::arm_neon_vqshiftns:
3851 VShiftOpc = ARMISD::VQSHRNs; break;
3852 case Intrinsic::arm_neon_vqshiftnu:
3853 VShiftOpc = ARMISD::VQSHRNu; break;
3854 case Intrinsic::arm_neon_vqshiftnsu:
3855 VShiftOpc = ARMISD::VQSHRNsu; break;
3856 case Intrinsic::arm_neon_vqrshiftns:
3857 VShiftOpc = ARMISD::VQRSHRNs; break;
3858 case Intrinsic::arm_neon_vqrshiftnu:
3859 VShiftOpc = ARMISD::VQRSHRNu; break;
3860 case Intrinsic::arm_neon_vqrshiftnsu:
3861 VShiftOpc = ARMISD::VQRSHRNsu; break;
3864 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
3865 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
3868 case Intrinsic::arm_neon_vshiftins: {
3869 EVT VT = N->getOperand(1).getValueType();
3871 unsigned VShiftOpc = 0;
3873 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
3874 VShiftOpc = ARMISD::VSLI;
3875 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
3876 VShiftOpc = ARMISD::VSRI;
3878 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
3881 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
3882 N->getOperand(1), N->getOperand(2),
3883 DAG.getConstant(Cnt, MVT::i32));
3886 case Intrinsic::arm_neon_vqrshifts:
3887 case Intrinsic::arm_neon_vqrshiftu:
3888 // No immediate versions of these to check for.
3895 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
3896 /// lowers them. As with the vector shift intrinsics, this is done during DAG
3897 /// combining instead of DAG legalizing because the build_vectors for 64-bit
3898 /// vector element shift counts are generally not legal, and it is hard to see
3899 /// their values after they get legalized to loads from a constant pool.
3900 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
3901 const ARMSubtarget *ST) {
3902 EVT VT = N->getValueType(0);
3904 // Nothing to be done for scalar shifts.
3905 if (! VT.isVector())
3908 assert(ST->hasNEON() && "unexpected vector shift");
3911 switch (N->getOpcode()) {
3912 default: llvm_unreachable("unexpected shift opcode");
3915 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
3916 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
3917 DAG.getConstant(Cnt, MVT::i32));
3922 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
3923 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
3924 ARMISD::VSHRs : ARMISD::VSHRu);
3925 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
3926 DAG.getConstant(Cnt, MVT::i32));
3932 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
3933 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
3934 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
3935 const ARMSubtarget *ST) {
3936 SDValue N0 = N->getOperand(0);
3938 // Check for sign- and zero-extensions of vector extract operations of 8-
3939 // and 16-bit vector elements. NEON supports these directly. They are
3940 // handled during DAG combining because type legalization will promote them
3941 // to 32-bit types and it is messy to recognize the operations after that.
3942 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3943 SDValue Vec = N0.getOperand(0);
3944 SDValue Lane = N0.getOperand(1);
3945 EVT VT = N->getValueType(0);
3946 EVT EltVT = N0.getValueType();
3947 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3949 if (VT == MVT::i32 &&
3950 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
3951 TLI.isTypeLegal(Vec.getValueType())) {
3954 switch (N->getOpcode()) {
3955 default: llvm_unreachable("unexpected opcode");
3956 case ISD::SIGN_EXTEND:
3957 Opc = ARMISD::VGETLANEs;
3959 case ISD::ZERO_EXTEND:
3960 case ISD::ANY_EXTEND:
3961 Opc = ARMISD::VGETLANEu;
3964 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
3971 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
3972 /// to match f32 max/min patterns to use NEON vmax/vmin instructions.
3973 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
3974 const ARMSubtarget *ST) {
3975 // If the target supports NEON, try to use vmax/vmin instructions for f32
3976 // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
3977 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
3978 // a NaN; only do the transformation when it matches that behavior.
3980 // For now only do this when using NEON for FP operations; if using VFP, it
3981 // is not obvious that the benefit outweighs the cost of switching to the
3983 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
3984 N->getValueType(0) != MVT::f32)
3987 SDValue CondLHS = N->getOperand(0);
3988 SDValue CondRHS = N->getOperand(1);
3989 SDValue LHS = N->getOperand(2);
3990 SDValue RHS = N->getOperand(3);
3991 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
3993 unsigned Opcode = 0;
3995 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
3996 IsReversed = false; // x CC y ? x : y
3997 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
3998 IsReversed = true ; // x CC y ? y : x
4012 // If LHS is NaN, an ordered comparison will be false and the result will
4013 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
4014 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
4015 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
4016 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4018 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
4019 // will return -0, so vmin can only be used for unsafe math or if one of
4020 // the operands is known to be nonzero.
4021 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
4023 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4025 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
4034 // If LHS is NaN, an ordered comparison will be false and the result will
4035 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
4036 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
4037 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
4038 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
4040 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
4041 // will return +0, so vmax can only be used for unsafe math or if one of
4042 // the operands is known to be nonzero.
4043 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
4045 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
4047 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
4053 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
4056 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
4057 DAGCombinerInfo &DCI) const {
4058 switch (N->getOpcode()) {
4060 case ISD::ADD: return PerformADDCombine(N, DCI);
4061 case ISD::SUB: return PerformSUBCombine(N, DCI);
4062 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
4063 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
4066 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
4067 case ISD::SIGN_EXTEND:
4068 case ISD::ZERO_EXTEND:
4069 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
4070 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
4075 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
4076 if (!Subtarget->hasV6Ops())
4077 // Pre-v6 does not support unaligned mem access.
4080 // v6+ may or may not support unaligned mem access depending on the system
4082 // FIXME: This is pretty conservative. Should we provide cmdline option to
4083 // control the behaviour?
4084 if (!Subtarget->isTargetDarwin())
4088 switch (VT.getSimpleVT().SimpleTy) {
4095 // FIXME: VLD1 etc with standard alignment is legal.
4099 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
4104 switch (VT.getSimpleVT().SimpleTy) {
4105 default: return false;
4120 if ((V & (Scale - 1)) != 0)
4123 return V == (V & ((1LL << 5) - 1));
4126 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
4127 const ARMSubtarget *Subtarget) {
4134 switch (VT.getSimpleVT().SimpleTy) {
4135 default: return false;
4140 // + imm12 or - imm8
4142 return V == (V & ((1LL << 8) - 1));
4143 return V == (V & ((1LL << 12) - 1));
4146 // Same as ARM mode. FIXME: NEON?
4147 if (!Subtarget->hasVFP2())
4152 return V == (V & ((1LL << 8) - 1));
4156 /// isLegalAddressImmediate - Return true if the integer value can be used
4157 /// as the offset of the target addressing mode for load / store of the
4159 static bool isLegalAddressImmediate(int64_t V, EVT VT,
4160 const ARMSubtarget *Subtarget) {
4167 if (Subtarget->isThumb1Only())
4168 return isLegalT1AddressImmediate(V, VT);
4169 else if (Subtarget->isThumb2())
4170 return isLegalT2AddressImmediate(V, VT, Subtarget);
4175 switch (VT.getSimpleVT().SimpleTy) {
4176 default: return false;
4181 return V == (V & ((1LL << 12) - 1));
4184 return V == (V & ((1LL << 8) - 1));
4187 if (!Subtarget->hasVFP2()) // FIXME: NEON?
4192 return V == (V & ((1LL << 8) - 1));
4196 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
4198 int Scale = AM.Scale;
4202 switch (VT.getSimpleVT().SimpleTy) {
4203 default: return false;
4212 return Scale == 2 || Scale == 4 || Scale == 8;
4215 if (((unsigned)AM.HasBaseReg + Scale) <= 2)
4219 // Note, we allow "void" uses (basically, uses that aren't loads or
4220 // stores), because arm allows folding a scale into many arithmetic
4221 // operations. This should be made more precise and revisited later.
4223 // Allow r << imm, but the imm has to be a multiple of two.
4224 if (Scale & 1) return false;
4225 return isPowerOf2_32(Scale);
4229 /// isLegalAddressingMode - Return true if the addressing mode represented
4230 /// by AM is legal for this target, for a load/store of the specified type.
4231 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
4232 const Type *Ty) const {
4233 EVT VT = getValueType(Ty, true);
4234 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
4237 // Can never fold addr of global into load/store.
4242 case 0: // no scale reg, must be "r+i" or "r", or "i".
4245 if (Subtarget->isThumb1Only())
4249 // ARM doesn't support any R+R*scale+imm addr modes.
4256 if (Subtarget->isThumb2())
4257 return isLegalT2ScaledAddressingMode(AM, VT);
4259 int Scale = AM.Scale;
4260 switch (VT.getSimpleVT().SimpleTy) {
4261 default: return false;
4265 if (Scale < 0) Scale = -Scale;
4269 return isPowerOf2_32(Scale & ~1);
4273 if (((unsigned)AM.HasBaseReg + Scale) <= 2)
4278 // Note, we allow "void" uses (basically, uses that aren't loads or
4279 // stores), because arm allows folding a scale into many arithmetic
4280 // operations. This should be made more precise and revisited later.
4282 // Allow r << imm, but the imm has to be a multiple of two.
4283 if (Scale & 1) return false;
4284 return isPowerOf2_32(Scale);
4291 /// isLegalICmpImmediate - Return true if the specified immediate is legal
4292 /// icmp immediate, that is the target has icmp instructions which can compare
4293 /// a register against the immediate without having to materialize the
4294 /// immediate into a register.
4295 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
4296 if (!Subtarget->isThumb())
4297 return ARM_AM::getSOImmVal(Imm) != -1;
4298 if (Subtarget->isThumb2())
4299 return ARM_AM::getT2SOImmVal(Imm) != -1;
4300 return Imm >= 0 && Imm <= 255;
4303 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
4304 bool isSEXTLoad, SDValue &Base,
4305 SDValue &Offset, bool &isInc,
4306 SelectionDAG &DAG) {
4307 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
4310 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
4312 Base = Ptr->getOperand(0);
4313 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4314 int RHSC = (int)RHS->getZExtValue();
4315 if (RHSC < 0 && RHSC > -256) {
4316 assert(Ptr->getOpcode() == ISD::ADD);
4318 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4322 isInc = (Ptr->getOpcode() == ISD::ADD);
4323 Offset = Ptr->getOperand(1);
4325 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
4327 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4328 int RHSC = (int)RHS->getZExtValue();
4329 if (RHSC < 0 && RHSC > -0x1000) {
4330 assert(Ptr->getOpcode() == ISD::ADD);
4332 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4333 Base = Ptr->getOperand(0);
4338 if (Ptr->getOpcode() == ISD::ADD) {
4340 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
4341 if (ShOpcVal != ARM_AM::no_shift) {
4342 Base = Ptr->getOperand(1);
4343 Offset = Ptr->getOperand(0);
4345 Base = Ptr->getOperand(0);
4346 Offset = Ptr->getOperand(1);
4351 isInc = (Ptr->getOpcode() == ISD::ADD);
4352 Base = Ptr->getOperand(0);
4353 Offset = Ptr->getOperand(1);
4357 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
4361 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
4362 bool isSEXTLoad, SDValue &Base,
4363 SDValue &Offset, bool &isInc,
4364 SelectionDAG &DAG) {
4365 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
4368 Base = Ptr->getOperand(0);
4369 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
4370 int RHSC = (int)RHS->getZExtValue();
4371 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
4372 assert(Ptr->getOpcode() == ISD::ADD);
4374 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
4376 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
4377 isInc = Ptr->getOpcode() == ISD::ADD;
4378 Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
4386 /// getPreIndexedAddressParts - returns true by value, base pointer and
4387 /// offset pointer and addressing mode by reference if the node's address
4388 /// can be legally represented as pre-indexed load / store address.
4390 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
4392 ISD::MemIndexedMode &AM,
4393 SelectionDAG &DAG) const {
4394 if (Subtarget->isThumb1Only())
4399 bool isSEXTLoad = false;
4400 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
4401 Ptr = LD->getBasePtr();
4402 VT = LD->getMemoryVT();
4403 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
4404 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
4405 Ptr = ST->getBasePtr();
4406 VT = ST->getMemoryVT();
4411 bool isLegal = false;
4412 if (Subtarget->isThumb2())
4413 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
4414 Offset, isInc, DAG);
4416 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
4417 Offset, isInc, DAG);
4421 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
4425 /// getPostIndexedAddressParts - returns true by value, base pointer and
4426 /// offset pointer and addressing mode by reference if this node can be
4427 /// combined with a load / store to form a post-indexed load / store.
4428 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
4431 ISD::MemIndexedMode &AM,
4432 SelectionDAG &DAG) const {
4433 if (Subtarget->isThumb1Only())
4438 bool isSEXTLoad = false;
4439 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
4440 VT = LD->getMemoryVT();
4441 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
4442 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
4443 VT = ST->getMemoryVT();
4448 bool isLegal = false;
4449 if (Subtarget->isThumb2())
4450 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
4453 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
4458 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
4462 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
4466 const SelectionDAG &DAG,
4467 unsigned Depth) const {
4468 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
4469 switch (Op.getOpcode()) {
4471 case ARMISD::CMOV: {
4472 // Bits are known zero/one if known on the LHS and RHS.
4473 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
4474 if (KnownZero == 0 && KnownOne == 0) return;
4476 APInt KnownZeroRHS, KnownOneRHS;
4477 DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
4478 KnownZeroRHS, KnownOneRHS, Depth+1);
4479 KnownZero &= KnownZeroRHS;
4480 KnownOne &= KnownOneRHS;
4486 //===----------------------------------------------------------------------===//
4487 // ARM Inline Assembly Support
4488 //===----------------------------------------------------------------------===//
4490 /// getConstraintType - Given a constraint letter, return the type of
4491 /// constraint it is for this target.
4492 ARMTargetLowering::ConstraintType
4493 ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
4494 if (Constraint.size() == 1) {
4495 switch (Constraint[0]) {
4497 case 'l': return C_RegisterClass;
4498 case 'w': return C_RegisterClass;
4501 return TargetLowering::getConstraintType(Constraint);
4504 std::pair<unsigned, const TargetRegisterClass*>
4505 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4507 if (Constraint.size() == 1) {
4508 // GCC ARM Constraint Letters
4509 switch (Constraint[0]) {
4511 if (Subtarget->isThumb())
4512 return std::make_pair(0U, ARM::tGPRRegisterClass);
4514 return std::make_pair(0U, ARM::GPRRegisterClass);
4516 return std::make_pair(0U, ARM::GPRRegisterClass);
4519 return std::make_pair(0U, ARM::SPRRegisterClass);
4520 if (VT.getSizeInBits() == 64)
4521 return std::make_pair(0U, ARM::DPRRegisterClass);
4522 if (VT.getSizeInBits() == 128)
4523 return std::make_pair(0U, ARM::QPRRegisterClass);
4527 if (StringRef("{cc}").equals_lower(Constraint))
4528 return std::make_pair(0U, ARM::CCRRegisterClass);
4530 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4533 std::vector<unsigned> ARMTargetLowering::
4534 getRegClassForInlineAsmConstraint(const std::string &Constraint,
4536 if (Constraint.size() != 1)
4537 return std::vector<unsigned>();
4539 switch (Constraint[0]) { // GCC ARM Constraint Letters
4542 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
4543 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
4546 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
4547 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
4548 ARM::R8, ARM::R9, ARM::R10, ARM::R11,
4549 ARM::R12, ARM::LR, 0);
4552 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
4553 ARM::S4, ARM::S5, ARM::S6, ARM::S7,
4554 ARM::S8, ARM::S9, ARM::S10, ARM::S11,
4555 ARM::S12,ARM::S13,ARM::S14,ARM::S15,
4556 ARM::S16,ARM::S17,ARM::S18,ARM::S19,
4557 ARM::S20,ARM::S21,ARM::S22,ARM::S23,
4558 ARM::S24,ARM::S25,ARM::S26,ARM::S27,
4559 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
4560 if (VT.getSizeInBits() == 64)
4561 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
4562 ARM::D4, ARM::D5, ARM::D6, ARM::D7,
4563 ARM::D8, ARM::D9, ARM::D10,ARM::D11,
4564 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
4565 if (VT.getSizeInBits() == 128)
4566 return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
4567 ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
4571 return std::vector<unsigned>();
4574 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
4575 /// vector. If it is invalid, don't add anything to Ops.
4576 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4579 std::vector<SDValue>&Ops,
4580 SelectionDAG &DAG) const {
4581 SDValue Result(0, 0);
4583 switch (Constraint) {
4585 case 'I': case 'J': case 'K': case 'L':
4586 case 'M': case 'N': case 'O':
4587 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4591 int64_t CVal64 = C->getSExtValue();
4592 int CVal = (int) CVal64;
4593 // None of these constraints allow values larger than 32 bits. Check
4594 // that the value fits in an int.
4598 switch (Constraint) {
4600 if (Subtarget->isThumb1Only()) {
4601 // This must be a constant between 0 and 255, for ADD
4603 if (CVal >= 0 && CVal <= 255)
4605 } else if (Subtarget->isThumb2()) {
4606 // A constant that can be used as an immediate value in a
4607 // data-processing instruction.
4608 if (ARM_AM::getT2SOImmVal(CVal) != -1)
4611 // A constant that can be used as an immediate value in a
4612 // data-processing instruction.
4613 if (ARM_AM::getSOImmVal(CVal) != -1)
4619 if (Subtarget->isThumb()) { // FIXME thumb2
4620 // This must be a constant between -255 and -1, for negated ADD
4621 // immediates. This can be used in GCC with an "n" modifier that
4622 // prints the negated value, for use with SUB instructions. It is
4623 // not useful otherwise but is implemented for compatibility.
4624 if (CVal >= -255 && CVal <= -1)
4627 // This must be a constant between -4095 and 4095. It is not clear
4628 // what this constraint is intended for. Implemented for
4629 // compatibility with GCC.
4630 if (CVal >= -4095 && CVal <= 4095)
4636 if (Subtarget->isThumb1Only()) {
4637 // A 32-bit value where only one byte has a nonzero value. Exclude
4638 // zero to match GCC. This constraint is used by GCC internally for
4639 // constants that can be loaded with a move/shift combination.
4640 // It is not useful otherwise but is implemented for compatibility.
4641 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
4643 } else if (Subtarget->isThumb2()) {
4644 // A constant whose bitwise inverse can be used as an immediate
4645 // value in a data-processing instruction. This can be used in GCC
4646 // with a "B" modifier that prints the inverted value, for use with
4647 // BIC and MVN instructions. It is not useful otherwise but is
4648 // implemented for compatibility.
4649 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
4652 // A constant whose bitwise inverse can be used as an immediate
4653 // value in a data-processing instruction. This can be used in GCC
4654 // with a "B" modifier that prints the inverted value, for use with
4655 // BIC and MVN instructions. It is not useful otherwise but is
4656 // implemented for compatibility.
4657 if (ARM_AM::getSOImmVal(~CVal) != -1)
4663 if (Subtarget->isThumb1Only()) {
4664 // This must be a constant between -7 and 7,
4665 // for 3-operand ADD/SUB immediate instructions.
4666 if (CVal >= -7 && CVal < 7)
4668 } else if (Subtarget->isThumb2()) {
4669 // A constant whose negation can be used as an immediate value in a
4670 // data-processing instruction. This can be used in GCC with an "n"
4671 // modifier that prints the negated value, for use with SUB
4672 // instructions. It is not useful otherwise but is implemented for
4674 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
4677 // A constant whose negation can be used as an immediate value in a
4678 // data-processing instruction. This can be used in GCC with an "n"
4679 // modifier that prints the negated value, for use with SUB
4680 // instructions. It is not useful otherwise but is implemented for
4682 if (ARM_AM::getSOImmVal(-CVal) != -1)
4688 if (Subtarget->isThumb()) { // FIXME thumb2
4689 // This must be a multiple of 4 between 0 and 1020, for
4690 // ADD sp + immediate.
4691 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
4694 // A power of two or a constant between 0 and 32. This is used in
4695 // GCC for the shift amount on shifted register operands, but it is
4696 // useful in general for any shift amounts.
4697 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
4703 if (Subtarget->isThumb()) { // FIXME thumb2
4704 // This must be a constant between 0 and 31, for shift amounts.
4705 if (CVal >= 0 && CVal <= 31)
4711 if (Subtarget->isThumb()) { // FIXME thumb2
4712 // This must be a multiple of 4 between -508 and 508, for
4713 // ADD/SUB sp = sp + immediate.
4714 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
4719 Result = DAG.getTargetConstant(CVal, Op.getValueType());
4723 if (Result.getNode()) {
4724 Ops.push_back(Result);
4727 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
4732 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
4733 // The ARM target isn't yet aware of offsets.
4737 int ARM::getVFPf32Imm(const APFloat &FPImm) {
4738 APInt Imm = FPImm.bitcastToAPInt();
4739 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
4740 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127
4741 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits
4743 // We can handle 4 bits of mantissa.
4744 // mantissa = (16+UInt(e:f:g:h))/16.
4745 if (Mantissa & 0x7ffff)
4748 if ((Mantissa & 0xf) != Mantissa)
4751 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
4752 if (Exp < -3 || Exp > 4)
4754 Exp = ((Exp+3) & 0x7) ^ 4;
4756 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
4759 int ARM::getVFPf64Imm(const APFloat &FPImm) {
4760 APInt Imm = FPImm.bitcastToAPInt();
4761 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
4762 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023
4763 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
4765 // We can handle 4 bits of mantissa.
4766 // mantissa = (16+UInt(e:f:g:h))/16.
4767 if (Mantissa & 0xffffffffffffLL)
4770 if ((Mantissa & 0xf) != Mantissa)
4773 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
4774 if (Exp < -3 || Exp > 4)
4776 Exp = ((Exp+3) & 0x7) ^ 4;
4778 return ((int)Sign << 7) | (Exp << 4) | Mantissa;
4781 /// isFPImmLegal - Returns true if the target can instruction select the
4782 /// specified FP immediate natively. If false, the legalizer will
4783 /// materialize the FP immediate as a load from a constant pool.
4784 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4785 if (!Subtarget->hasVFP3())
4788 return ARM::getVFPf32Imm(Imm) != -1;
4790 return ARM::getVFPf64Imm(Imm) != -1;