X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=3e53af42b897d74d455dfab0cb8b0d18d39517b5;hp=2ebad8e0e43264e17a49efd1bb520ae27bbb2438;hb=b23c2d9b2c9f4cfcb3781486c3a13d553a45b474;hpb=d5561bb1f0fe5c67af59d79a6961ebf4b3c7b953 diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2ebad8e0e43..3e53af42b89 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARMISelLowering.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" @@ -37,18 +36,22 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include using namespace llvm; +#define DEBUG_TYPE "arm-isel" + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); @@ -79,7 +82,7 @@ namespace { } // The APCS parameter registers. -static const uint16_t GPRArgRegs[] = { +static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -152,25 +155,26 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget().isTargetMachO()) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) return new TargetLoweringObjectFileMachO(); - + if (TT.isOSWindows()) + return new TargetLoweringObjectFileCOFF(); return new ARMElfTargetObjectFile(); } ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget(); - RegInfo = TM.getRegisterInfo(); - Itins = TM.getInstrItineraryData(); + RegInfo = TM.getSubtargetImpl()->getRegisterInfo(); + Itins = TM.getSubtargetImpl()->getInstrItineraryData(); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && - Subtarget->hasARMOps()) { + Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) { // Single-precision floating-point arithmetic. setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); @@ -246,173 +250,135 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // These libcalls are not available in 32-bit. - setLibcallName(RTLIB::SHL_I128, 0); - setLibcallName(RTLIB::SRL_I128, 0); - setLibcallName(RTLIB::SRA_I128, 0); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && !Subtarget->isTargetWindows()) { - // Double-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 2 - setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); - setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); - setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); - setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); - setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); - - // Double-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 3 - setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); - setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); - setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); - setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); - setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); - setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); - setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); - setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); - - // Single-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 4 - setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); - setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); - setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); - setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); - setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); - - // Single-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 5 - setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); - setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); - setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); - setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); - setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); - setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); - setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); - setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); - - // Floating-point to integer conversions. - // RTABI chapter 4.1.2, Table 6 - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); - - // Conversions between floating types. - // RTABI chapter 4.1.2, Table 7 - setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); - setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); - - // Integer to floating-point conversions. - // RTABI chapter 4.1.2, Table 8 - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - - // Long long helper functions - // RTABI chapter 4.2, Table 9 - setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); - setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); - setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); - setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); - - // Integer division functions - // RTABI chapter 4.3.1 - setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); - setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - - // Memory operations - // RTABI chapter 4.3.4 - setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); - setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); - setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); - setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } LibraryCalls[] = { + // Double-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 2 + { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Double-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 3 + { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Single-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 4 + { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Single-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 5 + { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Floating-point to integer conversions. + // RTABI chapter 4.1.2, Table 6 + { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Conversions between floating types. + // RTABI chapter 4.1.2, Table 7 + { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer to floating-point conversions. + // RTABI chapter 4.1.2, Table 8 + { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Long long helper functions + // RTABI chapter 4.2, Table 9 + { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer division functions + // RTABI chapter 4.3.1 + { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } + + if (Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -431,8 +397,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::f32, &ARM::SPRRegClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, &ARM::DPRRegClass); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); } for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -444,6 +408,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); @@ -610,8 +581,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) computeRegisterProperties(); - // ARM does not have f32 extending load. + // ARM does not have floating-point extending loads. setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + // ... or truncating stores + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -631,6 +608,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction(ISD::UADDO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -733,7 +715,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setExceptionSelectorRegister(ARM::R1); } - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { @@ -844,13 +830,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } - // Special handling for half-precision FP. + + // v8 adds f64 <-> f16 conversion. Before that it should be expanded. + if (!Subtarget->hasV8Ops()) { + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + } + + // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { - setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); - setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } } - + // Combine sin / cos into one node or libcall if possible. if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); @@ -913,7 +906,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // and extractions. std::pair ARMTargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = 0; + const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: @@ -950,7 +943,7 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case ARMISD::Wrapper: return "ARMISD::Wrapper"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; @@ -1006,6 +999,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; @@ -1136,7 +1131,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Load are scheduled for latency even if there instruction itinerary // is not available. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); if (MCID.getNumDefs() == 0) @@ -1204,40 +1200,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, - bool Return, - bool isVarArg) const { +/// getEffectiveCallingConv - Get the effective calling convention, taking into +/// account presence of floating point hardware and calling convention +/// limitations, such as support for variadic functions. +CallingConv::ID +ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - if (Subtarget->hasVFP2() && !isVarArg) { - if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); - // For AAPCS ABI targets, just use VFP variant of the calling convention. - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - } - // Fallthrough - case CallingConv::C: { - // Use target triple & subtarget features to do actual dispatch. + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + case CallingConv::GHC: + return CC; + case CallingConv::ARM_AAPCS_VFP: + return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; + case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); - else if (Subtarget->hasVFP2() && + return CallingConv::ARM_APCS; + else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; + case CallingConv::Fast: + if (!Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::Fast; + return CallingConv::ARM_APCS; + } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; } - case CallingConv::ARM_AAPCS_VFP: - if (!isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - // Fallthrough - case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for the given +/// CallingConvention. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, + bool Return, + bool isVarArg) const { + switch (getEffectiveCallingConv(CC, isVarArg)) { + default: + llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + case CallingConv::Fast: + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } @@ -1286,6 +1300,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { @@ -1301,6 +1317,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); @@ -1351,16 +1369,17 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + unsigned id = Subtarget->isLittle() ? 0 : 1; + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) - RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } @@ -1398,6 +1417,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { @@ -1542,7 +1564,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, - Ops, array_lengthof(Ops))); + Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); @@ -1553,8 +1575,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -1596,8 +1617,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMFunctionInfo *AFI = MF.getInfo(); if (EnableARMLongCalls) { - assert (getTargetMachine().getRelocationModel() == Reloc::Static - && "long-calls with non-static relocation model!"); + assert((Subtarget->isTargetWindows() || + getTargetMachine().getRelocationModel() == Reloc::Static) && + "long-calls with non-static relocation model!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. @@ -1645,6 +1667,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), DAG.getTargetGlobalAddress(GV, dl, getPointerTy())); + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + unsigned TargetFlags = GV->hasDLLImportStorageClass() + ? ARMII::MO_DLLIMPORT + : ARMII::MO_NO_FLAG; + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0, + TargetFlags); + if (GV->hasDLLImportStorageClass()) + Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(), + Callee), MachinePointerInfo::getGOT(), + false, false, false, 0); } else { // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; @@ -1686,7 +1721,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = Subtarget->isMinSize(); + bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -1717,7 +1753,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. if (!isTailCall) { const uint32_t *Mask; - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); const ARMBaseRegisterInfo *ARI = static_cast(TRI); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable @@ -1741,10 +1778,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -1972,7 +2009,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -2049,8 +2087,7 @@ static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); - return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); } SDValue @@ -2074,6 +2111,11 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + bool isLittleEndian = Subtarget->isLittle(); + + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo(); + AFI->setReturnRegsCount(RVLocs.size()); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -2100,12 +2142,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + HalfGPRs.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - HalfGPRs.getValue(1), Flag); + HalfGPRs.getValue(isLittleEndian ? 1 : 0), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc @@ -2117,12 +2162,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + DAG.getVTList(MVT::i32, MVT::i32), Arg); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); @@ -2151,8 +2199,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return LowerInterruptReturn(RetOps, dl, DAG); } - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2314,13 +2361,14 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); + // FIXME: is there useful debug info available here? - TargetLowering::CallLoweringInfo CLI(Chain, - (Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), + 0); + std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } @@ -2427,7 +2475,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. - if (Subtarget->useMovt()) { + if (Subtarget->useMovt(DAG.getMachineFunction())) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -2449,7 +2497,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, const GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - if (Subtarget->useMovt()) + if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register @@ -2466,6 +2514,32 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, return Result; } +SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); + assert(Subtarget->useMovt(DAG.getMachineFunction()) && + "Windows on ARM expects to use movw/movt"); + + const GlobalValue *GV = cast(Op)->getGlobal(); + const ARMII::TOF TargetFlags = + (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); + EVT PtrVT = getPointerTy(); + SDValue Result; + SDLoc DL(Op); + + ++NumMovwMovt; + + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes. + Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, + TargetFlags)); + if (GV->hasDLLImportStorageClass()) + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, false, 0); + return Result; +} + SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && @@ -2511,6 +2585,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_rbit: { + assert(Op.getOperand(0).getValueType() == MVT::i32 && + "RBIT intrinsic must have i32 type!"); + return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0)); + } case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -2654,7 +2733,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - + if (!Subtarget->isLittle()) + std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } @@ -2678,7 +2758,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; } - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); ArgRegsSize = NumGPRs * 4; // If parameter is split between stack and GPRs... @@ -2803,8 +2886,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; } else { if (ArgSize == 0) { @@ -2834,8 +2916,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize); + StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, + 0, TotalArgRegsSaveSize); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -3166,11 +3249,96 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } +std::pair +ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, + SDValue &ARMcc) const { + assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); + + SDValue Value, OverflowCmp; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + + // FIXME: We are currently always generating CMPs because we don't support + // generating CMN through the backend. This is not as good as the natural + // CMP case because it causes a register dependency and cannot be folded + // later. + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::SADDO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::UADDO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::SSUBO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + case ISD::USUBO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + } // switch (...) + + return std::make_pair(Value, OverflowCmp); +} + + +SDValue +ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + // We use 0 and 1 as false and true values. + SDValue TVal = DAG.getConstant(1, MVT::i32); + SDValue FVal = DAG.getConstant(0, MVT::i32); + EVT VT = Op.getValueType(); + + SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal, + ARMcc, CCR, OverflowCmp); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); +} + + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); + unsigned Opc = Cond.getOpcode(); + + if (Cond.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO)) { + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + EVT VT = Op.getValueType(); + + return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, + ARMcc, CCR, OverflowCmp); + + } // Convert: // @@ -3472,7 +3640,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; - return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); + return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); @@ -3512,11 +3680,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; - SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } @@ -3713,7 +3881,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp1, 1).getValue(1); + Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. @@ -3729,7 +3897,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); + Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); @@ -3761,14 +3929,16 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + const ARMBaseRegisterInfo &ARI = + *static_cast(RegInfo); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO()) - ? ARM::R7 : ARM::R11; + unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -3777,6 +3947,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARMTargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + unsigned Reg = StringSwitch(RegName) + .Case("sp", ARM::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -3806,8 +3988,15 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + SDValue Cvt; + if (TLI.isBigEndian() && SrcVT.isVector() && + SrcVT.getVectorNumElements() > 1) + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), + DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); + else + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } @@ -3863,7 +4052,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two @@ -3897,7 +4086,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, @@ -4102,7 +4291,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); @@ -4361,6 +4550,11 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, BitMask <<= 8; ImmMask <<= 1; } + + if (DAG.getTargetLoweringInfo().isBigEndian()) + // swap higher and lower 32 bit word + Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); + // Op=1, Cmode=1110. OpCmode = 0x1e; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; @@ -4859,7 +5053,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, MVT::i32)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; @@ -4870,7 +5064,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); @@ -4906,7 +5100,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5213,12 +5407,10 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, if (V2.getNode()->getOpcode() == ISD::UNDEF) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, @@ -5371,7 +5563,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(ShuffleMask[i] & (NumElts-1), MVT::i32))); } - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5563,7 +5755,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), + LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(), LD->isNonTemporal(), LD->getAlignment()); } @@ -5608,7 +5800,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); + MVT::getVectorVT(TruncVT, NumElts), Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { @@ -5946,12 +6138,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, - CallingConv::C, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/false, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, + std::move(Args), 0) + .setDiscardResult(); + std::pair CallResult = LowerCallTo(CLI); SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, @@ -5998,8 +6190,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, }; Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, - DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], - array_lengthof(Ops)); + DAG.getVTList(MVT::i32, MVT::Other), Ops); OutChain = Cycles32.getValue(1); } else { // Intrinsic is defined to return 0 on unsupported platforms. Technically @@ -6022,8 +6213,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: - return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) : - LowerGlobalAddressELF(Op, DAG); + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -6068,11 +6266,20 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + return LowerDYNAMIC_STACKALLOC(Op, DAG); + llvm_unreachable("Don't know how to custom lower this!"); } } @@ -6109,7 +6316,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, void ARMTargetLowering:: SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -6224,7 +6432,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *ARMTargetLowering:: EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -6558,7 +6767,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // N.B. the order the invoke BBs are processed in doesn't matter here. - const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); + const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector MBBLPads; for (SmallPtrSet::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { @@ -6741,7 +6950,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; @@ -6755,8 +6965,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnitSize = 0; - const TargetRegisterClass *TRC = 0; - const TargetRegisterClass *VecTRC = 0; + const TargetRegisterClass *TRC = nullptr; + const TargetRegisterClass *VecTRC = nullptr; bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); @@ -6790,7 +7000,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, ? (const TargetRegisterClass *)&ARM::DPairRegClass : UnitSize == 8 ? (const TargetRegisterClass *)&ARM::DPRRegClass - : 0; + : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -6971,10 +7181,77 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + assert(Subtarget->isTargetWindows() && + "__chkstk is only supported on Windows"); + assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); + + // __chkstk takes the number of words to allocate on the stack in R4, and + // returns the stack adjustment in number of bytes in R4. This will not + // clober any other registers (other than the obvious lr). + // + // Although, technically, IP should be considered a register which may be + // clobbered, the call itself will not touch it. Windows on ARM is a pure + // thumb-2 environment, so there is no interworking required. As a result, we + // do not expect a veneer to be emitted by the linker, clobbering IP. + // + // Each module receives its own copy of __chkstk, so no import thunk is + // required, again, ensuring that IP is not clobbered. + // + // Finally, although some linkers may theoretically provide a trampoline for + // out of range calls (which is quite common due to a 32M range limitation of + // branches for Thumb), we can generate the long-call version via + // -mcmodel=large, alleviating the need for the trampoline which may clobber + // IP. + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + case CodeModel::Large: + case CodeModel::JITDefault: { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + + BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) + .addExternalSymbol("__chkstk"); + BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(Reg, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + } + } + + AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP).addReg(ARM::R4))); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { @@ -7220,6 +7497,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); + case ARM::WIN__CHKSTK: + return EmitLowered__chkstk(MI, BB); } } @@ -7242,8 +7521,8 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); if (NewOpc) { - const ARMBaseInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); + const ARMBaseInstrInfo *TII = static_cast( + getTargetMachine().getSubtargetImpl()->getInstrInfo()); MCID = &TII->get(NewOpc); assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && @@ -7520,8 +7799,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - widenType, &Ops[0], Ops.size()); + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops); unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; return DAG.getNode(ExtOp, SDLoc(N), VT, tmp); } @@ -7581,7 +7859,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); - if (AddeNode == NULL) + if (!AddeNode) return SDValue(); // Make sure it is really an ADDE. @@ -7616,9 +7894,9 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Figure out the high and low input values to the MLAL node. SDValue* HiMul = &MULOp; - SDValue* HiAdd = NULL; - SDValue* LoMul = NULL; - SDValue* LowAdd = NULL; + SDValue* HiAdd = nullptr; + SDValue* LoMul = nullptr; + SDValue* LowAdd = nullptr; if (IsLeftOperandMUL) HiAdd = &AddeOp1; @@ -7635,7 +7913,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, LowAdd = &AddcOp0; } - if (LoMul == NULL) + if (!LoMul) return SDValue(); if (LoMul->getNode() != HiMul->getNode()) @@ -7652,8 +7930,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*HiAdd); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), - DAG.getVTList(MVT::i32, MVT::i32), - &Ops[0], Ops.size()); + DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); @@ -8177,9 +8454,9 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); + if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) + std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); - DCI.RemoveFromWorklist(LD); - DAG.DeleteNode(LD); return Result; } @@ -8244,7 +8521,8 @@ static SDValue PerformSTORECombine(SDNode *N, SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); @@ -8290,8 +8568,7 @@ static SDValue PerformSTORECombine(SDNode *N, Increment); Chains.push_back(Ch); } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], - Chains.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) @@ -8302,16 +8579,18 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; + bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(0), BasePtr, - St->getPointerInfo(), St->isVolatile(), + StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), + BasePtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); - return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + return DAG.getStore(NewST1.getValue(0), DL, + StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), std::min(4U, St->getAlignment() / 2)); @@ -8340,7 +8619,7 @@ static SDValue PerformSTORECombine(SDNode *N, return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment(), - St->getTBAAInfo()); + St->getAAInfo()); } /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node @@ -8387,7 +8666,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -8470,7 +8749,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Fold obvious case. V = V.getOperand(0); else { - V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(V.getNode()); } @@ -8666,7 +8945,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Tys[n] = VecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs+2)); SmallVector Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); @@ -8676,8 +8955,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } MemIntrinsicSDNode *MemInt = cast(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops.data(), Ops.size(), - MemInt->getMemoryVT(), + Ops, MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. @@ -8746,11 +9024,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, - Ops, 2, VLDMemInt->getMemoryVT(), + Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. @@ -9348,7 +9626,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); + DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -9431,8 +9709,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned, - bool *Fast) const { +bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); @@ -9486,11 +9766,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || - (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && + Fast))) { return MVT::f64; } } @@ -9935,11 +10216,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return true; } -void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { @@ -9955,11 +10236,11 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; @@ -10053,7 +10334,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -10132,7 +10413,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; @@ -10331,16 +10612,41 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); SDLoc dl(Op); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, - 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair CallInfo = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = LowerCallTo(CLI); return CallInfo.first; } +SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "unsupported target platform"); + SDLoc DL(Op); + + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + + SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, + DAG.getConstant(2, MVT::i32)); + + SDValue Flag; + Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); + + SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); + Chain = NewSP.getValue(1); + + SDValue Ops[2] = { NewSP, Chain }; + return DAG.getMergeValues(Ops, DL); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10494,3 +10800,171 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return false; return true; } + +bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { + // Loads and stores less than 64-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit + // anything for those. + bool IsMClass = Subtarget->isMClass(); + if (StoreInst *SI = dyn_cast(Inst)) { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return Size == 64 && !IsMClass; + } else if (LoadInst *LI = dyn_cast(Inst)) { + return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; + } + + // For the real atomic operations, we have ldrex/strex up to 32 bits, + // and up to 64 bits on the non-M profiles + unsigned AtomicLimit = IsMClass ? 32 : 64; + return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; +} + +// This has so far only been implemented for MachO. +bool ARMTargetLowering::useLoadStackGuardNode() const { + return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO; +} + +Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast(Addr->getType())->getElementType(); + bool IsAcquire = + Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd + // intrinsic must return {i32, i32} and we have to recombine them into a + // single i64 here. + if (ValTy->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); + + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); + + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); + } + + Type *Tys[] = { Addr->getType() }; + Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateTruncOrBitCast( + Builder.CreateCall(Ldrex, Addr), + cast(Addr->getType())->getElementType()); +} + +Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + bool IsRelease = + Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since the intrinsics must have legal type, the i64 intrinsics take two + // parameters: "i32, i32". We must marshal Val into the appropriate form + // before the call. + if (Val->getType()->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; + Function *Strex = Intrinsic::getDeclaration(M, Int); + Type *Int32Ty = Type::getInt32Ty(M->getContext()); + + Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); + Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + return Builder.CreateCall3(Strex, Lo, Hi, Addr); + } + + Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; + Type *Tys[] = { Addr->getType() }; + Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateCall2( + Strex, Builder.CreateZExtOrBitCast( + Val, Strex->getFunctionType()->getParamType(0)), + Addr); +} + +enum HABaseType { + HA_UNKNOWN = 0, + HA_FLOAT, + HA_DOUBLE, + HA_VECT64, + HA_VECT128 +}; + +static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, + uint64_t &Members) { + if (const StructType *ST = dyn_cast(Ty)) { + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) + return false; + Members += SubMembers; + } + } else if (const ArrayType *AT = dyn_cast(Ty)) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) + return false; + Members += SubMembers * AT->getNumElements(); + } else if (Ty->isFloatTy()) { + if (Base != HA_UNKNOWN && Base != HA_FLOAT) + return false; + Members = 1; + Base = HA_FLOAT; + } else if (Ty->isDoubleTy()) { + if (Base != HA_UNKNOWN && Base != HA_DOUBLE) + return false; + Members = 1; + Base = HA_DOUBLE; + } else if (const VectorType *VT = dyn_cast(Ty)) { + Members = 1; + switch (Base) { + case HA_FLOAT: + case HA_DOUBLE: + return false; + case HA_VECT64: + return VT->getBitWidth() == 64; + case HA_VECT128: + return VT->getBitWidth() == 128; + case HA_UNKNOWN: + switch (VT->getBitWidth()) { + case 64: + Base = HA_VECT64; + return true; + case 128: + Base = HA_VECT128; + return true; + default: + return false; + } + } + } + + return (Members > 0 && Members <= 4); +} + +/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. +bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + if (getEffectiveCallingConv(CallConv, isVarArg) != + CallingConv::ARM_AAPCS_VFP) + return false; + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; +}