X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=ef23b2d22639f337d9a7fb594eb54304807b6c6d;hb=75909261f05aa06bdabf9b37e3172be7be7b40c1;hp=bdd547fe286d83bb98dacd233188615050af3e42;hpb=09da6b55409ec4fbf69fedff8f74552bbad69469;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index bdd547fe286..ef23b2d2263 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARMISelLowering.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" @@ -44,12 +43,15 @@ #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include using namespace llvm; +#define DEBUG_TYPE "arm-isel" + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); @@ -153,15 +155,16 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget().isTargetMachO()) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) return new TargetLoweringObjectFileMachO(); - + if (TT.isOSWindows()) + return new TargetLoweringObjectFileCOFF(); return new ARMElfTargetObjectFile(); } ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget(); RegInfo = TM.getRegisterInfo(); Itins = TM.getInstrItineraryData(); @@ -171,7 +174,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && - Subtarget->hasARMOps()) { + Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) { // Single-precision floating-point arithmetic. setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); @@ -247,173 +250,134 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // These libcalls are not available in 32-bit. - setLibcallName(RTLIB::SHL_I128, 0); - setLibcallName(RTLIB::SRL_I128, 0); - setLibcallName(RTLIB::SRA_I128, 0); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && !Subtarget->isTargetWindows()) { - // Double-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 2 - setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); - setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); - setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); - setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); - setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); - - // Double-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 3 - setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); - setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); - setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); - setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); - setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); - setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); - setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); - setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); - - // Single-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 4 - setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); - setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); - setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); - setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); - setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); - - // Single-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 5 - setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); - setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); - setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); - setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); - setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); - setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); - setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); - setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); - - // Floating-point to integer conversions. - // RTABI chapter 4.1.2, Table 6 - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); - - // Conversions between floating types. - // RTABI chapter 4.1.2, Table 7 - setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); - setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); - - // Integer to floating-point conversions. - // RTABI chapter 4.1.2, Table 8 - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - - // Long long helper functions - // RTABI chapter 4.2, Table 9 - setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); - setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); - setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); - setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); - - // Integer division functions - // RTABI chapter 4.3.1 - setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); - setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - - // Memory operations - // RTABI chapter 4.3.4 - setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); - setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); - setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); - setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } LibraryCalls[] = { + // Double-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 2 + { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Double-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 3 + { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Single-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 4 + { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Single-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 5 + { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Floating-point to integer conversions. + // RTABI chapter 4.1.2, Table 6 + { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Conversions between floating types. + // RTABI chapter 4.1.2, Table 7 + { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer to floating-point conversions. + // RTABI chapter 4.1.2, Table 8 + { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Long long helper functions + // RTABI chapter 4.2, Table 9 + { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer division functions + // RTABI chapter 4.3.1 + { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } + + if (Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -445,6 +409,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); @@ -632,6 +603,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction(ISD::UADDO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -734,7 +710,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setExceptionSelectorRegister(ARM::R1); } - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { @@ -914,7 +894,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // and extractions. std::pair ARMTargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = 0; + const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: @@ -951,7 +931,7 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case ARMISD::Wrapper: return "ARMISD::Wrapper"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; @@ -1007,6 +987,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; @@ -1205,40 +1187,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, - bool Return, - bool isVarArg) const { +/// getEffectiveCallingConv - Get the effective calling convention, taking into +/// account presence of floating point hardware and calling convention +/// limitations, such as support for variadic functions. +CallingConv::ID +ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - if (Subtarget->hasVFP2() && !isVarArg) { - if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); - // For AAPCS ABI targets, just use VFP variant of the calling convention. - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - } - // Fallthrough - case CallingConv::C: { - // Use target triple & subtarget features to do actual dispatch. + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + case CallingConv::GHC: + return CC; + case CallingConv::ARM_AAPCS_VFP: + return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; + case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); - else if (Subtarget->hasVFP2() && + return CallingConv::ARM_APCS; + else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; + case CallingConv::Fast: + if (!Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::Fast; + return CallingConv::ARM_APCS; + } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; } - case CallingConv::ARM_AAPCS_VFP: - if (!isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - // Fallthrough - case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for the given +/// CallingConvention. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, + bool Return, + bool isVarArg) const { + switch (getEffectiveCallingConv(CC, isVarArg)) { + default: + llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + case CallingConv::Fast: + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } @@ -1287,6 +1287,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { @@ -1302,6 +1304,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); @@ -1352,16 +1356,17 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + unsigned id = Subtarget->isLittle() ? 0 : 1; + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) - RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } @@ -1399,6 +1404,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { @@ -1543,7 +1551,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, - Ops, array_lengthof(Ops))); + Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); @@ -1554,8 +1562,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -1597,8 +1604,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMFunctionInfo *AFI = MF.getInfo(); if (EnableARMLongCalls) { - assert (getTargetMachine().getRelocationModel() == Reloc::Static - && "long-calls with non-static relocation model!"); + assert((Subtarget->isTargetWindows() || + getTargetMachine().getRelocationModel() == Reloc::Static) && + "long-calls with non-static relocation model!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. @@ -1742,10 +1750,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -2050,8 +2058,7 @@ static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); - return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); } SDValue @@ -2075,6 +2082,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + bool isLittleEndian = Subtarget->isLittle(); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -2101,12 +2109,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + HalfGPRs.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - HalfGPRs.getValue(1), Flag); + HalfGPRs.getValue(isLittleEndian ? 1 : 0), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc @@ -2118,12 +2129,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + DAG.getVTList(MVT::i32, MVT::i32), Arg); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); @@ -2152,8 +2166,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return LowerInterruptReturn(RetOps, dl, DAG); } - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2315,13 +2328,14 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); + // FIXME: is there useful debug info available here? - TargetLowering::CallLoweringInfo CLI(Chain, - (Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), + 0); + std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } @@ -2467,6 +2481,23 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, return Result; } +SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); + assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"); + + const GlobalValue *GV = cast(Op)->getGlobal(); + EVT PtrVT = getPointerTy(); + SDLoc DL(Op); + + ++NumMovwMovt; + + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes. + return DAG.getNode(ARMISD::Wrapper, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT)); +} + SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && @@ -2512,6 +2543,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_rbit: { + assert(Op.getOperand(0).getValueType() == MVT::i32 && + "RBIT intrinsic must have i32 type!"); + return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0)); + } case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -2655,7 +2691,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - + if (!Subtarget->isLittle()) + std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } @@ -2804,8 +2841,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; } else { if (ArgSize == 0) { @@ -2835,8 +2871,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize); + StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, + 0, TotalArgRegsSaveSize); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -3167,11 +3204,96 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } +std::pair +ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, + SDValue &ARMcc) const { + assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); + + SDValue Value, OverflowCmp; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + + // FIXME: We are currently always generating CMPs because we don't support + // generating CMN through the backend. This is not as good as the natural + // CMP case because it causes a register dependency and cannot be folded + // later. + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::SADDO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::UADDO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::SSUBO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + case ISD::USUBO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + } // switch (...) + + return std::make_pair(Value, OverflowCmp); +} + + +SDValue +ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + // We use 0 and 1 as false and true values. + SDValue TVal = DAG.getConstant(1, MVT::i32); + SDValue FVal = DAG.getConstant(0, MVT::i32); + EVT VT = Op.getValueType(); + + SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal, + ARMcc, CCR, OverflowCmp); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); +} + + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); + unsigned Opc = Cond.getOpcode(); + + if (Cond.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO)) { + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + EVT VT = Op.getValueType(); + + return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, + ARMcc, CCR, OverflowCmp); + + } // Convert: // @@ -3473,7 +3595,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; - return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); + return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); @@ -3513,11 +3635,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; - SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } @@ -3714,7 +3836,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp1, 1).getValue(1); + Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. @@ -3730,7 +3852,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); + Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); @@ -3762,14 +3884,16 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + const ARMBaseRegisterInfo &ARI = + *static_cast(RegInfo); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO()) - ? ARM::R7 : ARM::R11; + unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -3778,6 +3902,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARMTargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + unsigned Reg = StringSwitch(RegName) + .Case("sp", ARM::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -3807,8 +3943,15 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + SDValue Cvt; + if (TLI.isBigEndian() && SrcVT.isVector() && + SrcVT.getVectorNumElements() > 1) + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), + DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); + else + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } @@ -3864,7 +4007,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two @@ -3898,7 +4041,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, @@ -4103,7 +4246,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); @@ -4362,6 +4505,11 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, BitMask <<= 8; ImmMask <<= 1; } + + if (DAG.getTargetLoweringInfo().isBigEndian()) + // swap higher and lower 32 bit word + Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); + // Op=1, Cmode=1110. OpCmode = 0x1e; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; @@ -4860,7 +5008,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, MVT::i32)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; @@ -4871,7 +5019,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); @@ -4907,7 +5055,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5214,12 +5362,10 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, if (V2.getNode()->getOpcode() == ISD::UNDEF) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, @@ -5372,7 +5518,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(ShuffleMask[i] & (NumElts-1), MVT::i32))); } - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5609,7 +5755,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); + MVT::getVectorVT(TruncVT, NumElts), Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { @@ -5947,12 +6093,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, - CallingConv::C, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/false, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, + std::move(Args), 0) + .setDiscardResult(); + std::pair CallResult = LowerCallTo(CLI); SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, @@ -5999,8 +6145,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, }; Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, - DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], - array_lengthof(Ops)); + DAG.getVTList(MVT::i32, MVT::Other), Ops); OutChain = Cycles32.getValue(1); } else { // Intrinsic is defined to return 0 on unsupported platforms. Technically @@ -6023,8 +6168,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: - return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) : - LowerGlobalAddressELF(Op, DAG); + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -6069,11 +6221,20 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + return LowerDYNAMIC_STACKALLOC(Op, DAG); + llvm_unreachable("Don't know how to custom lower this!"); } } @@ -6756,8 +6917,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnitSize = 0; - const TargetRegisterClass *TRC = 0; - const TargetRegisterClass *VecTRC = 0; + const TargetRegisterClass *TRC = nullptr; + const TargetRegisterClass *VecTRC = nullptr; bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); @@ -6791,7 +6952,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, ? (const TargetRegisterClass *)&ARM::DPairRegClass : UnitSize == 8 ? (const TargetRegisterClass *)&ARM::DPRRegClass - : 0; + : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -6972,6 +7133,73 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + assert(Subtarget->isTargetWindows() && + "__chkstk is only supported on Windows"); + assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); + + // __chkstk takes the number of words to allocate on the stack in R4, and + // returns the stack adjustment in number of bytes in R4. This will not + // clober any other registers (other than the obvious lr). + // + // Although, technically, IP should be considered a register which may be + // clobbered, the call itself will not touch it. Windows on ARM is a pure + // thumb-2 environment, so there is no interworking required. As a result, we + // do not expect a veneer to be emitted by the linker, clobbering IP. + // + // Each module receives its own copy of __chkstk, so no import thunk is + // required, again, ensuring that IP is not clobbered. + // + // Finally, although some linkers may theoretically provide a trampoline for + // out of range calls (which is quite common due to a 32M range limitation of + // branches for Thumb), we can generate the long-call version via + // -mcmodel=large, alleviating the need for the trampoline which may clobber + // IP. + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + case CodeModel::Large: + case CodeModel::JITDefault: { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + + BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) + .addExternalSymbol("__chkstk"); + BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(Reg, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + } + } + + AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::R4, RegState::Kill))); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -7221,6 +7449,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); + case ARM::WIN__CHKSTK: + return EmitLowered__chkstk(MI, BB); } } @@ -7521,8 +7751,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - widenType, &Ops[0], Ops.size()); + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops); unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; return DAG.getNode(ExtOp, SDLoc(N), VT, tmp); } @@ -7582,7 +7811,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); - if (AddeNode == NULL) + if (!AddeNode) return SDValue(); // Make sure it is really an ADDE. @@ -7617,9 +7846,9 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Figure out the high and low input values to the MLAL node. SDValue* HiMul = &MULOp; - SDValue* HiAdd = NULL; - SDValue* LoMul = NULL; - SDValue* LowAdd = NULL; + SDValue* HiAdd = nullptr; + SDValue* LoMul = nullptr; + SDValue* LowAdd = nullptr; if (IsLeftOperandMUL) HiAdd = &AddeOp1; @@ -7636,7 +7865,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, LowAdd = &AddcOp0; } - if (LoMul == NULL) + if (!LoMul) return SDValue(); if (LoMul->getNode() != HiMul->getNode()) @@ -7653,8 +7882,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*HiAdd); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), - DAG.getVTList(MVT::i32, MVT::i32), - &Ops[0], Ops.size()); + DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); @@ -8178,6 +8406,8 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); + if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) + std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); DCI.RemoveFromWorklist(LD); DAG.DeleteNode(LD); @@ -8245,7 +8475,8 @@ static SDValue PerformSTORECombine(SDNode *N, SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); @@ -8291,8 +8522,7 @@ static SDValue PerformSTORECombine(SDNode *N, Increment); Chains.push_back(Ch); } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], - Chains.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) @@ -8303,16 +8533,18 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; + bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(0), BasePtr, - St->getPointerInfo(), St->isVolatile(), + StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), + BasePtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); - return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + return DAG.getStore(NewST1.getValue(0), DL, + StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), std::min(4U, St->getAlignment() / 2)); @@ -8388,7 +8620,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -8677,8 +8909,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } MemIntrinsicSDNode *MemInt = cast(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops.data(), Ops.size(), - MemInt->getMemoryVT(), + Ops, MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. @@ -8751,7 +8982,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, - Ops, 2, VLDMemInt->getMemoryVT(), + Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. @@ -9349,7 +9580,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); + DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -9936,11 +10167,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return true; } -void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { @@ -9956,11 +10187,11 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; @@ -10054,7 +10285,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -10133,7 +10364,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; @@ -10332,16 +10563,41 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); SDLoc dl(Op); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, - 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair CallInfo = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = LowerCallTo(CLI); return CallInfo.first; } +SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "unsupported target platform"); + SDLoc DL(Op); + + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + + SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, + DAG.getConstant(2, MVT::i32)); + + SDValue Flag; + Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue); + Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); + + SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); + Chain = NewSP.getValue(1); + + SDValue Ops[2] = { NewSP, Chain }; + return DAG.getMergeValues(Ops, DL); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10499,14 +10755,20 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { // Loads and stores less than 64-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when - // things go wrong: - if (StoreInst *SI = dyn_cast(Inst)) - return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64; - else if (LoadInst *LI = dyn_cast(Inst)) - return LI->getType()->getPrimitiveSizeInBits() == 64; + // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit + // anything for those. + bool IsMClass = Subtarget->isMClass(); + if (StoreInst *SI = dyn_cast(Inst)) { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return Size == 64 && !IsMClass; + } else if (LoadInst *LI = dyn_cast(Inst)) { + return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; + } - // For the real atomic operations, we have ldrex/strex up to 64 bits. - return Inst->getType()->getPrimitiveSizeInBits() <= 64; + // For the real atomic operations, we have ldrex/strex up to 32 bits, + // and up to 64 bits on the non-M profiles + unsigned AtomicLimit = IsMClass ? 32 : 64; + return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; } Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, @@ -10529,6 +10791,8 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); return Builder.CreateOr( @@ -10562,6 +10826,8 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); return Builder.CreateCall3(Strex, Lo, Hi, Addr); } @@ -10575,3 +10841,76 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Val, Strex->getFunctionType()->getParamType(0)), Addr); } + +enum HABaseType { + HA_UNKNOWN = 0, + HA_FLOAT, + HA_DOUBLE, + HA_VECT64, + HA_VECT128 +}; + +static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, + uint64_t &Members) { + if (const StructType *ST = dyn_cast(Ty)) { + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) + return false; + Members += SubMembers; + } + } else if (const ArrayType *AT = dyn_cast(Ty)) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) + return false; + Members += SubMembers * AT->getNumElements(); + } else if (Ty->isFloatTy()) { + if (Base != HA_UNKNOWN && Base != HA_FLOAT) + return false; + Members = 1; + Base = HA_FLOAT; + } else if (Ty->isDoubleTy()) { + if (Base != HA_UNKNOWN && Base != HA_DOUBLE) + return false; + Members = 1; + Base = HA_DOUBLE; + } else if (const VectorType *VT = dyn_cast(Ty)) { + Members = 1; + switch (Base) { + case HA_FLOAT: + case HA_DOUBLE: + return false; + case HA_VECT64: + return VT->getBitWidth() == 64; + case HA_VECT128: + return VT->getBitWidth() == 128; + case HA_UNKNOWN: + switch (VT->getBitWidth()) { + case 64: + Base = HA_VECT64; + return true; + case 128: + Base = HA_VECT128; + return true; + default: + return false; + } + } + } + + return (Members > 0 && Members <= 4); +} + +/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. +bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + if (getEffectiveCallingConv(CallConv, isVarArg) != + CallingConv::ARM_AAPCS_VFP) + return false; + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; +}