X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=3e53af42b897d74d455dfab0cb8b0d18d39517b5;hp=527bc399980a695d44536bd61313369d0ff31a84;hb=77327b8520d8d0b69f45a812c7d354bbead6c1c1;hpb=b87d142ba189d404b7eb60a2f81d27836e093b06 diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 527bc399980..3e53af42b89 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,9 +12,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARMISelLowering.h" -#include "ARM.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" @@ -38,29 +36,26 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include using namespace llvm; +#define DEBUG_TYPE "arm-isel" + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); -// This option should go away when tail calls fully work. -static cl::opt -EnableARMTailCalls("arm-tail-calls", cl::Hidden, - cl::desc("Generate tail calls (TEMPORARY OPTION)."), - cl::init(false)); - cl::opt EnableARMLongCalls("arm-long-calls", cl::Hidden, cl::desc("Generate calls via indirect call instructions"), @@ -87,7 +82,7 @@ namespace { } // The APCS parameter registers. -static const uint16_t GPRArgRegs[] = { +static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -156,29 +151,30 @@ void ARMTargetLowering::addDRTypeForNEON(MVT VT) { } void ARMTargetLowering::addQRTypeForNEON(MVT VT) { - addRegisterClass(VT, &ARM::QPRRegClass); + addRegisterClass(VT, &ARM::DPairRegClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget().isTargetDarwin()) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) return new TargetLoweringObjectFileMachO(); - + if (TT.isOSWindows()) + return new TargetLoweringObjectFileCOFF(); return new ARMElfTargetObjectFile(); } ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget(); - RegInfo = TM.getRegisterInfo(); - Itins = TM.getInstrItineraryData(); + RegInfo = TM.getSubtargetImpl()->getRegisterInfo(); + Itins = TM.getSubtargetImpl()->getInstrItineraryData(); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - if (Subtarget->isTargetIOS()) { + if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && - Subtarget->hasARMOps()) { + Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) { // Single-precision floating-point arithmetic. setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); @@ -254,172 +250,135 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // These libcalls are not available in 32-bit. - setLibcallName(RTLIB::SHL_I128, 0); - setLibcallName(RTLIB::SRL_I128, 0); - setLibcallName(RTLIB::SRA_I128, 0); - - if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { - // Double-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 2 - setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); - setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); - setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); - setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); - setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); - - // Double-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 3 - setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); - setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); - setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); - setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); - setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); - setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); - setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); - setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); - - // Single-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 4 - setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); - setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); - setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); - setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); - setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); - - // Single-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 5 - setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); - setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); - setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); - setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); - setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); - setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); - setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); - setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); - - // Floating-point to integer conversions. - // RTABI chapter 4.1.2, Table 6 - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); - - // Conversions between floating types. - // RTABI chapter 4.1.2, Table 7 - setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); - setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); - - // Integer to floating-point conversions. - // RTABI chapter 4.1.2, Table 8 - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - - // Long long helper functions - // RTABI chapter 4.2, Table 9 - setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); - setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); - setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); - setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); - - // Integer division functions - // RTABI chapter 4.3.1 - setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); - setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - - // Memory operations - // RTABI chapter 4.3.4 - setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); - setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); - setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); - setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); + + if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && + !Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } LibraryCalls[] = { + // Double-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 2 + { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Double-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 3 + { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Single-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 4 + { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Single-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 5 + { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Floating-point to integer conversions. + // RTABI chapter 4.1.2, Table 6 + { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Conversions between floating types. + // RTABI chapter 4.1.2, Table 7 + { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer to floating-point conversions. + // RTABI chapter 4.1.2, Table 8 + { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Long long helper functions + // RTABI chapter 4.2, Table 9 + { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer division functions + // RTABI chapter 4.3.1 + { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } + + if (Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -438,8 +397,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::f32, &ARM::SPRRegClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, &ARM::DPRRegClass); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); } for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -451,6 +408,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); @@ -617,8 +581,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) computeRegisterProperties(); - // ARM does not have f32 extending load. + // ARM does not have floating-point extending loads. setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + // ... or truncating stores + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -638,6 +608,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction(ISD::UADDO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -733,39 +708,31 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (!Subtarget->isTargetDarwin()) { - // Non-Darwin platforms may return values in these registers via the + if (!Subtarget->isTargetMachO()) { + // Non-MachO platforms may return values in these registers via the // personality function. setExceptionPointerRegister(ARM::R0); setExceptionSelectorRegister(ARM::R1); } - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { - // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and - // handled normally. + // ATOMIC_FENCE needs custom lowering; the others should have been expanded + // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - // Custom lowering for 64-bit ops - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); + // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops()) { // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); } - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); } else { // If there's anything we can use as a barrier, go through custom lowering // for ATOMIC_FENCE. @@ -863,13 +830,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } - // Special handling for half-precision FP. + + // v8 adds f64 <-> f16 conversion. Before that it should be expanded. + if (!Subtarget->hasV8Ops()) { + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + } + + // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { - setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); - setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); } } - + // Combine sin / cos into one node or libcall if possible. if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); @@ -920,44 +894,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - bool isThumb2, unsigned &LdrOpc, - unsigned &StrOpc) { - static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, - {ARM::LDREXH, ARM::t2LDREXH}, - {ARM::LDREX, ARM::t2LDREX}, - {ARM::LDREXD, ARM::t2LDREXD}}; - static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, - {ARM::LDAEXH, ARM::t2LDAEXH}, - {ARM::LDAEX, ARM::t2LDAEX}, - {ARM::LDAEXD, ARM::t2LDAEXD}}; - static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, - {ARM::STREXH, ARM::t2STREXH}, - {ARM::STREX, ARM::t2STREX}, - {ARM::STREXD, ARM::t2STREXD}}; - static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, - {ARM::STLEXH, ARM::t2STLEXH}, - {ARM::STLEX, ARM::t2STLEX}, - {ARM::STLEXD, ARM::t2STLEXD}}; - - const unsigned (*LoadOps)[2], (*StoreOps)[2]; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; - - assert(isPowerOf2_32(Size) && Size <= 8 && - "unsupported size for atomic binary op!"); - - LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; - StrOpc = StoreOps[Log2_32(Size)][isThumb2]; -} - // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -970,7 +906,7 @@ static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, // and extractions. std::pair ARMTargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = 0; + const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: @@ -1007,7 +943,7 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case ARMISD::Wrapper: return "ARMISD::Wrapper"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; @@ -1063,6 +999,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; @@ -1078,10 +1016,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VSHL: return "ARMISD::VSHL"; case ARMISD::VSHRs: return "ARMISD::VSHRs"; case ARMISD::VSHRu: return "ARMISD::VSHRu"; - case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; - case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; - case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; - case ARMISD::VSHRN: return "ARMISD::VSHRN"; case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; @@ -1197,7 +1131,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Load are scheduled for latency even if there instruction itinerary // is not available. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); if (MCID.getNumDefs() == 0) @@ -1265,40 +1200,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, - bool Return, - bool isVarArg) const { +/// getEffectiveCallingConv - Get the effective calling convention, taking into +/// account presence of floating point hardware and calling convention +/// limitations, such as support for variadic functions. +CallingConv::ID +ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - if (Subtarget->hasVFP2() && !isVarArg) { - if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); - // For AAPCS ABI targets, just use VFP variant of the calling convention. - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - } - // Fallthrough - case CallingConv::C: { - // Use target triple & subtarget features to do actual dispatch. + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + case CallingConv::GHC: + return CC; + case CallingConv::ARM_AAPCS_VFP: + return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; + case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); - else if (Subtarget->hasVFP2() && + return CallingConv::ARM_APCS; + else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; + case CallingConv::Fast: + if (!Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::Fast; + return CallingConv::ARM_APCS; + } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; } - case CallingConv::ARM_AAPCS_VFP: - if (!isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - // Fallthrough - case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for the given +/// CallingConvention. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, + bool Return, + bool isVarArg) const { + switch (getEffectiveCallingConv(CC, isVarArg)) { + default: + llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + case CallingConv::Fast: + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } @@ -1347,6 +1300,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { @@ -1362,6 +1317,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); @@ -1412,16 +1369,17 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + unsigned id = Subtarget->isLittle() ? 0 : 1; + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) - RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } @@ -1449,14 +1407,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; bool isSibCall = false; + // Disable tail calls if they're not supported. - if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) + if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls) isTailCall = false; + if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { @@ -1601,7 +1564,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, - Ops, array_lengthof(Ops))); + Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); @@ -1612,8 +1575,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -1655,8 +1617,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMFunctionInfo *AFI = MF.getInfo(); if (EnableARMLongCalls) { - assert (getTargetMachine().getRelocationModel() == Reloc::Static - && "long-calls with non-static relocation model!"); + assert((Subtarget->isTargetWindows() || + getTargetMachine().getRelocationModel() == Reloc::Static) && + "long-calls with non-static relocation model!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. @@ -1694,16 +1657,29 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const GlobalValue *GV = G->getGlobal(); isDirect = true; bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); - bool isStub = (isExt && Subtarget->isTargetDarwin()) && + bool isStub = (isExt && Subtarget->isTargetMachO()) && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - assert(Subtarget->isTargetDarwin() && "WrapperPIC use on non-Darwin?"); + assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), DAG.getTargetGlobalAddress(GV, dl, getPointerTy())); + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + unsigned TargetFlags = GV->hasDLLImportStorageClass() + ? ARMII::MO_DLLIMPORT + : ARMII::MO_NO_FLAG; + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0, + TargetFlags); + if (GV->hasDLLImportStorageClass()) + Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(), + Callee), MachinePointerInfo::getGOT(), + false, false, false, 0); } else { // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; @@ -1714,7 +1690,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { isDirect = true; - bool isStub = Subtarget->isTargetDarwin() && + bool isStub = Subtarget->isTargetMachO() && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // tBX takes a register source operand. @@ -1745,7 +1721,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = Subtarget->isMinSize(); + bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -1776,7 +1753,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. if (!isTailCall) { const uint32_t *Mask; - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); const ARMBaseRegisterInfo *ARI = static_cast(TRI); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable @@ -1800,10 +1778,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -1830,22 +1808,6 @@ ARMTargetLowering::HandleByVal( State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - // For in-prologue parameters handling, we also introduce stack offset - // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. - // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how - // NSAA should be evaluted (NSAA means "next stacked argument address"). - // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. - // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. - unsigned NSAAOffset = State->getNextStackOffset(); - if (State->getCallOrPrologue() != Call) { - for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { - unsigned RB, RE; - State->getInRegsParamInfo(i, RB, RE); - assert(NSAAOffset >= (RE-RB)*4 && - "Stack offset for byval regs doesn't introduced anymore?"); - NSAAOffset -= (RE-RB)*4; - } - } if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { if (Subtarget->isAAPCS_ABI() && Align > 4) { unsigned AlignInRegs = Align / 4; @@ -1860,6 +1822,7 @@ ARMTargetLowering::HandleByVal( // all remained GPR regs. In that case we can't split parameter, we must // send it to stack. We also must set NCRN to R4, so waste all // remained registers. + const unsigned NSAAOffset = State->getNextStackOffset(); if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { while (State->AllocateReg(GPRArgRegs, 4)) ; @@ -1879,18 +1842,14 @@ ARMTargetLowering::HandleByVal( // allocate remained amount of registers we need. for (unsigned i = reg+1; i != ByValRegEnd; ++i) State->AllocateReg(GPRArgRegs, 4); - // At a call site, a byval parameter that is split between - // registers and memory needs its size truncated here. In a - // function prologue, such byval parameters are reassembled in - // memory, and are not truncated. - if (State->getCallOrPrologue() == Call) { - // Make remained size equal to 0 in case, when - // the whole structure may be stored into registers. - if (size < excess) - size = 0; - else - size -= excess; - } + // A byval parameter that is split between registers and memory needs its + // size truncated here. + // In the case where the entire structure fits in registers, we set the + // size in memory to zero. + if (size < excess) + size = 0; + else + size -= excess; } } } @@ -2050,7 +2009,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -2127,8 +2087,7 @@ static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); - return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); } SDValue @@ -2152,6 +2111,11 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + bool isLittleEndian = Subtarget->isLittle(); + + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo(); + AFI->setReturnRegsCount(RVLocs.size()); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -2178,12 +2142,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + HalfGPRs.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - HalfGPRs.getValue(1), Flag); + HalfGPRs.getValue(isLittleEndian ? 1 : 0), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc @@ -2195,12 +2162,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + DAG.getVTList(MVT::i32, MVT::i32), Arg); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); @@ -2229,8 +2199,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return LowerInterruptReturn(RetOps, dl, DAG); } - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2299,10 +2268,10 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { } bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) + if (!Subtarget->supportsTailCall()) return false; - if (!CI->isTailCall()) + if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls) return false; return !Subtarget->isThumb1Only(); @@ -2392,13 +2361,14 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); + // FIXME: is there useful debug info available here? - TargetLowering::CallLoweringInfo CLI(Chain, - (Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), + 0); + std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } @@ -2505,7 +2475,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. - if (Subtarget->useMovt()) { + if (Subtarget->useMovt(DAG.getMachineFunction())) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -2527,7 +2497,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, const GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - if (Subtarget->useMovt()) + if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register @@ -2544,6 +2514,32 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, return Result; } +SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); + assert(Subtarget->useMovt(DAG.getMachineFunction()) && + "Windows on ARM expects to use movw/movt"); + + const GlobalValue *GV = cast(Op)->getGlobal(); + const ARMII::TOF TargetFlags = + (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); + EVT PtrVT = getPointerTy(); + SDValue Result; + SDLoc DL(Op); + + ++NumMovwMovt; + + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes. + Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, + TargetFlags)); + if (GV->hasDLLImportStorageClass()) + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, false, 0); + return Result; +} + SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && @@ -2589,6 +2585,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_rbit: { + assert(Op.getOperand(0).getValueType() == MVT::i32 && + "RBIT intrinsic must have i32 type!"); + return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0)); + } case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -2732,7 +2733,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - + if (!Subtarget->isLittle()) + std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } @@ -2756,15 +2758,18 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; } - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); ArgRegsSize = NumGPRs * 4; // If parameter is split between stack and GPRs... - if (NumGPRs && Align == 8 && + if (NumGPRs && Align > 4 && (ArgRegsSize < ArgSize || InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { - // Add padding for part of param recovered from GPRs, so - // its last byte must be at address K*8 - 1. + // Add padding for part of param recovered from GPRs. For example, + // if Align == 8, its last byte must be at address K*8 - 1. // We need to do it, since remained (stack) part of parameter has // stack alignment, and we need to "attach" "GPRs head" without gaps // to it: @@ -2774,8 +2779,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, // ARMFunctionInfo *AFI = MF.getInfo(); unsigned Padding = - ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - - (ArgRegsSize + AFI->getArgRegsSaveSize()); + OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align); ArgRegsSaveSize = ArgRegsSize + Padding; } else // We don't need to extend regs save size for byval parameters if they @@ -2799,7 +2803,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, unsigned OffsetFromOrigArg, unsigned ArgOffset, unsigned ArgSize, - bool ForceMutable) const { + bool ForceMutable, + unsigned ByValStoreOffset, + unsigned TotalArgRegsSaveSize) const { // Currently, two use-cases possible: // Case #1. Non-var-args function, and we meet first byval parameter. @@ -2836,7 +2842,6 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, // Note: once stack area for byval/varargs registers // was initialized, it can't be initialized again. if (ArgRegsSaveSize) { - unsigned Padding = ArgRegsSaveSize - ArgRegsSize; if (Padding) { @@ -2845,11 +2850,18 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setStoredByValParamsPadding(Padding); } - int FrameIndex = MFI->CreateFixedObject( - ArgRegsSaveSize, - Padding + ArgOffset, - false); + int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize, + Padding + + ByValStoreOffset - + (int64_t)TotalArgRegsSaveSize, + false); SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); + if (Padding) { + MFI->CreateFixedObject(Padding, + ArgOffset + ByValStoreOffset - + (int64_t)ArgRegsSaveSize, + false); + } SmallVector MemOps; for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; @@ -2874,13 +2886,18 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; - } else + } else { + if (ArgSize == 0) { + // We cannot allocate a zero-byte object for the first variadic argument, + // so just make up a size. + ArgSize = 4; + } // This will point to the next argument passed via stack. return MFI->CreateFixedObject( - 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); + ArgSize, ArgOffset, !ForceMutable); + } } // Setup stack frame, the va_list pointer will start from. @@ -2888,6 +2905,7 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, unsigned ArgOffset, + unsigned TotalArgRegsSaveSize, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -2898,8 +2916,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, 0, ForceMutable); + StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, + 0, TotalArgRegsSaveSize); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -2936,6 +2955,51 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); + unsigned ByValStoreOffset = 0; + unsigned TotalArgRegsSaveSize = 0; + unsigned ArgRegsSaveSizeMaxAlign = 4; + + // Calculate the amount of stack space that we need to allocate to store + // byval and variadic arguments that are passed in registers. + // We need to know this before we allocate the first byval or variadic + // argument, as they will be allocated a stack slot below the CFA (Canonical + // Frame Address, the stack pointer at entry to the function). + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isMemLoc()) { + int index = VA.getValNo(); + if (index != lastInsIndex) { + ISD::ArgFlagsTy Flags = Ins[index].Flags; + if (Flags.isByVal()) { + unsigned ExtraArgRegsSize; + unsigned ExtraArgRegsSaveSize; + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(), + Flags.getByValSize(), + ExtraArgRegsSize, ExtraArgRegsSaveSize); + + TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign) + ArgRegsSaveSizeMaxAlign = Flags.getByValAlign(); + CCInfo.nextInRegsParam(); + } + lastInsIndex = index; + } + } + } + CCInfo.rewindByValRegsInfo(); + lastInsIndex = -1; + if (isVarArg) { + unsigned ExtraArgRegsSize; + unsigned ExtraArgRegsSaveSize; + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, + ExtraArgRegsSize, ExtraArgRegsSaveSize); + TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + } + // If the arg regs save area contains N-byte aligned values, the + // bottom of it must be at least N-byte aligned. + TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign); + TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); @@ -3034,18 +3098,23 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // a tail call. if (Flags.isByVal()) { unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); + + ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); int FrameIndex = StoreByValRegs( CCInfo, DAG, dl, Chain, CurOrigArg, CurByValIndex, Ins[VA.getValNo()].PartOffset, VA.getLocMemOffset(), Flags.getByValSize(), - true /*force mutable frames*/); + true /*force mutable frames*/, + ByValStoreOffset, + TotalArgRegsSaveSize); + ByValStoreOffset += Flags.getByValSize(); + ByValStoreOffset = std::min(ByValStoreOffset, 16U); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { - unsigned FIOffset = VA.getLocMemOffset() + - AFI->getStoredByValParamsPadding(); + unsigned FIOffset = VA.getLocMemOffset(); int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, FIOffset, true); @@ -3063,7 +3132,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // varargs if (isVarArg) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, - CCInfo.getNextStackOffset()); + CCInfo.getNextStackOffset(), + TotalArgRegsSaveSize); + + AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); return Chain; } @@ -3177,11 +3249,96 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } +std::pair +ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, + SDValue &ARMcc) const { + assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); + + SDValue Value, OverflowCmp; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + + // FIXME: We are currently always generating CMPs because we don't support + // generating CMN through the backend. This is not as good as the natural + // CMP case because it causes a register dependency and cannot be folded + // later. + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::SADDO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::UADDO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::SSUBO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + case ISD::USUBO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + } // switch (...) + + return std::make_pair(Value, OverflowCmp); +} + + +SDValue +ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + // We use 0 and 1 as false and true values. + SDValue TVal = DAG.getConstant(1, MVT::i32); + SDValue FVal = DAG.getConstant(0, MVT::i32); + EVT VT = Op.getValueType(); + + SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal, + ARMcc, CCR, OverflowCmp); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); +} + + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); + unsigned Opc = Cond.getOpcode(); + + if (Cond.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO)) { + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + EVT VT = Op.getValueType(); + + return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, + ARMcc, CCR, OverflowCmp); + + } // Convert: // @@ -3483,7 +3640,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; - return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); + return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); @@ -3523,11 +3680,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; - SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } @@ -3724,7 +3881,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp1, 1).getValue(1); + Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. @@ -3740,7 +3897,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); + Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); @@ -3772,14 +3929,16 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + const ARMBaseRegisterInfo &ARI = + *static_cast(RegInfo); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) - ? ARM::R7 : ARM::R11; + unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -3788,6 +3947,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARMTargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + unsigned Reg = StringSwitch(RegName) + .Case("sp", ARM::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -3817,8 +3988,15 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + SDValue Cvt; + if (TLI.isBigEndian() && SrcVT.isVector() && + SrcVT.getVectorNumElements() > 1) + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), + DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); + else + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } @@ -3874,7 +4052,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two @@ -3908,7 +4086,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, @@ -4113,7 +4291,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); @@ -4336,7 +4514,6 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // Value = 0x0000nnff: Op=x, Cmode=1100. OpCmode = 0xc; Imm = SplatBits >> 8; - SplatBits |= 0xff; break; } @@ -4345,7 +4522,6 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // Value = 0x00nnffff: Op=x, Cmode=1101. OpCmode = 0xd; Imm = SplatBits >> 16; - SplatBits |= 0xffff; break; } @@ -4374,9 +4550,13 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, BitMask <<= 8; ImmMask <<= 1; } + + if (DAG.getTargetLoweringInfo().isBigEndian()) + // swap higher and lower 32 bit word + Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); + // Op=1, Cmode=1110. OpCmode = 0x1e; - SplatBits = Val; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; break; } @@ -4873,7 +5053,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, MVT::i32)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; @@ -4884,7 +5064,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); @@ -4920,7 +5100,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5227,12 +5407,10 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, if (V2.getNode()->getOpcode() == ISD::UNDEF) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, @@ -5385,7 +5563,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(ShuffleMask[i] & (NumElts-1), MVT::i32))); } - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5577,7 +5755,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), + LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(), LD->isNonTemporal(), LD->getAlignment()); } @@ -5622,7 +5800,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); + MVT::getVectorVT(TruncVT, NumElts), Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { @@ -5960,12 +6138,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, - CallingConv::C, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/false, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, + std::move(Args), 0) + .setDiscardResult(); + std::pair CallResult = LowerCallTo(CLI); SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, @@ -5987,40 +6165,11 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (cast(Op)->getOrdering() <= Monotonic) return Op; - // Aquire/Release load/store is not legal for targets without a + // Acquire/Release load/store is not legal for targets without a // dmb or equivalent available. return SDValue(); } -static void -ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl& Results, - SelectionDAG &DAG) { - SDLoc dl(Node); - assert (Node->getValueType(0) == MVT::i64 && - "Only know how to expand i64 atomics"); - AtomicSDNode *AN = cast(Node); - - SmallVector Ops; - Ops.push_back(Node->getOperand(0)); // Chain - Ops.push_back(Node->getOperand(1)); // Ptr - for(unsigned i=2; igetNumOperands(); i++) { - // Low part - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(i), DAG.getIntPtrConstant(0))); - // High part - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(i), DAG.getIntPtrConstant(1))); - } - SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - SDValue Result = - DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), - cast(Node)->getMemOperand(), AN->getOrdering(), - AN->getSynchScope()); - SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); - Results.push_back(Result.getValue(2)); -} - static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, @@ -6041,8 +6190,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, }; Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, - DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], - array_lengthof(Ops)); + DAG.getVTList(MVT::i32, MVT::Other), Ops); OutChain = Cycles32.getValue(1); } else { // Intrinsic is defined to return 0 on unsupported platforms. Technically @@ -6065,8 +6213,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: - return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : - LowerGlobalAddressELF(Op, DAG); + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -6111,11 +6266,20 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + return LowerDYNAMIC_STACKALLOC(Op, DAG); + llvm_unreachable("Don't know how to custom lower this!"); } } @@ -6138,22 +6302,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; - case ISD::ATOMIC_STORE: - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_64(N, Results, DAG); - return; } if (Res.getNode()) Results.push_back(Res); @@ -6163,544 +6311,13 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, // ARM Scheduler Hooks //===----------------------------------------------------------------------===// -MachineBasicBlock * -ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned scratch = MRI.createVirtualRegister(isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); - - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); - MRI.constrainRegClass(newval, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldrex dest, [ptr] - // cmp dest, oldval - // bne exitMBB - BB = loop1MBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(dest).addReg(oldval)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex scratch, newval, [ptr] - // cmp scratch, #0 - // bne loop1MBB - BB = loop2MBB; - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(incr, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrex dest, ptr - // scratch2, dest, incr - // strex scratch, scratch2, ptr - // cmp scratch, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - if (BinOpcode) { - // operand order needs to go the other way for NAND - if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) - AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). - addReg(incr).addReg(dest)).addReg(0); - else - AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). - addReg(dest).addReg(incr)).addReg(0); - } - - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - bool signExtend, - ARMCC::CondCodes Cond) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - unsigned oldval = dest; - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(incr, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc, extendOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); - case 1: - extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; - break; - case 2: - extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; - break; - case 4: - extendOpc = 0; - break; - } - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned scratch2 = MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrex dest, ptr - // (sign extend dest, if required) - // cmp dest, incr - // cmov.cond scratch2, incr, dest - // strex scratch, scratch2, ptr - // cmp scratch, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - - // Sign extend the value, if necessary. - if (signExtend && extendOpc) { - oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass - : &ARM::GPRnopcRegClass); - if (!isThumb2) - MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) - .addReg(dest) - .addImm(0)); - } - - // Build compare and cmov instructions. - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(oldval).addReg(incr)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) - .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); - - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Op1, unsigned Op2, - bool NeedsCarry, bool IsCmpxchg, - bool IsMinMax, ARMCC::CondCodes CC) const { - // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64); - unsigned offset = (isStore ? -2 : 0); - unsigned destlo = MI->getOperand(0).getReg(); - unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(offset+2).getReg(); - unsigned vallo = MI->getOperand(offset+3).getReg(); - unsigned valhi = MI->getOperand(offset+4).getReg(); - unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5); - AtomicOrdering Ord = static_cast(MI->getOperand(OrdIdx).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); - MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(vallo, &ARM::rGPRRegClass); - MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *contBB = 0, *cont2BB = 0; - if (IsCmpxchg || IsMinMax) - contBB = MF->CreateMachineBasicBlock(LLVM_BB); - if (IsCmpxchg) - cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - - MF->insert(It, loopMBB); - if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); - if (IsCmpxchg) MF->insert(It, cont2BB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned storesuccess = MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrexd r2, r3, ptr - // r0, r2, incr - // r1, r3, incr - // strexd storesuccess, r0, r1, ptr - // cmp storesuccess, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - - if (!isStore) { - // Load - if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr)); - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - } - } - - unsigned StoreLo, StoreHi; - if (IsCmpxchg) { - // Add early exit - for (unsigned i = 0; i < 2; i++) { - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : - ARM::CMPrr)) - .addReg(i == 0 ? destlo : desthi) - .addReg(i == 0 ? vallo : valhi)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(exitMBB); - BB->addSuccessor(i == 0 ? contBB : cont2BB); - BB = (i == 0 ? contBB : cont2BB); - } - - // Copy to physregs for strexd - StoreLo = MI->getOperand(5).getReg(); - StoreHi = MI->getOperand(6).getReg(); - } else if (Op1) { - // Perform binary operation - unsigned tmpRegLo = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) - .addReg(destlo).addReg(vallo)) - .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); - unsigned tmpRegHi = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) - .addReg(desthi).addReg(valhi)) - .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); - - StoreLo = tmpRegLo; - StoreHi = tmpRegHi; - } else { - // Copy to physregs for strexd - StoreLo = vallo; - StoreHi = valhi; - } - if (IsMinMax) { - // Compare and branch to exit block. - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); - BB->addSuccessor(exitMBB); - BB->addSuccessor(contBB); - BB = contBB; - StoreLo = vallo; - StoreHi = valhi; - } - - // Store - if (isThumb2) { - MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); - MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); - } else { - // Marshal a pair... - unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(StoreLo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) - .addReg(r1) - .addReg(StoreHi) - .addImm(ARM::gsub_1); - - // ...and store it - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(StorePair).addReg(ptr)); - } - // Cmp+jump - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(storesuccess).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { - - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - unsigned destlo = MI->getOperand(0).getReg(); - unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); - MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - } - unsigned ldrOpc, strOpc; - getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); - - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); - - if (isThumb2) { - MIB.addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr); - - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); - - // Copy GPRPair0 into dest. (This copy will normally be coalesced.) - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - } - AddDefaultPred(MIB); - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -6815,7 +6432,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *ARMTargetLowering:: EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -7149,7 +6767,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // N.B. the order the invoke BBs are processed in doesn't matter here. - const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); + const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector MBBLPads; for (SmallPtrSet::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { @@ -7332,7 +6950,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; @@ -7346,8 +6965,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnitSize = 0; - const TargetRegisterClass *TRC = 0; - const TargetRegisterClass *VecTRC = 0; + const TargetRegisterClass *TRC = nullptr; + const TargetRegisterClass *VecTRC = nullptr; bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); @@ -7381,7 +7000,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, ? (const TargetRegisterClass *)&ARM::DPairRegClass : UnitSize == 8 ? (const TargetRegisterClass *)&ARM::DPRRegClass - : 0; + : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -7449,8 +7068,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // Load an immediate to varEnd. @@ -7563,10 +7181,77 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + assert(Subtarget->isTargetWindows() && + "__chkstk is only supported on Windows"); + assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); + + // __chkstk takes the number of words to allocate on the stack in R4, and + // returns the stack adjustment in number of bytes in R4. This will not + // clober any other registers (other than the obvious lr). + // + // Although, technically, IP should be considered a register which may be + // clobbered, the call itself will not touch it. Windows on ARM is a pure + // thumb-2 environment, so there is no interworking required. As a result, we + // do not expect a veneer to be emitted by the linker, clobbering IP. + // + // Each module receives its own copy of __chkstk, so no import thunk is + // required, again, ensuring that IP is not clobbered. + // + // Finally, although some linkers may theoretically provide a trampoline for + // out of range calls (which is quite common due to a 32M range limitation of + // branches for Thumb), we can generate the long-call version via + // -mcmodel=large, alleviating the need for the trampoline which may clobber + // IP. + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + case CodeModel::Large: + case CodeModel::JITDefault: { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + + BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) + .addExternalSymbol("__chkstk"); + BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(Reg, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + } + } + + AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP).addReg(ARM::R4))); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { @@ -7626,131 +7311,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->eraseFromParent(); return BB; } - case ARM::ATOMIC_LOAD_ADD_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - - case ARM::ATOMIC_LOAD_AND_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - - case ARM::ATOMIC_LOAD_OR_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - - case ARM::ATOMIC_LOAD_XOR_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - - case ARM::ATOMIC_LOAD_NAND_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - - case ARM::ATOMIC_LOAD_SUB_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - - case ARM::ATOMIC_LOAD_MIN_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MIN_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MIN_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); - - case ARM::ATOMIC_LOAD_MAX_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); - case ARM::ATOMIC_LOAD_MAX_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); - case ARM::ATOMIC_LOAD_MAX_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); - - case ARM::ATOMIC_LOAD_UMIN_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMIN_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMIN_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); - - case ARM::ATOMIC_LOAD_UMAX_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); - case ARM::ATOMIC_LOAD_UMAX_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); - case ARM::ATOMIC_LOAD_UMAX_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); - - case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); - case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); - case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); - - case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); - case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); - case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); - - case ARM::ATOMIC_LOAD_I64: - return EmitAtomicLoad64(MI, BB); - - case ARM::ATOMIC_LOAD_ADD_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, - isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, - /*NeedsCarry*/ true); - case ARM::ATOMIC_LOAD_SUB_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true); - case ARM::ATOMIC_LOAD_OR_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, - isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_XOR_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, - isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_AND_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, - isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_STORE_I64: - case ARM::ATOMIC_SWAP_I64: - return EmitAtomicBinary64(MI, BB, 0, 0, false); - case ARM::ATOMIC_CMP_SWAP_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ false, /*IsCmpxchg*/true); - case ARM::ATOMIC_LOAD_MIN_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MAX_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::GE); - case ARM::ATOMIC_LOAD_UMIN_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMAX_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::HS); case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the @@ -7776,8 +7336,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(copy0MBB); @@ -7810,7 +7369,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::BCCi64: case ARM::BCCZi64: { // If there is an unconditional branch to the other successor, remove it. - BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); + BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end()); // Compare both parts that make up the double comparison separately for // equality. @@ -7895,8 +7454,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); SinkBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(RSBBB); @@ -7939,6 +7497,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); + case ARM::WIN__CHKSTK: + return EmitLowered__chkstk(MI, BB); } } @@ -7961,8 +7521,8 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); if (NewOpc) { - const ARMBaseInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); + const ARMBaseInstrInfo *TII = static_cast( + getTargetMachine().getSubtargetImpl()->getInstrInfo()); MCID = &TII->get(NewOpc); assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && @@ -8229,7 +7789,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); - switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + + EVT inputLaneType = Vec.getValueType().getVectorElementType(); + switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; @@ -8237,9 +7799,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - widenType, &Ops[0], Ops.size()); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops); + unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; + return DAG.getNode(ExtOp, SDLoc(N), VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { @@ -8297,7 +7859,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); - if (AddeNode == NULL) + if (!AddeNode) return SDValue(); // Make sure it is really an ADDE. @@ -8332,9 +7894,9 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Figure out the high and low input values to the MLAL node. SDValue* HiMul = &MULOp; - SDValue* HiAdd = NULL; - SDValue* LoMul = NULL; - SDValue* LowAdd = NULL; + SDValue* HiAdd = nullptr; + SDValue* LoMul = nullptr; + SDValue* LowAdd = nullptr; if (IsLeftOperandMUL) HiAdd = &AddeOp1; @@ -8351,7 +7913,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, LowAdd = &AddcOp0; } - if (LoMul == NULL) + if (!LoMul) return SDValue(); if (LoMul->getNode() != HiMul->getNode()) @@ -8368,8 +7930,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*HiAdd); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), - DAG.getVTList(MVT::i32, MVT::i32), - &Ops[0], Ops.size()); + DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); @@ -8893,9 +8454,9 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); + if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) + std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); - DCI.RemoveFromWorklist(LD); - DAG.DeleteNode(LD); return Result; } @@ -8960,7 +8521,8 @@ static SDValue PerformSTORECombine(SDNode *N, SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); @@ -9006,8 +8568,7 @@ static SDValue PerformSTORECombine(SDNode *N, Increment); Chains.push_back(Ch); } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], - Chains.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) @@ -9018,16 +8579,18 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; + bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(0), BasePtr, - St->getPointerInfo(), St->isVolatile(), + StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), + BasePtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); - return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + return DAG.getStore(NewST1.getValue(0), DL, + StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), std::min(4U, St->getAlignment() / 2)); @@ -9056,7 +8619,7 @@ static SDValue PerformSTORECombine(SDNode *N, return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment(), - St->getTBAAInfo()); + St->getAAInfo()); } /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node @@ -9103,7 +8666,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -9186,7 +8749,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Fold obvious case. V = V.getOperand(0); else { - V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(V.getNode()); } @@ -9382,7 +8945,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Tys[n] = VecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs+2)); SmallVector Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); @@ -9392,8 +8955,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } MemIntrinsicSDNode *MemInt = cast(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops.data(), Ops.size(), - MemInt->getMemoryVT(), + Ops, MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. @@ -9462,11 +9024,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, - Ops, 2, VLDMemInt->getMemoryVT(), + Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. @@ -9715,9 +9277,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { // loads from a constant pool. case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: - case Intrinsic::arm_neon_vshiftls: - case Intrinsic::arm_neon_vshiftlu: - case Intrinsic::arm_neon_vshiftn: case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: case Intrinsic::arm_neon_vrshiftn: @@ -9748,12 +9307,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { } return SDValue(); - case Intrinsic::arm_neon_vshiftls: - case Intrinsic::arm_neon_vshiftlu: - if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) - break; - llvm_unreachable("invalid shift count for vshll intrinsic"); - case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) @@ -9771,7 +9324,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { break; llvm_unreachable("invalid shift count for vqshlu intrinsic"); - case Intrinsic::arm_neon_vshiftn: case Intrinsic::arm_neon_vrshiftn: case Intrinsic::arm_neon_vqshiftns: case Intrinsic::arm_neon_vqshiftnu: @@ -9794,16 +9346,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { case Intrinsic::arm_neon_vshiftu: // Opcode already set above. break; - case Intrinsic::arm_neon_vshiftls: - case Intrinsic::arm_neon_vshiftlu: - if (Cnt == VT.getVectorElementType().getSizeInBits()) - VShiftOpc = ARMISD::VSHLLi; - else - VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? - ARMISD::VSHLLs : ARMISD::VSHLLu); - break; - case Intrinsic::arm_neon_vshiftn: - VShiftOpc = ARMISD::VSHRN; break; case Intrinsic::arm_neon_vrshifts: VShiftOpc = ARMISD::VRSHRs; break; case Intrinsic::arm_neon_vrshiftu: @@ -10084,7 +9626,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); + DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -10167,7 +9709,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { +bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); @@ -10189,7 +9734,7 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const case MVT::v2f64: { // For any little-endian targets with neon, we can support unaligned ld/st // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. - // A big-endian target may also explictly support unaligned accesses + // A big-endian target may also explicitly support unaligned accesses if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { if (Fast) *Fast = true; @@ -10221,11 +9766,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || - (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && + Fast))) { return MVT::f64; } } @@ -10670,11 +10216,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return true; } -void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { @@ -10690,15 +10236,29 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; } + case ISD::INTRINSIC_W_CHAIN: { + ConstantSDNode *CN = cast(Op->getOperand(1)); + Intrinsic::ID IntID = static_cast(CN->getZExtValue()); + switch (IntID) { + default: return; + case Intrinsic::arm_ldaex: + case Intrinsic::arm_ldrex: { + EVT VT = cast(Op)->getMemoryVT(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + return; + } + } + } } } @@ -10774,7 +10334,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -10853,7 +10413,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; @@ -11052,16 +10612,41 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); SDLoc dl(Op); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, - 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair CallInfo = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = LowerCallTo(CLI); return CallInfo.first; } +SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "unsupported target platform"); + SDLoc DL(Op); + + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + + SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, + DAG.getConstant(2, MVT::i32)); + + SDValue Flag; + Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); + + SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); + Chain = NewSP.getValue(1); + + SDValue Ops[2] = { NewSP, Chain }; + return DAG.getMergeValues(Ops, DL); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -11147,6 +10732,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -11159,6 +10745,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } + case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -11171,6 +10758,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; @@ -11182,6 +10770,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; @@ -11199,3 +10788,183 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } + +/// \brief Returns true if it is beneficial to convert a load of a constant +/// to just the constant itself. +bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned Bits = Ty->getPrimitiveSizeInBits(); + if (Bits == 0 || Bits > 32) + return false; + return true; +} + +bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { + // Loads and stores less than 64-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit + // anything for those. + bool IsMClass = Subtarget->isMClass(); + if (StoreInst *SI = dyn_cast(Inst)) { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return Size == 64 && !IsMClass; + } else if (LoadInst *LI = dyn_cast(Inst)) { + return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; + } + + // For the real atomic operations, we have ldrex/strex up to 32 bits, + // and up to 64 bits on the non-M profiles + unsigned AtomicLimit = IsMClass ? 32 : 64; + return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; +} + +// This has so far only been implemented for MachO. +bool ARMTargetLowering::useLoadStackGuardNode() const { + return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO; +} + +Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast(Addr->getType())->getElementType(); + bool IsAcquire = + Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd + // intrinsic must return {i32, i32} and we have to recombine them into a + // single i64 here. + if (ValTy->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); + + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); + + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); + } + + Type *Tys[] = { Addr->getType() }; + Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateTruncOrBitCast( + Builder.CreateCall(Ldrex, Addr), + cast(Addr->getType())->getElementType()); +} + +Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + bool IsRelease = + Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since the intrinsics must have legal type, the i64 intrinsics take two + // parameters: "i32, i32". We must marshal Val into the appropriate form + // before the call. + if (Val->getType()->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; + Function *Strex = Intrinsic::getDeclaration(M, Int); + Type *Int32Ty = Type::getInt32Ty(M->getContext()); + + Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); + Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + return Builder.CreateCall3(Strex, Lo, Hi, Addr); + } + + Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; + Type *Tys[] = { Addr->getType() }; + Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateCall2( + Strex, Builder.CreateZExtOrBitCast( + Val, Strex->getFunctionType()->getParamType(0)), + Addr); +} + +enum HABaseType { + HA_UNKNOWN = 0, + HA_FLOAT, + HA_DOUBLE, + HA_VECT64, + HA_VECT128 +}; + +static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, + uint64_t &Members) { + if (const StructType *ST = dyn_cast(Ty)) { + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) + return false; + Members += SubMembers; + } + } else if (const ArrayType *AT = dyn_cast(Ty)) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) + return false; + Members += SubMembers * AT->getNumElements(); + } else if (Ty->isFloatTy()) { + if (Base != HA_UNKNOWN && Base != HA_FLOAT) + return false; + Members = 1; + Base = HA_FLOAT; + } else if (Ty->isDoubleTy()) { + if (Base != HA_UNKNOWN && Base != HA_DOUBLE) + return false; + Members = 1; + Base = HA_DOUBLE; + } else if (const VectorType *VT = dyn_cast(Ty)) { + Members = 1; + switch (Base) { + case HA_FLOAT: + case HA_DOUBLE: + return false; + case HA_VECT64: + return VT->getBitWidth() == 64; + case HA_VECT128: + return VT->getBitWidth() == 128; + case HA_UNKNOWN: + switch (VT->getBitWidth()) { + case 64: + Base = HA_VECT64; + return true; + case 128: + Base = HA_VECT128; + return true; + default: + return false; + } + } + } + + return (Members > 0 && Members <= 4); +} + +/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. +bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + if (getEffectiveCallingConv(CallConv, isVarArg) != + CallingConv::ARM_AAPCS_VFP) + return false; + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; +}