X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=3e53af42b897d74d455dfab0cb8b0d18d39517b5;hp=96942ec8b7dab580fd5dd144399ba75a4b8adf33;hb=77327b8520d8d0b69f45a812c7d354bbead6c1c1;hpb=268c743a3ba44ada364938bc5ff9b1be219df54f diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 96942ec8b7d..3e53af42b89 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,9 +12,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARMISelLowering.h" -#include "ARM.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" @@ -38,28 +36,26 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" +#include using namespace llvm; +#define DEBUG_TYPE "arm-isel" + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); -// This option should go away when tail calls fully work. -static cl::opt -EnableARMTailCalls("arm-tail-calls", cl::Hidden, - cl::desc("Generate tail calls (TEMPORARY OPTION)."), - cl::init(false)); - cl::opt EnableARMLongCalls("arm-long-calls", cl::Hidden, cl::desc("Generate calls via indirect call instructions"), @@ -86,7 +82,7 @@ namespace { } // The APCS parameter registers. -static const uint16_t GPRArgRegs[] = { +static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -155,28 +151,30 @@ void ARMTargetLowering::addDRTypeForNEON(MVT VT) { } void ARMTargetLowering::addQRTypeForNEON(MVT VT) { - addRegisterClass(VT, &ARM::QPRRegClass); + addRegisterClass(VT, &ARM::DPairRegClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget().isTargetDarwin()) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) return new TargetLoweringObjectFileMachO(); - + if (TT.isOSWindows()) + return new TargetLoweringObjectFileCOFF(); return new ARMElfTargetObjectFile(); } ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget(); - RegInfo = TM.getRegisterInfo(); - Itins = TM.getInstrItineraryData(); + RegInfo = TM.getSubtargetImpl()->getRegisterInfo(); + Itins = TM.getSubtargetImpl()->getInstrItineraryData(); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - if (Subtarget->isTargetDarwin()) { + if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. - if (Subtarget->isThumb() && Subtarget->hasVFP2()) { + if (Subtarget->isThumb() && Subtarget->hasVFP2() && + Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) { // Single-precision floating-point arithmetic. setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); @@ -252,172 +250,135 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // These libcalls are not available in 32-bit. - setLibcallName(RTLIB::SHL_I128, 0); - setLibcallName(RTLIB::SRL_I128, 0); - setLibcallName(RTLIB::SRA_I128, 0); - - if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { - // Double-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 2 - setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); - setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); - setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); - setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); - setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); - - // Double-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 3 - setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); - setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); - setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); - setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); - setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); - setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); - setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); - setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); - - // Single-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 4 - setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); - setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); - setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); - setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); - setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); - - // Single-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 5 - setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); - setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); - setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); - setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); - setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); - setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); - setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); - setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); - - // Floating-point to integer conversions. - // RTABI chapter 4.1.2, Table 6 - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); - - // Conversions between floating types. - // RTABI chapter 4.1.2, Table 7 - setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); - setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); - - // Integer to floating-point conversions. - // RTABI chapter 4.1.2, Table 8 - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - - // Long long helper functions - // RTABI chapter 4.2, Table 9 - setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); - setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); - setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); - setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); - - // Integer division functions - // RTABI chapter 4.3.1 - setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); - setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - - // Memory operations - // RTABI chapter 4.3.4 - setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); - setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); - setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); - setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); + + if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && + !Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } LibraryCalls[] = { + // Double-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 2 + { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Double-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 3 + { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Single-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 4 + { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Single-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 5 + { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Floating-point to integer conversions. + // RTABI chapter 4.1.2, Table 6 + { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Conversions between floating types. + // RTABI chapter 4.1.2, Table 7 + { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer to floating-point conversions. + // RTABI chapter 4.1.2, Table 8 + { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Long long helper functions + // RTABI chapter 4.2, Table 9 + { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer division functions + // RTABI chapter 4.3.1 + { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } + + if (Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -436,8 +397,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::f32, &ARM::SPRRegClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, &ARM::DPRRegClass); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); } for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -449,6 +408,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); @@ -565,16 +531,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); - // Custom expand long extensions to vectors. - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); - // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. @@ -625,8 +581,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) computeRegisterProperties(); - // ARM does not have f32 extending load. + // ARM does not have floating-point extending loads. setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + + // ... or truncating stores + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -646,6 +608,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction(ISD::UADDO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -741,45 +708,38 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (!Subtarget->isTargetDarwin()) { - // Non-Darwin platforms may return values in these registers via the + if (!Subtarget->isTargetMachO()) { + // Non-MachO platforms may return values in these registers via the // personality function. setExceptionPointerRegister(ARM::R0); setExceptionSelectorRegister(ARM::R1); } - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. - // FIXME: This should be checking for v6k, not just v6. - if (Subtarget->hasDataBarrier() || - (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { - // membarrier needs custom lowering; the rest are legal and handled - // normally. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - // Custom lowering for 64-bit ops - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); + if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { + // ATOMIC_FENCE needs custom lowering; the others should have been expanded + // to ldrex/strex loops already. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops()) { // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); } - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); - //setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom); } else { + // If there's anything we can use as a barrier, go through custom lowering + // for ATOMIC_FENCE. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, + Subtarget->hasAnyDataBarrier() ? Custom : Expand); + // Set them all for expansion, which will force libcalls. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); @@ -870,10 +830,29 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } - // Special handling for half-precision FP. + + // v8 adds f64 <-> f16 conversion. Before that it should be expanded. + if (!Subtarget->hasV8Ops()) { + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); + } + + // fp16 is a special v7 extension that adds f16 <-> f32 conversions. if (!Subtarget->hasFP16()) { - setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); - setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); + } + } + + // Combine sin / cos into one node or libcall if possible. + if (Subtarget->hasSinCos()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + // For iOS, we don't want to the normal expansion of a libcall to + // sincos. We want to issue a libcall to __sincos_stret. + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } } @@ -915,44 +894,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - bool isThumb2, unsigned &LdrOpc, - unsigned &StrOpc) { - static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, - {ARM::LDREXH, ARM::t2LDREXH}, - {ARM::LDREX, ARM::t2LDREX}, - {ARM::LDREXD, ARM::t2LDREXD}}; - static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, - {ARM::LDAEXH, ARM::t2LDAEXH}, - {ARM::LDAEX, ARM::t2LDAEX}, - {ARM::LDAEXD, ARM::t2LDAEXD}}; - static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, - {ARM::STREXH, ARM::t2STREXH}, - {ARM::STREX, ARM::t2STREX}, - {ARM::STREXD, ARM::t2STREXD}}; - static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, - {ARM::STLEXH, ARM::t2STLEXH}, - {ARM::STLEX, ARM::t2STLEX}, - {ARM::STLEXD, ARM::t2STLEXD}}; - - const unsigned (*LoadOps)[2], (*StoreOps)[2]; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; - - assert(isPowerOf2_32(Size) && Size <= 8 && - "unsupported size for atomic binary op!"); - - LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; - StrOpc = StoreOps[Log2_32(Size)][isThumb2]; -} - // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -965,7 +906,7 @@ static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, // and extractions. std::pair ARMTargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = 0; + const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: @@ -1002,9 +943,8 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case ARMISD::Wrapper: return "ARMISD::Wrapper"; - case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; case ARMISD::CALL: return "ARMISD::CALL"; @@ -1015,6 +955,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; + case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMN: return "ARMISD::CMN"; @@ -1058,6 +999,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; @@ -1073,10 +1016,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VSHL: return "ARMISD::VSHL"; case ARMISD::VSHRs: return "ARMISD::VSHRs"; case ARMISD::VSHRu: return "ARMISD::VSHRu"; - case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; - case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; - case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; - case ARMISD::VSHRN: return "ARMISD::VSHRN"; case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; @@ -1192,7 +1131,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Load are scheduled for latency even if there instruction itinerary // is not available. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); if (MCID.getNumDefs() == 0) @@ -1260,40 +1200,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, - bool Return, - bool isVarArg) const { +/// getEffectiveCallingConv - Get the effective calling convention, taking into +/// account presence of floating point hardware and calling convention +/// limitations, such as support for variadic functions. +CallingConv::ID +ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - if (Subtarget->hasVFP2() && !isVarArg) { - if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); - // For AAPCS ABI targets, just use VFP variant of the calling convention. - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - } - // Fallthrough - case CallingConv::C: { - // Use target triple & subtarget features to do actual dispatch. + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + case CallingConv::GHC: + return CC; + case CallingConv::ARM_AAPCS_VFP: + return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; + case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); - else if (Subtarget->hasVFP2() && + return CallingConv::ARM_APCS; + else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; + case CallingConv::Fast: + if (!Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::Fast; + return CallingConv::ARM_APCS; + } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; } - case CallingConv::ARM_AAPCS_VFP: - if (!isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - // Fallthrough - case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for the given +/// CallingConvention. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, + bool Return, + bool isVarArg) const { + switch (getEffectiveCallingConv(CC, isVarArg)) { + default: + llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + case CallingConv::Fast: + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } @@ -1342,6 +1300,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { @@ -1357,6 +1317,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); @@ -1407,16 +1369,17 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + unsigned id = Subtarget->isLittle() ? 0 : 1; + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) - RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } @@ -1444,14 +1407,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; bool isSibCall = false; + // Disable tail calls if they're not supported. - if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) + if (!Subtarget->supportsTailCall() || MF.getTarget().Options.DisableTailCalls) isTailCall = false; + if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { @@ -1569,7 +1537,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, false, 0); + false, false, false, + DAG.InferPtrAlignment(AddArg)); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } @@ -1595,7 +1564,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, - Ops, array_lengthof(Ops))); + Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); @@ -1606,8 +1575,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -1649,8 +1617,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMFunctionInfo *AFI = MF.getInfo(); if (EnableARMLongCalls) { - assert (getTargetMachine().getRelocationModel() == Reloc::Static - && "long-calls with non-static relocation model!"); + assert((Subtarget->isTargetWindows() || + getTargetMachine().getRelocationModel() == Reloc::Static) && + "long-calls with non-static relocation model!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. @@ -1688,25 +1657,29 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const GlobalValue *GV = G->getGlobal(); isDirect = true; bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); - bool isStub = (isExt && Subtarget->isTargetDarwin()) && + bool isStub = (isExt && Subtarget->isTargetMachO()) && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. - if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { - unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); - Callee = DAG.getNode(ARMISD::PIC_ADD, dl, - getPointerTy(), Callee, PICLabel); + if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); + Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), + DAG.getTargetGlobalAddress(GV, dl, getPointerTy())); + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + unsigned TargetFlags = GV->hasDLLImportStorageClass() + ? ARMII::MO_DLLIMPORT + : ARMII::MO_NO_FLAG; + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0, + TargetFlags); + if (GV->hasDLLImportStorageClass()) + Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(), + Callee), MachinePointerInfo::getGOT(), + false, false, false, 0); } else { // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; @@ -1717,7 +1690,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { isDirect = true; - bool isStub = Subtarget->isTargetDarwin() && + bool isStub = Subtarget->isTargetMachO() && getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // tBX takes a register source operand. @@ -1748,8 +1721,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -1778,34 +1751,37 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask; - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARMBaseRegisterInfo *ARI = static_cast(TRI); - if (isThisReturn) { - // For 'this' returns, use the R0-preserving mask if applicable - Mask = ARI->getThisReturnPreservedMask(CallConv); - if (!Mask) { - // Set isThisReturn to false if the calling convention is not one that - // allows 'returned' to be modeled in this way, so LowerCallResult does - // not try to pass 'this' straight through - isThisReturn = false; + if (!isTailCall) { + const uint32_t *Mask; + const TargetRegisterInfo *TRI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); + const ARMBaseRegisterInfo *ARI = static_cast(TRI); + if (isThisReturn) { + // For 'this' returns, use the R0-preserving mask if applicable + Mask = ARI->getThisReturnPreservedMask(CallConv); + if (!Mask) { + // Set isThisReturn to false if the calling convention is not one that + // allows 'returned' to be modeled in this way, so LowerCallResult does + // not try to pass 'this' straight through + isThisReturn = false; + Mask = ARI->getCallPreservedMask(CallConv); + } + } else Mask = ARI->getCallPreservedMask(CallConv); - } - } else - Mask = ARI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + } if (InFlag.getNode()) Ops.push_back(InFlag); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -1832,22 +1808,6 @@ ARMTargetLowering::HandleByVal( State->getCallOrPrologue() == Call) && "unhandled ParmContext"); - // For in-prologue parameters handling, we also introduce stack offset - // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. - // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how - // NSAA should be evaluted (NSAA means "next stacked argument address"). - // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. - // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. - unsigned NSAAOffset = State->getNextStackOffset(); - if (State->getCallOrPrologue() != Call) { - for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { - unsigned RB, RE; - State->getInRegsParamInfo(i, RB, RE); - assert(NSAAOffset >= (RE-RB)*4 && - "Stack offset for byval regs doesn't introduced anymore?"); - NSAAOffset -= (RE-RB)*4; - } - } if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { if (Subtarget->isAAPCS_ABI() && Align > 4) { unsigned AlignInRegs = Align / 4; @@ -1862,6 +1822,7 @@ ARMTargetLowering::HandleByVal( // all remained GPR regs. In that case we can't split parameter, we must // send it to stack. We also must set NCRN to R4, so waste all // remained registers. + const unsigned NSAAOffset = State->getNextStackOffset(); if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { while (State->AllocateReg(GPRArgRegs, 4)) ; @@ -1881,18 +1842,14 @@ ARMTargetLowering::HandleByVal( // allocate remained amount of registers we need. for (unsigned i = reg+1; i != ByValRegEnd; ++i) State->AllocateReg(GPRArgRegs, 4); - // At a call site, a byval parameter that is split between - // registers and memory needs its size truncated here. In a - // function prologue, such byval parameters are reassembled in - // memory, and are not truncated. - if (State->getCallOrPrologue() == Call) { - // Make remained size equal to 0 in case, when - // the whole structure may be stored into registers. - if (size < excess) - size = 0; - else - size -= excess; - } + // A byval parameter that is split between registers and memory needs its + // size truncated here. + // In the case where the entire structure fits in registers, we set the + // size in memory to zero. + if (size < excess) + size = 0; + else + size -= excess; } } } @@ -1966,6 +1923,12 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isVarArg && !Outs.empty()) return false; + // Exception-handling functions need a special set of instructions to indicate + // a return to the hardware. Tail-calling another function would probably + // break this. + if (CallerF->hasFnAttribute("interrupt")) + return false; + // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) @@ -2046,7 +2009,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -2094,6 +2058,38 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, isVarArg)); } +static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, + SDLoc DL, SelectionDAG &DAG) { + const MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); + + StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); + + // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset + // version of the "preferred return address". These offsets affect the return + // instruction if this is a return from PL1 without hypervisor extensions. + // IRQ/FIQ: +4 "subs pc, lr, #4" + // SWI: 0 "subs pc, lr, #0" + // ABORT: +4 "subs pc, lr, #4" + // UNDEF: +4/+2 "subs pc, lr, #0" + // UNDEF varies depending on where the exception came from ARM or Thumb + // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. + + int64_t LROffset; + if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || + IntKind == "ABORT") + LROffset = 4; + else if (IntKind == "SWI" || IntKind == "UNDEF") + LROffset = 0; + else + report_fatal_error("Unsupported interrupt attribute. If present, value " + "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); + + RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); + + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); +} + SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -2115,6 +2111,11 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + bool isLittleEndian = Subtarget->isLittle(); + + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo(); + AFI->setReturnRegsCount(RVLocs.size()); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -2141,12 +2142,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + HalfGPRs.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - HalfGPRs.getValue(1), Flag); + HalfGPRs.getValue(isLittleEndian ? 1 : 0), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc @@ -2158,12 +2162,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + DAG.getVTList(MVT::i32, MVT::i32), Arg); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); @@ -2179,8 +2186,20 @@ ARMTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, - RetOps.data(), RetOps.size()); + // CPUs which aren't M-class use a special sequence to return from + // exceptions (roughly, any instruction setting pc and cpsr simultaneously, + // though we use "subs pc, lr, #N"). + // + // M-class CPUs actually use a normal return sequence with a special + // (hardware-provided) value in LR, so the normal code path works. + if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && + !Subtarget->isMClass()) { + if (Subtarget->isThumb1Only()) + report_fatal_error("interrupt attribute is not supported in Thumb1"); + return LowerInterruptReturn(RetOps, dl, DAG); + } + + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2235,7 +2254,8 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { bool HasRet = false; for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); UI != UE; ++UI) { - if (UI->getOpcode() != ARMISD::RET_FLAG) + if (UI->getOpcode() != ARMISD::RET_FLAG && + UI->getOpcode() != ARMISD::INTRET_FLAG) return false; HasRet = true; } @@ -2248,10 +2268,10 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { } bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) + if (!Subtarget->supportsTailCall()) return false; - if (!CI->isTailCall()) + if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls) return false; return !Subtarget->isThumb1Only(); @@ -2341,13 +2361,14 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); + // FIXME: is there useful debug info available here? - TargetLowering::CallLoweringInfo CLI(Chain, - (Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), + 0); + std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } @@ -2454,7 +2475,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. - if (Subtarget->useMovt()) { + if (Subtarget->useMovt(DAG.getMachineFunction())) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -2476,56 +2497,46 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, const GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - // FIXME: Enable this for static codegen when tool issues are fixed. Also - // update ARMFastISel::ARMMaterializeGV. - if (Subtarget->useMovt() && RelocM != Reloc::Static) { + if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; - // FIXME: Once remat is capable of dealing with instructions with register - // operands, expand this into two nodes. - if (RelocM == Reloc::Static) - return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - - unsigned Wrapper = (RelocM == Reloc::PIC_) - ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; - SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT)); - if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) - Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), - false, false, false, 0); - return Result; - } - - unsigned ARMPCLabelIndex = 0; - SDValue CPAddr; - if (RelocM == Reloc::Static) { - CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); - } else { - ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo(); - ARMPCLabelIndex = AFI->createPICLabelUId(); - unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, - PCAdj); - CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - } - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - SDValue Chain = Result.getValue(1); + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into multiple nodes + unsigned Wrapper = + RelocM == Reloc::PIC_ ? ARMISD::WrapperPIC : ARMISD::Wrapper; - if (RelocM == Reloc::PIC_) { - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); - Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); - } + SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY); + SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), - false, false, false, 0); + Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, false, 0); + return Result; +} + +SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); + assert(Subtarget->useMovt(DAG.getMachineFunction()) && + "Windows on ARM expects to use movw/movt"); + + const GlobalValue *GV = cast(Op)->getGlobal(); + const ARMII::TOF TargetFlags = + (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); + EVT PtrVT = getPointerTy(); + SDValue Result; + SDLoc DL(Op); + + ++NumMovwMovt; + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes. + Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, + TargetFlags)); + if (GV->hasDLLImportStorageClass()) + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, false, 0); return Result; } @@ -2574,6 +2585,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_rbit: { + assert(Op.getOperand(0).getValueType() == MVT::i32 && + "RBIT intrinsic must have i32 type!"); + return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0)); + } case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -2622,7 +2638,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && - "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); + "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); } @@ -2717,7 +2733,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - + if (!Subtarget->isLittle()) + std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } @@ -2741,15 +2758,18 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; } - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); ArgRegsSize = NumGPRs * 4; // If parameter is split between stack and GPRs... - if (NumGPRs && Align == 8 && + if (NumGPRs && Align > 4 && (ArgRegsSize < ArgSize || InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { - // Add padding for part of param recovered from GPRs, so - // its last byte must be at address K*8 - 1. + // Add padding for part of param recovered from GPRs. For example, + // if Align == 8, its last byte must be at address K*8 - 1. // We need to do it, since remained (stack) part of parameter has // stack alignment, and we need to "attach" "GPRs head" without gaps // to it: @@ -2759,8 +2779,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, // ARMFunctionInfo *AFI = MF.getInfo(); unsigned Padding = - ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - - (ArgRegsSize + AFI->getArgRegsSaveSize()); + OffsetToAlignment(ArgRegsSize + AFI->getArgRegsSaveSize(), Align); ArgRegsSaveSize = ArgRegsSize + Padding; } else // We don't need to extend regs save size for byval parameters if they @@ -2784,10 +2803,12 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, unsigned OffsetFromOrigArg, unsigned ArgOffset, unsigned ArgSize, - bool ForceMutable) const { + bool ForceMutable, + unsigned ByValStoreOffset, + unsigned TotalArgRegsSaveSize) const { // Currently, two use-cases possible: - // Case #1. Non var-args function, and we meet first byval parameter. + // Case #1. Non-var-args function, and we meet first byval parameter. // Setup first unallocated register as first byval register; // eat all remained registers // (these two actions are performed by HandleByVal method). @@ -2821,7 +2842,6 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, // Note: once stack area for byval/varargs registers // was initialized, it can't be initialized again. if (ArgRegsSaveSize) { - unsigned Padding = ArgRegsSaveSize - ArgRegsSize; if (Padding) { @@ -2830,11 +2850,18 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setStoredByValParamsPadding(Padding); } - int FrameIndex = MFI->CreateFixedObject( - ArgRegsSaveSize, - Padding + ArgOffset, - false); + int FrameIndex = MFI->CreateFixedObject(ArgRegsSaveSize, + Padding + + ByValStoreOffset - + (int64_t)TotalArgRegsSaveSize, + false); SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); + if (Padding) { + MFI->CreateFixedObject(Padding, + ArgOffset + ByValStoreOffset - + (int64_t)ArgRegsSaveSize, + false); + } SmallVector MemOps; for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; @@ -2859,13 +2886,18 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; - } else + } else { + if (ArgSize == 0) { + // We cannot allocate a zero-byte object for the first variadic argument, + // so just make up a size. + ArgSize = 4; + } // This will point to the next argument passed via stack. return MFI->CreateFixedObject( - 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); + ArgSize, ArgOffset, !ForceMutable); + } } // Setup stack frame, the va_list pointer will start from. @@ -2873,6 +2905,7 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, unsigned ArgOffset, + unsigned TotalArgRegsSaveSize, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -2883,8 +2916,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, 0, ForceMutable); + StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, + 0, TotalArgRegsSaveSize); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -2921,6 +2955,51 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // We also increase this value in case of varargs function. AFI->setArgRegsSaveSize(0); + unsigned ByValStoreOffset = 0; + unsigned TotalArgRegsSaveSize = 0; + unsigned ArgRegsSaveSizeMaxAlign = 4; + + // Calculate the amount of stack space that we need to allocate to store + // byval and variadic arguments that are passed in registers. + // We need to know this before we allocate the first byval or variadic + // argument, as they will be allocated a stack slot below the CFA (Canonical + // Frame Address, the stack pointer at entry to the function). + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isMemLoc()) { + int index = VA.getValNo(); + if (index != lastInsIndex) { + ISD::ArgFlagsTy Flags = Ins[index].Flags; + if (Flags.isByVal()) { + unsigned ExtraArgRegsSize; + unsigned ExtraArgRegsSaveSize; + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsProceed(), + Flags.getByValSize(), + ExtraArgRegsSize, ExtraArgRegsSaveSize); + + TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + if (Flags.getByValAlign() > ArgRegsSaveSizeMaxAlign) + ArgRegsSaveSizeMaxAlign = Flags.getByValAlign(); + CCInfo.nextInRegsParam(); + } + lastInsIndex = index; + } + } + } + CCInfo.rewindByValRegsInfo(); + lastInsIndex = -1; + if (isVarArg) { + unsigned ExtraArgRegsSize; + unsigned ExtraArgRegsSaveSize; + computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, + ExtraArgRegsSize, ExtraArgRegsSaveSize); + TotalArgRegsSaveSize += ExtraArgRegsSaveSize; + } + // If the arg regs save area contains N-byte aligned values, the + // bottom of it must be at least N-byte aligned. + TotalArgRegsSaveSize = RoundUpToAlignment(TotalArgRegsSaveSize, ArgRegsSaveSizeMaxAlign); + TotalArgRegsSaveSize = std::min(TotalArgRegsSaveSize, 16U); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); @@ -3019,18 +3098,23 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // a tail call. if (Flags.isByVal()) { unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); + + ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign()); int FrameIndex = StoreByValRegs( CCInfo, DAG, dl, Chain, CurOrigArg, CurByValIndex, Ins[VA.getValNo()].PartOffset, VA.getLocMemOffset(), Flags.getByValSize(), - true /*force mutable frames*/); + true /*force mutable frames*/, + ByValStoreOffset, + TotalArgRegsSaveSize); + ByValStoreOffset += Flags.getByValSize(); + ByValStoreOffset = std::min(ByValStoreOffset, 16U); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { - unsigned FIOffset = VA.getLocMemOffset() + - AFI->getStoredByValParamsPadding(); + unsigned FIOffset = VA.getLocMemOffset(); int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, FIOffset, true); @@ -3048,7 +3132,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // varargs if (isVarArg) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, - CCInfo.getNextStackOffset()); + CCInfo.getNextStackOffset(), + TotalArgRegsSaveSize); + + AFI->setArgumentStackSize(CCInfo.getNextStackOffset()); return Chain; } @@ -3162,11 +3249,96 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } +std::pair +ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, + SDValue &ARMcc) const { + assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); + + SDValue Value, OverflowCmp; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + + // FIXME: We are currently always generating CMPs because we don't support + // generating CMN through the backend. This is not as good as the natural + // CMP case because it causes a register dependency and cannot be folded + // later. + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::SADDO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::UADDO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::SSUBO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + case ISD::USUBO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + } // switch (...) + + return std::make_pair(Value, OverflowCmp); +} + + +SDValue +ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + // We use 0 and 1 as false and true values. + SDValue TVal = DAG.getConstant(1, MVT::i32); + SDValue FVal = DAG.getConstant(0, MVT::i32); + EVT VT = Op.getValueType(); + + SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal, + ARMcc, CCR, OverflowCmp); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); +} + + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); + unsigned Opc = Cond.getOpcode(); + + if (Cond.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO)) { + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + EVT VT = Op.getValueType(); + + return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, + ARMcc, CCR, OverflowCmp); + + } // Convert: // @@ -3217,7 +3389,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { if (CC == ISD::SETNE) return ISD::SETEQ; - return ISD::getSetCCSwappedOperands(CC); + return ISD::getSetCCInverse(CC, true); } static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, @@ -3468,7 +3640,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; - return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); + return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); @@ -3508,11 +3680,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; - SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } @@ -3709,7 +3881,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp1, 1).getValue(1); + Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. @@ -3725,7 +3897,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); + Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); @@ -3737,6 +3909,9 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); @@ -3754,14 +3929,16 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + const ARMBaseRegisterInfo &ARI = + *static_cast(RegInfo); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) - ? ARM::R7 : ARM::R11; + unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -3770,45 +3947,16 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } -/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), -/// and size(DestVec) > 128-bits. -/// This is achieved by doing the one extension from the SrcVec, splitting the -/// result, extending these parts, and then concatenating these into the -/// destination. -static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { - SDValue Op = N->getOperand(0); - EVT SrcVT = Op.getValueType(); - EVT DestVT = N->getValueType(0); - - assert(DestVT.getSizeInBits() > 128 && - "Custom sext/zext expansion needs >128-bit vector."); - // If this is a normal length extension, use the default expansion. - if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && - SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) - return SDValue(); - - SDLoc dl(N); - unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); - unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); - unsigned NumElts = SrcVT.getVectorNumElements(); - LLVMContext &Ctx = *DAG.getContext(); - SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; - - EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), - NumElts); - EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), - NumElts/2); - EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), - NumElts/2); - - Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); - SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, - DAG.getIntPtrConstant(0)); - SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, - DAG.getIntPtrConstant(NumElts/2)); - ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); - ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARMTargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + unsigned Reg = StringSwitch(RegName) + .Case("sp", ARM::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); } /// ExpandBITCAST - If the target supports VFP, this function is called to @@ -3840,8 +3988,15 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + SDValue Cvt; + if (TLI.isBigEndian() && SrcVT.isVector() && + SrcVT.getVectorNumElements() > 1) + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), + DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); + else + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } @@ -3897,7 +4052,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two @@ -3931,7 +4086,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, @@ -4136,7 +4291,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); @@ -4359,7 +4514,6 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // Value = 0x0000nnff: Op=x, Cmode=1100. OpCmode = 0xc; Imm = SplatBits >> 8; - SplatBits |= 0xff; break; } @@ -4368,7 +4522,6 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, // Value = 0x00nnffff: Op=x, Cmode=1101. OpCmode = 0xd; Imm = SplatBits >> 16; - SplatBits |= 0xffff; break; } @@ -4397,9 +4550,13 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, BitMask <<= 8; ImmMask <<= 1; } + + if (DAG.getTargetLoweringInfo().isBigEndian()) + // swap higher and lower 32 bit word + Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); + // Op=1, Cmode=1110. OpCmode = 0x1e; - SplatBits = Val; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; break; } @@ -4896,7 +5053,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, MVT::i32)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; @@ -4907,7 +5064,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); @@ -4943,7 +5100,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5250,12 +5407,10 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, if (V2.getNode()->getOpcode() == ISD::UNDEF) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, @@ -5408,7 +5563,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(ShuffleMask[i] & (NumElts-1), MVT::i32))); } - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5600,7 +5755,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), + LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(), LD->isNonTemporal(), LD->getAlignment()); } @@ -5645,7 +5800,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); + MVT::getVectorVT(TruncVT, NumElts), Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { @@ -5941,45 +6096,80 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } +SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin()); + + // For iOS, we want to call an alternative entry point: __sincos_stret, + // return values are passed via sret. + SDLoc dl(Op); + SDValue Arg = Op.getOperand(0); + EVT ArgVT = Arg.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Pair of floats / doubles used to pass the result. + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + + // Create stack object for sret. + const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); + const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy()); + + ArgListTy Args; + ArgListEntry Entry; + + Entry.Node = SRet; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isSRet = true; + Args.push_back(Entry); + + Entry.Node = Arg; + Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + const char *LibcallName = (ArgVT == MVT::f64) + ? "__sincos_stret" : "__sincosf_stret"; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, + std::move(Args), 0) + .setDiscardResult(); + + std::pair CallResult = LowerCallTo(CLI); + + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, + MachinePointerInfo(), false, false, false, 0); + + // Address of cos field. + SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet, + DAG.getIntPtrConstant(ArgVT.getStoreSize())); + SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, + MachinePointerInfo(), false, false, false, 0); + + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, + LoadSin.getValue(0), LoadCos.getValue(0)); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { // Monotonic load/store is legal for all targets if (cast(Op)->getOrdering() <= Monotonic) return Op; - // Aquire/Release load/store is not legal for targets without a + // Acquire/Release load/store is not legal for targets without a // dmb or equivalent available. return SDValue(); } -static void -ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl& Results, - SelectionDAG &DAG) { - SDLoc dl(Node); - assert (Node->getValueType(0) == MVT::i64 && - "Only know how to expand i64 atomics"); - AtomicSDNode *AN = cast(Node); - - SmallVector Ops; - Ops.push_back(Node->getOperand(0)); // Chain - Ops.push_back(Node->getOperand(1)); // Ptr - for(unsigned i=2; igetNumOperands(); i++) { - // Low part - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(i), DAG.getIntPtrConstant(0))); - // High part - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(i), DAG.getIntPtrConstant(1))); - } - SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - SDValue Result = - DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), - cast(Node)->getMemOperand(), AN->getOrdering(), - AN->getSynchScope()); - SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); - Results.push_back(Result.getValue(2)); -} - static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, @@ -6000,8 +6190,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, }; Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, - DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], - array_lengthof(Ops)); + DAG.getVTList(MVT::i32, MVT::Other), Ops); OutChain = Cycles32.getValue(1); } else { // Intrinsic is defined to return 0 on unsupported platforms. Technically @@ -6024,8 +6213,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: - return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : - LowerGlobalAddressELF(Op, DAG); + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -6070,10 +6266,20 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); + case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + return LowerDYNAMIC_STACKALLOC(Op, DAG); + llvm_unreachable("Don't know how to custom lower this!"); } } @@ -6089,10 +6295,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - Res = ExpandVectorExtension(N, DAG); - break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); @@ -6100,22 +6302,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::READCYCLECOUNTER: ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; - case ISD::ATOMIC_STORE: - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_64(N, Results, DAG); - return; } if (Res.getNode()) Results.push_back(Res); @@ -6125,550 +6311,19 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, // ARM Scheduler Hooks //===----------------------------------------------------------------------===// -MachineBasicBlock * -ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); +/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and +/// registers the function context. +void ARMTargetLowering:: +SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, + MachineBasicBlock *DispatchBB, int FI) const { + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned scratch = MRI.createVirtualRegister(isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); - - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); - MRI.constrainRegClass(newval, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldrex dest, [ptr] - // cmp dest, oldval - // bne exitMBB - BB = loop1MBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(dest).addReg(oldval)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex scratch, newval, [ptr] - // cmp scratch, #0 - // bne loop1MBB - BB = loop2MBB; - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(incr, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrex dest, ptr - // scratch2, dest, incr - // strex scratch, scratch2, ptr - // cmp scratch, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - if (BinOpcode) { - // operand order needs to go the other way for NAND - if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) - AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). - addReg(incr).addReg(dest)).addReg(0); - else - AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). - addReg(dest).addReg(incr)).addReg(0); - } - - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - bool signExtend, - ARMCC::CondCodes Cond) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - unsigned oldval = dest; - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(dest, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(incr, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc, extendOpc; - getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); - case 1: - extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; - break; - case 2: - extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; - break; - case 4: - extendOpc = 0; - break; - } - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned scratch = MRI.createVirtualRegister(TRC); - unsigned scratch2 = MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrex dest, ptr - // (sign extend dest, if required) - // cmp dest, incr - // cmov.cond scratch2, incr, dest - // strex scratch, scratch2, ptr - // cmp scratch, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (ldrOpc == ARM::t2LDREX) - MIB.addImm(0); - AddDefaultPred(MIB); - - // Sign extend the value, if necessary. - if (signExtend && extendOpc) { - oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass - : &ARM::GPRnopcRegClass); - if (!isThumb2) - MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) - .addReg(dest) - .addImm(0)); - } - - // Build compare and cmov instructions. - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) - .addReg(oldval).addReg(incr)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) - .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); - - MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - if (strOpc == ARM::t2STREX) - MIB.addImm(0); - AddDefaultPred(MIB); - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(scratch).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Op1, unsigned Op2, - bool NeedsCarry, bool IsCmpxchg, - bool IsMinMax, ARMCC::CondCodes CC) const { - // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64); - unsigned offset = (isStore ? -2 : 0); - unsigned destlo = MI->getOperand(0).getReg(); - unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(offset+2).getReg(); - unsigned vallo = MI->getOperand(offset+3).getReg(); - unsigned valhi = MI->getOperand(offset+4).getReg(); - unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5); - AtomicOrdering Ord = static_cast(MI->getOperand(OrdIdx).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); - MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - MRI.constrainRegClass(vallo, &ARM::rGPRRegClass); - MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); - } - - unsigned ldrOpc, strOpc; - getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *contBB = 0, *cont2BB = 0; - if (IsCmpxchg || IsMinMax) - contBB = MF->CreateMachineBasicBlock(LLVM_BB); - if (IsCmpxchg) - cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - - MF->insert(It, loopMBB); - if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); - if (IsCmpxchg) MF->insert(It, cont2BB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - unsigned storesuccess = MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldrexd r2, r3, ptr - // r0, r2, incr - // r1, r3, incr - // strexd storesuccess, r0, r1, ptr - // cmp storesuccess, #0 - // bne- loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - - if (!isStore) { - // Load - if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr)); - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - } - } - - unsigned StoreLo, StoreHi; - if (IsCmpxchg) { - // Add early exit - for (unsigned i = 0; i < 2; i++) { - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : - ARM::CMPrr)) - .addReg(i == 0 ? destlo : desthi) - .addReg(i == 0 ? vallo : valhi)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - BB->addSuccessor(exitMBB); - BB->addSuccessor(i == 0 ? contBB : cont2BB); - BB = (i == 0 ? contBB : cont2BB); - } - - // Copy to physregs for strexd - StoreLo = MI->getOperand(5).getReg(); - StoreHi = MI->getOperand(6).getReg(); - } else if (Op1) { - // Perform binary operation - unsigned tmpRegLo = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) - .addReg(destlo).addReg(vallo)) - .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); - unsigned tmpRegHi = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) - .addReg(desthi).addReg(valhi)) - .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); - - StoreLo = tmpRegLo; - StoreHi = tmpRegHi; - } else { - // Copy to physregs for strexd - StoreLo = vallo; - StoreHi = valhi; - } - if (IsMinMax) { - // Compare and branch to exit block. - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); - BB->addSuccessor(exitMBB); - BB->addSuccessor(contBB); - BB = contBB; - StoreLo = vallo; - StoreHi = valhi; - } - - // Store - if (isThumb2) { - MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); - MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); - } else { - // Marshal a pair... - unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(StoreLo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) - .addReg(r1) - .addReg(StoreHi) - .addImm(ARM::gsub_1); - - // ...and store it - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(StorePair).addReg(ptr)); - } - // Cmp+jump - AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) - .addReg(storesuccess).addImm(0)); - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { - - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - unsigned destlo = MI->getOperand(0).getReg(); - unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - if (isThumb2) { - MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); - MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); - MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); - } - unsigned ldrOpc, strOpc; - getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); - - MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); - - if (isThumb2) { - MIB.addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr); - - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); - - // Copy GPRPair0 into dest. (This copy will normally be coalesced.) - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - } - AddDefaultPred(MIB); - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and -/// registers the function context. -void ARMTargetLowering:: -SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB, int FI) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - MachineConstantPool *MCP = MF->getConstantPool(); - ARMFunctionInfo *AFI = MF->getInfo(); - const Function *F = MF->getFunction(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + MachineConstantPool *MCP = MF->getConstantPool(); + ARMFunctionInfo *AFI = MF->getInfo(); + const Function *F = MF->getFunction(); bool isThumb = Subtarget->isThumb(); bool isThumb2 = Subtarget->isThumb2(); @@ -6777,7 +6432,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *ARMTargetLowering:: EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -7111,7 +6767,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // N.B. the order the invoke BBs are processed in doesn't matter here. - const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); + const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector MBBLPads; for (SmallPtrSet::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { @@ -7188,12 +6844,114 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { llvm_unreachable("Expecting a BB with two successors!"); } -MachineBasicBlock *ARMTargetLowering:: -EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { +/// Return the load opcode for a given load size. If load size >= 8, +/// neon opcode will be returned. +static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) { + if (LdSize >= 8) + return LdSize == 16 ? ARM::VLD1q32wb_fixed + : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0; + if (IsThumb1) + return LdSize == 4 ? ARM::tLDRi + : LdSize == 2 ? ARM::tLDRHi + : LdSize == 1 ? ARM::tLDRBi : 0; + if (IsThumb2) + return LdSize == 4 ? ARM::t2LDR_POST + : LdSize == 2 ? ARM::t2LDRH_POST + : LdSize == 1 ? ARM::t2LDRB_POST : 0; + return LdSize == 4 ? ARM::LDR_POST_IMM + : LdSize == 2 ? ARM::LDRH_POST + : LdSize == 1 ? ARM::LDRB_POST_IMM : 0; +} + +/// Return the store opcode for a given store size. If store size >= 8, +/// neon opcode will be returned. +static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { + if (StSize >= 8) + return StSize == 16 ? ARM::VST1q32wb_fixed + : StSize == 8 ? ARM::VST1d32wb_fixed : 0; + if (IsThumb1) + return StSize == 4 ? ARM::tSTRi + : StSize == 2 ? ARM::tSTRHi + : StSize == 1 ? ARM::tSTRBi : 0; + if (IsThumb2) + return StSize == 4 ? ARM::t2STR_POST + : StSize == 2 ? ARM::t2STRH_POST + : StSize == 1 ? ARM::t2STRB_POST : 0; + return StSize == 4 ? ARM::STR_POST_IMM + : StSize == 2 ? ARM::STRH_POST + : StSize == 1 ? ARM::STRB_POST_IMM : 0; +} + +/// Emit a post-increment load operation with given size. The instructions +/// will be added to BB at Pos. +static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, + const TargetInstrInfo *TII, DebugLoc dl, + unsigned LdSize, unsigned Data, unsigned AddrIn, + unsigned AddrOut, bool IsThumb1, bool IsThumb2) { + unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); + assert(LdOpc != 0 && "Should have a load opcode"); + if (LdSize >= 8) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define).addReg(AddrIn) + .addImm(0)); + } else if (IsThumb1) { + // load + update AddrIn + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrIn).addImm(0)); + MachineInstrBuilder MIB = + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); + MIB = AddDefaultT1CC(MIB); + MIB.addReg(AddrIn).addImm(LdSize); + AddDefaultPred(MIB); + } else if (IsThumb2) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define).addReg(AddrIn) + .addImm(LdSize)); + } else { // arm + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define).addReg(AddrIn) + .addReg(0).addImm(LdSize)); + } +} + +/// Emit a post-increment store operation with given size. The instructions +/// will be added to BB at Pos. +static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, + const TargetInstrInfo *TII, DebugLoc dl, + unsigned StSize, unsigned Data, unsigned AddrIn, + unsigned AddrOut, bool IsThumb1, bool IsThumb2) { + unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); + assert(StOpc != 0 && "Should have a store opcode"); + if (StSize >= 8) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(AddrIn).addImm(0).addReg(Data)); + } else if (IsThumb1) { + // store + update AddrIn + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) + .addReg(AddrIn).addImm(0)); + MachineInstrBuilder MIB = + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); + MIB = AddDefaultT1CC(MIB); + MIB.addReg(AddrIn).addImm(StSize); + AddDefaultPred(MIB); + } else if (IsThumb2) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data).addReg(AddrIn).addImm(StSize)); + } else { // arm + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data).addReg(AddrIn).addReg(0) + .addImm(StSize)); + } +} + +MachineBasicBlock * +ARMTargetLowering::EmitStructByval(MachineInstr *MI, + MachineBasicBlock *BB) const { // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; @@ -7204,23 +6962,18 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned Align = MI->getOperand(3).getImm(); DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned ldrOpc, strOpc, UnitSize = 0; + unsigned UnitSize = 0; + const TargetRegisterClass *TRC = nullptr; + const TargetRegisterClass *VecTRC = nullptr; - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - const TargetRegisterClass *TRC_Vec = 0; + bool IsThumb1 = Subtarget->isThumb1Only(); + bool IsThumb2 = Subtarget->isThumb2(); if (Align & 1) { - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; UnitSize = 1; } else if (Align & 2) { - ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST; - strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; UnitSize = 2; } else { // Check whether we can use NEON instructions. @@ -7228,27 +6981,27 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { - if ((Align % 16 == 0) && SizeVal >= 16) { - ldrOpc = ARM::VLD1q32wb_fixed; - strOpc = ARM::VST1q32wb_fixed; + if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; - TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; - } - else if ((Align % 8 == 0) && SizeVal >= 8) { - ldrOpc = ARM::VLD1d32wb_fixed; - strOpc = ARM::VST1d32wb_fixed; + else if ((Align % 8 == 0) && SizeVal >= 8) UnitSize = 8; - TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; - } } // Can't use NEON instructions. - if (UnitSize == 0) { - ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; + if (UnitSize == 0) UnitSize = 4; - } } + // Select the correct opcode and register class for unit size load/store + bool IsNeon = UnitSize >= 8; + TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass + : (const TargetRegisterClass *)&ARM::GPRRegClass; + if (IsNeon) + VecTRC = UnitSize == 16 + ? (const TargetRegisterClass *)&ARM::DPairRegClass + : UnitSize == 8 + ? (const TargetRegisterClass *)&ARM::DPRRegClass + : nullptr; + unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -7259,34 +7012,13 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { - unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (UnitSize >= 8) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(destIn).addImm(0).addReg(scratch)); - } else if (isThumb2) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addImm(UnitSize)); - } else { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0) - .addImm(UnitSize)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(UnitSize)); - } + unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, + IsThumb1, IsThumb2); + emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, + IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } @@ -7294,30 +7026,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // Handle the leftover bytes with LDRB and STRB. // [scratch, srcOut] = LDRB_POST(srcIn, 1) // [destOut] = STRB_POST(scratch, destIn, 1) - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; for (unsigned i = 0; i < BytesLeft; i++) { - unsigned scratch = MRI.createVirtualRegister(TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } else { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn) - .addReg(0).addImm(1)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } + unsigned scratch = MRI.createVirtualRegister(TRC); + emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, + IsThumb1, IsThumb2); + emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, + IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } @@ -7352,23 +7068,21 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); - if (isThumb2) { - unsigned VReg1 = varEnd; + if (IsThumb2) { + unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) - VReg1 = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1) - .addImm(LoopSize & 0xFFFF)); + Vtmp = MRI.createVirtualRegister(TRC); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp) + .addImm(LoopSize & 0xFFFF)); if ((LoopSize & 0xFFFF0000) != 0) AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) - .addReg(VReg1) - .addImm(LoopSize >> 16)); + .addReg(Vtmp).addImm(LoopSize >> 16)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -7380,10 +7094,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { Align = getDataLayout()->getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) - .addReg(varEnd, RegState::Define) - .addConstantPoolIndex(Idx) - .addImm(0)); + if (IsThumb1) + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( + varEnd, RegState::Define).addConstantPoolIndex(Idx)); + else + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( + varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); } BB->addSuccessor(loopMBB); @@ -7412,39 +7128,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) - unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); - if (UnitSize >= 8) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(destPhi).addImm(0).addReg(scratch)); - } else if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(scratch).addReg(destPhi) - .addImm(UnitSize)); - } else { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0) - .addImm(UnitSize)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(scratch).addReg(destPhi) - .addReg(0).addImm(UnitSize)); - } + unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, + IsThumb1, IsThumb2); + emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, + IsThumb1, IsThumb2); // Decrement loop variable by UnitSize. - MachineInstrBuilder MIB = BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); - AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); - MIB->getOperand(5).setReg(ARM::CPSR); - MIB->getOperand(5).setIsDef(true); - - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + if (IsThumb1) { + MachineInstrBuilder MIB = + BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); + MIB = AddDefaultT1CC(MIB); + MIB.addReg(varPhi).addImm(UnitSize); + AddDefaultPred(MIB); + } else { + MachineInstrBuilder MIB = + BuildMI(*BB, BB->end(), dl, + TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); + AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); + MIB->getOperand(5).setReg(ARM::CPSR); + MIB->getOperand(5).setIsDef(true); + } + BuildMI(*BB, BB->end(), dl, + TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); // loopMBB can loop back to loopMBB or fall through to exitMBB. BB->addSuccessor(loopMBB); @@ -7453,34 +7160,19 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // Add epilogue to handle BytesLeft. BB = exitMBB; MachineInstr *StartOfExit = exitMBB->begin(); - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) unsigned srcIn = srcLoop; unsigned destIn = destLoop; for (unsigned i = 0; i < BytesLeft; i++) { - unsigned scratch = MRI.createVirtualRegister(TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); - - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addImm(1)); - } else { - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1)); - - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } + unsigned scratch = MRI.createVirtualRegister(TRC); + emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, + IsThumb1, IsThumb2); + emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, + IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } @@ -7489,10 +7181,77 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + assert(Subtarget->isTargetWindows() && + "__chkstk is only supported on Windows"); + assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); + + // __chkstk takes the number of words to allocate on the stack in R4, and + // returns the stack adjustment in number of bytes in R4. This will not + // clober any other registers (other than the obvious lr). + // + // Although, technically, IP should be considered a register which may be + // clobbered, the call itself will not touch it. Windows on ARM is a pure + // thumb-2 environment, so there is no interworking required. As a result, we + // do not expect a veneer to be emitted by the linker, clobbering IP. + // + // Each module receives its own copy of __chkstk, so no import thunk is + // required, again, ensuring that IP is not clobbered. + // + // Finally, although some linkers may theoretically provide a trampoline for + // out of range calls (which is quite common due to a 32M range limitation of + // branches for Thumb), we can generate the long-call version via + // -mcmodel=large, alleviating the need for the trampoline which may clobber + // IP. + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + case CodeModel::Large: + case CodeModel::JITDefault: { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + + BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) + .addExternalSymbol("__chkstk"); + BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(Reg, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + } + } + + AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP).addReg(ARM::R4))); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { @@ -7552,131 +7311,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->eraseFromParent(); return BB; } - case ARM::ATOMIC_LOAD_ADD_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - case ARM::ATOMIC_LOAD_ADD_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); - - case ARM::ATOMIC_LOAD_AND_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_LOAD_AND_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - - case ARM::ATOMIC_LOAD_OR_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_OR_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - - case ARM::ATOMIC_LOAD_XOR_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_XOR_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - - case ARM::ATOMIC_LOAD_NAND_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - case ARM::ATOMIC_LOAD_NAND_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); - - case ARM::ATOMIC_LOAD_SUB_I8: - return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I16: - return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - case ARM::ATOMIC_LOAD_SUB_I32: - return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); - - case ARM::ATOMIC_LOAD_MIN_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MIN_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MIN_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); - - case ARM::ATOMIC_LOAD_MAX_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); - case ARM::ATOMIC_LOAD_MAX_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); - case ARM::ATOMIC_LOAD_MAX_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); - - case ARM::ATOMIC_LOAD_UMIN_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMIN_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMIN_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); - - case ARM::ATOMIC_LOAD_UMAX_I8: - return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); - case ARM::ATOMIC_LOAD_UMAX_I16: - return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); - case ARM::ATOMIC_LOAD_UMAX_I32: - return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); - - case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); - case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); - case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); - - case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); - case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); - case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); - - case ARM::ATOMIC_LOAD_I64: - return EmitAtomicLoad64(MI, BB); - - case ARM::ATOMIC_LOAD_ADD_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, - isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, - /*NeedsCarry*/ true); - case ARM::ATOMIC_LOAD_SUB_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true); - case ARM::ATOMIC_LOAD_OR_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, - isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMIC_LOAD_XOR_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, - isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMIC_LOAD_AND_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, - isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMIC_STORE_I64: - case ARM::ATOMIC_SWAP_I64: - return EmitAtomicBinary64(MI, BB, 0, 0, false); - case ARM::ATOMIC_CMP_SWAP_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ false, /*IsCmpxchg*/true); - case ARM::ATOMIC_LOAD_MIN_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LT); - case ARM::ATOMIC_LOAD_MAX_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::GE); - case ARM::ATOMIC_LOAD_UMIN_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LO); - case ARM::ATOMIC_LOAD_UMAX_I64: - return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, - isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, - /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::HS); case ARM::tMOVCCr_pseudo: { // To "insert" a SELECT_CC instruction, we actually have to insert the @@ -7702,8 +7336,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(copy0MBB); @@ -7736,7 +7369,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::BCCi64: case ARM::BCCZi64: { // If there is an unconditional branch to the other successor, remove it. - BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); + BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end()); // Compare both parts that make up the double comparison separately for // equality. @@ -7821,8 +7454,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); + std::next(MachineBasicBlock::iterator(MI)), BB->end()); SinkBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(RSBBB); @@ -7865,6 +7497,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); + case ARM::WIN__CHKSTK: + return EmitLowered__chkstk(MI, BB); } } @@ -7887,8 +7521,8 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); if (NewOpc) { - const ARMBaseInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); + const ARMBaseInstrInfo *TII = static_cast( + getTargetMachine().getSubtargetImpl()->getInstrInfo()); MCID = &TII->get(NewOpc); assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && @@ -8155,7 +7789,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, // Get widened type and narrowed type. MVT widenType; unsigned numElem = VT.getVectorNumElements(); - switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + + EVT inputLaneType = Vec.getValueType().getVectorElementType(); + switch (inputLaneType.getSimpleVT().SimpleTy) { case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; @@ -8163,9 +7799,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - widenType, &Ops[0], Ops.size()); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops); + unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; + return DAG.getNode(ExtOp, SDLoc(N), VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { @@ -8223,7 +7859,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); - if (AddeNode == NULL) + if (!AddeNode) return SDValue(); // Make sure it is really an ADDE. @@ -8258,9 +7894,9 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Figure out the high and low input values to the MLAL node. SDValue* HiMul = &MULOp; - SDValue* HiAdd = NULL; - SDValue* LoMul = NULL; - SDValue* LowAdd = NULL; + SDValue* HiAdd = nullptr; + SDValue* LoMul = nullptr; + SDValue* LowAdd = nullptr; if (IsLeftOperandMUL) HiAdd = &AddeOp1; @@ -8277,7 +7913,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, LowAdd = &AddcOp0; } - if (LoMul == NULL) + if (!LoMul) return SDValue(); if (LoMul->getNode() != HiMul->getNode()) @@ -8294,8 +7930,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*HiAdd); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), - DAG.getVTList(MVT::i32, MVT::i32), - &Ops[0], Ops.size()); + DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); @@ -8819,9 +8454,9 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); + if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) + std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); - DCI.RemoveFromWorklist(LD); - DAG.DeleteNode(LD); return Result; } @@ -8886,7 +8521,8 @@ static SDValue PerformSTORECombine(SDNode *N, SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); @@ -8932,8 +8568,7 @@ static SDValue PerformSTORECombine(SDNode *N, Increment); Chains.push_back(Ch); } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], - Chains.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) @@ -8944,16 +8579,18 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; + bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(0), BasePtr, - St->getPointerInfo(), St->isVolatile(), + StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), + BasePtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); - return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + return DAG.getStore(NewST1.getValue(0), DL, + StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), std::min(4U, St->getAlignment() / 2)); @@ -8982,7 +8619,7 @@ static SDValue PerformSTORECombine(SDNode *N, return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment(), - St->getTBAAInfo()); + St->getAAInfo()); } /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node @@ -9029,7 +8666,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -9112,7 +8749,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Fold obvious case. V = V.getOperand(0); else { - V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(V.getNode()); } @@ -9308,7 +8945,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Tys[n] = VecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs+2)); SmallVector Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); @@ -9318,8 +8955,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } MemIntrinsicSDNode *MemInt = cast(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops.data(), Ops.size(), - MemInt->getMemoryVT(), + Ops, MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. @@ -9388,11 +9024,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, - Ops, 2, VLDMemInt->getMemoryVT(), + Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. @@ -9641,9 +9277,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { // loads from a constant pool. case Intrinsic::arm_neon_vshifts: case Intrinsic::arm_neon_vshiftu: - case Intrinsic::arm_neon_vshiftls: - case Intrinsic::arm_neon_vshiftlu: - case Intrinsic::arm_neon_vshiftn: case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: case Intrinsic::arm_neon_vrshiftn: @@ -9674,12 +9307,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { } return SDValue(); - case Intrinsic::arm_neon_vshiftls: - case Intrinsic::arm_neon_vshiftlu: - if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) - break; - llvm_unreachable("invalid shift count for vshll intrinsic"); - case Intrinsic::arm_neon_vrshifts: case Intrinsic::arm_neon_vrshiftu: if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) @@ -9697,7 +9324,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { break; llvm_unreachable("invalid shift count for vqshlu intrinsic"); - case Intrinsic::arm_neon_vshiftn: case Intrinsic::arm_neon_vrshiftn: case Intrinsic::arm_neon_vqshiftns: case Intrinsic::arm_neon_vqshiftnu: @@ -9720,16 +9346,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { case Intrinsic::arm_neon_vshiftu: // Opcode already set above. break; - case Intrinsic::arm_neon_vshiftls: - case Intrinsic::arm_neon_vshiftlu: - if (Cnt == VT.getVectorElementType().getSizeInBits()) - VShiftOpc = ARMISD::VSHLLi; - else - VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? - ARMISD::VSHLLs : ARMISD::VSHLLu); - break; - case Intrinsic::arm_neon_vshiftn: - VShiftOpc = ARMISD::VSHRN; break; case Intrinsic::arm_neon_vrshifts: VShiftOpc = ARMISD::VRSHRs; break; case Intrinsic::arm_neon_vrshiftu: @@ -10010,7 +9626,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); + DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -10093,7 +9709,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { +bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); @@ -10115,7 +9734,7 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const case MVT::v2f64: { // For any little-endian targets with neon, we can support unaligned ld/st // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. - // A big-endian target may also explictly support unaligned accesses + // A big-endian target may also explicitly support unaligned accesses if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { if (Fast) *Fast = true; @@ -10147,11 +9766,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || - (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && + Fast))) { return MVT::f64; } } @@ -10596,11 +10216,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return true; } -void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { @@ -10616,15 +10236,29 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; } + case ISD::INTRINSIC_W_CHAIN: { + ConstantSDNode *CN = cast(Op->getOperand(1)); + Intrinsic::ID IntID = static_cast(CN->getZExtValue()); + switch (IntID) { + default: return; + case Intrinsic::arm_ldaex: + case Intrinsic::arm_ldrex: { + EVT VT = cast(Op)->getMemoryVT(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + return; + } + } + } } } @@ -10700,7 +10334,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -10742,6 +10376,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'r': return RCPair(0U, &ARM::GPRRegClass); case 'w': + if (VT == MVT::Other) + break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) @@ -10750,6 +10386,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return RCPair(0U, &ARM::QPRRegClass); break; case 'x': + if (VT == MVT::Other) + break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPR_8RegClass); if (VT.getSizeInBits() == 64) @@ -10775,7 +10413,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; @@ -10974,16 +10612,41 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); SDLoc dl(Op); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, - 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair CallInfo = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = LowerCallTo(CLI); return CallInfo.first; } +SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "unsupported target platform"); + SDLoc DL(Op); + + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + + SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, + DAG.getConstant(2, MVT::i32)); + + SDValue Flag; + Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); + + SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); + Chain = NewSP.getValue(1); + + SDValue Ops[2] = { NewSP, Chain }; + return DAG.getMergeValues(Ops, DL); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -11069,6 +10732,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldaex: case Intrinsic::arm_ldrex: { PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -11081,6 +10745,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } + case Intrinsic::arm_stlex: case Intrinsic::arm_strex: { PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); Info.opc = ISD::INTRINSIC_W_CHAIN; @@ -11093,6 +10758,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; @@ -11104,6 +10770,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldaexd: case Intrinsic::arm_ldrexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; @@ -11121,3 +10788,183 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } + +/// \brief Returns true if it is beneficial to convert a load of a constant +/// to just the constant itself. +bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned Bits = Ty->getPrimitiveSizeInBits(); + if (Bits == 0 || Bits > 32) + return false; + return true; +} + +bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { + // Loads and stores less than 64-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit + // anything for those. + bool IsMClass = Subtarget->isMClass(); + if (StoreInst *SI = dyn_cast(Inst)) { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return Size == 64 && !IsMClass; + } else if (LoadInst *LI = dyn_cast(Inst)) { + return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; + } + + // For the real atomic operations, we have ldrex/strex up to 32 bits, + // and up to 64 bits on the non-M profiles + unsigned AtomicLimit = IsMClass ? 32 : 64; + return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; +} + +// This has so far only been implemented for MachO. +bool ARMTargetLowering::useLoadStackGuardNode() const { + return Subtarget->getTargetTriple().getObjectFormat() == Triple::MachO; +} + +Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast(Addr->getType())->getElementType(); + bool IsAcquire = + Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd + // intrinsic must return {i32, i32} and we have to recombine them into a + // single i64 here. + if (ValTy->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); + + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); + + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); + } + + Type *Tys[] = { Addr->getType() }; + Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateTruncOrBitCast( + Builder.CreateCall(Ldrex, Addr), + cast(Addr->getType())->getElementType()); +} + +Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + bool IsRelease = + Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since the intrinsics must have legal type, the i64 intrinsics take two + // parameters: "i32, i32". We must marshal Val into the appropriate form + // before the call. + if (Val->getType()->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; + Function *Strex = Intrinsic::getDeclaration(M, Int); + Type *Int32Ty = Type::getInt32Ty(M->getContext()); + + Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); + Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + return Builder.CreateCall3(Strex, Lo, Hi, Addr); + } + + Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; + Type *Tys[] = { Addr->getType() }; + Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateCall2( + Strex, Builder.CreateZExtOrBitCast( + Val, Strex->getFunctionType()->getParamType(0)), + Addr); +} + +enum HABaseType { + HA_UNKNOWN = 0, + HA_FLOAT, + HA_DOUBLE, + HA_VECT64, + HA_VECT128 +}; + +static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, + uint64_t &Members) { + if (const StructType *ST = dyn_cast(Ty)) { + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) + return false; + Members += SubMembers; + } + } else if (const ArrayType *AT = dyn_cast(Ty)) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) + return false; + Members += SubMembers * AT->getNumElements(); + } else if (Ty->isFloatTy()) { + if (Base != HA_UNKNOWN && Base != HA_FLOAT) + return false; + Members = 1; + Base = HA_FLOAT; + } else if (Ty->isDoubleTy()) { + if (Base != HA_UNKNOWN && Base != HA_DOUBLE) + return false; + Members = 1; + Base = HA_DOUBLE; + } else if (const VectorType *VT = dyn_cast(Ty)) { + Members = 1; + switch (Base) { + case HA_FLOAT: + case HA_DOUBLE: + return false; + case HA_VECT64: + return VT->getBitWidth() == 64; + case HA_VECT128: + return VT->getBitWidth() == 128; + case HA_UNKNOWN: + switch (VT->getBitWidth()) { + case 64: + Base = HA_VECT64; + return true; + case 128: + Base = HA_VECT128; + return true; + default: + return false; + } + } + } + + return (Members > 0 && Members <= 4); +} + +/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. +bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + if (getEffectiveCallingConv(CallConv, isVarArg) != + CallingConv::ARM_AAPCS_VFP) + return false; + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; +}