X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTarget%2FARM%2FARMISelLowering.cpp;h=dec7a72bedaaff5cbf7478a2965e951a9c48c6fc;hp=dee71bc3310236bf9a646fec4f5cb06084cca8b8;hb=c25e7581b9b8088910da31702d4ca21c4734c6d7;hpb=fd29e0eb060ea8b4d490860329234d2ae5f5952e diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index dee71bc3310..dec7a72beda 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -22,29 +22,92 @@ #include "ARMTargetMachine.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" +#include "llvm/Function.h" #include "llvm/Instruction.h" #include "llvm/Intrinsics.h" #include "llvm/GlobalValue.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" using namespace llvm; +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + +void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, + MVT PromotedBitwiseVT) { + if (VT != PromotedLdStVT) { + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); + + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); + } + + MVT ElemTy = VT.getVectorElementType(); + if (ElemTy != MVT::i64 && ElemTy != MVT::f64) + setOperationAction(ISD::VSETCC, VT, Custom); + if (ElemTy == MVT::i8 || ElemTy == MVT::i16) + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); + if (VT.isInteger()) { + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); + } + + // Promote all bit-wise operations. + if (VT.isInteger() && VT != PromotedBitwiseVT) { + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); + } +} + +void ARMTargetLowering::addDRTypeForNEON(MVT VT) { + addRegisterClass(VT, ARM::DPRRegisterClass); + addTypeForNEON(VT, MVT::f64, MVT::v2i32); +} + +void ARMTargetLowering::addQRTypeForNEON(MVT VT) { + addRegisterClass(VT, ARM::QPRRegisterClass); + addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); +} + ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM), ARMPCLabelIndex(0) { Subtarget = &TM.getSubtarget(); if (Subtarget->isTargetDarwin()) { - // Don't have these. - setLibcallName(RTLIB::UINTTOFP_I64_F32, NULL); - setLibcallName(RTLIB::UINTTOFP_I64_F64, NULL); - // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2()) { // Single-precision floating-point arithmetic. @@ -112,8 +175,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Integer to floating-point conversions. // i64 conversions are done via library routines even when generating VFP // instructions, so use the same ones. - // FIXME: There appears to be some naming inconsistency in ARM libgcc: e.g. - // __floatunsidf vs. __floatunssidfvfp. + // FIXME: There appears to be some naming inconsistency in ARM libgcc: + // e.g., __floatunsidf vs. __floatunssidfvfp. setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); @@ -121,36 +184,70 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } - addRegisterClass(MVT::i32, ARM::GPRRegisterClass); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) { + // These libcalls are not available in 32-bit. + setLibcallName(RTLIB::SHL_I128, 0); + setLibcallName(RTLIB::SRL_I128, 0); + setLibcallName(RTLIB::SRA_I128, 0); + + if (Subtarget->isThumb1Only()) + addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); + else + addRegisterClass(MVT::i32, ARM::GPRRegisterClass); + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); addRegisterClass(MVT::f64, ARM::DPRRegisterClass); - + setTruncStoreAction(MVT::f64, MVT::f32, Expand); } + + if (Subtarget->hasNEON()) { + addDRTypeForNEON(MVT::v2f32); + addDRTypeForNEON(MVT::v8i8); + addDRTypeForNEON(MVT::v4i16); + addDRTypeForNEON(MVT::v2i32); + addDRTypeForNEON(MVT::v1i64); + + addQRTypeForNEON(MVT::v4f32); + addQRTypeForNEON(MVT::v2f64); + addQRTypeForNEON(MVT::v16i8); + addQRTypeForNEON(MVT::v8i16); + addQRTypeForNEON(MVT::v4i32); + addQRTypeForNEON(MVT::v2i64); + + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); + } + computeRegisterProperties(); // ARM does not have f32 extending load. - setLoadXAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); // ARM does not have i1 sign extending load. - setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); // ARM supports all 4 flavors of integer indexed load / store. - for (unsigned im = (unsigned)ISD::PRE_INC; - im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { - setIndexedLoadAction(im, MVT::i1, Legal); - setIndexedLoadAction(im, MVT::i8, Legal); - setIndexedLoadAction(im, MVT::i16, Legal); - setIndexedLoadAction(im, MVT::i32, Legal); - setIndexedStoreAction(im, MVT::i1, Legal); - setIndexedStoreAction(im, MVT::i8, Legal); - setIndexedStoreAction(im, MVT::i16, Legal); - setIndexedStoreAction(im, MVT::i32, Legal); + if (!Subtarget->isThumb1Only()) { + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::i1, Legal); + setIndexedLoadAction(im, MVT::i8, Legal); + setIndexedLoadAction(im, MVT::i16, Legal); + setIndexedLoadAction(im, MVT::i32, Legal); + setIndexedStoreAction(im, MVT::i1, Legal); + setIndexedStoreAction(im, MVT::i8, Legal); + setIndexedStoreAction(im, MVT::i16, Legal); + setIndexedStoreAction(im, MVT::i32, Legal); + } } // i64 operation support. - if (Subtarget->isThumb()) { + if (Subtarget->isThumb1Only()) { setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); setOperationAction(ISD::MULHS, MVT::i32, Expand); @@ -159,7 +256,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } else { setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); - if (!Subtarget->hasV6Ops()) + if (!Subtarget->isThumb1Only() && !Subtarget->hasV6Ops()) setOperationAction(ISD::MULHS, MVT::i32, Expand); } setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); @@ -170,9 +267,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // ARM does not have ROTL. setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i32, Expand); - if (!Subtarget->hasV5TOps() || Subtarget->isThumb()) + if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); // Only ARMv6 has BSWAP. @@ -186,9 +283,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - + // Support label based line numbers. - setOperationAction(ISD::LOCATION, MVT::Other, Expand); + setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); setOperationAction(ISD::RET, MVT::Other, Custom); @@ -197,83 +294,103 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - // Expand mem operations genericly. - setOperationAction(ISD::MEMSET , MVT::Other, Expand); - setOperationAction(ISD::MEMCPY , MVT::Other, Custom); - setOperationAction(ISD::MEMMOVE , MVT::Other, Expand); - // Use the default implementation. - setOperationAction(ISD::VASTART , MVT::Other, Custom); - setOperationAction(ISD::VAARG , MVT::Other, Expand); - setOperationAction(ISD::VACOPY , MVT::Other, Expand); - setOperationAction(ISD::VAEND , MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - if (!Subtarget->hasV6Ops()) { + if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) - // Turn f64->i64 into FMRRD iff target supports vfp2. + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) + // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2. setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::SETCC , MVT::i32, Expand); - setOperationAction(ISD::SETCC , MVT::f32, Expand); - setOperationAction(ISD::SETCC , MVT::f64, Expand); - setOperationAction(ISD::SELECT , MVT::i32, Expand); - setOperationAction(ISD::SELECT , MVT::f32, Expand); - setOperationAction(ISD::SELECT , MVT::f64, Expand); + setOperationAction(ISD::SETCC, MVT::i32, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Expand); + setOperationAction(ISD::SETCC, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - setOperationAction(ISD::BRCOND , MVT::Other, Expand); - setOperationAction(ISD::BR_CC , MVT::i32, Custom); - setOperationAction(ISD::BR_CC , MVT::f32, Custom); - setOperationAction(ISD::BR_CC , MVT::f64, Custom); - setOperationAction(ISD::BR_JT , MVT::Other, Custom); - - // FP Constants can't be immediates. - setOperationAction(ISD::ConstantFP, MVT::f64, Expand); - setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Custom); + setOperationAction(ISD::BR_JT, MVT::Other, Custom); // We don't support sin/cos/fmod/copysign/pow - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FREM , MVT::f64, Expand); - setOperationAction(ISD::FREM , MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - setOperationAction(ISD::FPOW , MVT::f64, Expand); - setOperationAction(ISD::FPOW , MVT::f32, Expand); - + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + } + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + // int <-> fp are custom expanded into bit_convert + ARMISD ops. - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + } // We have target-specific dag combine patterns for the following nodes: // ARMISD::FMRRD - No need to call setTargetDAGCombine - + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); + setStackPointerRegisterToSaveRestore(ARM::SP); setSchedulingPreference(SchedulingForRegPressure); setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10); setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2); + if (!Subtarget->isThumb()) { + // Use branch latency information to determine if-conversion limits. + // FIXME: If-converter should use instruction latency of the branch being + // eliminated to compute the threshold. For ARMv6, the branch "latency" + // varies depending on whether it's dynamically or statically predicted + // and on whether the destination is in the prefetch buffer. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData(); + unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass()); + if (Latency > 1) { + setIfCvtBlockSizeLimit(Latency-1); + if (Latency > 2) + setIfCvtDupBlockSizeLimit(Latency-2); + } else { + setIfCvtBlockSizeLimit(10); + setIfCvtDupBlockSizeLimit(2); + } + } + maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type + // Do not enable CodePlacementOpt for now: it currently runs after the + // ARMConstantIslandPass and messes up branch relaxation and placement + // of constant islands. + // benefitFromCodePlacementOpt = true; } - const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; @@ -288,13 +405,13 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; - case ARMISD::CMPNZ: return "ARMISD::CMPNZ"; + case ARMISD::CMPZ: return "ARMISD::CMPZ"; case ARMISD::CMPFP: return "ARMISD::CMPFP"; case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; case ARMISD::CNEG: return "ARMISD::CNEG"; - + case ARMISD::FTOSI: return "ARMISD::FTOSI"; case ARMISD::FTOUI: return "ARMISD::FTOUI"; case ARMISD::SITOF: return "ARMISD::SITOF"; @@ -303,23 +420,57 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; - + case ARMISD::FMRRD: return "ARMISD::FMRRD"; case ARMISD::FMDRR: return "ARMISD::FMDRR"; case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; + + case ARMISD::VCEQ: return "ARMISD::VCEQ"; + case ARMISD::VCGE: return "ARMISD::VCGE"; + case ARMISD::VCGEU: return "ARMISD::VCGEU"; + case ARMISD::VCGT: return "ARMISD::VCGT"; + case ARMISD::VCGTU: return "ARMISD::VCGTU"; + case ARMISD::VTST: return "ARMISD::VTST"; + + case ARMISD::VSHL: return "ARMISD::VSHL"; + case ARMISD::VSHRs: return "ARMISD::VSHRs"; + case ARMISD::VSHRu: return "ARMISD::VSHRu"; + case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; + case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; + case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; + case ARMISD::VSHRN: return "ARMISD::VSHRN"; + case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; + case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; + case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; + case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; + case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; + case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; + case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; + case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; + case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; + case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; + case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; + case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; + case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; + case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; + case ARMISD::VDUPLANEQ: return "ARMISD::VDUPLANEQ"; } } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { + return getTargetMachine().getSubtarget().isThumb() ? 1 : 2; +} + //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// - /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { switch (CC) { - default: assert(0 && "Unknown condition code!"); + default: LLVM_UNREACHABLE("Unknown condition code!"); case ISD::SETNE: return ARMCC::NE; case ISD::SETEQ: return ARMCC::EQ; case ISD::SETGT: return ARMCC::GT; @@ -341,7 +492,7 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool Invert = false; CondCode2 = ARMCC::AL; switch (CC) { - default: assert(0 && "Unknown FP condition!"); + default: LLVM_UNREACHABLE("Unknown FP condition!"); case ISD::SETEQ: case ISD::SETOEQ: CondCode = ARMCC::EQ; break; case ISD::SETGT: @@ -366,161 +517,412 @@ static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, return Invert; } -static void -HowToPassArgument(MVT::ValueType ObjectVT, unsigned NumGPRs, - unsigned StackOffset, unsigned &NeededGPRs, - unsigned &NeededStackSize, unsigned &GPRPad, - unsigned &StackPad, unsigned Flags) { - NeededStackSize = 0; - NeededGPRs = 0; - StackPad = 0; - GPRPad = 0; - unsigned align = (Flags >> ISD::ParamFlags::OrigAlignmentOffs); - GPRPad = NumGPRs % ((align + 3)/4); - StackPad = StackOffset % align; - unsigned firstGPR = NumGPRs + GPRPad; - switch (ObjectVT) { - default: assert(0 && "Unhandled argument type!"); - case MVT::i32: - case MVT::f32: - if (firstGPR < 4) - NeededGPRs = 1; - else - NeededStackSize = 4; - break; - case MVT::i64: - case MVT::f64: - if (firstGPR < 3) - NeededGPRs = 2; - else if (firstGPR == 3) { - NeededGPRs = 1; - NeededStackSize = 4; - } else - NeededStackSize = 8; +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +// +// The lower operations present on calling convention works on this order: +// LowerCALL (virt regs --> phys regs, virt regs --> stack) +// LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs) +// LowerRET (virt regs --> phys regs) +// LowerCALL (phys regs --> virt regs) +// +//===----------------------------------------------------------------------===// + +#include "ARMGenCallingConv.inc" + +// APCS f64 is in register pairs, possibly split to stack +static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + CCState &State, bool CanFail) { + static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + + // Try to get the first register. + if (unsigned Reg = State.AllocateReg(RegList, 4)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else { + // For the 2nd half of a v2f64, do not fail. + if (CanFail) + return false; + + // Put the whole thing on the stack. + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(8, 4), + LocVT, LocInfo)); + return true; + } + + // Try to get the second register. + if (unsigned Reg = State.AllocateReg(RegList, 4)) + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(4, 4), + LocVT, LocInfo)); + return true; +} + +static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) + return false; + if (LocVT == MVT::v2f64 && + !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) + return false; + return true; // we handled it +} + +// AAPCS f64 is in aligned register pairs +static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + CCState &State, bool CanFail) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + if (Reg == 0) { + // For the 2nd half of a v2f64, do not just fail. + if (CanFail) + return false; + + // Put the whole thing on the stack. + State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, + State.AllocateStack(8, 8), + LocVT, LocInfo)); + return true; + } + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + LocVT, LocInfo)); + return true; +} + +static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) + return false; + if (LocVT == MVT::v2f64 && + !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) + return false; + return true; // we handled it +} + +static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, CCState &State) { + static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; + static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + + unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); + if (Reg == 0) + return false; // we didn't handle it + + unsigned i; + for (i = 0; i < 2; ++i) + if (HiRegList[i] == Reg) + break; + + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], + LocVT, LocInfo)); + return true; +} + +static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) + return false; + if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) + return false; + return true; // we handled it +} + +static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, + CCState &State) { + return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, + State); +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for a the +/// given CallingConvention value. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC, + bool Return) const { + switch (CC) { + default: + LLVM_UNREACHABLE("Unsupported calling convention"); + case CallingConv::C: + case CallingConv::Fast: + // Use target triple & subtarget features to do actual dispatch. + if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && + FloatABIType == FloatABI::Hard) + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + else + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + } else + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); + case CallingConv::ARM_APCS: + return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + } +} + +/// LowerCallResult - Lower the result values of an ISD::CALL into the +/// appropriate copies out of appropriate physical registers. This assumes that +/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +/// being lowered. The returns a SDNode with the same number of values as the +/// ISD::CALL. +SDNode *ARMTargetLowering:: +LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall, + unsigned CallingConv, SelectionDAG &DAG) { + + DebugLoc dl = TheCall->getDebugLoc(); + // Assign locations to each value returned by this call. + SmallVector RVLocs; + bool isVarArg = TheCall->isVarArg(); + CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), + RVLocs, DAG.getContext()); + CCInfo.AnalyzeCallResult(TheCall, + CCAssignFnForNode(CallingConv, /* Return*/ true)); + + SmallVector ResultVals; + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + SDValue Val; + if (VA.needsCustom()) { + // Handle f64 or half of a v2f64. + SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, + InFlag); + Chain = Lo.getValue(1); + InFlag = Lo.getValue(2); + VA = RVLocs[++i]; // skip ahead to next loc + SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, + InFlag); + Chain = Hi.getValue(1); + InFlag = Hi.getValue(2); + Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + + if (VA.getLocVT() == MVT::v2f64) { + SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, + DAG.getConstant(0, MVT::i32)); + + VA = RVLocs[++i]; // skip ahead to next loc + Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); + Chain = Lo.getValue(1); + InFlag = Lo.getValue(2); + VA = RVLocs[++i]; // skip ahead to next loc + Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); + Chain = Hi.getValue(1); + InFlag = Hi.getValue(2); + Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, + DAG.getConstant(1, MVT::i32)); + } + } else { + Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + } + + switch (VA.getLocInfo()) { + default: LLVM_UNREACHABLE("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); + break; + } + + ResultVals.push_back(Val); + } + + // Merge everything together with a MERGE_VALUES node. + ResultVals.push_back(Chain); + return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(), + &ResultVals[0], ResultVals.size()).getNode(); +} + +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. +/// Sometimes what we are copying is the end of a larger object, the part that +/// does not fit in registers. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG, + DebugLoc dl) { + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), + /*AlwaysInline=*/false, NULL, 0, NULL, 0); +} + +/// LowerMemOpCallTo - Store the argument to the stack. +SDValue +ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG, + const SDValue &StackPtr, + const CCValAssign &VA, SDValue Chain, + SDValue Arg, ISD::ArgFlagsTy Flags) { + DebugLoc dl = TheCall->getDebugLoc(); + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); + if (Flags.isByVal()) { + return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); + } + return DAG.getStore(Chain, dl, Arg, PtrOff, + PseudoSourceValue::getStack(), LocMemOffset); +} + +void ARMTargetLowering::PassF64ArgInRegs(CallSDNode *TheCall, SelectionDAG &DAG, + SDValue Chain, SDValue &Arg, + RegsToPassVector &RegsToPass, + CCValAssign &VA, CCValAssign &NextVA, + SDValue &StackPtr, + SmallVector &MemOpChains, + ISD::ArgFlagsTy Flags) { + DebugLoc dl = TheCall->getDebugLoc(); + + SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Arg); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + + if (NextVA.isRegLoc()) + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); + else { + assert(NextVA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, NextVA, + Chain, fmrrd.getValue(1), Flags)); } } /// LowerCALL - Lowering a ISD::CALL node into a callseq_start <- /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter /// nodes. -SDOperand ARMTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { - MVT::ValueType RetVT= Op.Val->getValueType(0); - SDOperand Chain = Op.getOperand(0); - unsigned CallConv = cast(Op.getOperand(1))->getValue(); - assert((CallConv == CallingConv::C || - CallConv == CallingConv::Fast) && "unknown calling convention"); - SDOperand Callee = Op.getOperand(4); - unsigned NumOps = (Op.getNumOperands() - 5) / 2; - unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot - unsigned NumGPRs = 0; // GPRs used for parameter passing. - - // Count how many bytes are to be pushed on the stack. - unsigned NumBytes = 0; - - // Add up all the space actually used. - for (unsigned i = 0; i < NumOps; ++i) { - unsigned ObjSize; - unsigned ObjGPRs; - unsigned StackPad; - unsigned GPRPad; - MVT::ValueType ObjectVT = Op.getOperand(5+2*i).getValueType(); - unsigned Flags = Op.getConstantOperandVal(5+2*i+1); - HowToPassArgument(ObjectVT, NumGPRs, NumBytes, ObjGPRs, ObjSize, - GPRPad, StackPad, Flags); - NumBytes += ObjSize + StackPad; - NumGPRs += ObjGPRs + GPRPad; - } +SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { + CallSDNode *TheCall = cast(Op.getNode()); + MVT RetVT = TheCall->getRetValType(0); + SDValue Chain = TheCall->getChain(); + unsigned CC = TheCall->getCallingConv(); + bool isVarArg = TheCall->isVarArg(); + SDValue Callee = TheCall->getCallee(); + DebugLoc dl = TheCall->getDebugLoc(); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, DAG.getContext()); + CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC, /* Return*/ false)); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getConstant(NumBytes, MVT::i32)); - - SDOperand StackPtr = DAG.getRegister(ARM::SP, MVT::i32); - - static const unsigned GPRArgRegs[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 - }; - - NumGPRs = 0; - std::vector > RegsToPass; - std::vector MemOpChains; - for (unsigned i = 0; i != NumOps; ++i) { - SDOperand Arg = Op.getOperand(5+2*i); - unsigned Flags = Op.getConstantOperandVal(5+2*i+1); - MVT::ValueType ArgVT = Arg.getValueType(); - - unsigned ObjSize; - unsigned ObjGPRs; - unsigned GPRPad; - unsigned StackPad; - HowToPassArgument(ArgVT, NumGPRs, ArgOffset, ObjGPRs, - ObjSize, GPRPad, StackPad, Flags); - NumGPRs += GPRPad; - ArgOffset += StackPad; - if (ObjGPRs > 0) { - switch (ArgVT) { - default: assert(0 && "Unexpected ValueType for argument!"); - case MVT::i32: - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Arg)); - break; - case MVT::f32: - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], - DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Arg))); - break; - case MVT::i64: { - SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg, - DAG.getConstant(0, getPointerTy())); - SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Arg, - DAG.getConstant(1, getPointerTy())); - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Lo)); - if (ObjGPRs == 2) - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1], Hi)); - else { - SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Hi, PtrOff, NULL, 0)); - } - break; - } - case MVT::f64: { - SDOperand Cvt = DAG.getNode(ARMISD::FMRRD, - DAG.getVTList(MVT::i32, MVT::i32), - &Arg, 1); - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs], Cvt)); - if (ObjGPRs == 2) - RegsToPass.push_back(std::make_pair(GPRArgRegs[NumGPRs+1], - Cvt.getValue(1))); - else { - SDOperand PtrOff= DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Cvt.getValue(1), PtrOff, - NULL, 0)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + + SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32); + + RegsToPassVector RegsToPass; + SmallVector MemOpChains; + + // Walk the register/memloc assignments, inserting copies/loads. In the case + // of tail call optimization, arguments are handled later. + for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); + i != e; + ++i, ++realArgIdx) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = TheCall->getArg(realArgIdx); + ISD::ArgFlagsTy Flags = TheCall->getArgFlags(realArgIdx); + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: LLVM_UNREACHABLE("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + break; + } + + // f64 and v2f64 are passed in i32 pairs and must be split into pieces + if (VA.needsCustom()) { + if (VA.getLocVT() == MVT::v2f64) { + SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, + DAG.getConstant(0, MVT::i32)); + SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, + DAG.getConstant(1, MVT::i32)); + + PassF64ArgInRegs(TheCall, DAG, Chain, Op0, RegsToPass, + VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); + + VA = ArgLocs[++i]; // skip ahead to next loc + if (VA.isRegLoc()) { + PassF64ArgInRegs(TheCall, DAG, Chain, Op1, RegsToPass, + VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); + } else { + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); + + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, Op1, Flags)); } - break; - } + } else { + PassF64ArgInRegs(TheCall, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], + StackPtr, MemOpChains, Flags); } + } else if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { - assert(ObjSize != 0); - SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); - PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); - MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); - } + assert(VA.isMemLoc()); + if (StackPtr.getNode() == 0) + StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - NumGPRs += ObjGPRs; - ArgOffset += ObjSize; + MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA, + Chain, Arg, Flags)); + } } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. - SDOperand InFlag; + SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, - InFlag); + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -541,14 +943,16 @@ SDOperand ARMTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && !isExt; // tBX takes a register source operand. - if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) { + if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPStub, 4); - SDOperand CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 2); - CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), CPAddr, NULL, 0); - SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); - Callee = DAG.getNode(ARMISD::PIC_ADD, getPointerTy(), Callee, PICLabel); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Callee = DAG.getNode(ARMISD::PIC_ADD, dl, + getPointerTy(), Callee, PICLabel); } else Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { @@ -558,21 +962,23 @@ SDOperand ARMTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { isARMFunc = !Subtarget->isThumb() || isStub; // tBX takes a register source operand. const char *Sym = S->getSymbol(); - if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) { + if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex, ARMCP::CPStub, 4); - SDOperand CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 2); - CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr); - Callee = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), CPAddr, NULL, 0); - SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); - Callee = DAG.getNode(ARMISD::PIC_ADD, getPointerTy(), Callee, PICLabel); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Callee = DAG.getNode(ARMISD::PIC_ADD, dl, + getPointerTy(), Callee, PICLabel); } else Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); } // FIXME: handle tail calls differently. unsigned CallOpc; - if (Subtarget->isThumb()) { + if (Subtarget->isThumb1Only()) { if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc)) CallOpc = ARMISD::CALL_NOLINK; else @@ -582,18 +988,13 @@ SDOperand ARMTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) : ARMISD::CALL_NOLINK; } - if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) { + if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) { // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK - Chain = DAG.getCopyToReg(Chain, ARM::LR, - DAG.getNode(ISD::UNDEF, MVT::i32), InFlag); + Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); InFlag = Chain.getValue(1); } - std::vector NodeTys; - NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. - - std::vector Ops; + std::vector Ops; Ops.push_back(Chain); Ops.push_back(Callee); @@ -603,141 +1004,153 @@ SDOperand ARMTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) { Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - if (InFlag.Val) + if (InFlag.getNode()) Ops.push_back(InFlag); - Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); + // Returns a chain and a flag for retval copy to use. + Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), + &Ops[0], Ops.size()); InFlag = Chain.getValue(1); - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getConstant(NumBytes, MVT::i32), - DAG.getConstant(0, MVT::i32), - InFlag); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); if (RetVT != MVT::Other) InFlag = Chain.getValue(1); - std::vector ResultVals; - NodeTys.clear(); + // Handle result values, copying them out of physregs into vregs that we + // return. + return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG), + Op.getResNo()); +} - // If the call has results, copy the values out of the ret val registers. - switch (RetVT) { - default: assert(0 && "Unexpected ret value!"); - case MVT::Other: - break; - case MVT::i32: - Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - if (Op.Val->getValueType(1) == MVT::i32) { - // Returns a i64 value. - Chain = DAG.getCopyFromReg(Chain, ARM::R1, MVT::i32, - Chain.getValue(2)).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - NodeTys.push_back(MVT::i32); - } - NodeTys.push_back(MVT::i32); - break; - case MVT::f32: - Chain = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag).getValue(1); - ResultVals.push_back(DAG.getNode(ISD::BIT_CONVERT, MVT::f32, - Chain.getValue(0))); - NodeTys.push_back(MVT::f32); - break; - case MVT::f64: { - SDOperand Lo = DAG.getCopyFromReg(Chain, ARM::R0, MVT::i32, InFlag); - SDOperand Hi = DAG.getCopyFromReg(Lo, ARM::R1, MVT::i32, Lo.getValue(2)); - ResultVals.push_back(DAG.getNode(ARMISD::FMDRR, MVT::f64, Lo, Hi)); - NodeTys.push_back(MVT::f64); - break; - } +SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { + // The chain is always operand #0 + SDValue Chain = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + + // CCValAssign - represent the assignment of the return value to a location. + SmallVector RVLocs; + unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + + // CCState - Info about the registers and stack slots. + CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs, DAG.getContext()); + + // Analyze return values of ISD::RET. + CCInfo.AnalyzeReturn(Op.getNode(), CCAssignFnForNode(CC, /* Return */ true)); + + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + if (RVLocs[i].isRegLoc()) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } - NodeTys.push_back(MVT::Other); + SDValue Flag; - if (ResultVals.empty()) - return Chain; + // Copy the result values into the output registers. + for (unsigned i = 0, realRVLocIdx = 0; + i != RVLocs.size(); + ++i, ++realRVLocIdx) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); - ResultVals.push_back(Chain); - SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, &ResultVals[0], - ResultVals.size()); - return Res.getValue(Op.ResNo); -} + // ISD::RET => ret chain, (regnum1,val1), ... + // So i*2+1 index only the regnums + SDValue Arg = Op.getOperand(realRVLocIdx*2+1); -static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) { - SDOperand Copy; - SDOperand Chain = Op.getOperand(0); - switch(Op.getNumOperands()) { - default: - assert(0 && "Do not know how to return this many arguments!"); - abort(); - case 1: { - SDOperand LR = DAG.getRegister(ARM::LR, MVT::i32); - return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Chain); - } - case 3: - Op = Op.getOperand(1); - if (Op.getValueType() == MVT::f32) { - Op = DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op); - } else if (Op.getValueType() == MVT::f64) { + switch (VA.getLocInfo()) { + default: LLVM_UNREACHABLE("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); + break; + } + + if (VA.needsCustom()) { + if (VA.getLocVT() == MVT::v2f64) { + // Extract the first half and return it in two registers. + SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, + DAG.getConstant(0, MVT::i32)); + SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Half); + + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); + Flag = Chain.getValue(1); + VA = RVLocs[++i]; // skip ahead to next loc + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + HalfGPRs.getValue(1), Flag); + Flag = Chain.getValue(1); + VA = RVLocs[++i]; // skip ahead to next loc + + // Extract the 2nd half and fall through to handle it as an f64 value. + Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, + DAG.getConstant(1, MVT::i32)); + } // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. - Op = DAG.getNode(ARMISD::FMRRD, DAG.getVTList(MVT::i32, MVT::i32), &Op,1); - SDOperand Sign = DAG.getConstant(0, MVT::i32); - return DAG.getNode(ISD::RET, MVT::Other, Chain, Op, Sign, - Op.getValue(1), Sign); - } - Copy = DAG.getCopyToReg(Chain, ARM::R0, Op, SDOperand()); - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R0); - break; - case 5: - Copy = DAG.getCopyToReg(Chain, ARM::R1, Op.getOperand(3), SDOperand()); - Copy = DAG.getCopyToReg(Copy, ARM::R0, Op.getOperand(1), Copy.getValue(1)); - // If we haven't noted the R0+R1 are live out, do so now. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R0); - DAG.getMachineFunction().getRegInfo().addLiveOut(ARM::R1); - } - break; + SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + Flag = Chain.getValue(1); + VA = RVLocs[++i]; // skip ahead to next loc + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Flag); + } else + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + + // Guarantee that all emitted copies are + // stuck together, avoiding something bad. + Flag = Chain.getValue(1); } - //We must use RET_FLAG instead of BRIND because BRIND doesn't have a flag - return DAG.getNode(ARMISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); + SDValue result; + if (Flag.getNode()) + result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + else // Return Void + result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); + + return result; } -// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as +// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. -static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { - MVT::ValueType PtrVT = Op.getValueType(); +static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { + MVT PtrVT = Op.getValueType(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); ConstantPoolSDNode *CP = cast(Op); - SDOperand Res; + SDValue Res; if (CP->isMachineConstantPoolEntry()) Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlignment()); else Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment()); - return DAG.getNode(ARMISD::Wrapper, MVT::i32, Res); + return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } // Lower ISD::GlobalTLSAddress using the "general dynamic" model -SDOperand +SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) { - MVT::ValueType PtrVT = getPointerTy(); + DebugLoc dl = GA->getDebugLoc(); + MVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, "tlsgd", true); - SDOperand Argument = DAG.getTargetConstantPool(CPV, PtrVT, 2); - Argument = DAG.getNode(ARMISD::Wrapper, MVT::i32, Argument); - Argument = DAG.getLoad(PtrVT, DAG.getEntryNode(), Argument, NULL, 0); - SDOperand Chain = Argument.getValue(1); + SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); + Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0); + SDValue Chain = Argument.getValue(1); - SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); - Argument = DAG.getNode(ARMISD::PIC_ADD, PtrVT, Argument, PICLabel); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); // call __tls_get_addr. ArgListTy Args; @@ -745,24 +1158,26 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Entry.Node = Argument; Entry.Ty = (const Type *) Type::Int32Ty; Args.push_back(Entry); - std::pair CallResult = - LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, - CallingConv::C, false, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG); + // FIXME: is there useful debug info available here? + std::pair CallResult = + LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false, + 0, CallingConv::C, false, + DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; } // Lower ISD::GlobalTLSAddress using the "initial exec" or // "local exec" model. -SDOperand +SDValue ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, - SelectionDAG &DAG) { + SelectionDAG &DAG) { GlobalValue *GV = GA->getGlobal(); - SDOperand Offset; - SDOperand Chain = DAG.getEntryNode(); - MVT::ValueType PtrVT = getPointerTy(); + DebugLoc dl = GA->getDebugLoc(); + SDValue Offset; + SDValue Chain = DAG.getEntryNode(); + MVT PtrVT = getPointerTy(); // Get the Thread Pointer - SDOperand ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, PtrVT); + SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); if (GV->isDeclaration()){ // initial exec model @@ -770,31 +1185,31 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, "gottpoff", true); - Offset = DAG.getTargetConstantPool(CPV, PtrVT, 2); - Offset = DAG.getNode(ARMISD::Wrapper, MVT::i32, Offset); - Offset = DAG.getLoad(PtrVT, Chain, Offset, NULL, 0); + Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); Chain = Offset.getValue(1); - SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); - Offset = DAG.getNode(ARMISD::PIC_ADD, PtrVT, Offset, PICLabel); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); - Offset = DAG.getLoad(PtrVT, Chain, Offset, NULL, 0); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); } else { // local exec model ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff"); - Offset = DAG.getTargetConstantPool(CPV, PtrVT, 2); - Offset = DAG.getNode(ARMISD::Wrapper, MVT::i32, Offset); - Offset = DAG.getLoad(PtrVT, Chain, Offset, NULL, 0); + Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); } // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. - return DAG.getNode(ISD::ADD, PtrVT, ThreadPointer, Offset); + return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); } -SDOperand -ARMTargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { +SDValue +ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); @@ -807,48 +1222,54 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { return LowerToTLSExecModels(GA, DAG); } -SDOperand ARMTargetLowering::LowerGlobalAddressELF(SDOperand Op, - SelectionDAG &DAG) { - MVT::ValueType PtrVT = getPointerTy(); +SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, + SelectionDAG &DAG) { + MVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); if (RelocM == Reloc::PIC_) { - bool UseGOTOFF = GV->hasInternalLinkage() || GV->hasHiddenVisibility(); + bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT"); - SDOperand CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2); - CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr); - SDOperand Result = DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0); - SDOperand Chain = Result.getValue(1); - SDOperand GOT = DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, PtrVT); - Result = DAG.getNode(ISD::ADD, PtrVT, Result, GOT); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), + CPAddr, NULL, 0); + SDValue Chain = Result.getValue(1); + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) - Result = DAG.getLoad(PtrVT, Chain, Result, NULL, 0); + Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); return Result; } else { - SDOperand CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 2); - CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr); - return DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); } } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol /// even in non-static mode. static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) { - return RelocM != Reloc::Static && - (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || - (GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode())); + // If symbol visibility is hidden, the extra load is not needed if + // the symbol is definitely defined in the current translation unit. + bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode(); + if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage())) + return false; + return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker()); } -SDOperand ARMTargetLowering::LowerGlobalAddressDarwin(SDOperand Op, - SelectionDAG &DAG) { - MVT::ValueType PtrVT = getPointerTy(); +SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, + SelectionDAG &DAG) { + MVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); bool IsIndirect = GVIsIndirectSymbol(GV, RelocM); - SDOperand CPAddr; + SDValue CPAddr; if (RelocM == Reloc::Static) - CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 2); + CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); else { unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); @@ -856,174 +1277,245 @@ SDOperand ARMTargetLowering::LowerGlobalAddressDarwin(SDOperand Op, : ARMCP::CPValue; ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, Kind, PCAdj); - CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2); + CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); } - CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDOperand Result = DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0); - SDOperand Chain = Result.getValue(1); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { - SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); - Result = DAG.getNode(ARMISD::PIC_ADD, PtrVT, Result, PICLabel); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } if (IsIndirect) - Result = DAG.getLoad(PtrVT, Chain, Result, NULL, 0); + Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); return Result; } -SDOperand ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDOperand Op, - SelectionDAG &DAG){ +SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, + SelectionDAG &DAG){ assert(Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); - MVT::ValueType PtrVT = getPointerTy(); + MVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, ARMCP::CPValue, PCAdj); - SDOperand CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2); - CPAddr = DAG.getNode(ARMISD::Wrapper, MVT::i32, CPAddr); - SDOperand Result = DAG.getLoad(PtrVT, DAG.getEntryNode(), CPAddr, NULL, 0); - SDOperand PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); - return DAG.getNode(ARMISD::PIC_ADD, PtrVT, Result, PICLabel); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } -static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - unsigned IntNo = cast(Op.getOperand(0))->getValue(); +SDValue +ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { - default: return SDOperand(); // Don't custom lower most intrinsics. + default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::arm_thread_pointer: - return DAG.getNode(ARMISD::THREAD_POINTER, PtrVT); + return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); + case Intrinsic::eh_sjlj_setjmp: + SDValue Res = DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, + Op.getOperand(1)); + return Res; } } -static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG, - unsigned VarArgsFrameIndex) { +static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, + unsigned VarArgsFrameIndex) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + DebugLoc dl = Op.getDebugLoc(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0); + return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); } -static SDOperand LowerFORMAL_ARGUMENT(SDOperand Op, SelectionDAG &DAG, - unsigned ArgNo, unsigned &NumGPRs, - unsigned &ArgOffset) { +SDValue +ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, + SDValue &Root, SelectionDAG &DAG, + DebugLoc dl) { MachineFunction &MF = DAG.getMachineFunction(); - MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); - SDOperand Root = Op.getOperand(0); - std::vector ArgValues; - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - - static const unsigned GPRArgRegs[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 - }; - - unsigned ObjSize; - unsigned ObjGPRs; - unsigned GPRPad; - unsigned StackPad; - unsigned Flags = Op.getConstantOperandVal(ArgNo + 3); - HowToPassArgument(ObjectVT, NumGPRs, ArgOffset, ObjGPRs, - ObjSize, GPRPad, StackPad, Flags); - NumGPRs += GPRPad; - ArgOffset += StackPad; - - SDOperand ArgValue; - if (ObjGPRs == 1) { - unsigned VReg = RegInfo.createVirtualRegister(&ARM::GPRRegClass); - RegInfo.addLiveIn(GPRArgRegs[NumGPRs], VReg); - ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32); - if (ObjectVT == MVT::f32) - ArgValue = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, ArgValue); - } else if (ObjGPRs == 2) { - unsigned VReg = RegInfo.createVirtualRegister(&ARM::GPRRegClass); - RegInfo.addLiveIn(GPRArgRegs[NumGPRs], VReg); - ArgValue = DAG.getCopyFromReg(Root, VReg, MVT::i32); - - VReg = RegInfo.createVirtualRegister(&ARM::GPRRegClass); - RegInfo.addLiveIn(GPRArgRegs[NumGPRs+1], VReg); - SDOperand ArgValue2 = DAG.getCopyFromReg(Root, VReg, MVT::i32); - - assert(ObjectVT != MVT::i64 && "i64 should already be lowered"); - ArgValue = DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2); - } - NumGPRs += ObjGPRs; - - if (ObjSize) { - // If the argument is actually used, emit a load from the right stack - // slot. - if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { - MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); - SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); - if (ObjGPRs == 0) - ArgValue = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); - else { - SDOperand ArgValue2 = DAG.getLoad(MVT::i32, Root, FIN, NULL, 0); - assert(ObjectVT != MVT::i64 && "i64 should already be lowered"); - ArgValue = DAG.getNode(ARMISD::FMDRR, MVT::f64, ArgValue, ArgValue2); - } - } else { - // Don't emit a dead load. - ArgValue = DAG.getNode(ISD::UNDEF, ObjectVT); - } + ARMFunctionInfo *AFI = MF.getInfo(); + + TargetRegisterClass *RC; + if (AFI->isThumb1OnlyFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + // Transform the arguments stored in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); - ArgOffset += ObjSize; // Move on to the next argument. + SDValue ArgValue2; + if (NextVA.isMemLoc()) { + unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8; + MachineFrameInfo *MFI = MF.getFrameInfo(); + int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset()); + + // Create load node to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0); + } else { + Reg = MF.addLiveIn(NextVA.getLocReg(), RC); + ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - return ArgValue; + return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2); } -SDOperand -ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { - std::vector ArgValues; - SDOperand Root = Op.getOperand(0); - unsigned ArgOffset = 0; // Frame mechanisms handle retaddr slot - unsigned NumGPRs = 0; // GPRs used for parameter passing. +SDValue +ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + SDValue Root = Op.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); + bool isVarArg = cast(Op.getOperand(2))->getZExtValue() != 0; + unsigned CC = MF.getFunction()->getCallingConv(); + ARMFunctionInfo *AFI = MF.getInfo(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Op.getNode(), + CCAssignFnForNode(CC, /* Return*/ false)); + + SmallVector ArgValues; + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + + // Arguments stored in registers. + if (VA.isRegLoc()) { + MVT RegVT = VA.getLocVT(); + + SDValue ArgValue; + if (VA.needsCustom()) { + // f64 and vector types are split up into multiple registers or + // combinations of registers and stack slots. + RegVT = MVT::i32; + + if (VA.getLocVT() == MVT::v2f64) { + SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], + Root, DAG, dl); + VA = ArgLocs[++i]; // skip ahead to next loc + SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], + Root, DAG, dl); + ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); + ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, + ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); + ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, + ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); + } else + ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Root, DAG, dl); - unsigned NumArgs = Op.Val->getNumValues()-1; - for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo) - ArgValues.push_back(LowerFORMAL_ARGUMENT(Op, DAG, ArgNo, - NumGPRs, ArgOffset)); + } else { + TargetRegisterClass *RC; + if (FloatABIType == FloatABI::Hard && RegVT == MVT::f32) + RC = ARM::SPRRegisterClass; + else if (FloatABIType == FloatABI::Hard && RegVT == MVT::f64) + RC = ARM::DPRRegisterClass; + else if (AFI->isThumb1OnlyFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + assert((RegVT == MVT::i32 || RegVT == MVT::f32 || + (FloatABIType == FloatABI::Hard && RegVT == MVT::f64)) && + "RegVT not supported by FORMAL_ARGUMENTS Lowering"); + + // Transform the arguments in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT); + } - bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; + // If this is an 8 or 16-bit value, it is really passed promoted + // to 32 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + switch (VA.getLocInfo()) { + default: LLVM_UNREACHABLE("Unknown loc info!"); + case CCValAssign::Full: break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::SExt: + ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + break; + case CCValAssign::ZExt: + ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, + DAG.getValueType(VA.getValVT())); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + break; + } + + ArgValues.push_back(ArgValue); + + } else { // VA.isRegLoc() + + // sanity check + assert(VA.isMemLoc()); + assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); + + unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); + + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0)); + } + } + + // varargs if (isVarArg) { static const unsigned GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARMFunctionInfo *AFI = MF.getInfo(); + unsigned NumGPRs = CCInfo.getFirstUnallocated + (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned VARegSize = (4 - NumGPRs) * 4; unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); + unsigned ArgOffset = 0; if (VARegSaveSize) { // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing // the result of va_next. AFI->setVarArgsRegSaveSize(VARegSaveSize); + ArgOffset = CCInfo.getNextStackOffset(); VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + VARegSaveSize - VARegSize); - SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); - SmallVector MemOps; + SmallVector MemOps; for (; NumGPRs < 4; ++NumGPRs) { - unsigned VReg = RegInfo.createVirtualRegister(&ARM::GPRRegClass); - RegInfo.addLiveIn(GPRArgRegs[NumGPRs], VReg); - SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32); - SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + TargetRegisterClass *RC; + if (AFI->isThumb1OnlyFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); + SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); } if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, MVT::Other, + Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. @@ -1033,19 +1525,18 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) { ArgValues.push_back(Root); // Return the new list of results. - std::vector RetVT(Op.Val->value_begin(), - Op.Val->value_end()); - return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), + &ArgValues[0], ArgValues.size()).getValue(Op.getResNo()); } /// isFloatingPointZero - Return true if this is +0.0. -static bool isFloatingPointZero(SDOperand Op) { +static bool isFloatingPointZero(SDValue Op) { if (ConstantFPSDNode *CFP = dyn_cast(Op)) return CFP->getValueAPF().isPosZero(); - else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) { + else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { - SDOperand WrapperOp = Op.getOperand(1).getOperand(0); + SDValue WrapperOp = Op.getOperand(1).getOperand(0); if (ConstantPoolSDNode *CP = dyn_cast(WrapperOp)) if (ConstantFP *CFP = dyn_cast(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); @@ -1054,45 +1545,46 @@ static bool isFloatingPointZero(SDOperand Op) { return false; } -static bool isLegalCmpImmediate(unsigned C, bool isThumb) { - return ( isThumb && (C & ~255U) == 0) || - (!isThumb && ARM_AM::getSOImmVal(C) != -1); +static bool isLegalCmpImmediate(unsigned C, bool isThumb1Only) { + return ( isThumb1Only && (C & ~255U) == 0) || + (!isThumb1Only && ARM_AM::getSOImmVal(C) != -1); } /// Returns appropriate ARM CMP (cmp) and corresponding condition code for /// the given operands. -static SDOperand getARMCmp(SDOperand LHS, SDOperand RHS, ISD::CondCode CC, - SDOperand &ARMCC, SelectionDAG &DAG, bool isThumb) { - if (ConstantSDNode *RHSC = dyn_cast(RHS.Val)) { - unsigned C = RHSC->getValue(); - if (!isLegalCmpImmediate(C, isThumb)) { +static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, bool isThumb1Only, + DebugLoc dl) { + if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { + unsigned C = RHSC->getZExtValue(); + if (!isLegalCmpImmediate(C, isThumb1Only)) { // Constant does not fit, try adjusting it by one? switch (CC) { default: break; case ISD::SETLT: case ISD::SETGE: - if (isLegalCmpImmediate(C-1, isThumb)) { + if (isLegalCmpImmediate(C-1, isThumb1Only)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: - if (C > 0 && isLegalCmpImmediate(C-1, isThumb)) { + if (C > 0 && isLegalCmpImmediate(C-1, isThumb1Only)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: - if (isLegalCmpImmediate(C+1, isThumb)) { + if (isLegalCmpImmediate(C+1, isThumb1Only)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C+1, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: - if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb)) { + if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb1Only)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C+1, MVT::i32); } @@ -1109,73 +1601,76 @@ static SDOperand getARMCmp(SDOperand LHS, SDOperand RHS, ISD::CondCode CC, break; case ARMCC::EQ: case ARMCC::NE: - case ARMCC::MI: - case ARMCC::PL: - // Uses only N and Z Flags - CompareType = ARMISD::CMPNZ; + // Uses only Z Flag + CompareType = ARMISD::CMPZ; break; } ARMCC = DAG.getConstant(CondCode, MVT::i32); - return DAG.getNode(CompareType, MVT::Flag, LHS, RHS); + return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); } /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. -static SDOperand getVFPCmp(SDOperand LHS, SDOperand RHS, SelectionDAG &DAG) { - SDOperand Cmp; +static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, + DebugLoc dl) { + SDValue Cmp; if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(ARMISD::CMPFP, MVT::Flag, LHS, RHS); + Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); else - Cmp = DAG.getNode(ARMISD::CMPFPw0, MVT::Flag, LHS); - return DAG.getNode(ARMISD::FMSTAT, MVT::Flag, Cmp); + Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); + return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } -static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { - MVT::ValueType VT = Op.getValueType(); - SDOperand LHS = Op.getOperand(0); - SDOperand RHS = Op.getOperand(1); +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + MVT VT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(4))->get(); - SDOperand TrueVal = Op.getOperand(2); - SDOperand FalseVal = Op.getOperand(3); + SDValue TrueVal = Op.getOperand(2); + SDValue FalseVal = Op.getOperand(3); + DebugLoc dl = Op.getDebugLoc(); if (LHS.getValueType() == MVT::i32) { - SDOperand ARMCC; - SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb()); - return DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal, ARMCC, CCR, Cmp); + SDValue ARMCC; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); } ARMCC::CondCodes CondCode, CondCode2; if (FPCCToARMCC(CC, CondCode, CondCode2)) std::swap(TrueVal, FalseVal); - SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32); - SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDOperand Cmp = getVFPCmp(LHS, RHS, DAG); - SDOperand Result = DAG.getNode(ARMISD::CMOV, VT, FalseVal, TrueVal, + SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR, Cmp); if (CondCode2 != ARMCC::AL) { - SDOperand ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); + SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDOperand Cmp2 = getVFPCmp(LHS, RHS, DAG); - Result = DAG.getNode(ARMISD::CMOV, VT, Result, TrueVal, ARMCC2, CCR, Cmp2); + SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); + Result = DAG.getNode(ARMISD::CMOV, dl, VT, + Result, TrueVal, ARMCC2, CCR, Cmp2); } return Result; } -static SDOperand LowerBR_CC(SDOperand Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { - SDOperand Chain = Op.getOperand(0); +static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); - SDOperand LHS = Op.getOperand(2); - SDOperand RHS = Op.getOperand(3); - SDOperand Dest = Op.getOperand(4); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); if (LHS.getValueType() == MVT::i32) { - SDOperand ARMCC; - SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDOperand Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb()); - return DAG.getNode(ARMISD::BRCOND, MVT::Other, Chain, Dest, ARMCC, CCR,Cmp); + SDValue ARMCC; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, + Chain, Dest, ARMCC, CCR,Cmp); } assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); @@ -1183,90 +1678,121 @@ static SDOperand LowerBR_CC(SDOperand Op, SelectionDAG &DAG, if (FPCCToARMCC(CC, CondCode, CondCode2)) // Swap the LHS/RHS of the comparison if needed. std::swap(LHS, RHS); - - SDOperand Cmp = getVFPCmp(LHS, RHS, DAG); - SDOperand ARMCC = DAG.getConstant(CondCode, MVT::i32); - SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + + SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); - SDOperand Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; - SDOperand Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 5); + SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); if (CondCode2 != ARMCC::AL) { ARMCC = DAG.getConstant(CondCode2, MVT::i32); - SDOperand Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, VTList, Ops, 5); + SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) }; + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); } return Res; } -SDOperand ARMTargetLowering::LowerBR_JT(SDOperand Op, SelectionDAG &DAG) { - SDOperand Chain = Op.getOperand(0); - SDOperand Table = Op.getOperand(1); - SDOperand Index = Op.getOperand(2); +SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { + SDValue Chain = Op.getOperand(0); + SDValue Table = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); - MVT::ValueType PTy = getPointerTy(); + MVT PTy = getPointerTy(); JumpTableSDNode *JT = cast(Table); ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo(); - SDOperand UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); - SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); - Table = DAG.getNode(ARMISD::WrapperJT, MVT::i32, JTI, UId); - Index = DAG.getNode(ISD::MUL, PTy, Index, DAG.getConstant(4, PTy)); - SDOperand Addr = DAG.getNode(ISD::ADD, PTy, Index, Table); + SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); + Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); + Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - Addr = DAG.getLoad(isPIC ? (MVT::ValueType)MVT::i32 : PTy, + Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl, Chain, Addr, NULL, 0); Chain = Addr.getValue(1); if (isPIC) - Addr = DAG.getNode(ISD::ADD, PTy, Addr, Table); - return DAG.getNode(ARMISD::BR_JT, MVT::Other, Chain, Addr, JTI, UId); + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); + return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } -static SDOperand LowerFP_TO_INT(SDOperand Op, SelectionDAG &DAG) { +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + DebugLoc dl = Op.getDebugLoc(); unsigned Opc = Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI; - Op = DAG.getNode(Opc, MVT::f32, Op.getOperand(0)); - return DAG.getNode(ISD::BIT_CONVERT, MVT::i32, Op); + Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); + return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); } -static SDOperand LowerINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { - MVT::ValueType VT = Op.getValueType(); +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); unsigned Opc = Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF; - Op = DAG.getNode(ISD::BIT_CONVERT, MVT::f32, Op.getOperand(0)); - return DAG.getNode(Opc, VT, Op); + Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); + return DAG.getNode(Opc, dl, VT, Op); } -static SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) { +static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { // Implement fcopysign with a fabs and a conditional fneg. - SDOperand Tmp0 = Op.getOperand(0); - SDOperand Tmp1 = Op.getOperand(1); - MVT::ValueType VT = Op.getValueType(); - MVT::ValueType SrcVT = Tmp1.getValueType(); - SDOperand AbsVal = DAG.getNode(ISD::FABS, VT, Tmp0); - SDOperand Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG); - SDOperand ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); - SDOperand CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); -} - -SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, - SDOperand Dest, - SDOperand Source, - unsigned Size, - unsigned Align, - SelectionDAG &DAG) { + SDValue Tmp0 = Op.getOperand(0); + SDValue Tmp1 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + MVT VT = Op.getValueType(); + MVT SrcVT = Tmp1.getValueType(); + SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); + SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); + SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); +} + +SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) + ? ARM::R7 : ARM::R11; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0); + return FrameAddr; +} + +SDValue +ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool AlwaysInline, + const Value *DstSV, uint64_t DstSVOff, + const Value *SrcSV, uint64_t SrcSVOff){ // Do repeated 4-byte loads and stores. To be improved. - assert((Align & 3) == 0 && "Expected 4-byte aligned addresses!"); - unsigned BytesLeft = Size & 3; - unsigned NumMemOps = Size >> 2; + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) + return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; unsigned EmittedNumMemOps = 0; - unsigned SrcOff = 0, DstOff = 0; - MVT::ValueType VT = MVT::i32; + MVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; const unsigned MAX_LOADS_IN_LDM = 6; - SDOperand TFOps[MAX_LOADS_IN_LDM]; - SDOperand Loads[MAX_LOADS_IN_LDM]; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the // same number of stores. The loads and stores will get combined into @@ -1274,29 +1800,29 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, while (EmittedNumMemOps < NumMemOps) { for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - Loads[i] = DAG.getLoad(VT, Chain, - DAG.getNode(ISD::ADD, MVT::i32, Source, + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - NULL, 0); + SrcSV, SrcSVOff + SrcOff); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - TFOps[i] = DAG.getStore(Chain, Loads[i], - DAG.getNode(ISD::ADD, MVT::i32, Dest, + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - NULL, 0); + DstSV, DstSVOff + DstOff); DstOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); EmittedNumMemOps += i; } - if (BytesLeft == 0) + if (BytesLeft == 0) return Chain; // Issue loads / stores for the trailing (1 - 3) bytes. @@ -1311,16 +1837,16 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, VTSize = 1; } - Loads[i] = DAG.getLoad(VT, Chain, - DAG.getNode(ISD::ADD, MVT::i32, Source, + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - NULL, 0); + SrcSV, SrcSVOff + SrcOff); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; BytesLeft -= VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); i = 0; BytesLeft = BytesLeftSave; @@ -1333,65 +1859,408 @@ SDOperand ARMTargetLowering::LowerMEMCPYInline(SDOperand Chain, VTSize = 1; } - TFOps[i] = DAG.getStore(Chain, Loads[i], - DAG.getNode(ISD::ADD, MVT::i32, Dest, + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, DAG.getConstant(DstOff, MVT::i32)), - NULL, 0); + DstSV, DstSVOff + DstOff); ++i; DstOff += VTSize; BytesLeft -= VTSize; } - return DAG.getNode(ISD::TokenFactor, MVT::Other, &TFOps[0], i); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); } -static SDNode *ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { +static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + DebugLoc dl = N->getDebugLoc(); + if (N->getValueType(0) == MVT::f64) { + // Turn i64->f64 into FMDRR. + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, + DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + } + // Turn f64->i64 into FMRRD. - assert(N->getValueType(0) == MVT::i64 && - N->getOperand(0).getValueType() == MVT::f64); - - SDOperand Op = N->getOperand(0); - SDOperand Cvt = DAG.getNode(ARMISD::FMRRD, DAG.getVTList(MVT::i32, MVT::i32), - &Op, 1); - + SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + // Merge the pieces into a single i64 value. - return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Cvt, Cvt.getValue(1)).Val; + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } -static SDNode *ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { - assert(N->getValueType(0) == MVT::i64 && +/// getZeroVector - Returns a vector of specified type with all zero elements. +/// +static SDValue getZeroVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { + assert(VT.isVector() && "Expected a vector type"); + + // Zero vectors are used to represent vector negation and in those cases + // will be implemented with the NEON VNEG instruction. However, VNEG does + // not support i64 elements, so sometimes the zero vectors will need to be + // explicitly constructed. For those cases, and potentially other uses in + // the future, always build zero vectors as <4 x i32> or <2 x i32> bitcasted + // to their dest type. This ensures they get CSE'd. + SDValue Vec; + SDValue Cst = DAG.getTargetConstant(0, MVT::i32); + if (VT.getSizeInBits() == 64) + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); + else + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); +} + +/// getOnesVector - Returns a vector of specified type with all bits set. +/// +static SDValue getOnesVector(MVT VT, SelectionDAG &DAG, DebugLoc dl) { + assert(VT.isVector() && "Expected a vector type"); + + // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest + // type. This ensures they get CSE'd. + SDValue Vec; + SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); + if (VT.getSizeInBits() == 64) + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); + else + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); +} + +static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + MVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); + + // Lower vector shifts on NEON to use VSHL. + if (VT.isVector()) { + assert(ST->hasNEON() && "unexpected vector shift"); + + // Left shifts translate directly to the vshiftu intrinsic. + if (N->getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), + N->getOperand(0), N->getOperand(1)); + + assert((N->getOpcode() == ISD::SRA || + N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); + + // NEON uses the same intrinsics for both left and right shifts. For + // right shifts, the shift amounts are negative, so negate the vector of + // shift amounts. + MVT ShiftVT = N->getOperand(1).getValueType(); + SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, + getZeroVector(ShiftVT, DAG, dl), + N->getOperand(1)); + Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? + Intrinsic::arm_neon_vshifts : + Intrinsic::arm_neon_vshiftu); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(vshiftInt, MVT::i32), + N->getOperand(0), NegatedCount); + } + + assert(VT == MVT::i64 && (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && "Unknown shift to lower!"); - + // We only lower SRA, SRL of 1 here, all others use generic lowering. if (!isa(N->getOperand(1)) || - cast(N->getOperand(1))->getValue() != 1) - return 0; - + cast(N->getOperand(1))->getZExtValue() != 1) + return SDValue(); + // If we are in thumb mode, we don't have RRX. - if (ST->isThumb()) return 0; - + if (ST->isThumb1Only()) return SDValue(); + // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. - SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(0), + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(0, MVT::i32)); - SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, N->getOperand(0), + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(1, MVT::i32)); - + // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; - Hi = DAG.getNode(Opc, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); - + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); + // The low part is an ARMISD::RRX operand, which shifts the carry in. - Lo = DAG.getNode(ARMISD::RRX, MVT::i32, Lo, Hi.getValue(1)); - + Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); + // Merge the pieces into a single i64 value. - return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi).Val; + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); +} + +static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { + SDValue TmpOp0, TmpOp1; + bool Invert = false; + bool Swap = false; + unsigned Opc = 0; + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + MVT VT = Op.getValueType(); + ISD::CondCode SetCCOpcode = cast(CC)->get(); + DebugLoc dl = Op.getDebugLoc(); + + if (Op.getOperand(1).getValueType().isFloatingPoint()) { + switch (SetCCOpcode) { + default: LLVM_UNREACHABLE("Illegal FP comparison"); break; + case ISD::SETUNE: + case ISD::SETNE: Invert = true; // Fallthrough + case ISD::SETOEQ: + case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETOLT: + case ISD::SETLT: Swap = true; // Fallthrough + case ISD::SETOGT: + case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETOLE: + case ISD::SETLE: Swap = true; // Fallthrough + case ISD::SETOGE: + case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETUGE: Swap = true; // Fallthrough + case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; + case ISD::SETUGT: Swap = true; // Fallthrough + case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; + case ISD::SETUEQ: Invert = true; // Fallthrough + case ISD::SETONE: + // Expand this to (OLT | OGT). + TmpOp0 = Op0; + TmpOp1 = Op1; + Opc = ISD::OR; + Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); + Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); + break; + case ISD::SETUO: Invert = true; // Fallthrough + case ISD::SETO: + // Expand this to (OLT | OGE). + TmpOp0 = Op0; + TmpOp1 = Op1; + Opc = ISD::OR; + Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); + Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); + break; + } + } else { + // Integer comparisons. + switch (SetCCOpcode) { + default: LLVM_UNREACHABLE("Illegal integer comparison"); break; + case ISD::SETNE: Invert = true; + case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETLT: Swap = true; + case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETLE: Swap = true; + case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETULT: Swap = true; + case ISD::SETUGT: Opc = ARMISD::VCGTU; break; + case ISD::SETULE: Swap = true; + case ISD::SETUGE: Opc = ARMISD::VCGEU; break; + } + + // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). + if (Opc == ARMISD::VCEQ) { + + SDValue AndOp; + if (ISD::isBuildVectorAllZeros(Op1.getNode())) + AndOp = Op0; + else if (ISD::isBuildVectorAllZeros(Op0.getNode())) + AndOp = Op1; + + // Ignore bitconvert. + if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) + AndOp = AndOp.getOperand(0); + + if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { + Opc = ARMISD::VTST; + Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); + Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); + Invert = !Invert; + } + } + } + + if (Swap) + std::swap(Op0, Op1); + + SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); + + if (Invert) + Result = DAG.getNOT(dl, Result, VT); + + return Result; +} + +/// isVMOVSplat - Check if the specified splat value corresponds to an immediate +/// VMOV instruction, and if so, return the constant being splatted. +static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef, + unsigned SplatBitSize, SelectionDAG &DAG) { + switch (SplatBitSize) { + case 8: + // Any 1-byte value is OK. + assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); + return DAG.getTargetConstant(SplatBits, MVT::i8); + + case 16: + // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. + if ((SplatBits & ~0xff) == 0 || + (SplatBits & ~0xff00) == 0) + return DAG.getTargetConstant(SplatBits, MVT::i16); + break; + + case 32: + // NEON's 32-bit VMOV supports splat values where: + // * only one byte is nonzero, or + // * the least significant byte is 0xff and the second byte is nonzero, or + // * the least significant 2 bytes are 0xff and the third is nonzero. + if ((SplatBits & ~0xff) == 0 || + (SplatBits & ~0xff00) == 0 || + (SplatBits & ~0xff0000) == 0 || + (SplatBits & ~0xff000000) == 0) + return DAG.getTargetConstant(SplatBits, MVT::i32); + + if ((SplatBits & ~0xffff) == 0 && + ((SplatBits | SplatUndef) & 0xff) == 0xff) + return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32); + + if ((SplatBits & ~0xffffff) == 0 && + ((SplatBits | SplatUndef) & 0xffff) == 0xffff) + return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32); + + // Note: there are a few 32-bit splat values (specifically: 00ffff00, + // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not + // VMOV.I32. A (very) minor optimization would be to replicate the value + // and fall through here to test for a valid 64-bit splat. But, then the + // caller would also need to check and handle the change in size. + break; + + case 64: { + // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. + uint64_t BitMask = 0xff; + uint64_t Val = 0; + for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { + if (((SplatBits | SplatUndef) & BitMask) == BitMask) + Val |= BitMask; + else if ((SplatBits & BitMask) != 0) + return SDValue(); + BitMask <<= 8; + } + return DAG.getTargetConstant(Val, MVT::i64); + } + + default: + LLVM_UNREACHABLE("unexpected size for isVMOVSplat"); + break; + } + + return SDValue(); +} + +/// getVMOVImm - If this is a build_vector of constants which can be +/// formed by using a VMOV instruction of the specified element size, +/// return the constant being splatted. The ByteSize field indicates the +/// number of bytes of each element [1248]. +SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { + BuildVectorSDNode *BVN = dyn_cast(N); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, ByteSize * 8)) + return SDValue(); + + if (SplatBitSize > ByteSize * 8) + return SDValue(); + + return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG); +} + +static SDValue BuildSplat(SDValue Val, MVT VT, SelectionDAG &DAG, DebugLoc dl) { + // Canonicalize all-zeros and all-ones vectors. + ConstantSDNode *ConstVal = dyn_cast(Val.getNode()); + if (ConstVal->isNullValue()) + return getZeroVector(VT, DAG, dl); + if (ConstVal->isAllOnesValue()) + return getOnesVector(VT, DAG, dl); + + MVT CanonicalVT; + if (VT.is64BitVector()) { + switch (Val.getValueType().getSizeInBits()) { + case 8: CanonicalVT = MVT::v8i8; break; + case 16: CanonicalVT = MVT::v4i16; break; + case 32: CanonicalVT = MVT::v2i32; break; + case 64: CanonicalVT = MVT::v1i64; break; + default: LLVM_UNREACHABLE("unexpected splat element type"); break; + } + } else { + assert(VT.is128BitVector() && "unknown splat vector size"); + switch (Val.getValueType().getSizeInBits()) { + case 8: CanonicalVT = MVT::v16i8; break; + case 16: CanonicalVT = MVT::v8i16; break; + case 32: CanonicalVT = MVT::v4i32; break; + case 64: CanonicalVT = MVT::v2i64; break; + default: LLVM_UNREACHABLE("unexpected splat element type"); break; + } + } + + // Build a canonical splat for this value. + SmallVector Ops; + Ops.assign(CanonicalVT.getVectorNumElements(), Val); + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0], + Ops.size()); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res); +} + +// If this is a case we can't handle, return null and let the default +// expansion code take care of it. +static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); + assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + DebugLoc dl = Op.getDebugLoc(); + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, DAG); + if (Val.getNode()) + return BuildSplat(Val, Op.getValueType(), DAG, dl); + } + + return SDValue(); +} + +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + return Op; +} + +static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { + return Op; +} + +static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + assert((VT == MVT::i8 || VT == MVT::i16) && + "unexpected type for custom-lowering vector extract"); + SDValue Vec = Op.getOperand(0); + SDValue Lane = Op.getOperand(1); + Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); + Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } +static SDValue LowerCONCAT_VECTORS(SDValue Op) { + if (Op.getValueType().is128BitVector() && Op.getNumOperands() == 2) + return Op; + return SDValue(); +} -SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { - default: assert(0 && "Don't know how to custom lower this!"); abort(); + default: LLVM_UNREACHABLE("Don't know how to custom lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : @@ -1410,32 +2279,44 @@ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG); case ISD::RETURNADDR: break; - case ISD::FRAMEADDR: break; + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - - - // FIXME: Remove these when LegalizeDAGTypes lands. - case ISD::BIT_CONVERT: return SDOperand(ExpandBIT_CONVERT(Op.Val, DAG), 0); + case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); + case ISD::SHL: case ISD::SRL: - case ISD::SRA: return SDOperand(ExpandSRx(Op.Val, DAG,Subtarget),0); + case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); + case ISD::VSETCC: return LowerVSETCC(Op, DAG); + case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op); } - return SDOperand(); + return SDValue(); } - -/// ExpandOperationResult - Provide custom lowering hooks for expanding -/// operations. -SDNode *ARMTargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { +/// ReplaceNodeResults - Replace the results of node with an illegal result +/// type with new values built out of custom code. +void ARMTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl&Results, + SelectionDAG &DAG) { switch (N->getOpcode()) { - default: assert(0 && "Don't know how to custom expand this!"); abort(); - case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(N, DAG); + default: + LLVM_UNREACHABLE("Don't know how to custom expand this!"); + return; + case ISD::BIT_CONVERT: + Results.push_back(ExpandBIT_CONVERT(N, DAG)); + return; case ISD::SRL: - case ISD::SRA: return ExpandSRx(N, DAG, Subtarget); + case ISD::SRA: { + SDValue Res = LowerShift(N, DAG, Subtarget); + if (Res.getNode()) + Results.push_back(Res); + return; + } } } - //===----------------------------------------------------------------------===// // ARM Scheduler Hooks @@ -1443,8 +2324,9 @@ SDNode *ARMTargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); case ARM::tMOVCCr: { @@ -1453,7 +2335,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); - ilist::iterator It = BB; + MachineFunction::iterator It = BB; ++It; // thisMBB: @@ -1463,13 +2345,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; - MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); - BuildMI(BB, TII->get(ARM::tBcc)).addMBB(sinkMBB) - .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); MachineFunction *F = BB->getParent(); - F->getBasicBlockList().insert(It, copy0MBB); - F->getBasicBlockList().insert(It, sinkMBB); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) + .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), @@ -1494,11 +2376,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, TII->get(ARM::PHI), MI->getOperand(0).getReg()) + BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - delete MI; // The pseudo instruction is gone now. + F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; } } @@ -1508,41 +2390,433 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // ARM Optimization Hooks //===----------------------------------------------------------------------===// +static +SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + MVT VT = N->getValueType(0); + unsigned Opc = N->getOpcode(); + bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; + SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); + SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); + ISD::CondCode CC = ISD::SETCC_INVALID; + + if (isSlctCC) { + CC = cast(Slct.getOperand(4))->get(); + } else { + SDValue CCOp = Slct.getOperand(0); + if (CCOp.getOpcode() == ISD::SETCC) + CC = cast(CCOp.getOperand(2))->get(); + } + + bool DoXform = false; + bool InvCC = false; + assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && + "Bad input!"); + + if (LHS.getOpcode() == ISD::Constant && + cast(LHS)->isNullValue()) { + DoXform = true; + } else if (CC != ISD::SETCC_INVALID && + RHS.getOpcode() == ISD::Constant && + cast(RHS)->isNullValue()) { + std::swap(LHS, RHS); + SDValue Op0 = Slct.getOperand(0); + MVT OpVT = isSlctCC ? Op0.getValueType() : + Op0.getOperand(0).getValueType(); + bool isInt = OpVT.isInteger(); + CC = ISD::getSetCCInverse(CC, isInt); + + if (!TLI.isCondCodeLegal(CC, OpVT)) + return SDValue(); // Inverse operator isn't legal. + + DoXform = true; + InvCC = true; + } + + if (DoXform) { + SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); + if (isSlctCC) + return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, + Slct.getOperand(0), Slct.getOperand(1), CC); + SDValue CCOp = Slct.getOperand(0); + if (InvCC) + CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), + CCOp.getOperand(0), CCOp.getOperand(1), CC); + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + CCOp, OtherOp, Result); + } + return SDValue(); +} + +/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. +static SDValue PerformADDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // added by evan in r37685 with no testcase. + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) + if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N0, N1, DCI); + if (Result.getNode()) return Result; + } + if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + +/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. +static SDValue PerformSUBCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // added by evan in r37685 with no testcase. + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + + // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) + if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI); + if (Result.getNode()) return Result; + } + + return SDValue(); +} + + /// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD. -static SDOperand PerformFMRRDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { +static SDValue PerformFMRRDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { // fmrrd(fmdrr x, y) -> x,y - SDOperand InDouble = N->getOperand(0); + SDValue InDouble = N->getOperand(0); if (InDouble.getOpcode() == ARMISD::FMDRR) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); - return SDOperand(); + return SDValue(); +} + +/// getVShiftImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift operation, where all the elements of the +/// build_vector must have the same constant integer value. +static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BIT_CONVERT) + Op = Op.getOperand(0); + BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, ElementBits) || + SplatBitSize > ElementBits) + return false; + Cnt = SplatBits.getSExtValue(); + return true; +} + +/// isVShiftLImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift left operation. That value must be in the range: +/// 0 <= Value < ElementBits for a left shift; or +/// 0 <= Value <= ElementBits for a long left shift. +static bool isVShiftLImm(SDValue Op, MVT VT, bool isLong, int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (! getVShiftImm(Op, ElementBits, Cnt)) + return false; + return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); } -SDOperand ARMTargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { +/// isVShiftRImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift right operation. For a shift opcode, the value +/// is positive, but for an intrinsic the value count must be negative. The +/// absolute value must be in the range: +/// 1 <= |Value| <= ElementBits for a right shift; or +/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. +static bool isVShiftRImm(SDValue Op, MVT VT, bool isNarrow, bool isIntrinsic, + int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (! getVShiftImm(Op, ElementBits, Cnt)) + return false; + if (isIntrinsic) + Cnt = -Cnt; + return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); +} + +/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. +static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { + unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + // Don't do anything for most intrinsics. + break; + + // Vector shifts: check for immediate versions and lower them. + // Note: This is done during DAG combining instead of DAG legalizing because + // the build_vectors for 64-bit vector element shift counts are generally + // not legal, and it is hard to see their values after they get legalized to + // loads from a constant pool. + case Intrinsic::arm_neon_vshifts: + case Intrinsic::arm_neon_vshiftu: + case Intrinsic::arm_neon_vshiftls: + case Intrinsic::arm_neon_vshiftlu: + case Intrinsic::arm_neon_vshiftn: + case Intrinsic::arm_neon_vrshifts: + case Intrinsic::arm_neon_vrshiftu: + case Intrinsic::arm_neon_vrshiftn: + case Intrinsic::arm_neon_vqshifts: + case Intrinsic::arm_neon_vqshiftu: + case Intrinsic::arm_neon_vqshiftsu: + case Intrinsic::arm_neon_vqshiftns: + case Intrinsic::arm_neon_vqshiftnu: + case Intrinsic::arm_neon_vqshiftnsu: + case Intrinsic::arm_neon_vqrshiftns: + case Intrinsic::arm_neon_vqrshiftnu: + case Intrinsic::arm_neon_vqrshiftnsu: { + MVT VT = N->getOperand(1).getValueType(); + int64_t Cnt; + unsigned VShiftOpc = 0; + + switch (IntNo) { + case Intrinsic::arm_neon_vshifts: + case Intrinsic::arm_neon_vshiftu: + if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { + VShiftOpc = ARMISD::VSHL; + break; + } + if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { + VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? + ARMISD::VSHRs : ARMISD::VSHRu); + break; + } + return SDValue(); + + case Intrinsic::arm_neon_vshiftls: + case Intrinsic::arm_neon_vshiftlu: + if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) + break; + LLVM_UNREACHABLE("invalid shift count for vshll intrinsic"); + + case Intrinsic::arm_neon_vrshifts: + case Intrinsic::arm_neon_vrshiftu: + if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) + break; + return SDValue(); + + case Intrinsic::arm_neon_vqshifts: + case Intrinsic::arm_neon_vqshiftu: + if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) + break; + return SDValue(); + + case Intrinsic::arm_neon_vqshiftsu: + if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) + break; + LLVM_UNREACHABLE("invalid shift count for vqshlu intrinsic"); + + case Intrinsic::arm_neon_vshiftn: + case Intrinsic::arm_neon_vrshiftn: + case Intrinsic::arm_neon_vqshiftns: + case Intrinsic::arm_neon_vqshiftnu: + case Intrinsic::arm_neon_vqshiftnsu: + case Intrinsic::arm_neon_vqrshiftns: + case Intrinsic::arm_neon_vqrshiftnu: + case Intrinsic::arm_neon_vqrshiftnsu: + // Narrowing shifts require an immediate right shift. + if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) + break; + LLVM_UNREACHABLE("invalid shift count for narrowing vector shift intrinsic"); + + default: + LLVM_UNREACHABLE("unhandled vector shift"); + } + + switch (IntNo) { + case Intrinsic::arm_neon_vshifts: + case Intrinsic::arm_neon_vshiftu: + // Opcode already set above. + break; + case Intrinsic::arm_neon_vshiftls: + case Intrinsic::arm_neon_vshiftlu: + if (Cnt == VT.getVectorElementType().getSizeInBits()) + VShiftOpc = ARMISD::VSHLLi; + else + VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? + ARMISD::VSHLLs : ARMISD::VSHLLu); + break; + case Intrinsic::arm_neon_vshiftn: + VShiftOpc = ARMISD::VSHRN; break; + case Intrinsic::arm_neon_vrshifts: + VShiftOpc = ARMISD::VRSHRs; break; + case Intrinsic::arm_neon_vrshiftu: + VShiftOpc = ARMISD::VRSHRu; break; + case Intrinsic::arm_neon_vrshiftn: + VShiftOpc = ARMISD::VRSHRN; break; + case Intrinsic::arm_neon_vqshifts: + VShiftOpc = ARMISD::VQSHLs; break; + case Intrinsic::arm_neon_vqshiftu: + VShiftOpc = ARMISD::VQSHLu; break; + case Intrinsic::arm_neon_vqshiftsu: + VShiftOpc = ARMISD::VQSHLsu; break; + case Intrinsic::arm_neon_vqshiftns: + VShiftOpc = ARMISD::VQSHRNs; break; + case Intrinsic::arm_neon_vqshiftnu: + VShiftOpc = ARMISD::VQSHRNu; break; + case Intrinsic::arm_neon_vqshiftnsu: + VShiftOpc = ARMISD::VQSHRNsu; break; + case Intrinsic::arm_neon_vqrshiftns: + VShiftOpc = ARMISD::VQRSHRNs; break; + case Intrinsic::arm_neon_vqrshiftnu: + VShiftOpc = ARMISD::VQRSHRNu; break; + case Intrinsic::arm_neon_vqrshiftnsu: + VShiftOpc = ARMISD::VQRSHRNsu; break; + } + + return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); + } + + case Intrinsic::arm_neon_vshiftins: { + MVT VT = N->getOperand(1).getValueType(); + int64_t Cnt; + unsigned VShiftOpc = 0; + + if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) + VShiftOpc = ARMISD::VSLI; + else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) + VShiftOpc = ARMISD::VSRI; + else { + LLVM_UNREACHABLE("invalid shift count for vsli/vsri intrinsic"); + } + + return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + DAG.getConstant(Cnt, MVT::i32)); + } + + case Intrinsic::arm_neon_vqrshifts: + case Intrinsic::arm_neon_vqrshiftu: + // No immediate versions of these to check for. + break; + } + + return SDValue(); +} + +/// PerformShiftCombine - Checks for immediate versions of vector shifts and +/// lowers them. As with the vector shift intrinsics, this is done during DAG +/// combining instead of DAG legalizing because the build_vectors for 64-bit +/// vector element shift counts are generally not legal, and it is hard to see +/// their values after they get legalized to loads from a constant pool. +static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + MVT VT = N->getValueType(0); + + // Nothing to be done for scalar shifts. + if (! VT.isVector()) + return SDValue(); + + assert(ST->hasNEON() && "unexpected vector shift"); + int64_t Cnt; + + switch (N->getOpcode()) { + default: LLVM_UNREACHABLE("unexpected shift opcode"); + + case ISD::SHL: + if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) + return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), + DAG.getConstant(Cnt, MVT::i32)); + break; + + case ISD::SRA: + case ISD::SRL: + if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { + unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? + ARMISD::VSHRs : ARMISD::VSHRu); + return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), + DAG.getConstant(Cnt, MVT::i32)); + } + } + return SDValue(); +} + +/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, +/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. +static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + SDValue N0 = N->getOperand(0); + + // Check for sign- and zero-extensions of vector extract operations of 8- + // and 16-bit vector elements. NEON supports these directly. They are + // handled during DAG combining because type legalization will promote them + // to 32-bit types and it is messy to recognize the operations after that. + if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue Vec = N0.getOperand(0); + SDValue Lane = N0.getOperand(1); + MVT VT = N->getValueType(0); + MVT EltVT = N0.getValueType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (VT == MVT::i32 && + (EltVT == MVT::i8 || EltVT == MVT::i16) && + TLI.isTypeLegal(Vec.getValueType())) { + + unsigned Opc = 0; + switch (N->getOpcode()) { + default: LLVM_UNREACHABLE("unexpected opcode"); + case ISD::SIGN_EXTEND: + Opc = ARMISD::VGETLANEs; + break; + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Opc = ARMISD::VGETLANEu; + break; + } + return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); + } + } + + return SDValue(); +} + +SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::ADD: return PerformADDCombine(N, DCI); + case ISD::SUB: return PerformSUBCombine(N, DCI); case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI); + case ISD::INTRINSIC_WO_CHAIN: + return PerformIntrinsicCombine(N, DCI.DAG); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + return PerformShiftCombine(N, DCI.DAG, Subtarget); + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + return PerformExtendCombine(N, DCI.DAG, Subtarget); } - - return SDOperand(); + return SDValue(); } - /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. -static bool isLegalAddressImmediate(int64_t V, MVT::ValueType VT, +static bool isLegalAddressImmediate(int64_t V, MVT VT, const ARMSubtarget *Subtarget) { if (V == 0) return true; - if (Subtarget->isThumb()) { + if (!VT.isSimple()) + return false; + + if (Subtarget->isThumb()) { // FIXME for thumb2 if (V < 0) return false; unsigned Scale = 1; - switch (VT) { + switch (VT.getSimpleVT()) { default: return false; case MVT::i1: case MVT::i8: @@ -1561,21 +2835,21 @@ static bool isLegalAddressImmediate(int64_t V, MVT::ValueType VT, if ((V & (Scale - 1)) != 0) return false; V /= Scale; - return V == V & ((1LL << 5) - 1); + return V == (V & ((1LL << 5) - 1)); } if (V < 0) V = - V; - switch (VT) { + switch (VT.getSimpleVT()) { default: return false; case MVT::i1: case MVT::i8: case MVT::i32: // +- imm12 - return V == V & ((1LL << 12) - 1); + return V == (V & ((1LL << 12) - 1)); case MVT::i16: // +- imm8 - return V == V & ((1LL << 8) - 1); + return V == (V & ((1LL << 8) - 1)); case MVT::f32: case MVT::f64: if (!Subtarget->hasVFP2()) @@ -1583,35 +2857,39 @@ static bool isLegalAddressImmediate(int64_t V, MVT::ValueType VT, if ((V & 3) != 0) return false; V >>= 2; - return V == V & ((1LL << 8) - 1); + return V == (V & ((1LL << 8) - 1)); } } /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. -bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, +bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { - if (!isLegalAddressImmediate(AM.BaseOffs, getValueType(Ty), Subtarget)) + MVT VT = getValueType(Ty, true); + if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; - + // Can never fold addr of global into load/store. - if (AM.BaseGV) + if (AM.BaseGV) return false; - + switch (AM.Scale) { case 0: // no scale reg, must be "r+i" or "r", or "i". break; case 1: - if (Subtarget->isThumb()) + if (Subtarget->isThumb()) // FIXME for thumb2 return false; // FALL THROUGH. default: // ARM doesn't support any R+R*scale+imm addr modes. if (AM.BaseOffs) return false; - + + if (!VT.isSimple()) + return false; + int Scale = AM.Scale; - switch (getValueType(Ty)) { + switch (VT.getSimpleVT()) { default: return false; case MVT::i1: case MVT::i8: @@ -1630,12 +2908,12 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, if (((unsigned)AM.HasBaseReg + Scale) <= 2) return true; return false; - + case MVT::isVoid: // Note, we allow "void" uses (basically, uses that aren't loads or // stores), because arm allows folding a scale into many arithmetic // operations. This should be made more precise and revisited later. - + // Allow r << imm, but the imm has to be a multiple of two. if (AM.Scale & 1) return false; return isPowerOf2_32(AM.Scale); @@ -1645,11 +2923,10 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } - -static bool getIndexedAddressParts(SDNode *Ptr, MVT::ValueType VT, - bool isSEXTLoad, SDOperand &Base, - SDOperand &Offset, bool &isInc, - SelectionDAG &DAG) { +static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) return false; @@ -1657,8 +2934,9 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT::ValueType VT, // AddressingMode 3 Base = Ptr->getOperand(0); if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { - int RHSC = (int)RHS->getValue(); + int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -256) { + assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); return true; @@ -1670,8 +2948,9 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT::ValueType VT, } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { // AddressingMode 2 if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { - int RHSC = (int)RHS->getValue(); + int RHSC = (int)RHS->getZExtValue(); if (RHSC < 0 && RHSC > -0x1000) { + assert(Ptr->getOpcode() == ISD::ADD); isInc = false; Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); Base = Ptr->getOperand(0); @@ -1702,19 +2981,44 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT::ValueType VT, return false; } +static bool getT2IndexedAddressParts(SDNode *Ptr, MVT VT, + bool isSEXTLoad, SDValue &Base, + SDValue &Offset, bool &isInc, + SelectionDAG &DAG) { + if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) + return false; + + Base = Ptr->getOperand(0); + if (ConstantSDNode *RHS = dyn_cast(Ptr->getOperand(1))) { + int RHSC = (int)RHS->getZExtValue(); + if (RHSC < 0 && RHSC > -0x100) { // 8 bits. + assert(Ptr->getOpcode() == ISD::ADD); + isInc = false; + Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); + return true; + } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. + isInc = Ptr->getOpcode() == ISD::ADD; + Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); + return true; + } + } + + return false; +} + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool -ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, - SDOperand &Offset, +ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, ISD::MemIndexedMode &AM, - SelectionDAG &DAG) { - if (Subtarget->isThumb()) + SelectionDAG &DAG) const { + if (Subtarget->isThumb1Only()) return false; - MVT::ValueType VT; - SDOperand Ptr; + MVT VT; + SDValue Ptr; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); @@ -1727,28 +3031,33 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, return false; bool isInc; - bool isLegal = getIndexedAddressParts(Ptr.Val, VT, isSEXTLoad, Base, Offset, - isInc, DAG); - if (isLegal) { - AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; - return true; - } - return false; + bool isLegal = false; + if (Subtarget->isThumb() && Subtarget->hasThumb2()) + isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + else + isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, + Offset, isInc, DAG); + if (!isLegal) + return false; + + AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; + return true; } /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDOperand &Base, - SDOperand &Offset, + SDValue &Base, + SDValue &Offset, ISD::MemIndexedMode &AM, - SelectionDAG &DAG) { - if (Subtarget->isThumb()) + SelectionDAG &DAG) const { + if (Subtarget->isThumb1Only()) return false; - MVT::ValueType VT; - SDOperand Ptr; + MVT VT; + SDValue Ptr; bool isSEXTLoad = false; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); @@ -1759,18 +3068,23 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return false; bool isInc; - bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + bool isLegal = false; + if (Subtarget->isThumb() && Subtarget->hasThumb2()) + isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, isInc, DAG); - if (isLegal) { - AM = isInc ? ISD::POST_INC : ISD::POST_DEC; - return true; - } - return false; + else + isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, + isInc, DAG); + if (!isLegal) + return false; + + AM = isInc ? ISD::POST_INC : ISD::POST_DEC; + return true; } -void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, - APInt Mask, - APInt &KnownZero, +void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { @@ -1810,15 +3124,17 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } -std::pair +std::pair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT::ValueType VT) const { + MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'l': - // FIXME: in thumb mode, 'l' is only low-regs. - // FALL THROUGH. + if (Subtarget->isThumb1Only()) + return std::make_pair(0U, ARM::tGPRRegisterClass); + else + return std::make_pair(0U, ARM::GPRRegisterClass); case 'r': return std::make_pair(0U, ARM::GPRRegisterClass); case 'w': @@ -1834,13 +3150,16 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, std::vector ARMTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - MVT::ValueType VT) const { + MVT VT) const { if (Constraint.size() != 1) return std::vector(); switch (Constraint[0]) { // GCC ARM Constraint Letters default: break; case 'l': + return make_vector(ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7, + 0); case 'r': return make_vector(ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7, @@ -1866,3 +3185,160 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, return std::vector(); } + +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + char Constraint, + bool hasMemory, + std::vector&Ops, + SelectionDAG &DAG) const { + SDValue Result(0, 0); + + switch (Constraint) { + default: break; + case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': + ConstantSDNode *C = dyn_cast(Op); + if (!C) + return; + + int64_t CVal64 = C->getSExtValue(); + int CVal = (int) CVal64; + // None of these constraints allow values larger than 32 bits. Check + // that the value fits in an int. + if (CVal != CVal64) + return; + + switch (Constraint) { + case 'I': + if (Subtarget->isThumb1Only()) { + // This must be a constant between 0 and 255, for ADD + // immediates. + if (CVal >= 0 && CVal <= 255) + break; + } else if (Subtarget->isThumb2()) { + // A constant that can be used as an immediate value in a + // data-processing instruction. + if (ARM_AM::getT2SOImmVal(CVal) != -1) + break; + } else { + // A constant that can be used as an immediate value in a + // data-processing instruction. + if (ARM_AM::getSOImmVal(CVal) != -1) + break; + } + return; + + case 'J': + if (Subtarget->isThumb()) { // FIXME thumb2 + // This must be a constant between -255 and -1, for negated ADD + // immediates. This can be used in GCC with an "n" modifier that + // prints the negated value, for use with SUB instructions. It is + // not useful otherwise but is implemented for compatibility. + if (CVal >= -255 && CVal <= -1) + break; + } else { + // This must be a constant between -4095 and 4095. It is not clear + // what this constraint is intended for. Implemented for + // compatibility with GCC. + if (CVal >= -4095 && CVal <= 4095) + break; + } + return; + + case 'K': + if (Subtarget->isThumb1Only()) { + // A 32-bit value where only one byte has a nonzero value. Exclude + // zero to match GCC. This constraint is used by GCC internally for + // constants that can be loaded with a move/shift combination. + // It is not useful otherwise but is implemented for compatibility. + if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) + break; + } else if (Subtarget->isThumb2()) { + // A constant whose bitwise inverse can be used as an immediate + // value in a data-processing instruction. This can be used in GCC + // with a "B" modifier that prints the inverted value, for use with + // BIC and MVN instructions. It is not useful otherwise but is + // implemented for compatibility. + if (ARM_AM::getT2SOImmVal(~CVal) != -1) + break; + } else { + // A constant whose bitwise inverse can be used as an immediate + // value in a data-processing instruction. This can be used in GCC + // with a "B" modifier that prints the inverted value, for use with + // BIC and MVN instructions. It is not useful otherwise but is + // implemented for compatibility. + if (ARM_AM::getSOImmVal(~CVal) != -1) + break; + } + return; + + case 'L': + if (Subtarget->isThumb1Only()) { + // This must be a constant between -7 and 7, + // for 3-operand ADD/SUB immediate instructions. + if (CVal >= -7 && CVal < 7) + break; + } else if (Subtarget->isThumb2()) { + // A constant whose negation can be used as an immediate value in a + // data-processing instruction. This can be used in GCC with an "n" + // modifier that prints the negated value, for use with SUB + // instructions. It is not useful otherwise but is implemented for + // compatibility. + if (ARM_AM::getT2SOImmVal(-CVal) != -1) + break; + } else { + // A constant whose negation can be used as an immediate value in a + // data-processing instruction. This can be used in GCC with an "n" + // modifier that prints the negated value, for use with SUB + // instructions. It is not useful otherwise but is implemented for + // compatibility. + if (ARM_AM::getSOImmVal(-CVal) != -1) + break; + } + return; + + case 'M': + if (Subtarget->isThumb()) { // FIXME thumb2 + // This must be a multiple of 4 between 0 and 1020, for + // ADD sp + immediate. + if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) + break; + } else { + // A power of two or a constant between 0 and 32. This is used in + // GCC for the shift amount on shifted register operands, but it is + // useful in general for any shift amounts. + if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) + break; + } + return; + + case 'N': + if (Subtarget->isThumb()) { // FIXME thumb2 + // This must be a constant between 0 and 31, for shift amounts. + if (CVal >= 0 && CVal <= 31) + break; + } + return; + + case 'O': + if (Subtarget->isThumb()) { // FIXME thumb2 + // This must be a multiple of 4 between -508 and 508, for + // ADD/SUB sp = sp + immediate. + if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) + break; + } + return; + } + Result = DAG.getTargetConstant(CVal, Op.getValueType()); + break; + } + + if (Result.getNode()) { + Ops.push_back(Result); + return; + } + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, + Ops, DAG); +}