X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTarget%2FARM%2FARMSubtarget.cpp;h=bb6ae28065bd3fb6af64300009384314ca5469e9;hb=001f3417071d4d6b08cc0dcd1dc03f5f90fe7623;hp=699c11d7c385bba1225af21cc80ef5309f05d240;hpb=c559ba72517d912ed4cd45ee6097b240c3234086;p=oota-llvm.git diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 699c11d7c38..bb6ae28065b 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -15,21 +15,22 @@ #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" -#include "ARMMachineFunctionInfo.h" #include "ARMTargetMachine.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -39,38 +40,10 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "ARMGenSubtargetInfo.inc" -static cl::opt -ReserveR9("arm-reserve-r9", cl::Hidden, - cl::desc("Reserve R9, making it unavailable as GPR")); - -static cl::opt -ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden); - static cl::opt UseFusedMulOps("arm-use-mulops", cl::init(true), cl::Hidden); -namespace { -enum AlignMode { - DefaultAlign, - StrictAlign, - NoStrictAlign -}; -} - -static cl::opt -Align(cl::desc("Load/store alignment support"), - cl::Hidden, cl::init(DefaultAlign), - cl::values( - clEnumValN(DefaultAlign, "arm-default-align", - "Generate unaligned accesses only on hardware/OS " - "combinations that are known to support them"), - clEnumValN(StrictAlign, "arm-strict-align", - "Disallow all unaligned memory accesses"), - clEnumValN(NoStrictAlign, "arm-no-strict-align", - "Allow unaligned memory accesses"), - clEnumValEnd)); - enum ITMode { DefaultIT, RestrictedIT, @@ -88,55 +61,11 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), "Allow IT blocks based on ARMv7"), clEnumValEnd)); -static std::string computeDataLayout(ARMSubtarget &ST) { - std::string Ret = ""; - - if (ST.isLittle()) - // Little endian. - Ret += "e"; - else - // Big endian. - Ret += "E"; - - Ret += DataLayout::getManglingComponent(ST.getTargetTriple()); - - // Pointers are 32 bits and aligned to 32 bits. - Ret += "-p:32:32"; - - // ABIs other than APCS have 64 bit integers with natural alignment. - if (!ST.isAPCS_ABI()) - Ret += "-i64:64"; - - // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 - // bits, others to 64 bits. We always try to align to 64 bits. - if (ST.isAPCS_ABI()) - Ret += "-f64:32:64"; - - // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others - // to 64. We always ty to give them natural alignment. - if (ST.isAPCS_ABI()) - Ret += "-v64:32:64-v128:32:128"; - else - Ret += "-v128:64:128"; - - // Try to align aggregates to 32 bits (the default is 64 bits, which has no - // particular hardware support on 32-bit ARM). - Ret += "-a:0:32"; - - // Integer registers are 32 bits. - Ret += "-n32"; - - // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit - // aligned everywhere else. - if (ST.isTargetNaCl()) - Ret += "-S128"; - else if (ST.isAAPCS_ABI()) - Ret += "-S64"; - else - Ret += "-S32"; - - return Ret; -} +/// ForceFastISel - Use the fast-isel, even for subtargets where it is not +/// currently supported (for testing only). +static cl::opt +ForceFastISel("arm-force-fast-isel", + cl::init(false), cl::Hidden); /// initializeSubtargetDependencies - Initializes using a CPU and feature string /// so that we can use initializer lists for subtarget initialization. @@ -147,23 +76,30 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, return *this; } -ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const ARMBaseTargetMachine &TM, - bool IsLittle) +ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, + StringRef FS) { + ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS); + if (STI.isThumb1Only()) + return (ARMFrameLowering *)new Thumb1FrameLowering(STI); + + return new ARMFrameLowering(STI); +} + +ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, + const ARMBaseTargetMachine &TM, bool IsLittle) : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), - TargetTriple(TT), Options(TM.Options), TM(TM), - DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), - TSInfo(DL), + ARMProcClass(None), ARMArch(ARMv4t), stackAlignment(4), CPUString(CPU), + IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM), + FrameLowering(initializeFrameLowering(CPU, FS)), + // At this point initializeSubtargetDependencies has been called so + // we can query directly. InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) : !isThumb() ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), - TLInfo(TM), - FrameLowering(!isThumb1Only() - ? new ARMFrameLowering(*this) - : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {} + TLInfo(TM, *this) {} void ARMSubtarget::initializeEnvironment() { HasV4TOps = false; @@ -171,9 +107,12 @@ void ARMSubtarget::initializeEnvironment() { HasV5TEOps = false; HasV6Ops = false; HasV6MOps = false; + HasV6KOps = false; HasV6T2Ops = false; HasV7Ops = false; HasV8Ops = false; + HasV8_1aOps = false; + HasV8_2aOps = false; HasVFPv2 = false; HasVFPv3 = false; HasVFPv4 = false; @@ -185,12 +124,14 @@ void ARMSubtarget::initializeEnvironment() { HasVMLxForwarding = false; SlowFPBrcc = false; InThumbMode = false; + UseSoftFloat = false; HasThumb2 = false; NoARM = false; - IsR9Reserved = ReserveR9; - UseMovt = false; + ReserveR9 = false; + NoMovt = false; SupportsTailCall = false; HasFP16 = false; + HasFullFP16 = false; HasD16 = false; HasHardwareDivide = false; HasHardwareDivideInARM = false; @@ -208,29 +149,45 @@ void ARMSubtarget::initializeEnvironment() { HasCrypto = false; HasCRC = false; HasZeroCycleZeroing = false; - AllowsUnalignedMem = false; - Thumb2DSP = false; + StrictAlign = false; + HasDSP = false; UseNaClTrap = false; + GenLongCalls = false; UnsafeFPMath = false; + + // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this + // directly from it, but we can try to make sure they're consistent when both + // available. + UseSjLjEH = isTargetDarwin() && !isTargetWatchOS(); + assert((!TM.getMCAsmInfo() || + (TM.getMCAsmInfo()->getExceptionHandlingType() == + ExceptionHandling::SjLj) == UseSjLjEH) && + "inconsistent sjlj choice between CodeGen and MC"); } void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (CPUString.empty()) { - if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s")) - // Default to the Swift CPU when targeting armv7s/thumbv7s. - CPUString = "swift"; - else - CPUString = "generic"; + CPUString = "generic"; + + if (isTargetDarwin()) { + StringRef ArchName = TargetTriple.getArchName(); + if (ArchName.endswith("v7s")) + // Default to the Swift CPU when targeting armv7s/thumbv7s. + CPUString = "swift"; + else if (ArchName.endswith("v7k")) + // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k. + // ARMv7k does not use SjLj exception handling. + CPUString = "cortex-a7"; + } } // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied // based on the architecture version. - std::string ArchFS = - ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString); + std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple, CPUString); if (!FS.empty()) { if (!ArchFS.empty()) - ArchFS = ArchFS + "," + FS.str(); + ArchFS = (Twine(ArchFS) + "," + FS).str(); else ArchFS = FS; } @@ -252,48 +209,35 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isAAPCS_ABI()) stackAlignment = 8; - if (isTargetNaCl()) + if (isTargetNaCl() || isAAPCS16_ABI()) stackAlignment = 16; - UseMovt = hasV6T2Ops() && ArmUseMOVT; - - if (isTargetMachO()) { - IsR9Reserved = ReserveR9 || !HasV6Ops; - SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0); - } else { - IsR9Reserved = ReserveR9; - SupportsTailCall = !isThumb1Only(); - } - - if (Align == DefaultAlign) { - // Assume pre-ARMv6 doesn't support unaligned accesses. - // - // ARMv6 may or may not support unaligned accesses depending on the - // SCTLR.U bit, which is architecture-specific. We assume ARMv6 - // Darwin and NetBSD targets support unaligned accesses, and others don't. - // - // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit - // which raises an alignment fault on unaligned accesses. Linux - // defaults this bit to 0 and handles it as a system-wide (not - // per-process) setting. It is therefore safe to assume that ARMv7+ - // Linux targets support unaligned accesses. The same goes for NaCl. - // - // The above behavior is consistent with GCC. - AllowsUnalignedMem = - (hasV7Ops() && (isTargetLinux() || isTargetNaCl() || - isTargetNetBSD())) || - (hasV6Ops() && (isTargetMachO() || isTargetNetBSD())); - } else { - AllowsUnalignedMem = !(Align == StrictAlign); - } - - // No v6M core supports unaligned memory access (v6M ARM ARM A3.2) - if (isV6M()) - AllowsUnalignedMem = false; + // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo:: + // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as + // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation + // support in the assembler and linker to be used. This would need to be + // fixed to fully support tail calls in Thumb1. + // + // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take + // LR. This means if we need to reload LR, it takes an extra instructions, + // which outweighs the value of the tail call; but here we don't know yet + // whether LR is going to be used. Probably the right approach is to + // generate the tail call here and turn it back into CALL/RET in + // emitEpilogue if LR is used. + + // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, + // but we need to make sure there are enough registers; the only valid + // registers are the 4 used for parameters. We don't currently do this + // case. + + SupportsTailCall = !isThumb1Only(); + + if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0)) + SupportsTailCall = false; switch (IT) { case DefaultIT: - RestrictIT = hasV8Ops() ? true : false; + RestrictIT = hasV8Ops(); break; case RestrictedIT: RestrictIT = true; @@ -304,8 +248,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { } // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. - uint64_t Bits = getFeatureBits(); - if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters + const FeatureBitset &Bits = getFeatureBits(); + if ((Bits[ARM::ProcA5] || Bits[ARM::ProcA8]) && // Where this matters (Options.UnsafeFPMath || isTargetDarwin())) UseNEONForSinglePrecisionFP = true; } @@ -316,9 +260,15 @@ bool ARMSubtarget::isAPCS_ABI() const { } bool ARMSubtarget::isAAPCS_ABI() const { assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); - return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS; + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS || + TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; +} +bool ARMSubtarget::isAAPCS16_ABI() const { + assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; } + /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. bool ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, @@ -326,7 +276,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, if (RelocM == Reloc::Static) return false; - bool isDecl = GV->isDeclarationForLinker(); + bool isDef = GV->isStrongDefinitionForLinker(); if (!isTargetMachO()) { // Extra load is needed for all externally visible. @@ -334,34 +284,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, return false; return true; } else { - if (RelocM == Reloc::PIC_) { - // If this is a strong reference to a definition, it is definitely not - // through a stub. - if (!isDecl && !GV->isWeakForLinker()) - return false; - - // Unless we have a symbol with hidden visibility, we have to go through a - // normal $non_lazy_ptr stub because this symbol might be resolved late. - if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. - return true; + // If this is a strong reference to a definition, it is definitely not + // through a stub. + if (isDef) + return false; + + // Unless we have a symbol with hidden visibility, we have to go through a + // normal $non_lazy_ptr stub because this symbol might be resolved late. + if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. + return true; + if (RelocM == Reloc::PIC_) { // If symbol visibility is hidden, we have a stub for common symbol // references and external declarations. - if (isDecl || GV->hasCommonLinkage()) + if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) // Hidden $non_lazy_ptr reference. return true; - - return false; - } else { - // If this is a strong reference to a definition, it is definitely not - // through a stub. - if (!isDecl && !GV->isWeakForLinker()) - return false; - - // Unless we have a symbol with hidden visibility, we have to go through a - // normal $non_lazy_ptr stub because this symbol might be resolved late. - if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. - return true; } } @@ -373,11 +311,23 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { } bool ARMSubtarget::hasSinCos() const { - return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0); + return isTargetWatchOS() || + (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0)); +} + +bool ARMSubtarget::enableMachineScheduler() const { + // Enable the MachineScheduler before register allocation for out-of-order + // architectures where we do not use the PostRA scheduler anymore (for now + // restricted to swift). + return getSchedModel().isOutOfOrder() && isSwift(); } // This overrides the PostRAScheduler bit in the SchedModel for any CPU. -bool ARMSubtarget::enablePostMachineScheduler() const { +bool ARMSubtarget::enablePostRAScheduler() const { + // No need for PostRA scheduling on out of order CPUs (for now restricted to + // swift). + if (getSchedModel().isOutOfOrder() && isSwift()) + return false; return (!isThumb() || hasThumb2()); } @@ -385,11 +335,32 @@ bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier() && !isThumb1Only(); } +bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { + // For general targets, the prologue can grow when VFPs are allocated with + // stride 4 (more vpush instructions). But WatchOS uses a compact unwind + // format which it's more important to get right. + return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize()); +} + bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit // immediates as it is inherently position independent, and may be out of // range otherwise. - return UseMovt && (isTargetWindows() || - !MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize)); + return !NoMovt && hasV6T2Ops() && + (isTargetWindows() || !MF.getFunction()->optForMinSize()); +} + +bool ARMSubtarget::useFastISel() const { + // Enable fast-isel for any target, for testing only. + if (ForceFastISel) + return true; + + // Limit fast-isel to the targets that are or have been tested. + if (!hasV6Ops()) + return false; + + // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. + return TM.Options.EnableFastISel && + ((isTargetMachO() && !isThumb1Only()) || + (isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb())); }