From 308458a98b4a98986b1ceff93f8988bd9d59bc73 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 23 Feb 2015 19:28:45 +0000 Subject: [PATCH] Rewrite the global merge pass to be subprogram agnostic for now. It was previously using the subtarget to get values for the global offset without actually checking each function as it was generating code. Go ahead and solidify the current behavior and make the existing FIXMEs more prominent. As a note the ARM backend previously had a thumb1 and non-thumb1 set of defaults. Only the former was tested so I've changed the behavior to only use that for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230245 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 6 ---- include/llvm/Transforms/Scalar.h | 2 +- lib/CodeGen/GlobalMerge.cpp | 37 ++++++++++----------- lib/Target/AArch64/AArch64ISelLowering.cpp | 7 ---- lib/Target/AArch64/AArch64ISelLowering.h | 4 --- lib/Target/AArch64/AArch64TargetMachine.cpp | 5 ++- lib/Target/ARM/ARMISelLowering.cpp | 6 ---- lib/Target/ARM/ARMISelLowering.h | 4 --- lib/Target/ARM/ARMTargetMachine.cpp | 7 +++- 9 files changed, 29 insertions(+), 49 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index cd499ba5cb0..d5ff0584422 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -959,12 +959,6 @@ public: return false; } - /// Returns the maximal possible offset which can be used for loads / stores - /// from the global. - virtual unsigned getMaximalGlobalOffset() const { - return 0; - } - /// Returns true if a cast between SrcAS and DestAS is a noop. virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { return false; diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 108201f072f..558b81e023c 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -145,7 +145,7 @@ Pass *createLICMPass(); // Pass *createLoopStrengthReducePass(); -Pass *createGlobalMergePass(const TargetMachine *TM = nullptr); +Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset); //===----------------------------------------------------------------------===// // diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 6da9766379e..32543b7723a 100644 --- a/lib/CodeGen/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -90,10 +90,16 @@ EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, cl::desc("Enable global merge pass on external linkage"), cl::init(false)); -STATISTIC(NumMerged , "Number of globals merged"); +STATISTIC(NumMerged, "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { const TargetMachine *TM; + const DataLayout *DL; + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions), see the code that passes in the offset in the ARM backend + // for more information. + unsigned MaxOffset; bool doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const; @@ -117,8 +123,10 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - explicit GlobalMerge(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TM(TM) { + explicit GlobalMerge(const TargetMachine *TM = nullptr, + unsigned MaximalOffset = 0) + : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()), + MaxOffset(MaximalOffset) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -138,22 +146,16 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables", - false, false) +INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables", + false, false) +INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables", + false, false) bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - const DataLayout *DL = TM->getDataLayout(); - - // FIXME: Infer the maximum possible offset depending on the actual users - // (these max offsets are different for the users inside Thumb or ARM - // functions) - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); - // FIXME: Find better heuristics std::stable_sort(Globals.begin(), Globals.end(), - [DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + [this](const GlobalVariable *GV1, const GlobalVariable *GV2) { Type *Ty1 = cast(GV1->getType())->getElementType(); Type *Ty2 = cast(GV2->getType())->getElementType(); @@ -282,9 +284,6 @@ bool GlobalMerge::doInitialization(Module &M) { DenseMap > Globals, ConstGlobals, BSSGlobals; - const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering(); - const DataLayout *DL = TM->getDataLayout(); - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; setMustKeepGlobalVariables(M); @@ -357,6 +356,6 @@ bool GlobalMerge::doFinalization(Module &M) { return false; } -Pass *llvm::createGlobalMergePass(const TargetMachine *TM) { - return new GlobalMerge(TM); +Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset) { + return new GlobalMerge(TM, Offset); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index fb31d7d3376..f16ec9d0062 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -730,13 +730,6 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i64; } -unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { - // FIXME: On AArch64, this depends on the type. - // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). - // and the offset has to be a multiple of the related size in bytes. - return 4095; -} - FastISel * AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index db15538e43b..0a84294948f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -246,10 +246,6 @@ public: /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 53360428050..d73d0b3f8b7 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -236,8 +236,11 @@ bool AArch64PassConfig::addPreISel() { // get a chance to be merged if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) addPass(createAArch64PromoteConstantPass()); + // FIXME: On AArch64, this depends on the type. + // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). + // and the offset has to be a multiple of the related size in bytes. if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); + addPass(createGlobalMergePass(TM, 4095)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a66d9ab00f4..7620cd0a463 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1170,12 +1170,6 @@ ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, return ARM::createFastISel(funcInfo, libInfo); } -/// getMaximalGlobalOffset - Returns the maximal possible offset which can -/// be used for loads / stores from the global. -unsigned ARMTargetLowering::getMaximalGlobalOffset() const { - return (Subtarget->isThumb1Only() ? 127 : 4095); -} - Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { unsigned NumVals = N->getNumValues(); if (!NumVals) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 2544be08234..12513370077 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -353,10 +353,6 @@ namespace llvm { /// specified value type. const TargetRegisterClass *getRegClassFor(MVT VT) const override; - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 11fbb5c3e30..a97a058f2aa 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -326,7 +326,12 @@ void ARMPassConfig::addIRPasses() { bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); + // FIXME: This is using the thumb1 only constant value for + // maximal global offset for merging globals. We may want + // to look into using the old value for non-thumb1 code of + // 4095 based on the TargetMachine, but this starts to become + // tricky when doing code gen per function. + addPass(createGlobalMergePass(TM, 127)); return false; } -- 2.34.1