From: Devang Patel Date: Mon, 17 Oct 2011 17:17:43 +0000 (+0000) Subject: svn mv Target/ARM/ARMGlobalMerge.cpp Transforms/Scalar/GlobalMerge.cpp X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=827454e6e28cfed93db990b03b720ef7c23e6917;p=oota-llvm.git svn mv Target/ARM/ARMGlobalMerge.cpp Transforms/Scalar/GlobalMerge.cpp There is no reason to have simple IR level pass in lib/Target. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142200 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index c91fbf8de81..3a926dbbc7c 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -136,6 +136,7 @@ void initializeLoopRotatePass(PassRegistry&); void initializeLoopSimplifyPass(PassRegistry&); void initializeLoopSplitterPass(PassRegistry&); void initializeLoopStrengthReducePass(PassRegistry&); +void initializeGlobalMergePass(PassRegistry&); void initializeLoopUnrollPass(PassRegistry&); void initializeLoopUnswitchPass(PassRegistry&); void initializeLoopIdiomRecognizePass(PassRegistry&); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index b1536f906d8..5c0e9c66fba 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -112,6 +112,8 @@ Pass *createLICMPass(); // Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0); +Pass *createGlobalMergePass(const TargetLowering *TLI = 0); + //===----------------------------------------------------------------------===// // // LoopUnswitch - This pass is a simple loop unswitching pass. diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp deleted file mode 100644 index 5f863ea241c..00000000000 --- a/lib/Target/ARM/ARMGlobalMerge.cpp +++ /dev/null @@ -1,219 +0,0 @@ -//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This pass merges globals with internal linkage into one. This way all the -// globals which were merged into a biggest one can be addressed using offsets -// from the same base pointer (no need for separate base pointer for each of the -// global). Such a transformation can significantly reduce the register pressure -// when many globals are involved. -// -// For example, consider the code which touches several global variables at -// once: -// -// static int foo[N], bar[N], baz[N]; -// -// for (i = 0; i < N; ++i) { -// foo[i] = bar[i] * baz[i]; -// } -// -// On ARM the addresses of 3 arrays should be kept in the registers, thus -// this code has quite large register pressure (loop body): -// -// ldr r1, [r5], #4 -// ldr r2, [r6], #4 -// mul r1, r2, r1 -// str r1, [r0], #4 -// -// Pass converts the code to something like: -// -// static struct { -// int foo[N]; -// int bar[N]; -// int baz[N]; -// } merged; -// -// for (i = 0; i < N; ++i) { -// merged.foo[i] = merged.bar[i] * merged.baz[i]; -// } -// -// and in ARM code this becomes: -// -// ldr r0, [r5, #40] -// ldr r1, [r5, #80] -// mul r0, r1, r0 -// str r0, [r5], #4 -// -// note that we saved 2 registers here almostly "for free". -// ===---------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm-global-merge" -#include "ARM.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Attributes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -using namespace llvm; - -namespace { - class ARMGlobalMerge : public FunctionPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// target type sizes. - const TargetLowering *TLI; - - bool doMerge(SmallVectorImpl &Globals, - Module &M, bool isConst) const; - - public: - static char ID; // Pass identification, replacement for typeid. - explicit ARMGlobalMerge(const TargetLowering *tli) - : FunctionPass(ID), TLI(tli) {} - - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - - const char *getPassName() const { - return "Merge internal globals"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - FunctionPass::getAnalysisUsage(AU); - } - - struct GlobalCmp { - const TargetData *TD; - - GlobalCmp(const TargetData *td) : TD(td) { } - - bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) { - Type *Ty1 = cast(GV1->getType())->getElementType(); - Type *Ty2 = cast(GV2->getType())->getElementType(); - - return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); - } - }; - }; -} // end anonymous namespace - -char ARMGlobalMerge::ID = 0; - -bool ARMGlobalMerge::doMerge(SmallVectorImpl &Globals, - Module &M, bool isConst) const { - const TargetData *TD = TLI->getTargetData(); - - // FIXME: Infer the maximum possible offset depending on the actual users - // (these max offsets are different for the users inside Thumb or ARM - // functions) - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); - - // FIXME: Find better heuristics - std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); - - Type *Int32Ty = Type::getInt32Ty(M.getContext()); - - for (size_t i = 0, e = Globals.size(); i != e; ) { - size_t j = 0; - uint64_t MergedSize = 0; - std::vector Tys; - std::vector Inits; - for (j = i; j != e; ++j) { - Type *Ty = Globals[j]->getType()->getElementType(); - MergedSize += TD->getTypeAllocSize(Ty); - if (MergedSize > MaxOffset) { - break; - } - Tys.push_back(Ty); - Inits.push_back(Globals[j]->getInitializer()); - } - - StructType *MergedTy = StructType::get(M.getContext(), Tys); - Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); - GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, - GlobalValue::InternalLinkage, - MergedInit, "_MergedGlobals"); - for (size_t k = i; k < j; ++k) { - Constant *Idx[2] = { - ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, k-i) - }; - Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); - Globals[k]->replaceAllUsesWith(GEP); - Globals[k]->eraseFromParent(); - } - i = j; - } - - return true; -} - - -bool ARMGlobalMerge::doInitialization(Module &M) { - SmallVector Globals, ConstGlobals, BSSGlobals; - const TargetData *TD = TLI->getTargetData(); - unsigned MaxOffset = TLI->getMaximalGlobalOffset(); - bool Changed = false; - - // Grab all non-const globals. - for (Module::global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { - // Merge is safe for "normal" internal globals only - if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) - continue; - - // Ignore fancy-aligned globals for now. - unsigned Alignment = I->getAlignment(); - Type *Ty = I->getType()->getElementType(); - if (Alignment > TD->getABITypeAlignment(Ty)) - continue; - - // Ignore all 'special' globals. - if (I->getName().startswith("llvm.") || - I->getName().startswith(".llvm.")) - continue; - - if (TD->getTypeAllocSize(Ty) < MaxOffset) { - const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering(); - if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal()) - BSSGlobals.push_back(I); - else if (I->isConstant()) - ConstGlobals.push_back(I); - else - Globals.push_back(I); - } - } - - if (Globals.size() > 1) - Changed |= doMerge(Globals, M, false); - if (BSSGlobals.size() > 1) - Changed |= doMerge(BSSGlobals, M, false); - - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore - // them in the future. - // if (ConstGlobals.size() > 1) - // Changed |= doMerge(ConstGlobals, M, true); - - return Changed; -} - -bool ARMGlobalMerge::runOnFunction(Function &F) { - return false; -} - -FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) { - return new ARMGlobalMerge(tli); -} diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 96b1e89b0df..cf1432d64f2 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; static cl::opt @@ -97,7 +98,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { if (OptLevel != CodeGenOpt::None && EnableGlobalMerge) - PM.add(createARMGlobalMergePass(getTargetLowering())); + PM.add(createGlobalMergePass(getTargetLowering())); return false; } diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp new file mode 100644 index 00000000000..0772b487293 --- /dev/null +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -0,0 +1,226 @@ +//===-- GlobalMerge.cpp - Internal globals merging -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass merges globals with internal linkage into one. This way all the +// globals which were merged into a biggest one can be addressed using offsets +// from the same base pointer (no need for separate base pointer for each of the +// global). Such a transformation can significantly reduce the register pressure +// when many globals are involved. +// +// For example, consider the code which touches several global variables at +// once: +// +// static int foo[N], bar[N], baz[N]; +// +// for (i = 0; i < N; ++i) { +// foo[i] = bar[i] * baz[i]; +// } +// +// On ARM the addresses of 3 arrays should be kept in the registers, thus +// this code has quite large register pressure (loop body): +// +// ldr r1, [r5], #4 +// ldr r2, [r6], #4 +// mul r1, r2, r1 +// str r1, [r0], #4 +// +// Pass converts the code to something like: +// +// static struct { +// int foo[N]; +// int bar[N]; +// int baz[N]; +// } merged; +// +// for (i = 0; i < N; ++i) { +// merged.foo[i] = merged.bar[i] * merged.baz[i]; +// } +// +// and in ARM code this becomes: +// +// ldr r0, [r5, #40] +// ldr r1, [r5, #80] +// mul r0, r1, r0 +// str r0, [r5], #4 +// +// note that we saved 2 registers here almostly "for free". +// ===---------------------------------------------------------------------===// + +#define DEBUG_TYPE "global-merge" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Attributes.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumMerged , "Number of globals merged"); +namespace { + class GlobalMerge : public FunctionPass { + /// TLI - Keep a pointer of a TargetLowering to consult for determining + /// target type sizes. + const TargetLowering *TLI; + + bool doMerge(SmallVectorImpl &Globals, + Module &M, bool isConst) const; + + public: + static char ID; // Pass identification, replacement for typeid. + explicit GlobalMerge(const TargetLowering *tli = 0) + : FunctionPass(ID), TLI(tli) { + initializeGlobalMergePass(*PassRegistry::getPassRegistry()); + } + + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + + const char *getPassName() const { + return "Merge internal globals"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + } + + struct GlobalCmp { + const TargetData *TD; + + GlobalCmp(const TargetData *td) : TD(td) { } + + bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) { + Type *Ty1 = cast(GV1->getType())->getElementType(); + Type *Ty2 = cast(GV2->getType())->getElementType(); + + return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2)); + } + }; + }; +} // end anonymous namespace + +char GlobalMerge::ID = 0; +INITIALIZE_PASS(GlobalMerge, "global-merge", + "Global Merge", false, false) + + +bool GlobalMerge::doMerge(SmallVectorImpl &Globals, + Module &M, bool isConst) const { + const TargetData *TD = TLI->getTargetData(); + + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions) + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + + // FIXME: Find better heuristics + std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD)); + + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + + for (size_t i = 0, e = Globals.size(); i != e; ) { + size_t j = 0; + uint64_t MergedSize = 0; + std::vector Tys; + std::vector Inits; + for (j = i; j != e; ++j) { + Type *Ty = Globals[j]->getType()->getElementType(); + MergedSize += TD->getTypeAllocSize(Ty); + if (MergedSize > MaxOffset) { + break; + } + Tys.push_back(Ty); + Inits.push_back(Globals[j]->getInitializer()); + } + + StructType *MergedTy = StructType::get(M.getContext(), Tys); + Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); + GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, + GlobalValue::InternalLinkage, + MergedInit, "_MergedGlobals"); + for (size_t k = i; k < j; ++k) { + Constant *Idx[2] = { + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, k-i) + }; + Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); + Globals[k]->replaceAllUsesWith(GEP); + Globals[k]->eraseFromParent(); + NumMerged++; + } + i = j; + } + + return true; +} + + +bool GlobalMerge::doInitialization(Module &M) { + SmallVector Globals, ConstGlobals, BSSGlobals; + const TargetData *TD = TLI->getTargetData(); + unsigned MaxOffset = TLI->getMaximalGlobalOffset(); + bool Changed = false; + + // Grab all non-const globals. + for (Module::global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + // Merge is safe for "normal" internal globals only + if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) + continue; + + // Ignore fancy-aligned globals for now. + unsigned Alignment = I->getAlignment(); + Type *Ty = I->getType()->getElementType(); + if (Alignment > TD->getABITypeAlignment(Ty)) + continue; + + // Ignore all 'special' globals. + if (I->getName().startswith("llvm.") || + I->getName().startswith(".llvm.")) + continue; + + if (TD->getTypeAllocSize(Ty) < MaxOffset) { + const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering(); + if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal()) + BSSGlobals.push_back(I); + else if (I->isConstant()) + ConstGlobals.push_back(I); + else + Globals.push_back(I); + } + } + + if (Globals.size() > 1) + Changed |= doMerge(Globals, M, false); + if (BSSGlobals.size() > 1) + Changed |= doMerge(BSSGlobals, M, false); + + // FIXME: This currently breaks the EH processing due to way how the + // typeinfo detection works. We might want to detect the TIs and ignore + // them in the future. + // if (ConstGlobals.size() > 1) + // Changed |= doMerge(ConstGlobals, M, true); + + return Changed; +} + +bool GlobalMerge::runOnFunction(Function &F) { + return false; +} + +Pass *llvm::createGlobalMergePass(const TargetLowering *tli) { + return new GlobalMerge(tli); +}