X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FTransforms%2FIPO%2FPassManagerBuilder.cpp;h=936ca1364ada88399ae175aea400c33738282fc9;hp=3b21d3f0cb134b1ea04389bcf03f6fdace19c39b;hb=59537e6802c57669892d26149f9018cd51ed8c11;hpb=0048c06a9dae97dd08ae20545c9a6ffaf03aabf7 diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 3b21d3f0cb1..936ca1364ad 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -22,7 +22,12 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" @@ -89,12 +94,16 @@ static cl::opt EnableLoopDistribute( "enable-loop-distribute", cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental LoopDistribution Pass")); +static cl::opt EnableNonLTOGlobalsModRef( + "enable-non-lto-gmr", cl::init(false), cl::Hidden, + cl::desc( + "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; - DisableTailCalls = false; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; @@ -106,6 +115,7 @@ PassManagerBuilder::PassManagerBuilder() { VerifyInput = false; VerifyOutput = false; MergeFunctions = false; + PrepareForLTO = false; } PassManagerBuilder::~PassManagerBuilder() { @@ -213,6 +223,12 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } + if (EnableNonLTOGlobalsModRef) + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsModRefPass()); + // Start of CallGraph SCC passes. if (!DisableUnitAtATime) MPM.add(createPruneEHPass()); // Remove dead EH info @@ -228,7 +244,7 @@ void PassManagerBuilder::populateModulePassManager( // Start of function pass. // Break up aggregate allocas, using SSAUpdater. if (UseNewSROA) - MPM.add(createSROAPass(/*RequiresDomTree*/ false)); + MPM.add(createSROAPass()); else MPM.add(createScalarReplAggregatesPass(-1, false)); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies @@ -238,8 +254,7 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); - if (!DisableTailCalls) - MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // Rotate Loop - disable header duplication at -Oz @@ -316,13 +331,33 @@ void PassManagerBuilder::populateModulePassManager( // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (EnableNonLTOGlobalsModRef) + // We add a fresh GlobalsModRef run at this point. This is particularly + // useful as the above will have inlined, DCE'ed, and function-attr + // propagated everything. We should at this point have a reasonably minimal + // and richly annotated call graph. By computing aliasing and mod/ref + // information for all local globals here, the late loop passes and notably + // the vectorizer will be able to use them to help recognize vectorizable + // memory operations. + // + // Note that this relies on a bug in the pass manager which preserves + // a module analysis into a function pass pipeline (and throughout it) so + // long as the first function pass doesn't invalidate the module analysis. + // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for + // this to work. Fortunately, it is trivial to preserve AliasAnalysis + // (doing nothing preserves it as it is required to be conservatively + // correct in the face of IR changes). + MPM.add(createGlobalsModRefPass()); + if (RunFloat2Int) MPM.add(createFloat2IntPass()); + addExtensionsToPM(EP_VectorizerStart, MPM); + // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies - // on the rotated form. - MPM.add(createLoopRotatePass()); + // on the rotated form. Disable header duplication at -Oz. + MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. @@ -385,10 +420,6 @@ void PassManagerBuilder::populateModulePassManager( // LoopUnroll may generate some redundency to cleanup. MPM.add(createInstructionCombiningPass()); - // This is a barrier pass to avoid combine LICM pass and loop unroll pass - // within same loop pass manager. - MPM.add(createInstructionSimplifierPass()); - // Runtime unrolling will introduce runtime check in loop prologue. If the // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the @@ -407,6 +438,17 @@ void PassManagerBuilder::populateModulePassManager( // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. if (OptLevel > 1) { + if (!PrepareForLTO) { + // Remove avail extern fns and globals definitions if we aren't + // compiling an object file for later LTO. For LTO we want to preserve + // these so they are eligible for inlining at link-time. Note if they + // are unreferenced they will be removed by GlobalDCE below, so + // this only impacts referenced available externally globals. + // Eventually they will be suppressed during codegen, but eliminating + // here enables more opportunity for GlobalDCE as it may make + // globals referenced by available external functions dead. + MPM.add(createEliminateAvailableExternallyPass()); + } MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. MPM.add(createConstantMergePass()); // Merge dup global constants } @@ -518,6 +560,9 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( // Delete basic blocks, which optimization passes may have killed. PM.add(createCFGSimplificationPass()); + // Drop bodies of available externally objects to improve GlobalDCE. + PM.add(createEliminateAvailableExternallyPass()); + // Now that we have optimized the program, discard unreachable functions. PM.add(createGlobalDCEPass());