Add a flag to defer vectorization into a phase after the inliner and its
[oota-llvm.git] / lib / Transforms / IPO / PassManagerBuilder.cpp
index 9d88608a41c4597aec4c807403892e9c6ef02480..1917cc81ddaa7959ffbcff37e4495e8792cf1fb0 100644 (file)
@@ -32,6 +32,11 @@ static cl::opt<bool>
 RunLoopVectorization("vectorize-loops",
                      cl::desc("Run the Loop vectorization passes"));
 
+static cl::opt<bool>
+LateVectorization("late-vectorize", cl::init(false), cl::Hidden,
+                  cl::desc("Run the vectorization pasess late in the pass "
+                           "pipeline (after the inliner)"));
+
 static cl::opt<bool>
 RunSLPVectorization("vectorize-slp",
                     cl::desc("Run the SLP vectorization passes"));
@@ -59,6 +64,7 @@ PassManagerBuilder::PassManagerBuilder() {
     BBVectorize = RunBBVectorization;
     SLPVectorize = RunSLPVectorization;
     LoopVectorize = RunLoopVectorization;
+    LateVectorize = LateVectorization;
 }
 
 PassManagerBuilder::~PassManagerBuilder() {
@@ -189,8 +195,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
   MPM.add(createLoopDeletionPass());          // Delete dead loops
 
-  if (LoopVectorize && OptLevel > 1 && SizeLevel < 2)
-    MPM.add(createLoopVectorizePass());
+  if (!LateVectorize && LoopVectorize && OptLevel > 1 && SizeLevel < 2)
+      MPM.add(createLoopVectorizePass());
 
   if (!DisableUnrollLoops)
     MPM.add(createLoopUnrollPass());          // Unroll small loops
@@ -210,26 +216,70 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
-  if (SLPVectorize)
-    MPM.add(createSLPVectorizerPass());     // Vectorize parallel scalar chains.
-
-  if (BBVectorize) {
-    MPM.add(createBBVectorizePass());
-    MPM.add(createInstructionCombiningPass());
-    if (OptLevel > 1 && UseGVNAfterVectorization)
-      MPM.add(createGVNPass());                   // Remove redundancies
-    else
-      MPM.add(createEarlyCSEPass());              // Catch trivial redundancies
-
-    // BBVectorize may have significantly shortened a loop body; unroll again.
-    if (!DisableUnrollLoops)
-      MPM.add(createLoopUnrollPass());
+  if (!LateVectorize) {
+    if (SLPVectorize)
+      MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
+
+    if (BBVectorize) {
+      MPM.add(createBBVectorizePass());
+      MPM.add(createInstructionCombiningPass());
+      if (OptLevel > 1 && UseGVNAfterVectorization)
+        MPM.add(createGVNPass());           // Remove redundancies
+      else
+        MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
+
+      // BBVectorize may have significantly shortened a loop body; unroll again.
+      if (!DisableUnrollLoops)
+        MPM.add(createLoopUnrollPass());
+    }
   }
 
   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   MPM.add(createInstructionCombiningPass());  // Clean up after everything.
 
+  // As an experimental mode, run any vectorization passes in a separate
+  // pipeline from the CGSCC pass manager that runs iteratively with the
+  // inliner.
+  if (LateVectorize) {
+    // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+    // pass manager that we are specifically trying to avoid. To prevent this
+    // we must insert a no-op module pass to reset the pass manager.
+    MPM.add(createBarrierNoopPass());
+
+    // Add the various vectorization passes and relevant cleanup passes for
+    // them since we are no longer in the middle of the main scalar pipeline.
+    if (LoopVectorize && OptLevel > 1 && SizeLevel < 2) {
+      MPM.add(createLoopVectorizePass());
+
+      if (!DisableUnrollLoops)
+        MPM.add(createLoopUnrollPass());    // Unroll small loops
+
+      // FIXME: Is this necessary/useful? Should we also do SimplifyCFG?
+      MPM.add(createInstructionCombiningPass());
+    }
+
+    if (SLPVectorize) {
+      MPM.add(createSLPVectorizerPass());   // Vectorize parallel scalar chains.
+
+      // FIXME: Is this necessary/useful? Should we also do SimplifyCFG?
+      MPM.add(createInstructionCombiningPass());
+    }
+
+    if (BBVectorize) {
+      MPM.add(createBBVectorizePass());
+      MPM.add(createInstructionCombiningPass());
+      if (OptLevel > 1 && UseGVNAfterVectorization)
+        MPM.add(createGVNPass());           // Remove redundancies
+      else
+        MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
+
+      // BBVectorize may have significantly shortened a loop body; unroll again.
+      if (!DisableUnrollLoops)
+        MPM.add(createLoopUnrollPass());
+    }
+  }
+
   if (!DisableUnitAtATime) {
     // FIXME: We shouldn't bother with this anymore.
     MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes