LoopVectorizer: When -Os is used, vectorize only loops that dont require a tail loop...

author Nadav Rotem <nrotem@apple.com>

Wed, 12 Dec 2012 01:11:46 +0000 (01:11 +0000)

committer Nadav Rotem <nrotem@apple.com>

Wed, 12 Dec 2012 01:11:46 +0000 (01:11 +0000)
author Nadav Rotem <nrotem@apple.com>
Wed, 12 Dec 2012 01:11:46 +0000 (01:11 +0000)
committer Nadav Rotem <nrotem@apple.com>
Wed, 12 Dec 2012 01:11:46 +0000 (01:11 +0000)
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h

index baf8550edc89af690de445db12bb3f61addf2e9c..48d7c40642d04c883c6f3ebe4a2b583b3ea4a487 100644 (file)
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -156,7 +156,7 @@ namespace {
        (void) llvm::createCorrelatedValuePropagationPass();
        (void) llvm::createMemDepPrinter();
        (void) llvm::createInstructionSimplifierPass();
-      (void) llvm::createLoopVectorizePass();
+      (void) llvm::createLoopVectorizePass(0);
        (void) llvm::createBBVectorizePass();
  
        (void)new llvm::IntervalPartition();
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h

index 1ba4d22d5f5a5bb6fb776c2be648dad053b8ecc4..81864f32eff3b97fb0e8e1f657665930262ab4d4 100644 (file)
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -111,7 +111,7 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
  //
  // LoopVectorize - Create a loop vectorization pass.
  //
-Pass *createLoopVectorizePass();
+Pass *createLoopVectorizePass(bool OptForSize);
  
  //===----------------------------------------------------------------------===//
  /// @brief Vectorize the BasicBlock.
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp

index a9a9f2eeceb97ef01cb87fde04cd0f9c9df10ff2..0862786127b3f7841ef0d38fb846146af2fa1031 100644 (file)
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -189,7 +189,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
    MPM.add(createLoopDeletionPass());          // Delete dead loops
  
    if (LoopVectorize && OptLevel > 1)
-    MPM.add(createLoopVectorizePass());
+    MPM.add(createLoopVectorizePass(SizeLevel));
  
    if (!DisableUnrollLoops)
      MPM.add(createLoopUnrollPass());          // Unroll small loops
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index baa655cd59a237a663e09b7b66fe24f047ba66c8..d796716cf7a6a0b68ac4db3adb701b2de9842d6b 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -51,9 +51,12 @@ namespace {
  
  /// The LoopVectorize Pass.
  struct LoopVectorize : public LoopPass {
-  static char ID; // Pass identification, replacement for typeid
+  /// Pass identification, replacement for typeid
+  static char ID;
+  /// Optimize for size. Do not generate tail loops.
+  bool OptForSize;
  
-  LoopVectorize() : LoopPass(ID) {
+  explicit LoopVectorize(bool OptSz = false) : LoopPass(ID), OptForSize(OptSz) {
      initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
    }
  
@@ -85,23 +88,17 @@ struct LoopVectorize : public LoopPass {
      }
  
      // Select the preffered vectorization factor.
-    unsigned VF = 1;
-    if (VectorizationFactor == 0) {
-      const VectorTargetTransformInfo *VTTI = 0;
-      if (TTI)
-        VTTI = TTI->getVectorTargetTransformInfo();
-      // Use the cost model.
-      LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
-      VF = CM.findBestVectorizationFactor();
-
-      if (VF == 1) {
-        DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
-        return false;
-      }
-
-    } else {
-      // Use the user command flag.
-      VF = VectorizationFactor;
+    const VectorTargetTransformInfo *VTTI = 0;
+    if (TTI)
+      VTTI = TTI->getVectorTargetTransformInfo();
+    // Use the cost model.
+    LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
+    unsigned VF = CM.selectVectorizationFactor(OptForSize,
+                                                 VectorizationFactor);
+
+    if (VF == 1) {
+      DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+      return false;
      }
  
      DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF << ") in "<<
@@ -1886,7 +1883,48 @@ bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
  }
  
  unsigned
-LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
+LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
+                                                        unsigned UserVF) {
+  if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
+    DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
+    return 1;
+  }
+
+  // Find the trip count.
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop, TheLoop->getLoopLatch());
+  DEBUG(dbgs() << "LV: Found trip count:"<<TC<<"\n");
+
+  unsigned VF = MaxVectorSize;
+
+  // If we optimize the program for size, avoid creating the tail loop.
+  if (OptForSize) {
+    // If we are unable to calculate the trip count then don't try to vectorize.
+    if (TC < 2) {
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+      return 1;
+    }
+
+    // Find the maximum SIMD width that can fit within the trip count.
+    VF = TC % MaxVectorSize;
+
+    if (VF == 0)
+      VF = MaxVectorSize;
+
+    // If the trip count that we found modulo the vectorization factor is not
+    // zero then we require a tail.
+    if (VF < 2) {
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+      return 1;
+    }
+  }
+
+  if (UserVF != 0) {
+    assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
+    DEBUG(dbgs() << "LV: Using user VF "<<UserVF<<".\n");
+
+    return UserVF;
+  }
+
    if (!VTTI) {
      DEBUG(dbgs() << "LV: No vector target information. Not vectorizing. \n");
      return 1;
@@ -2121,8 +2159,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
  
  namespace llvm {
-  Pass *createLoopVectorizePass() {
-    return new LoopVectorize();
+  Pass *createLoopVectorizePass(bool OptForSize = false) {
+    return new LoopVectorize(OptForSize);
    }
  }
  
diff --git a/lib/Transforms/Vectorize/LoopVectorize.h b/lib/Transforms/Vectorize/LoopVectorize.h

index 9d6d80e22b3e7c02c06aea7920dc49128d36224a..e5a525960599ac266cea673d05a215bd17ce64ea 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.h
+++ b/lib/Transforms/Vectorize/LoopVectorize.h
@@ -420,10 +420,11 @@ public:
                               const VectorTargetTransformInfo *Vtti):
    TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
  
-  /// Returns the most profitable vectorization factor for the loop that is
-  /// smaller or equal to the VF argument. This method checks every power
-  /// of two up to VF.
-  unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize);
+  /// Returns the most profitable vectorization factor in powers of two.
+  /// This method checks every power of two up to VF. If UserVF is not ZERO
+  /// then this vectorization factor will be selected if vectorization is
+  /// possible.
+  unsigned selectVectorizationFactor(bool OptForSize, unsigned UserVF);
  
  private:
    /// Returns the expected execution cost. The unit of the cost does
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp

index 3fb36cadea088627ee757ce0f0f123219b1902ef..cf7d4ee8b2e1d3118beb4be1908dd5d64091353a 100644 (file)
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
-//===-- Vectorize.cpp -----------------------------------------------------===//
+   //===-- Vectorize.cpp -----------------------------------------------------===//
  //
  //                     The LLVM Compiler Infrastructure
  //
@@ -39,5 +39,5 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
  }
  
  void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createLoopVectorizePass());
+  unwrap(PM)->add(createLoopVectorizePass(0));
  }
author	Nadav Rotem <nrotem@apple.com>
	Wed, 12 Dec 2012 01:11:46 +0000 (01:11 +0000)
committer	Nadav Rotem <nrotem@apple.com>
	Wed, 12 Dec 2012 01:11:46 +0000 (01:11 +0000)
include/llvm/LinkAllPasses.h		patch \| blob \| history
include/llvm/Transforms/Vectorize.h		patch \| blob \| history
lib/Transforms/IPO/PassManagerBuilder.cpp		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.h		patch \| blob \| history
lib/Transforms/Vectorize/Vectorize.cpp		patch \| blob \| history