"small-loop-cost", cl::init(20), cl::Hidden,
cl::desc("The cost of a loop that is considered 'small' by the unroller."));
+static cl::opt<bool> LoopVectorizeWithBlockFrequency(
+ "loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden,
+ cl::desc("Enable the use of the block frequency analysis to access PGO "
+ "heuristics minimizing code growth in cold regions and being more "
+ "aggressive in hot regions."));
+
// Runtime unroll loops for load/store throughput.
static cl::opt<bool> EnableLoadStoreRuntimeUnroll(
"enable-loadstore-runtime-unroll", cl::init(false), cl::Hidden,
// Compute the weighted frequency of this loop being executed and see if it
// is less than 20% of the function entry baseline frequency. Note that we
// always have a canonical loop here because we think we *can* vectoriez.
- BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
- if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq)
- OptForSize = true;
+ // FIXME: This is hidden behind a flag due to pervasive problems with
+ // exactly what block frequency models.
+ if (LoopVectorizeWithBlockFrequency) {
+ BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
+ if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq)
+ OptForSize = true;
+ }
// Check the function attributes to see if implicit floats are allowed.a
// FIXME: This check doesn't seem possibly correct -- what if the loop is
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"