Add functions for finding ephemeral values

author Hal Finkel <hfinkel@anl.gov>

Sun, 7 Sep 2014 13:49:57 +0000 (13:49 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Sun, 7 Sep 2014 13:49:57 +0000 (13:49 +0000)
author Hal Finkel <hfinkel@anl.gov>
Sun, 7 Sep 2014 13:49:57 +0000 (13:49 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Sun, 7 Sep 2014 13:49:57 +0000 (13:49 +0000)
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h

index 04b39c17694677827301fb16bd2cff88ddcd9b82..59502df8b92802e177426feff7f15fb4b939f4dc 100644 (file)
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -16,10 +16,13 @@
  #define LLVM_ANALYSIS_CODEMETRICS_H
  
  #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/IR/CallSite.h"
  
  namespace llvm {
+class AssumptionTracker;
  class BasicBlock;
+class Loop;
  class Function;
  class Instruction;
  class DataLayout;
@@ -85,7 +88,18 @@ struct CodeMetrics {
          NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {}
  
    /// \brief Add information about a block to the current state.
-  void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI);
+  void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
+                         SmallPtrSetImpl<const Value*> &EphValues);
+
+  /// \brief Collect a loop's ephemeral values (those used only by an assume
+  /// or similar intrinsics in the loop).
+  static void collectEphemeralValues(const Loop *L, AssumptionTracker *AT,
+                                     SmallPtrSetImpl<const Value*> &EphValues);
+
+  /// \brief Collect a functions's ephemeral values (those used only by an
+  /// assume or similar intrinsics in the function).
+  static void collectEphemeralValues(const Function *L, AssumptionTracker *AT,
+                                     SmallPtrSetImpl<const Value*> &EphValues);
  };
  
  }
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h

index aaed716b6a11942cb3e47bce63e3e211db187bf3..81795ba9f3101c7d728fbf9c5b9bdd84eb8d7b9f 100644 (file)
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -19,6 +19,7 @@
  #include <climits>
  
  namespace llvm {
+class AssumptionTracker;
  class CallSite;
  class DataLayout;
  class Function;
@@ -100,6 +101,7 @@ public:
  /// \brief Cost analyzer used by inliner.
  class InlineCostAnalysis : public CallGraphSCCPass {
    const TargetTransformInfo *TTI;
+  AssumptionTracker *AT;
  
  public:
    static char ID;
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp

index 4c8a093684f62c59cd5799b627fcc794fc8fc176..97d04636fc59013253a6902534530f7b2566b031 100644 (file)
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -11,23 +11,99 @@
  //
  //===----------------------------------------------------------------------===//
  
+#include "llvm/Analysis/AssumptionTracker.h"
  #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopInfo.h"
  #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
  #include "llvm/IR/CallSite.h"
  #include "llvm/IR/DataLayout.h"
  #include "llvm/IR/Function.h"
  #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "code-metrics"
  
  using namespace llvm;
  
+static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
+                                    SmallPtrSetImpl<const Value*> &EphValues) {
+  SmallPtrSet<const Value *, 32> Visited;
+
+  // Make sure that all of the items in WorkSet are in our EphValues set.
+  EphValues.insert(WorkSet.begin(), WorkSet.end());
+
+  // Note: We don't speculate PHIs here, so we'll miss instruction chains kept
+  // alive only by ephemeral values.
+
+  while (!WorkSet.empty()) {
+    const Value *V = WorkSet.pop_back_val();
+    if (!Visited.insert(V))
+      continue;
+
+    // If all uses of this value are ephemeral, then so is this value.
+    bool FoundNEUse = false;
+    for (const User *I : V->users())
+      if (!EphValues.count(I)) {
+        FoundNEUse = true;
+        break;
+      }
+
+    if (FoundNEUse)
+      continue;
+
+    EphValues.insert(V);
+    DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
+
+    if (const User *U = dyn_cast<User>(V))
+      for (const Value *J : U->operands()) {
+        if (isSafeToSpeculativelyExecute(J))
+          WorkSet.push_back(J);
+      }
+  }
+}
+
+// Find all ephemeral values.
+void CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionTracker *AT,
+                                         SmallPtrSetImpl<const Value*> &EphValues) {
+  SmallVector<const Value *, 16> WorkSet;
+
+  for (auto &I : AT->assumptions(L->getHeader()->getParent())) {
+    // Filter out call sites outside of the loop so we don't to a function's
+    // worth of work for each of its loops (and, in the common case, ephemeral
+    // values in the loop are likely due to @llvm.assume calls in the loop).
+    if (!L->contains(I->getParent()))
+      continue;
+
+    WorkSet.push_back(I);
+  }
+
+  completeEphemeralValues(WorkSet, EphValues);
+}
+
+void CodeMetrics::collectEphemeralValues(const Function *F, AssumptionTracker *AT,
+                                         SmallPtrSetImpl<const Value*> &EphValues) {
+  SmallVector<const Value *, 16> WorkSet;
+
+  for (auto &I : AT->assumptions(const_cast<Function*>(F)))
+    WorkSet.push_back(I);
+
+  completeEphemeralValues(WorkSet, EphValues);
+}
+
  /// analyzeBasicBlock - Fill in the current structure with information gleaned
  /// from the specified block.
  void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
-                                    const TargetTransformInfo &TTI) {
+                                    const TargetTransformInfo &TTI,
+                                    SmallPtrSetImpl<const Value*> &EphValues) {
    ++NumBlocks;
    unsigned NumInstsBeforeThisBB = NumInsts;
    for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
         II != E; ++II) {
+    // Skip ephemeral values.
+    if (EphValues.count(II))
+      continue;
+
      // Special handling for calls.
      if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
        ImmutableCallSite CS(cast<Instruction>(II));
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp

index 8807529cabac1ba44607fd5645e81eb1eb0e1425..d30c21fb01b4f35fcd482128df50a4120c6442f5 100644 (file)
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -17,7 +17,9 @@
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
  #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/CodeMetrics.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/TargetTransformInfo.h"
  #include "llvm/IR/CallSite.h"
@@ -49,6 +51,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
    /// The TargetTransformInfo available for this compilation.
    const TargetTransformInfo &TTI;
  
+  /// The cache of @llvm.assume intrinsics.
+  AssumptionTracker *AT;
+
    // The called function.
    Function &F;
  
@@ -104,7 +109,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
    ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
  
    // Custom analysis routines.
-  bool analyzeBlock(BasicBlock *BB);
+  bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
  
    // Disable several entry points to the visitor so we don't accidentally use
    // them by declaring but not defining them here.
@@ -141,8 +146,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
  
  public:
    CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
-               Function &Callee, int Threshold)
-      : DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
+               AssumptionTracker *AT, Function &Callee, int Threshold)
+      : DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0),
          IsCallerRecursive(false), IsRecursiveCall(false),
          ExposesReturnsTwice(false), HasDynamicAlloca(false),
          ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
@@ -778,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
    // during devirtualization and so we want to give it a hefty bonus for
    // inlining, but cap that bonus in the event that inlining wouldn't pan
    // out. Pretend to inline the function, with a custom threshold.
-  CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold);
+  CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold);
    if (CA.analyzeCall(CS)) {
      // We were able to inline the indirect call! Subtract the cost from the
      // bonus we want to apply, but don't go below zero.
@@ -881,7 +886,8 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
  /// aborts early if the threshold has been exceeded or an impossible to inline
  /// construct has been detected. It returns false if inlining is no longer
  /// viable, and true if inlining remains viable.
-bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
+bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
+                                SmallPtrSetImpl<const Value *> &EphValues) {
    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
      // FIXME: Currently, the number of instructions in a function regardless of
      // our ability to simplify them during inline to constants or dead code,
@@ -893,6 +899,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
      if (isa<DbgInfoIntrinsic>(I))
        continue;
  
+    // Skip ephemeral values.
+    if (EphValues.count(I))
+      continue;
+
      ++NumInstructions;
      if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
        ++NumVectorInstructions;
@@ -1096,6 +1106,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
    NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
    NumAllocaArgs = SROAArgValues.size();
  
+  // FIXME: If a caller has multiple calls to a callee, we end up recomputing
+  // the ephemeral values multiple times (and they're completely determined by
+  // the callee, so this is purely duplicate work).
+  SmallPtrSet<const Value *, 32> EphValues;
+  CodeMetrics::collectEphemeralValues(&F, AT, EphValues);
+
    // The worklist of live basic blocks in the callee *after* inlining. We avoid
    // adding basic blocks of the callee which can be proven to be dead for this
    // particular call site in order to get more accurate cost estimates. This
@@ -1129,7 +1145,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
  
      // Analyze the cost of this block. If we blow through the threshold, this
      // returns false, and we can bail on out.
-    if (!analyzeBlock(BB)) {
+    if (!analyzeBlock(BB, EphValues)) {
        if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
            HasIndirectBr)
          return false;
@@ -1217,6 +1233,7 @@ void CallAnalyzer::dump() {
  INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
                        true, true)
  INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
  INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
                      true, true)
  
@@ -1228,12 +1245,14 @@ InlineCostAnalysis::~InlineCostAnalysis() {}
  
  void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
    AU.setPreservesAll();
+  AU.addRequired<AssumptionTracker>();
    AU.addRequired<TargetTransformInfo>();
    CallGraphSCCPass::getAnalysisUsage(AU);
  }
  
  bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
    TTI = &getAnalysis<TargetTransformInfo>();
+  AT = &getAnalysis<AssumptionTracker>();
    return false;
  }
  
@@ -1290,7 +1309,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
    DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
          << "...\n");
  
-  CallAnalyzer CA(Callee->getDataLayout(), *TTI, *Callee, Threshold);
+  CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold);
    bool ShouldInline = CA.analyzeCall(CS);
  
    DEBUG(CA.dump());
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp

index 71f01f40f872936761196eaf62cdb98ee5599809..166720435e6321366b19f4f37456225a48cb13ae 100644 (file)
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -13,6 +13,7 @@
  
  #include "llvm/Transforms/Scalar.h"
  #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
  #include "llvm/Analysis/CodeMetrics.h"
  #include "llvm/Analysis/InstructionSimplify.h"
  #include "llvm/Analysis/LoopPass.h"
@@ -53,6 +54,7 @@ namespace {
  
      // LCSSA form makes instruction renaming easier.
      void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<AssumptionTracker>();
        AU.addPreserved<DominatorTreeWrapperPass>();
        AU.addRequired<LoopInfo>();
        AU.addPreserved<LoopInfo>();
@@ -72,12 +74,14 @@ namespace {
      unsigned MaxHeaderSize;
      LoopInfo *LI;
      const TargetTransformInfo *TTI;
+    AssumptionTracker *AT;
    };
  }
  
  char LoopRotate::ID = 0;
  INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
  INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
  INITIALIZE_PASS_DEPENDENCY(LoopInfo)
  INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -98,6 +102,7 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
  
    LI = &getAnalysis<LoopInfo>();
    TTI = &getAnalysis<TargetTransformInfo>();
+  AT = &getAnalysis<AssumptionTracker>();
  
    // Simplify the loop latch before attempting to rotate the header
    // upward. Rotation may not be needed if the loop tail can be folded into the
@@ -323,8 +328,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
    // Check size of original header and reject loop if it is very big or we can't
    // duplicate blocks inside it.
    {
+    SmallPtrSet<const Value *, 32> EphValues;
+    CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+
      CodeMetrics Metrics;
-    Metrics.analyzeBasicBlock(OrigHeader, *TTI);
+    Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
      if (Metrics.notDuplicatable) {
        DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
              << " instructions: "; L->dump());
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp

index 198a3b385ae38a0f23d533f38e77d9dbd653124c..293386efe868fade3470618615a6a15852c99690 100644 (file)
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -203,11 +203,15 @@ Pass *llvm::createSimpleLoopUnrollPass() {
  /// ApproximateLoopSize - Approximate the size of the loop.
  static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
                                      bool &NotDuplicatable,
-                                    const TargetTransformInfo &TTI) {
+                                    const TargetTransformInfo &TTI,
+                                    AssumptionTracker *AT) {
+  SmallPtrSet<const Value *, 32> EphValues;
+  CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+
    CodeMetrics Metrics;
    for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
         I != E; ++I)
-    Metrics.analyzeBasicBlock(*I, TTI);
+    Metrics.analyzeBasicBlock(*I, TTI, EphValues);
    NumCalls = Metrics.NumInlineCandidates;
    NotDuplicatable = Metrics.notDuplicatable;
  
@@ -391,7 +395,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
    unsigned NumInlineCandidates;
    bool notDuplicatable;
    unsigned LoopSize =
-      ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI);
+      ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, AT);
    DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
    uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
    if (notDuplicatable) {
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp

index d3140f9a76d113c8467d679105c89aecb305ec6e..ef43483772020a1e985c8ba0b3dc65dcc932634b 100644 (file)
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -104,7 +104,8 @@ namespace {
  
        // Analyze loop. Check its size, calculate is it possible to unswitch
        // it. Returns true if we can unswitch this loop.
-      bool countLoop(const Loop *L, const TargetTransformInfo &TTI);
+      bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
+                     AssumptionTracker *AT);
  
        // Clean all data related to given loop.
        void forgetLoop(const Loop *L);
@@ -215,7 +216,8 @@ namespace {
  
  // Analyze loop. Check its size, calculate is it possible to unswitch
  // it. Returns true if we can unswitch this loop.
-bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
+bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
+                                AssumptionTracker *AT) {
  
    LoopPropsMapIt PropsIt;
    bool Inserted;
@@ -232,13 +234,16 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
      // large numbers of branches which cause loop unswitching to go crazy.
      // This is a very ad-hoc heuristic.
  
+    SmallPtrSet<const Value *, 32> EphValues;
+    CodeMetrics::collectEphemeralValues(L, AT, EphValues);
+
      // FIXME: This is overly conservative because it does not take into
      // consideration code simplification opportunities and code that can
      // be shared by the resultant unswitched loops.
      CodeMetrics Metrics;
      for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
           I != E; ++I)
-      Metrics.analyzeBasicBlock(*I, TTI);
+      Metrics.analyzeBasicBlock(*I, TTI, EphValues);
  
      Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
      Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
@@ -426,7 +431,8 @@ bool LoopUnswitch::processCurrentLoop() {
  
    // Probably we reach the quota of branches for this loop. If so
    // stop unswitching.
-  if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>()))
+  if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>(),
+                              AT))
      return false;
  
    // Loop over all of the basic blocks in the loop.  If we find an interior
diff --git a/test/Transforms/Inline/ephemeral.ll b/test/Transforms/Inline/ephemeral.ll

new file mode 100644 (file)

index 0000000..d1135c6
--- /dev/null
+++ b/test/Transforms/Inline/ephemeral.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -Oz %s | FileCheck %s
+
+@a = global i32 4
+
+define i1 @inner() {
+  %a1 = load volatile i32* @a
+  %x1 = add i32 %a1, %a1
+  %c = icmp eq i32 %x1, 0
+
+  ; Here are enough instructions to prevent inlining, but because they are used
+  ; only by the @llvm.assume intrinsic, they're free (and, thus, inlining will
+  ; still happen).
+  %a2 = mul i32 %a1, %a1
+  %a3 = sub i32 %a1, 5
+  %a4 = udiv i32 %a3, -13
+  %a5 = mul i32 %a4, %a4
+  %a6 = add i32 %a5, %x1
+  %ca = icmp sgt i32 %a6, -7
+  tail call void @llvm.assume(i1 %ca)
+
+  ret i1 %c
+}
+
+; @inner() should be inlined for -Oz.
+; CHECK-NOT: call i1 @inner
+define i1 @outer() optsize {
+   %r = call i1 @inner()
+   ret i1 %r
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/LoopUnroll/ephemeral.ll b/test/Transforms/LoopUnroll/ephemeral.ll

new file mode 100644 (file)

index 0000000..9d40613
--- /dev/null
+++ b/test/Transforms/LoopUnroll/ephemeral.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s
+
+; Make sure this loop is completely unrolled...
+; CHECK-LABEL: @test1
+; CHECK: for.body:
+; CHECK-NOT: for.end:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+
+  ; This loop will be completely unrolled, even with these extra instructions,
+  ; but only because they're ephemeral (and, thus, free).
+  %1 = add nsw i32 %0, 2
+  %2 = add nsw i32 %1, 4
+  %3 = add nsw i32 %2, 4
+  %4 = add nsw i32 %3, 4
+  %5 = add nsw i32 %4, 4
+  %6 = add nsw i32 %5, 4
+  %7 = add nsw i32 %6, 4
+  %8 = add nsw i32 %7, 4
+  %9 = add nsw i32 %8, 4
+  %10 = add nsw i32 %9, 4
+  %ca = icmp sgt i32 %10, -7
+  call void @llvm.assume(i1 %ca)
+
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+declare void @llvm.assume(i1) nounwind
+
author	Hal Finkel <hfinkel@anl.gov>
	Sun, 7 Sep 2014 13:49:57 +0000 (13:49 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Sun, 7 Sep 2014 13:49:57 +0000 (13:49 +0000)
include/llvm/Analysis/CodeMetrics.h		patch \| blob \| history
include/llvm/Analysis/InlineCost.h		patch \| blob \| history
lib/Analysis/CodeMetrics.cpp		patch \| blob \| history
lib/Analysis/IPA/InlineCost.cpp		patch \| blob \| history
lib/Transforms/Scalar/LoopRotation.cpp		patch \| blob \| history
lib/Transforms/Scalar/LoopUnrollPass.cpp		patch \| blob \| history
lib/Transforms/Scalar/LoopUnswitch.cpp		patch \| blob \| history
test/Transforms/Inline/ephemeral.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/LoopUnroll/ephemeral.ll	[new file with mode: 0644]	patch \| blob