Add a new pass "inductive range check elimination"

author Sanjoy Das <sanjoy@playingwithpointers.com>

Fri, 16 Jan 2015 01:03:22 +0000 (01:03 +0000)

committer Sanjoy Das <sanjoy@playingwithpointers.com>

Fri, 16 Jan 2015 01:03:22 +0000 (01:03 +0000)
author Sanjoy Das <sanjoy@playingwithpointers.com>
Fri, 16 Jan 2015 01:03:22 +0000 (01:03 +0000)
committer Sanjoy Das <sanjoy@playingwithpointers.com>
Fri, 16 Jan 2015 01:03:22 +0000 (01:03 +0000)
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h

index d449930cc634ab406ecfd733dfccca6b604e2d66..255fc43ec350c6d5d53f55051a4d80ec2b504656 100644 (file)
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -142,6 +142,7 @@ void initializeIPCPPass(PassRegistry&);
  void initializeIPSCCPPass(PassRegistry&);
  void initializeIVUsersPass(PassRegistry&);
  void initializeIfConverterPass(PassRegistry&);
+void initializeInductiveRangeCheckEliminationPass(PassRegistry&);
  void initializeIndVarSimplifyPass(PassRegistry&);
  void initializeInlineCostAnalysisPass(PassRegistry&);
  void initializeInstCombinerPass(PassRegistry&);
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h

index 2e8feab6d29d81ad573df3a8c72982b2d29dc4ea..bce4c0679a100f35e522e5697369267d5f4e0247 100644 (file)
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -86,6 +86,7 @@ namespace {
        (void) llvm::createGlobalsModRefPass();
        (void) llvm::createIPConstantPropagationPass();
        (void) llvm::createIPSCCPPass();
+      (void) llvm::createInductiveRangeCheckEliminationPass();
        (void) llvm::createIndVarSimplifyPass();
        (void) llvm::createInstructionCombiningPass();
        (void) llvm::createInternalizePass();
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h

index 5dcd899487598ad1d127031ab416d6d1495dacbc..83158596a214c9fbcd9422d83140ff8ffe080223 100644 (file)
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -96,6 +96,13 @@ FunctionPass *createScalarReplAggregatesPass(signed Threshold = -1,
                                               signed ArrayElementThreshold = -1,
                                               signed ScalarLoadThreshold = -1);
  
+//===----------------------------------------------------------------------===//
+//
+// InductiveRangeCheckElimination - Transform loops to elide range checks on
+// linear functions of the induction variable.
+//
+Pass *createInductiveRangeCheckEliminationPass();
+
  //===----------------------------------------------------------------------===//
  //
  // InductionVariableSimplify - Transform induction variables in a program to all
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt

index b3ee11ed67cd61b1911fa8638e9bb024eb568658..3511bb16d4e1283a2eefc89264960b574b6e42e2 100644 (file)
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMScalarOpts
    EarlyCSE.cpp
    FlattenCFGPass.cpp
    GVN.cpp
+  InductiveRangeCheckElimination.cpp
    IndVarSimplify.cpp
    JumpThreading.cpp
    LICM.cpp
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp

new file mode 100644 (file)

index 0000000..f169e9d
--- /dev/null
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -0,0 +1,1210 @@
+//===-- InductiveRangeCheckElimination.cpp - ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The InductiveRangeCheckElimination pass splits a loop's iteration space into
+// three disjoint ranges.  It does that in a way such that the loop running in
+// the middle loop provably does not need range checks. As an example, it will
+// convert
+//
+//   len = < known positive >
+//   for (i = 0; i < n; i++) {
+//     if (0 <= i && i < len) {
+//       do_something();
+//     } else {
+//       throw_out_of_bounds();
+//     }
+//   }
+//
+// to
+//
+//   len = < known positive >
+//   limit = smin(n, len)
+//   // no first segment
+//   for (i = 0; i < limit; i++) {
+//     if (0 <= i && i < len) { // this check is fully redundant
+//       do_something();
+//     } else {
+//       throw_out_of_bounds();
+//     }
+//   }
+//   for (i = limit; i < n; i++) {
+//     if (0 <= i && i < len) {
+//       do_something();
+//     } else {
+//       throw_out_of_bounds();
+//     }
+//   }
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Optional.h"
+
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/Verifier.h"
+
+#include "llvm/Support/Debug.h"
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+
+#include "llvm/Pass.h"
+
+#include <array>
+
+using namespace llvm;
+
+cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden,
+                                 cl::init(64));
+
+cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden,
+                                cl::init(false));
+
+#define DEBUG_TYPE "irce"
+
+namespace {
+
+/// An inductive range check is conditional branch in a loop with
+///
+///  1. a very cold successor (i.e. the branch jumps to that successor very
+///     rarely)
+///
+///  and
+///
+///  2. a condition that is provably true for some range of values taken by the
+///     containing loop's induction variable.
+///
+/// Currently all inductive range checks are branches conditional on an
+/// expression of the form
+///
+///   0 <= (Offset + Scale * I) < Length
+///
+/// where `I' is the canonical induction variable of a loop to which Offset and
+/// Scale are loop invariant, and Length is >= 0.  Currently the 'false' branch
+/// is considered cold, looking at profiling data to verify that is a TODO.
+
+class InductiveRangeCheck {
+  const SCEV *Offset;
+  const SCEV *Scale;
+  Value *Length;
+  BranchInst *Branch;
+
+  InductiveRangeCheck() :
+    Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { }
+
+public:
+  const SCEV *getOffset() const { return Offset; }
+  const SCEV *getScale() const { return Scale; }
+  Value *getLength() const { return Length; }
+
+  void print(raw_ostream &OS) const {
+    OS << "InductiveRangeCheck:\n";
+    OS << "  Offset: ";
+    Offset->print(OS);
+    OS << "  Scale: ";
+    Scale->print(OS);
+    OS << "  Length: ";
+    Length->print(OS);
+    OS << "  Branch: ";
+    getBranch()->print(OS);
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void dump() {
+    print(dbgs());
+  }
+#endif
+
+  BranchInst *getBranch() const { return Branch; }
+
+  /// Represents an integer range [Range.first, Range.second).  If Range.second
+  /// < Range.first, then the value denotes the empty range.
+  typedef std::pair<Value *, Value *> Range;
+  typedef SpecificBumpPtrAllocator<InductiveRangeCheck> AllocatorTy;
+
+  /// This is the value the condition of the branch needs to evaluate to for the
+  /// branch to take the hot successor (see (1) above).
+  bool getPassingDirection() { return true; }
+
+  /// Computes a range for the induction variable in which the range check is
+  /// redundant and can be constant-folded away.
+  Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE,
+                                            IRBuilder<> &B) const;
+
+  /// Create an inductive range check out of BI if possible, else return
+  /// nullptr.
+  static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI,
+                                     Loop *L, ScalarEvolution &SE);
+};
+
+class InductiveRangeCheckElimination : public LoopPass {
+  InductiveRangeCheck::AllocatorTy Allocator;
+
+public:
+  static char ID;
+  InductiveRangeCheckElimination() : LoopPass(ID) {
+    initializeInductiveRangeCheckEliminationPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LoopInfo>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addRequiredID(LCSSAID);
+    AU.addRequired<ScalarEvolution>();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+};
+
+char InductiveRangeCheckElimination::ID = 0;
+}
+
+INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
+                "Inductive range check elimination", false, false)
+
+static bool IsLowerBoundCheck(Value *Check, Value *&IndexV) {
+  using namespace llvm::PatternMatch;
+
+  ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+  Value *LHS = nullptr, *RHS = nullptr;
+
+  if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
+    return false;
+
+  switch (Pred) {
+  default:
+    return false;
+
+  case ICmpInst::ICMP_SLE:
+    std::swap(LHS, RHS);
+  // fallthrough
+  case ICmpInst::ICMP_SGE:
+    if (!match(RHS, m_ConstantInt<0>()))
+      return false;
+    IndexV = LHS;
+    return true;
+
+  case ICmpInst::ICMP_SLT:
+    std::swap(LHS, RHS);
+  // fallthrough
+  case ICmpInst::ICMP_SGT:
+    if (!match(RHS, m_ConstantInt<-1>()))
+      return false;
+    IndexV = LHS;
+    return true;
+  }
+}
+
+static bool IsUpperBoundCheck(Value *Check, Value *Index, Value *&UpperLimit) {
+  using namespace llvm::PatternMatch;
+
+  ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+  Value *LHS = nullptr, *RHS = nullptr;
+
+  if (!match(Check, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
+    return false;
+
+  switch (Pred) {
+  default:
+    return false;
+
+  case ICmpInst::ICMP_SGT:
+    std::swap(LHS, RHS);
+  // fallthrough
+  case ICmpInst::ICMP_SLT:
+    if (LHS != Index)
+      return false;
+    UpperLimit = RHS;
+    return true;
+
+  case ICmpInst::ICMP_UGT:
+    std::swap(LHS, RHS);
+  // fallthrough
+  case ICmpInst::ICMP_ULT:
+    if (LHS != Index)
+      return false;
+    UpperLimit = RHS;
+    return true;
+  }
+}
+
+/// Split a condition into something semantically equivalent to (0 <= I <
+/// Limit), both comparisons signed and Len loop invariant on L and positive.
+/// On success, return true and set Index to I and UpperLimit to Limit.  Return
+/// false on failure (we may still write to UpperLimit and Index on failure).
+/// It does not try to interpret I as a loop index.
+///
+static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE,
+                                     Value *Condition, const SCEV *&Index,
+                                     Value *&UpperLimit) {
+
+  // TODO: currently this catches some silly cases like comparing "%idx slt 1".
+  // Our transformations are still correct, but less likely to be profitable in
+  // those cases.  We have to come up with some heuristics that pick out the
+  // range checks that are more profitable to clone a loop for.  This function
+  // in general can be made more robust.
+
+  using namespace llvm::PatternMatch;
+
+  Value *A = nullptr;
+  Value *B = nullptr;
+  ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+
+  // In these early checks we assume that the matched UpperLimit is positive.
+  // We'll verify that fact later, before returning true.
+
+  if (match(Condition, m_And(m_Value(A), m_Value(B)))) {
+    Value *IndexV = nullptr;
+    Value *ExpectedUpperBoundCheck = nullptr;
+
+    if (IsLowerBoundCheck(A, IndexV))
+      ExpectedUpperBoundCheck = B;
+    else if (IsLowerBoundCheck(B, IndexV))
+      ExpectedUpperBoundCheck = A;
+    else
+      return false;
+
+    if (!IsUpperBoundCheck(ExpectedUpperBoundCheck, IndexV, UpperLimit))
+      return false;
+
+    Index = SE.getSCEV(IndexV);
+
+    if (isa<SCEVCouldNotCompute>(Index))
+      return false;
+
+  } else if (match(Condition, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
+    switch (Pred) {
+    default:
+      return false;
+
+    case ICmpInst::ICMP_SGT:
+      std::swap(A, B);
+    // fall through
+    case ICmpInst::ICMP_SLT:
+      UpperLimit = B;
+      Index = SE.getSCEV(A);
+      if (isa<SCEVCouldNotCompute>(Index) || !SE.isKnownNonNegative(Index))
+        return false;
+      break;
+
+    case ICmpInst::ICMP_UGT:
+      std::swap(A, B);
+    // fall through
+    case ICmpInst::ICMP_ULT:
+      UpperLimit = B;
+      Index = SE.getSCEV(A);
+      if (isa<SCEVCouldNotCompute>(Index))
+        return false;
+      break;
+    }
+  } else {
+    return false;
+  }
+
+  const SCEV *UpperLimitSCEV = SE.getSCEV(UpperLimit);
+  if (isa<SCEVCouldNotCompute>(UpperLimitSCEV) ||
+      !SE.isKnownNonNegative(UpperLimitSCEV))
+    return false;
+
+  if (SE.getLoopDisposition(UpperLimitSCEV, L) !=
+      ScalarEvolution::LoopInvariant) {
+    DEBUG(dbgs() << " in function: " << L->getHeader()->getParent()->getName()
+                 << " ";
+          dbgs() << " UpperLimit is not loop invariant: "
+                 << UpperLimit->getName() << "\n";);
+    return false;
+  }
+
+  return true;
+}
+
+InductiveRangeCheck *
+InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI,
+                            Loop *L, ScalarEvolution &SE) {
+
+  if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
+    return nullptr;
+
+  Value *Length = nullptr;
+  const SCEV *IndexSCEV = nullptr;
+
+  if (!SplitRangeCheckCondition(L, SE, BI->getCondition(), IndexSCEV, Length))
+    return nullptr;
+
+  assert(IndexSCEV && Length && "contract with SplitRangeCheckCondition!");
+
+  const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV);
+  bool IsAffineIndex =
+      IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
+
+  if (!IsAffineIndex)
+    return nullptr;
+
+  InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck;
+  IRC->Length = Length;
+  IRC->Offset = IndexAddRec->getStart();
+  IRC->Scale = IndexAddRec->getStepRecurrence(SE);
+  IRC->Branch = BI;
+  return IRC;
+}
+
+static Value *MaybeSimplify(Value *V) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (Value *Simplified = SimplifyInstruction(I))
+      return Simplified;
+  return V;
+}
+
+static Value *ConstructSMinOf(Value *X, Value *Y, IRBuilder<> &B) {
+  return MaybeSimplify(B.CreateSelect(B.CreateICmpSLT(X, Y), X, Y));
+}
+
+static Value *ConstructSMaxOf(Value *X, Value *Y, IRBuilder<> &B) {
+  return MaybeSimplify(B.CreateSelect(B.CreateICmpSGT(X, Y), X, Y));
+}
+
+namespace {
+
+/// This class is used to constrain loops to run within a given iteration space.
+/// The algorithm this class implements is given a Loop and a range [Begin,
+/// End).  The algorithm then tries to break out a "main loop" out of the loop
+/// it is given in a way that the "main loop" runs with the induction variable
+/// in a subset of [Begin, End).  The algorithm emits appropriate pre and post
+/// loops to run any remaining iterations.  The pre loop runs any iterations in
+/// which the induction variable is < Begin, and the post loop runs any
+/// iterations in which the induction variable is >= End.
+///
+class LoopConstrainer {
+
+  // Keeps track of the structure of a loop.  This is similar to llvm::Loop,
+  // except that it is more lightweight and can track the state of a loop
+  // through changing and potentially invalid IR.  This structure also
+  // formalizes the kinds of loops we can deal with -- ones that have a single
+  // latch that is also an exiting block *and* have a canonical induction
+  // variable.
+  struct LoopStructure {
+    const char *Tag;
+
+    BasicBlock *Header;
+    BasicBlock *Latch;
+
+    // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th
+    // successor is `LatchExit', the exit block of the loop.
+    BranchInst *LatchBr;
+    BasicBlock *LatchExit;
+    unsigned LatchBrExitIdx;
+
+    // The canonical induction variable.  It's value is `CIVStart` on the 0th
+    // itertion and `CIVNext` for all iterations after that.
+    PHINode *CIV;
+    Value *CIVStart;
+    Value *CIVNext;
+
+    LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr),
+                      LatchBr(nullptr), LatchExit(nullptr),
+                      LatchBrExitIdx(-1), CIV(nullptr),
+                      CIVStart(nullptr), CIVNext(nullptr) { }
+
+    template <typename M> LoopStructure map(M Map) const {
+      LoopStructure Result;
+      Result.Tag = Tag;
+      Result.Header = cast<BasicBlock>(Map(Header));
+      Result.Latch = cast<BasicBlock>(Map(Latch));
+      Result.LatchBr = cast<BranchInst>(Map(LatchBr));
+      Result.LatchExit = cast<BasicBlock>(Map(LatchExit));
+      Result.LatchBrExitIdx = LatchBrExitIdx;
+      Result.CIV = cast<PHINode>(Map(CIV));
+      Result.CIVNext = Map(CIVNext);
+      Result.CIVStart = Map(CIVStart);
+      return Result;
+    }
+  };
+
+  // The representation of a clone of the original loop we started out with.
+  struct ClonedLoop {
+    // The cloned blocks
+    std::vector<BasicBlock *> Blocks;
+
+    // `Map` maps values in the clonee into values in the cloned version
+    ValueToValueMapTy Map;
+
+    // An instance of `LoopStructure` for the cloned loop
+    LoopStructure Structure;
+  };
+
+  // Result of rewriting the range of a loop.  See changeIterationSpaceEnd for
+  // more details on what these fields mean.
+  struct RewrittenRangeInfo {
+    BasicBlock *PseudoExit;
+    BasicBlock *ExitSelector;
+    std::vector<PHINode *> PHIValuesAtPseudoExit;
+
+    RewrittenRangeInfo() : PseudoExit(nullptr), ExitSelector(nullptr) { }
+  };
+
+  // Calculated subranges we restrict the iteration space of the main loop to.
+  // See the implementation of `calculateSubRanges' for more details on how
+  // these fields are computed.  `ExitPreLoopAt' is `None' if we don't need a
+  // pre loop.  `ExitMainLoopAt' is `None' if we don't need a post loop.
+  struct SubRanges {
+    Optional<Value *> ExitPreLoopAt;
+    Optional<Value *> ExitMainLoopAt;
+  };
+
+  // A utility function that does a `replaceUsesOfWith' on the incoming block
+  // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's
+  // incoming block list with `ReplaceBy'.
+  static void replacePHIBlock(PHINode *PN, BasicBlock *Block,
+                              BasicBlock *ReplaceBy);
+
+  // Try to "parse" `OriginalLoop' and populate the various out parameters.
+  // Returns true on success, false on failure.
+  //
+  bool recognizeLoop(LoopStructure &LoopStructureOut,
+                     const SCEV *&LatchCountOut, BasicBlock *&PreHeaderOut,
+                     const char *&FailureReasonOut) const;
+
+  // Compute a safe set of limits for the main loop to run in -- effectively the
+  // intersection of `Range' and the iteration space of the original loop.
+  // Return the header count (1 + the latch taken count) in `HeaderCount'.
+  //
+  SubRanges calculateSubRanges(Value *&HeaderCount) const;
+
+  // Clone `OriginalLoop' and return the result in CLResult.  The IR after
+  // running `cloneLoop' is well formed except for the PHI nodes in CLResult --
+  // the PHI nodes say that there is an incoming edge from `OriginalPreheader`
+  // but there is no such edge.
+  //
+  void cloneLoop(ClonedLoop &CLResult, const char *Tag) const;
+
+  // Rewrite the iteration space of the loop denoted by (LS, Preheader). The
+  // iteration space of the rewritten loop ends at ExitLoopAt.  The start of the
+  // iteration space is not changed.  `ExitLoopAt' is assumed to be slt
+  // `OriginalHeaderCount'.
+  //
+  // If there are iterations left to execute, control is made to jump to
+  // `ContinuationBlock', otherwise they take the normal loop exit.  The
+  // returned `RewrittenRangeInfo' object is populated as follows:
+  //
+  //  .PseudoExit is a basic block that unconditionally branches to
+  //      `ContinuationBlock'.
+  //
+  //  .ExitSelector is a basic block that decides, on exit from the loop,
+  //      whether to branch to the "true" exit or to `PseudoExit'.
+  //
+  //  .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value
+  //      for each PHINode in the loop header on taking the pseudo exit.
+  //
+  // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate
+  // preheader because it is made to branch to the loop header only
+  // conditionally.
+  //
+  RewrittenRangeInfo
+  changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader,
+                          Value *ExitLoopAt,
+                          BasicBlock *ContinuationBlock) const;
+
+  // The loop denoted by `LS' has `OldPreheader' as its preheader.  This
+  // function creates a new preheader for `LS' and returns it.
+  //
+  BasicBlock *createPreheader(const LoopConstrainer::LoopStructure &LS,
+                              BasicBlock *OldPreheader, const char *Tag) const;
+
+  // `ContinuationBlockAndPreheader' was the continuation block for some call to
+  // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'.
+  // This function rewrites the PHI nodes in `LS.Header' to start with the
+  // correct value.
+  void rewriteIncomingValuesForPHIs(
+      LoopConstrainer::LoopStructure &LS,
+      BasicBlock *ContinuationBlockAndPreheader,
+      const LoopConstrainer::RewrittenRangeInfo &RRI) const;
+
+  // Even though we do not preserve any passes at this time, we at least need to
+  // keep the parent loop structure consistent.  The `LPPassManager' seems to
+  // verify this after running a loop pass.  This function adds the list of
+  // blocks denoted by the iterator range [BlocksBegin, BlocksEnd) to this loops
+  // parent loop if required.
+  template<typename IteratorTy>
+  void addToParentLoopIfNeeded(IteratorTy BlocksBegin, IteratorTy BlocksEnd);
+
+  // Some global state.
+  Function &F;
+  LLVMContext &Ctx;
+  ScalarEvolution &SE;
+
+  // Information about the original loop we started out with.
+  Loop &OriginalLoop;
+  LoopInfo &OriginalLoopInfo;
+  const SCEV *LatchTakenCount;
+  BasicBlock *OriginalPreheader;
+  Value *OriginalHeaderCount;
+
+  // The preheader of the main loop.  This may or may not be different from
+  // `OriginalPreheader'.
+  BasicBlock *MainLoopPreheader;
+
+  // The range we need to run the main loop in.
+  InductiveRangeCheck::Range Range;
+
+  // The structure of the main loop (see comment at the beginning of this class
+  // for a definition)
+  LoopStructure MainLoopStructure;
+
+public:
+  LoopConstrainer(Loop &L, LoopInfo &LI, ScalarEvolution &SE,
+                  InductiveRangeCheck::Range R)
+    : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), SE(SE),
+      OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
+      OriginalPreheader(nullptr), OriginalHeaderCount(nullptr),
+      MainLoopPreheader(nullptr), Range(R) { }
+
+  // Entry point for the algorithm.  Returns true on success.
+  bool run();
+};
+
+}
+
+void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block,
+                                      BasicBlock *ReplaceBy) {
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingBlock(i) == Block)
+      PN->setIncomingBlock(i, ReplaceBy);
+}
+
+bool LoopConstrainer::recognizeLoop(LoopStructure &LoopStructureOut,
+                                    const SCEV *&LatchCountOut,
+                                    BasicBlock *&PreheaderOut,
+                                    const char *&FailureReason) const {
+  using namespace llvm::PatternMatch;
+
+  assert(OriginalLoop.isLoopSimplifyForm() &&
+         "should follow from addRequired<>");
+
+  BasicBlock *Latch = OriginalLoop.getLoopLatch();
+  if (!OriginalLoop.isLoopExiting(Latch)) {
+    FailureReason = "no loop latch";
+    return false;
+  }
+
+  PHINode *CIV = OriginalLoop.getCanonicalInductionVariable();
+  if (!CIV) {
+    FailureReason = "no CIV";
+    return false;
+  }
+
+  BasicBlock *Header = OriginalLoop.getHeader();
+  BasicBlock *Preheader = OriginalLoop.getLoopPreheader();
+  if (!Preheader) {
+    FailureReason = "no preheader";
+    return false;
+  }
+
+  Value *CIVNext = CIV->getIncomingValueForBlock(Latch);
+  Value *CIVStart = CIV->getIncomingValueForBlock(Preheader);
+
+  const SCEV *LatchCount = SE.getExitCount(&OriginalLoop, Latch);
+  if (isa<SCEVCouldNotCompute>(LatchCount)) {
+    FailureReason = "could not compute latch count";
+    return false;
+  }
+
+  // While SCEV does most of the analysis for us, we still have to
+  // modify the latch; and currently we can only deal with certain
+  // kinds of latches.  This can be made more sophisticated as needed.
+
+  BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin());
+
+  if (!LatchBr || LatchBr->isUnconditional()) {
+    FailureReason = "latch terminator not conditional branch";
+    return false;
+  }
+
+  // Currently we only support a latch condition of the form:
+  //
+  //  %condition = icmp slt %civNext, %limit
+  //  br i1 %condition, label %header, label %exit
+
+  if (LatchBr->getSuccessor(0) != Header) {
+    FailureReason = "unknown latch form (header not first successor)";
+    return false;
+  }
+
+  Value *CIVComparedTo = nullptr;
+  ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+  if (!(match(LatchBr->getCondition(),
+              m_ICmp(Pred, m_Specific(CIVNext), m_Value(CIVComparedTo))) &&
+        Pred == ICmpInst::ICMP_SLT)) {
+    FailureReason = "unknown latch form (not slt)";
+    return false;
+  }
+
+  const SCEV *CIVComparedToSCEV = SE.getSCEV(CIVComparedTo);
+  if (isa<SCEVCouldNotCompute>(CIVComparedToSCEV)) {
+    FailureReason = "could not relate CIV to latch expression";
+    return false;
+  }
+
+  const SCEV *ShouldBeOne = SE.getMinusSCEV(CIVComparedToSCEV, LatchCount);
+  const SCEVConstant *SCEVOne = dyn_cast<SCEVConstant>(ShouldBeOne);
+  if (!SCEVOne || SCEVOne->getValue()->getValue() != 1) {
+    FailureReason = "unexpected header count in latch";
+    return false;
+  }
+
+  unsigned LatchBrExitIdx = 1;
+  BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
+
+  assert(SE.getLoopDisposition(LatchCount, &OriginalLoop) ==
+             ScalarEvolution::LoopInvariant &&
+         "loop variant exit count doesn't make sense!");
+
+  assert(!OriginalLoop.contains(LatchExit) && "expected an exit block!");
+
+  LoopStructureOut.Tag = "main";
+  LoopStructureOut.Header = Header;
+  LoopStructureOut.Latch = Latch;
+  LoopStructureOut.LatchBr = LatchBr;
+  LoopStructureOut.LatchExit = LatchExit;
+  LoopStructureOut.LatchBrExitIdx = LatchBrExitIdx;
+  LoopStructureOut.CIV = CIV;
+  LoopStructureOut.CIVNext = CIVNext;
+  LoopStructureOut.CIVStart = CIVStart;
+
+  LatchCountOut = LatchCount;
+  PreheaderOut = Preheader;
+  FailureReason = nullptr;
+
+  return true;
+}
+
+LoopConstrainer::SubRanges
+LoopConstrainer::calculateSubRanges(Value *&HeaderCountOut) const {
+  IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
+
+  SCEVExpander Expander(SE, "irce");
+  Instruction *InsertPt = OriginalPreheader->getTerminator();
+
+  Value *LatchCountV =
+      MaybeSimplify(Expander.expandCodeFor(LatchTakenCount, Ty, InsertPt));
+
+  IRBuilder<> B(InsertPt);
+
+  LoopConstrainer::SubRanges Result;
+
+  // I think we can be more aggressive here and make this nuw / nsw if the
+  // addition that feeds into the icmp for the latch's terminating branch is nuw
+  // / nsw.  In any case, a wrapping 2's complement addition is safe.
+  ConstantInt *One = ConstantInt::get(Ty, 1);
+  HeaderCountOut = MaybeSimplify(B.CreateAdd(LatchCountV, One, "header.count"));
+
+  const SCEV *RangeBegin = SE.getSCEV(Range.first);
+  const SCEV *RangeEnd = SE.getSCEV(Range.second);
+  const SCEV *HeaderCountSCEV = SE.getSCEV(HeaderCountOut);
+  const SCEV *Zero = SE.getConstant(Ty, 0);
+
+  // In some cases we can prove that we don't need a pre or post loop
+
+  bool ProvablyNoPreloop =
+      SE.isKnownPredicate(ICmpInst::ICMP_SLE, RangeBegin, Zero);
+  if (!ProvablyNoPreloop)
+    Result.ExitPreLoopAt = ConstructSMinOf(HeaderCountOut, Range.first, B);
+
+  bool ProvablyNoPostLoop =
+      SE.isKnownPredicate(ICmpInst::ICMP_SLE, HeaderCountSCEV, RangeEnd);
+  if (!ProvablyNoPostLoop)
+    Result.ExitMainLoopAt = ConstructSMinOf(HeaderCountOut, Range.second, B);
+
+  return Result;
+}
+
+void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
+                                const char *Tag) const {
+  for (BasicBlock *BB : OriginalLoop.getBlocks()) {
+    BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F);
+    Result.Blocks.push_back(Clone);
+    Result.Map[BB] = Clone;
+  }
+
+  auto GetClonedValue = [&Result](Value *V) {
+    assert(V && "null values not in domain!");
+    auto It = Result.Map.find(V);
+    if (It == Result.Map.end())
+      return V;
+    return static_cast<Value *>(It->second);
+  };
+
+  Result.Structure = MainLoopStructure.map(GetClonedValue);
+  Result.Structure.Tag = Tag;
+
+  for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) {
+    BasicBlock *ClonedBB = Result.Blocks[i];
+    BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i];
+
+    assert(Result.Map[OriginalBB] == ClonedBB && "invariant!");
+
+    for (Instruction &I : *ClonedBB)
+      RemapInstruction(&I, Result.Map,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+
+    // Exit blocks will now have one more predecessor and their PHI nodes need
+    // to be edited to reflect that.  No phi nodes need to be introduced because
+    // the loop is in LCSSA.
+
+    for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB);
+         SBBI != SBBE; ++SBBI) {
+
+      if (OriginalLoop.contains(*SBBI))
+        continue; // not an exit block
+
+      for (Instruction &I : **SBBI) {
+        if (!isa<PHINode>(&I))
+          break;
+
+        PHINode *PN = cast<PHINode>(&I);
+        Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB);
+        PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB);
+      }
+    }
+  }
+}
+
+LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
+    const LoopStructure &LS, BasicBlock *Preheader, Value *ExitLoopAt,
+    BasicBlock *ContinuationBlock) const {
+
+  // We start with a loop with a single latch:
+  //
+  //    +--------------------+
+  //    |                    |
+  //    |     preheader      |
+  //    |                    |
+  //    +--------+-----------+
+  //             |      ----------------\
+  //             |     /                |
+  //    +--------v----v------+          |
+  //    |                    |          |
+  //    |      header        |          |
+  //    |                    |          |
+  //    +--------------------+          |
+  //                                    |
+  //            .....                   |
+  //                                    |
+  //    +--------------------+          |
+  //    |                    |          |
+  //    |       latch        >----------/
+  //    |                    |
+  //    +-------v------------+
+  //            |
+  //            |
+  //            |   +--------------------+
+  //            |   |                    |
+  //            +--->   original exit    |
+  //                |                    |
+  //                +--------------------+
+  //
+  // We change the control flow to look like
+  //
+  //
+  //    +--------------------+
+  //    |                    |
+  //    |     preheader      >-------------------------+
+  //    |                    |                         |
+  //    +--------v-----------+                         |
+  //             |    /-------------+                  |
+  //             |   /              |                  |
+  //    +--------v--v--------+      |                  |
+  //    |                    |      |                  |
+  //    |      header        |      |   +--------+     |
+  //    |                    |      |   |        |     |
+  //    +--------------------+      |   |  +-----v-----v-----------+
+  //                                |   |  |                       |
+  //                                |   |  |     .pseudo.exit      |
+  //                                |   |  |                       |
+  //                                |   |  +-----------v-----------+
+  //                                |   |              |
+  //            .....               |   |              |
+  //                                |   |     +--------v-------------+
+  //    +--------------------+      |   |     |                      |
+  //    |                    |      |   |     |   ContinuationBlock  |
+  //    |       latch        >------+   |     |                      |
+  //    |                    |          |     +----------------------+
+  //    +---------v----------+          |
+  //              |                     |
+  //              |                     |
+  //              |     +---------------^-----+
+  //              |     |                     |
+  //              +----->    .exit.selector   |
+  //                    |                     |
+  //                    +----------v----------+
+  //                               |
+  //     +--------------------+    |
+  //     |                    |    |
+  //     |   original exit    <----+
+  //     |                    |
+  //     +--------------------+
+  //
+
+  RewrittenRangeInfo RRI;
+
+  auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
+  RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
+                                        &F, BBInsertLocation);
+  RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
+                                      BBInsertLocation);
+
+  BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
+
+  IRBuilder<> B(PreheaderJump);
+
+  // EnterLoopCond - is it okay to start executing this `LS'?
+  Value *EnterLoopCond = B.CreateICmpSLT(LS.CIVStart, ExitLoopAt);
+  B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit);
+  PreheaderJump->eraseFromParent();
+
+  assert(LS.LatchBrExitIdx == 1 && "generalize this as needed!");
+
+  B.SetInsertPoint(LS.LatchBr);
+
+  // ContinueCond - is it okay to execute the next iteration in `LS'?
+  Value *ContinueCond = B.CreateICmpSLT(LS.CIVNext, ExitLoopAt);
+
+  LS.LatchBr->setCondition(ContinueCond);
+  assert(LS.LatchBr->getSuccessor(LS.LatchBrExitIdx) == LS.LatchExit &&
+         "invariant!");
+  LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector);
+
+  B.SetInsertPoint(RRI.ExitSelector);
+
+  // IterationsLeft - are there any more iterations left, given the original
+  // upper bound on the induction variable?  If not, we branch to the "real"
+  // exit.
+  Value *IterationsLeft = B.CreateICmpSLT(LS.CIVNext, OriginalHeaderCount);
+  B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit);
+
+  BranchInst *BranchToContinuation =
+      BranchInst::Create(ContinuationBlock, RRI.PseudoExit);
+
+  // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
+  // each of the PHI nodes in the loop header.  This feeds into the initial
+  // value of the same PHI nodes if/when we continue execution.
+  for (Instruction &I : *LS.Header) {
+    if (!isa<PHINode>(&I))
+      break;
+
+    PHINode *PN = cast<PHINode>(&I);
+
+    PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy",
+                                      BranchToContinuation);
+
+    NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader);
+    NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch),
+                        RRI.ExitSelector);
+    RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
+  }
+
+  // The latch exit now has a branch from `RRI.ExitSelector' instead of
+  // `LS.Latch'.  The PHI nodes need to be updated to reflect that.
+  for (Instruction &I : *LS.LatchExit) {
+    if (PHINode *PN = dyn_cast<PHINode>(&I))
+      replacePHIBlock(PN, LS.Latch, RRI.ExitSelector);
+    else
+      break;
+  }
+
+  return RRI;
+}
+
+void LoopConstrainer::rewriteIncomingValuesForPHIs(
+    LoopConstrainer::LoopStructure &LS, BasicBlock *ContinuationBlock,
+    const LoopConstrainer::RewrittenRangeInfo &RRI) const {
+
+  unsigned PHIIndex = 0;
+  for (Instruction &I : *LS.Header) {
+    if (!isa<PHINode>(&I))
+      break;
+
+    PHINode *PN = cast<PHINode>(&I);
+
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
+      if (PN->getIncomingBlock(i) == ContinuationBlock)
+        PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
+  }
+
+  LS.CIVStart = LS.CIV->getIncomingValueForBlock(ContinuationBlock);
+}
+
+BasicBlock *
+LoopConstrainer::createPreheader(const LoopConstrainer::LoopStructure &LS,
+                                 BasicBlock *OldPreheader,
+                                 const char *Tag) const {
+
+  BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
+  BranchInst::Create(LS.Header, Preheader);
+
+  for (Instruction &I : *LS.Header) {
+    if (!isa<PHINode>(&I))
+      break;
+
+    PHINode *PN = cast<PHINode>(&I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
+      replacePHIBlock(PN, OldPreheader, Preheader);
+  }
+
+  return Preheader;
+}
+
+template<typename IteratorTy>
+void LoopConstrainer::addToParentLoopIfNeeded(IteratorTy Begin,
+                                              IteratorTy End) {
+  Loop *ParentLoop = OriginalLoop.getParentLoop();
+  if (!ParentLoop)
+    return;
+
+  auto &LoopInfoBase = OriginalLoopInfo.getBase();
+  for (; Begin != End; Begin++)
+    ParentLoop->addBasicBlockToLoop(*Begin, LoopInfoBase);
+}
+
+bool LoopConstrainer::run() {
+  BasicBlock *Preheader = nullptr;
+  const char *CouldNotProceedBecause = nullptr;
+  if (!recognizeLoop(MainLoopStructure, LatchTakenCount, Preheader,
+                     CouldNotProceedBecause)) {
+    DEBUG(dbgs() << "irce: could not recognize loop, " << CouldNotProceedBecause
+                 << "\n";);
+    return false;
+  }
+
+  OriginalPreheader = Preheader;
+  MainLoopPreheader = Preheader;
+
+  SubRanges SR = calculateSubRanges(OriginalHeaderCount);
+
+  // It would have been better to make `PreLoop' and `PostLoop'
+  // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy
+  // constructor.
+  ClonedLoop PreLoop, PostLoop;
+  bool NeedsPreLoop = SR.ExitPreLoopAt.hasValue();
+  bool NeedsPostLoop = SR.ExitMainLoopAt.hasValue();
+
+  // We clone these ahead of time so that we don't have to deal with changing
+  // and temporarily invalid IR as we transform the loops.
+  if (NeedsPreLoop)
+    cloneLoop(PreLoop, "preloop");
+  if (NeedsPostLoop)
+    cloneLoop(PostLoop, "postloop");
+
+  RewrittenRangeInfo PreLoopRRI;
+
+  if (NeedsPreLoop) {
+    Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header,
+                                                  PreLoop.Structure.Header);
+
+    MainLoopPreheader =
+        createPreheader(MainLoopStructure, Preheader, "mainloop");
+    PreLoopRRI =
+        changeIterationSpaceEnd(PreLoop.Structure, Preheader,
+                                SR.ExitPreLoopAt.getValue(), MainLoopPreheader);
+    rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader,
+                                 PreLoopRRI);
+  }
+
+  BasicBlock *PostLoopPreheader = nullptr;
+  RewrittenRangeInfo PostLoopRRI;
+
+  if (NeedsPostLoop) {
+    PostLoopPreheader =
+        createPreheader(PostLoop.Structure, Preheader, "postloop");
+    PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader,
+                                          SR.ExitMainLoopAt.getValue(),
+                                          PostLoopPreheader);
+    rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader,
+                                 PostLoopRRI);
+  }
+
+  SmallVector<BasicBlock *, 6> NewBlocks;
+  NewBlocks.push_back(PostLoopPreheader);
+  NewBlocks.push_back(PreLoopRRI.PseudoExit);
+  NewBlocks.push_back(PreLoopRRI.ExitSelector);
+  NewBlocks.push_back(PostLoopRRI.PseudoExit);
+  NewBlocks.push_back(PostLoopRRI.ExitSelector);
+  if (MainLoopPreheader != Preheader)
+    NewBlocks.push_back(MainLoopPreheader);
+
+  // Some of the above may be nullptr, filter them out before passing to
+  // addToParentLoopIfNeeded.
+  auto NewBlocksEnd = std::remove(NewBlocks.begin(), NewBlocks.end(), nullptr);
+
+  typedef SmallVector<BasicBlock *, 6>::iterator SmallVectItTy;
+  typedef std::vector<BasicBlock *>::iterator StdVectItTy;
+
+  addToParentLoopIfNeeded<SmallVectItTy>(NewBlocks.begin(), NewBlocksEnd);
+  addToParentLoopIfNeeded<StdVectItTy>(PreLoop.Blocks.begin(),
+                                       PreLoop.Blocks.end());
+  addToParentLoopIfNeeded<StdVectItTy>(PostLoop.Blocks.begin(),
+                                       PostLoop.Blocks.end());
+
+  return true;
+}
+
+/// Computes and returns a range of values for the induction variable in which
+/// the range check can be safely elided.  If it cannot compute such a range,
+/// returns None.
+Optional<InductiveRangeCheck::Range>
+InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
+                                               IRBuilder<> &B) const {
+
+  // Currently we support inequalities of the form:
+  //
+  //   0 <= Offset + 1 * CIV < L given L >= 0
+  //
+  // The inequality is satisfied by -Offset <= CIV < (L - Offset) [^1].  All
+  // additions and subtractions are twos-complement wrapping and comparisons are
+  // signed.
+  //
+  // Proof:
+  //
+  //   If there exists CIV such that -Offset <= CIV < (L - Offset) then it
+  //   follows that -Offset <= (-Offset + L) [== Eq. 1].  Since L >= 0, if
+  //   (-Offset + L) sign-overflows then (-Offset + L) < (-Offset).  Hence by
+  //   [Eq. 1], (-Offset + L) could not have overflown.
+  //
+  //   This means CIV = t + (-Offset) for t in [0, L).  Hence (CIV + Offset) =
+  //   t.  Hence 0 <= (CIV + Offset) < L
+
+  // [^1]: Note that the solution does _not_ apply if L < 0; consider values
+  // Offset = 127, CIV = 126 and L = -2 in an i8 world.
+
+  const SCEVConstant *ScaleC = dyn_cast<SCEVConstant>(getScale());
+  if (!(ScaleC && ScaleC->getValue()->getValue() == 1)) {
+    DEBUG(dbgs() << "irce: could not compute safe iteration space for:\n";
+          print(dbgs()));
+    return None;
+  }
+
+  Value *OffsetV = SCEVExpander(SE, "safe.itr.space").expandCodeFor(
+      getOffset(), getOffset()->getType(), B.GetInsertPoint());
+  OffsetV = MaybeSimplify(OffsetV);
+
+  Value *Begin = MaybeSimplify(B.CreateNeg(OffsetV));
+  Value *End = MaybeSimplify(B.CreateSub(getLength(), OffsetV));
+
+  return std::make_pair(Begin, End);
+}
+
+static InductiveRangeCheck::Range
+IntersectRange(const Optional<InductiveRangeCheck::Range> &R1,
+               const InductiveRangeCheck::Range &R2, IRBuilder<> &B) {
+  if (!R1.hasValue())
+    return R2;
+  auto &R1Value = R1.getValue();
+
+  Value *NewMin = ConstructSMaxOf(R1Value.first, R2.first, B);
+  Value *NewMax = ConstructSMinOf(R1Value.second, R2.second, B);
+  return std::make_pair(NewMin, NewMax);
+}
+
+bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
+  if (L->getBlocks().size() >= LoopSizeCutoff) {
+    DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";);
+    return false;
+  }
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    DEBUG(dbgs() << "irce: loop has no preheader, leaving\n");
+    return false;
+  }
+
+  LLVMContext &Context = Preheader->getContext();
+  InductiveRangeCheck::AllocatorTy IRCAlloc;
+  SmallVector<InductiveRangeCheck *, 16> RangeChecks;
+  ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
+
+  for (auto BBI : L->getBlocks())
+    if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
+      if (InductiveRangeCheck *IRC =
+              InductiveRangeCheck::create(IRCAlloc, TBI, L, SE))
+        RangeChecks.push_back(IRC);
+
+  if (RangeChecks.empty())
+    return false;
+
+  DEBUG(dbgs() << "irce: looking at loop "; L->print(dbgs());
+        dbgs() << "irce: loop has " << RangeChecks.size()
+               << " inductive range checks: \n";
+        for (InductiveRangeCheck *IRC : RangeChecks)
+          IRC->print(dbgs());
+    );
+
+  Optional<InductiveRangeCheck::Range> SafeIterRange;
+  Instruction *ExprInsertPt = Preheader->getTerminator();
+
+  SmallVector<InductiveRangeCheck *, 4> RangeChecksToEliminate;
+
+  IRBuilder<> B(ExprInsertPt);
+  for (InductiveRangeCheck *IRC : RangeChecks) {
+    auto Result = IRC->computeSafeIterationSpace(SE, B);
+    if (Result.hasValue()) {
+      SafeIterRange = IntersectRange(SafeIterRange, Result.getValue(), B);
+      RangeChecksToEliminate.push_back(IRC);
+    }
+  }
+
+  if (!SafeIterRange.hasValue())
+    return false;
+
+  LoopConstrainer LC(*L, getAnalysis<LoopInfo>(), SE, SafeIterRange.getValue());
+  bool Changed = LC.run();
+
+  if (Changed) {
+    auto PrintConstrainedLoopInfo = [L]() {
+      dbgs() << "irce: in function ";
+      dbgs() << L->getHeader()->getParent()->getName() << ": ";
+      dbgs() << "constrained ";
+      L->print(dbgs());
+    };
+
+    DEBUG(PrintConstrainedLoopInfo());
+
+    if (PrintChangedLoops)
+      PrintConstrainedLoopInfo();
+
+    // Optimize away the now-redundant range checks.
+
+    for (InductiveRangeCheck *IRC : RangeChecksToEliminate) {
+      ConstantInt *FoldedRangeCheck = IRC->getPassingDirection()
+                                          ? ConstantInt::getTrue(Context)
+                                          : ConstantInt::getFalse(Context);
+      IRC->getBranch()->setCondition(FoldedRangeCheck);
+    }
+  }
+
+  return Changed;
+}
+
+Pass *llvm::createInductiveRangeCheckEliminationPass() {
+  return new InductiveRangeCheckElimination;
+}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp

index a16e9e29a1f128bae40e3bbdc97c8345667a0f8b..e8e162db931205ef1f4266d6283f8586c12a9ab5 100644 (file)
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -40,6 +40,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
    initializeGVNPass(Registry);
    initializeEarlyCSEPass(Registry);
    initializeFlattenCFGPassPass(Registry);
+  initializeInductiveRangeCheckEliminationPass(Registry);
    initializeIndVarSimplifyPass(Registry);
    initializeJumpThreadingPass(Registry);
    initializeLICMPass(Registry);
diff --git a/test/Transforms/IRCE/multiple-access-no-preloop.ll b/test/Transforms/IRCE/multiple-access-no-preloop.ll

new file mode 100644 (file)

index 0000000..56b7b7b
--- /dev/null
+++ b/test/Transforms/IRCE/multiple-access-no-preloop.ll
@@ -0,0 +1,59 @@
+; RUN: opt -irce -S < %s | FileCheck %s
+
+define void @multiple_access_no_preloop(
+    i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) {
+
+ entry:
+  %len.a = load i32* %a_len_ptr, !range !0
+  %len.b = load i32* %b_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds.b ]
+  %idx.next = add i32 %idx, 1
+  %abc.a = icmp slt i32 %idx, %len.a
+  br i1 %abc.a, label %in.bounds.a, label %out.of.bounds
+
+ in.bounds.a:
+  %addr.a = getelementptr i32* %arr_a, i32 %idx
+  store i32 0, i32* %addr.a
+  %abc.b = icmp slt i32 %idx, %len.b
+  br i1 %abc.b, label %in.bounds.b, label %out.of.bounds
+
+ in.bounds.b:
+  %addr.b = getelementptr i32* %arr_b, i32 %idx
+  store i32 -1, i32* %addr.b
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+ out.of.bounds:
+  ret void
+
+ exit:
+  ret void
+}
+
+; CHECK-LABEL: multiple_access_no_preloop
+
+; CHECK-LABEL: loop.preheader:
+; CHECK: [[smaller_len_cmp:[^ ]+]] = icmp slt i32 %len.a, %len.b
+; CHECK: [[smaller_len:[^ ]+]] = select i1 [[smaller_len_cmp]], i32 %len.a, i32 %len.b
+; CHECK: [[upper_bound_cmp:[^ ]+]] = icmp slt i32 %n, %3
+; CHECK: [[upper_bound:[^ ]+]] = select i1 %5, i32 %n, i32 %3
+
+; CHECK-LABEL: loop:
+; CHECK: br i1 true, label %in.bounds.a, label %out.of.bounds
+
+; CHECK-LABEL: in.bounds.a:
+; CHECK: br i1 true, label %in.bounds.b, label %out.of.bounds
+
+; CHECK-LABEL: in.bounds.b:
+; CHECK: [[main_loop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[upper_bound]]
+; CHECK: br i1 [[main_loop_cond]], label %loop, label %main.exit.selector
+
+; CHECK-LABEL: in.bounds.b.postloop:
+; CHECK: %next.postloop = icmp slt i32 %idx.next.postloop, %n
+; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/test/Transforms/IRCE/single-access-no-preloop.ll b/test/Transforms/IRCE/single-access-no-preloop.ll

new file mode 100644 (file)

index 0000000..cf073b3
--- /dev/null
+++ b/test/Transforms/IRCE/single-access-no-preloop.ll
@@ -0,0 +1,110 @@
+; RUN: opt -irce -S < %s | FileCheck %s
+
+define void @single_access_no_preloop_no_offset(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
+ entry:
+  %len = load i32* %a_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp slt i32 %idx, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+ in.bounds:
+  %addr = getelementptr i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+ out.of.bounds:
+  ret void
+
+ exit:
+  ret void
+}
+
+; CHECK-LABEL: single_access_no_preloop
+
+; CHECK-LABEL: loop:
+; CHECK: br i1 true, label %in.bounds, label %out.of.bounds
+
+; CHECK-LABEL: main.exit.selector:
+; CHECK-NEXT: [[continue:%[^ ]+]] = icmp slt i32 %idx.next, %n
+; CHECK-NEXT: br i1 [[continue]], label %main.pseudo.exit, label %exit.loopexit
+
+; CHECK-LABEL: main.pseudo.exit:
+; CHECK-NEXT: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ]
+; CHECK-NEXT: br label %postloop
+
+; CHECK-LABEL: postloop:
+; CHECK-NEXT: br label %loop.postloop
+
+; CHECK-LABEL: loop.postloop:
+; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.next.postloop, %in.bounds.postloop ], [ %idx.copy, %postloop ]
+; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 1
+; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %len
+; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds
+
+; CHECK-LABEL: in.bounds.postloop:
+; CHECK-NEXT: %addr.postloop = getelementptr i32* %arr, i32 %idx.postloop
+; CHECK-NEXT: store i32 0, i32* %addr.postloop
+; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, %n
+; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
+
+
+define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
+ entry:
+  %len = load i32* %a_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.for.abc = add i32 %idx, 4
+  %abc = icmp slt i32 %idx.for.abc, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+ in.bounds:
+  %addr = getelementptr i32* %arr, i32 %idx.for.abc
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+ out.of.bounds:
+  ret void
+
+ exit:
+  ret void
+}
+
+; CHECK-LABEL: single_access_no_preloop_with_offset
+
+; CHECK-LABEL: loop.preheader:
+; CHECK: [[safe_range_end:[^ ]+]] = sub i32 %len, 4
+; CHECK: [[exit_main_loop_at_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]]
+; CHECK: [[exit_main_loop_at:[^ ]+]] = select i1 [[exit_main_loop_at_cmp]], i32 %n, i32 [[safe_range_end]]
+; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at]]
+; CHECK: br i1 [[enter_main_loop]], label %loop, label %main.pseudo.exit
+
+; CHECK-LABEL: loop:
+; CHECK: br i1 true, label %in.bounds, label %out.of.bounds
+
+; CHECK-LABEL: in.bounds:
+; CHECK: [[continue_main_loop:[^ ]+]] = icmp slt i32 %idx.next, [[exit_main_loop_at]]
+; CHECK: br i1 [[continue_main_loop]], label %loop, label %main.exit.selector
+
+; CHECK-LABEL: main.pseudo.exit:
+; CHECK:  %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next, %main.exit.selector ]
+; CHECK:  br label %postloop
+
+; CHECK-LABEL: loop.postloop:
+; CHECK: %idx.postloop = phi i32 [ %idx.next.postloop, %in.bounds.postloop ], [ %idx.copy, %postloop ]
+
+; CHECK-LABEL: in.bounds.postloop:
+; CHECK: %next.postloop = icmp slt i32 %idx.next.postloop, %n
+; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/test/Transforms/IRCE/single-access-with-preloop.ll b/test/Transforms/IRCE/single-access-with-preloop.ll

new file mode 100644 (file)

index 0000000..6775d33
--- /dev/null
+++ b/test/Transforms/IRCE/single-access-with-preloop.ll
@@ -0,0 +1,59 @@
+; RUN: opt -irce -S < %s | FileCheck %s
+
+define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 %offset) {
+ entry:
+  %len = load i32* %a_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %array.idx = add i32 %idx, %offset
+  %abc.high = icmp slt i32 %array.idx, %len
+  %abc.low = icmp sge i32 %array.idx, 0
+  %abc = and i1 %abc.low, %abc.high
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+ in.bounds:
+  %addr = getelementptr i32* %arr, i32 %array.idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+ out.of.bounds:
+  ret void
+
+ exit:
+  ret void
+}
+
+; CHECK-LABEL: loop.preheader:
+; CHECK: [[safe_start:[^ ]+]] = sub i32 0, %offset
+; CHECK: [[safe_end:[^ ]+]] = sub i32 %len, %offset
+; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp slt i32 %n, [[safe_start]]
+; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 %n, i32 [[safe_start]]
+; CHECK: [[exit_mainloop_at_cond:[^ ]+]] = icmp slt i32 %n, [[safe_end]]
+; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cond]], i32 %n, i32 [[safe_end]]
+
+; CHECK-LABEL: in.bounds:
+; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]
+; CHECK: br i1 [[continue_mainloop_cond]], label %loop, label %main.exit.selector
+
+; CHECK-LABEL: main.exit.selector:
+; CHECK: [[mainloop_its_left:[^ ]+]] = icmp slt i32 %idx.next, %n
+; CHECK: br i1 [[mainloop_its_left]], label %main.pseudo.exit, label %exit.loopexit
+
+; CHECK-LABEL: in.bounds.preloop:
+; CHECK: [[continue_preloop_cond:[^ ]+]] = icmp slt i32 %idx.next.preloop, [[exit_preloop_at]]
+; CHECK: br i1 [[continue_preloop_cond]], label %loop.preloop, label %preloop.exit.selector
+
+; CHECK-LABEL: preloop.exit.selector:
+; CHECK: [[preloop_its_left:[^ ]+]] = icmp slt i32 %idx.next.preloop, %n
+; CHECK: br i1 [[preloop_its_left]], label %preloop.pseudo.exit, label %exit.loopexit
+
+; CHECK-LABEL: in.bounds.postloop:
+; CHECK: %next.postloop = icmp slt i32 %idx.next.postloop, %n
+; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/test/Transforms/IRCE/unhandled.ll b/test/Transforms/IRCE/unhandled.ll

new file mode 100644 (file)

index 0000000..3531c48
--- /dev/null
+++ b/test/Transforms/IRCE/unhandled.ll
@@ -0,0 +1,37 @@
+; RUN: opt -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s
+
+; Demonstrates that we don't currently handle the general expression
+; `A * I + B'.
+
+define void @general_affine_expressions(i32 *%arr, i32 *%a_len_ptr, i32 %n,
+                                        i32 %scale, i32 %offset) {
+; CHECK-NOT: constrained Loop at depth
+ entry:
+  %len = load i32* %a_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %idx.mul = mul i32 %idx, %scale
+  %array.idx = add i32 %idx.mul, %offset
+  %abc.high = icmp slt i32 %array.idx, %len
+  %abc.low = icmp sge i32 %array.idx, 0
+  %abc = and i1 %abc.low, %abc.high
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+ in.bounds:
+  %addr = getelementptr i32* %arr, i32 %array.idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+ out.of.bounds:
+  ret void
+
+ exit:
+  ret void
+}
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/test/Transforms/IRCE/with-parent-loops.ll b/test/Transforms/IRCE/with-parent-loops.ll

new file mode 100644 (file)

index 0000000..25dfb13
--- /dev/null
+++ b/test/Transforms/IRCE/with-parent-loops.ll
@@ -0,0 +1,344 @@
+; RUN: opt -verify-loop-info -irce-print-changed-loops -irce < %s 2>&1 | FileCheck %s
+
+; This test checks if we update the LoopInfo correctly in the presence
+; of parents, uncles and cousins.
+
+; Function Attrs: alwaysinline
+define void @inner_loop(i32* %arr, i32* %a_len_ptr, i32 %n) #0 {
+; CHECK: irce: in function inner_loop: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
+
+entry:
+  %len = load i32* %a_len_ptr, !range !0
+  %first.itr.check = icmp sgt i32 %n, 0
+  br i1 %first.itr.check, label %loop, label %exit
+
+loop:                                             ; preds = %in.bounds, %entry
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ]
+  %idx.next = add i32 %idx, 1
+  %abc = icmp slt i32 %idx, %len
+  br i1 %abc, label %in.bounds, label %out.of.bounds
+
+in.bounds:                                        ; preds = %loop
+  %addr = getelementptr i32* %arr, i32 %idx
+  store i32 0, i32* %addr
+  %next = icmp slt i32 %idx.next, %n
+  br i1 %next, label %loop, label %exit
+
+out.of.bounds:                                    ; preds = %loop
+  ret void
+
+exit:                                             ; preds = %in.bounds, %entry
+  ret void
+}
+
+; Function Attrs: alwaysinline
+define void @with_parent(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count) #0 {
+; CHECK: irce: in function with_parent: constrained Loop at depth 2 containing: %loop.i<header><exiting>,%in.bounds.i<latch><exiting>
+
+entry:
+  br label %loop
+
+loop:                                             ; preds = %inner_loop.exit, %entry
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %inner_loop.exit ]
+  %idx.next = add i32 %idx, 1
+  %next = icmp ult i32 %idx.next, %parent.count
+  %len.i = load i32* %a_len_ptr, !range !0
+  %first.itr.check.i = icmp sgt i32 %n, 0
+  br i1 %first.itr.check.i, label %loop.i, label %exit.i
+
+loop.i:                                           ; preds = %in.bounds.i, %loop
+  %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ]
+  %idx.next.i = add i32 %idx.i, 1
+  %abc.i = icmp slt i32 %idx.i, %len.i
+  br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i
+
+in.bounds.i:                                      ; preds = %loop.i
+  %addr.i = getelementptr i32* %arr, i32 %idx.i
+  store i32 0, i32* %addr.i
+  %next.i = icmp slt i32 %idx.next.i, %n
+  br i1 %next.i, label %loop.i, label %exit.i
+
+out.of.bounds.i:                                  ; preds = %loop.i
+  br label %inner_loop.exit
+
+exit.i:                                           ; preds = %in.bounds.i, %loop
+  br label %inner_loop.exit
+
+inner_loop.exit:                                  ; preds = %exit.i, %out.of.bounds.i
+  br i1 %next, label %loop, label %exit
+
+exit:                                             ; preds = %inner_loop.exit
+  ret void
+}
+
+; Function Attrs: alwaysinline
+define void @with_grandparent(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count, i32 %grandparent.count) #0 {
+; CHECK: irce: in function with_grandparent: constrained Loop at depth 3 containing: %loop.i.i<header><exiting>,%in.bounds.i.i<latch><exiting>
+
+entry:
+  br label %loop
+
+loop:                                             ; preds = %with_parent.exit, %entry
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %with_parent.exit ]
+  %idx.next = add i32 %idx, 1
+  %next = icmp ult i32 %idx.next, %grandparent.count
+  br label %loop.i
+
+loop.i:                                           ; preds = %inner_loop.exit.i, %loop
+  %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %inner_loop.exit.i ]
+  %idx.next.i = add i32 %idx.i, 1
+  %next.i = icmp ult i32 %idx.next.i, %parent.count
+  %len.i.i = load i32* %a_len_ptr, !range !0
+  %first.itr.check.i.i = icmp sgt i32 %n, 0
+  br i1 %first.itr.check.i.i, label %loop.i.i, label %exit.i.i
+
+loop.i.i:                                         ; preds = %in.bounds.i.i, %loop.i
+  %idx.i.i = phi i32 [ 0, %loop.i ], [ %idx.next.i.i, %in.bounds.i.i ]
+  %idx.next.i.i = add i32 %idx.i.i, 1
+  %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i
+  br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i
+
+in.bounds.i.i:                                    ; preds = %loop.i.i
+  %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i
+  store i32 0, i32* %addr.i.i
+  %next.i.i = icmp slt i32 %idx.next.i.i, %n
+  br i1 %next.i.i, label %loop.i.i, label %exit.i.i
+
+out.of.bounds.i.i:                                ; preds = %loop.i.i
+  br label %inner_loop.exit.i
+
+exit.i.i:                                         ; preds = %in.bounds.i.i, %loop.i
+  br label %inner_loop.exit.i
+
+inner_loop.exit.i:                                ; preds = %exit.i.i, %out.of.bounds.i.i
+  br i1 %next.i, label %loop.i, label %with_parent.exit
+
+with_parent.exit:                                 ; preds = %inner_loop.exit.i
+  br i1 %next, label %loop, label %exit
+
+exit:                                             ; preds = %with_parent.exit
+  ret void
+}
+
+; Function Attrs: alwaysinline
+define void @with_sibling(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count) #0 {
+; CHECK: irce: in function with_sibling: constrained Loop at depth 2 containing: %loop.i<header><exiting>,%in.bounds.i<latch><exiting>
+; CHECK: irce: in function with_sibling: constrained Loop at depth 2 containing: %loop.i6<header><exiting>,%in.bounds.i9<latch><exiting>
+
+entry:
+  br label %loop
+
+loop:                                             ; preds = %inner_loop.exit12, %entry
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %inner_loop.exit12 ]
+  %idx.next = add i32 %idx, 1
+  %next = icmp ult i32 %idx.next, %parent.count
+  %len.i = load i32* %a_len_ptr, !range !0
+  %first.itr.check.i = icmp sgt i32 %n, 0
+  br i1 %first.itr.check.i, label %loop.i, label %exit.i
+
+loop.i:                                           ; preds = %in.bounds.i, %loop
+  %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ]
+  %idx.next.i = add i32 %idx.i, 1
+  %abc.i = icmp slt i32 %idx.i, %len.i
+  br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i
+
+in.bounds.i:                                      ; preds = %loop.i
+  %addr.i = getelementptr i32* %arr, i32 %idx.i
+  store i32 0, i32* %addr.i
+  %next.i = icmp slt i32 %idx.next.i, %n
+  br i1 %next.i, label %loop.i, label %exit.i
+
+out.of.bounds.i:                                  ; preds = %loop.i
+  br label %inner_loop.exit
+
+exit.i:                                           ; preds = %in.bounds.i, %loop
+  br label %inner_loop.exit
+
+inner_loop.exit:                                  ; preds = %exit.i, %out.of.bounds.i
+  %len.i1 = load i32* %a_len_ptr, !range !0
+  %first.itr.check.i2 = icmp sgt i32 %n, 0
+  br i1 %first.itr.check.i2, label %loop.i6, label %exit.i11
+
+loop.i6:                                          ; preds = %in.bounds.i9, %inner_loop.exit
+  %idx.i3 = phi i32 [ 0, %inner_loop.exit ], [ %idx.next.i4, %in.bounds.i9 ]
+  %idx.next.i4 = add i32 %idx.i3, 1
+  %abc.i5 = icmp slt i32 %idx.i3, %len.i1
+  br i1 %abc.i5, label %in.bounds.i9, label %out.of.bounds.i10
+
+in.bounds.i9:                                     ; preds = %loop.i6
+  %addr.i7 = getelementptr i32* %arr, i32 %idx.i3
+  store i32 0, i32* %addr.i7
+  %next.i8 = icmp slt i32 %idx.next.i4, %n
+  br i1 %next.i8, label %loop.i6, label %exit.i11
+
+out.of.bounds.i10:                                ; preds = %loop.i6
+  br label %inner_loop.exit12
+
+exit.i11:                                         ; preds = %in.bounds.i9, %inner_loop.exit
+  br label %inner_loop.exit12
+
+inner_loop.exit12:                                ; preds = %exit.i11, %out.of.bounds.i10
+  br i1 %next, label %loop, label %exit
+
+exit:                                             ; preds = %inner_loop.exit12
+  ret void
+}
+
+; Function Attrs: alwaysinline
+define void @with_cousin(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count, i32 %grandparent.count) #0 {
+; CHECK: irce: in function with_cousin: constrained Loop at depth 3 containing: %loop.i.i<header><exiting>,%in.bounds.i.i<latch><exiting>
+; CHECK: irce: in function with_cousin: constrained Loop at depth 3 containing: %loop.i.i10<header><exiting>,%in.bounds.i.i13<latch><exiting>
+
+entry:
+  br label %loop
+
+loop:                                             ; preds = %with_parent.exit17, %entry
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %with_parent.exit17 ]
+  %idx.next = add i32 %idx, 1
+  %next = icmp ult i32 %idx.next, %grandparent.count
+  br label %loop.i
+
+loop.i:                                           ; preds = %inner_loop.exit.i, %loop
+  %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %inner_loop.exit.i ]
+  %idx.next.i = add i32 %idx.i, 1
+  %next.i = icmp ult i32 %idx.next.i, %parent.count
+  %len.i.i = load i32* %a_len_ptr, !range !0
+  %first.itr.check.i.i = icmp sgt i32 %n, 0
+  br i1 %first.itr.check.i.i, label %loop.i.i, label %exit.i.i
+
+loop.i.i:                                         ; preds = %in.bounds.i.i, %loop.i
+  %idx.i.i = phi i32 [ 0, %loop.i ], [ %idx.next.i.i, %in.bounds.i.i ]
+  %idx.next.i.i = add i32 %idx.i.i, 1
+  %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i
+  br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i
+
+in.bounds.i.i:                                    ; preds = %loop.i.i
+  %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i
+  store i32 0, i32* %addr.i.i
+  %next.i.i = icmp slt i32 %idx.next.i.i, %n
+  br i1 %next.i.i, label %loop.i.i, label %exit.i.i
+
+out.of.bounds.i.i:                                ; preds = %loop.i.i
+  br label %inner_loop.exit.i
+
+exit.i.i:                                         ; preds = %in.bounds.i.i, %loop.i
+  br label %inner_loop.exit.i
+
+inner_loop.exit.i:                                ; preds = %exit.i.i, %out.of.bounds.i.i
+  br i1 %next.i, label %loop.i, label %with_parent.exit
+
+with_parent.exit:                                 ; preds = %inner_loop.exit.i
+  br label %loop.i6
+
+loop.i6:                                          ; preds = %inner_loop.exit.i16, %with_parent.exit
+  %idx.i1 = phi i32 [ 0, %with_parent.exit ], [ %idx.next.i2, %inner_loop.exit.i16 ]
+  %idx.next.i2 = add i32 %idx.i1, 1
+  %next.i3 = icmp ult i32 %idx.next.i2, %parent.count
+  %len.i.i4 = load i32* %a_len_ptr, !range !0
+  %first.itr.check.i.i5 = icmp sgt i32 %n, 0
+  br i1 %first.itr.check.i.i5, label %loop.i.i10, label %exit.i.i15
+
+loop.i.i10:                                       ; preds = %in.bounds.i.i13, %loop.i6
+  %idx.i.i7 = phi i32 [ 0, %loop.i6 ], [ %idx.next.i.i8, %in.bounds.i.i13 ]
+  %idx.next.i.i8 = add i32 %idx.i.i7, 1
+  %abc.i.i9 = icmp slt i32 %idx.i.i7, %len.i.i4
+  br i1 %abc.i.i9, label %in.bounds.i.i13, label %out.of.bounds.i.i14
+
+in.bounds.i.i13:                                  ; preds = %loop.i.i10
+  %addr.i.i11 = getelementptr i32* %arr, i32 %idx.i.i7
+  store i32 0, i32* %addr.i.i11
+  %next.i.i12 = icmp slt i32 %idx.next.i.i8, %n
+  br i1 %next.i.i12, label %loop.i.i10, label %exit.i.i15
+
+out.of.bounds.i.i14:                              ; preds = %loop.i.i10
+  br label %inner_loop.exit.i16
+
+exit.i.i15:                                       ; preds = %in.bounds.i.i13, %loop.i6
+  br label %inner_loop.exit.i16
+
+inner_loop.exit.i16:                              ; preds = %exit.i.i15, %out.of.bounds.i.i14
+  br i1 %next.i3, label %loop.i6, label %with_parent.exit17
+
+with_parent.exit17:                               ; preds = %inner_loop.exit.i16
+  br i1 %next, label %loop, label %exit
+
+exit:                                             ; preds = %with_parent.exit17
+  ret void
+}
+
+; Function Attrs: alwaysinline
+define void @with_uncle(i32* %arr, i32* %a_len_ptr, i32 %n, i32 %parent.count, i32 %grandparent.count) #0 {
+; CHECK: irce: in function with_uncle: constrained Loop at depth 2 containing: %loop.i<header><exiting>,%in.bounds.i<latch><exiting>
+; CHECK: irce: in function with_uncle: constrained Loop at depth 3 containing: %loop.i.i<header><exiting>,%in.bounds.i.i<latch><exiting>
+
+entry:
+  br label %loop
+
+loop:                                             ; preds = %with_parent.exit, %entry
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %with_parent.exit ]
+  %idx.next = add i32 %idx, 1
+  %next = icmp ult i32 %idx.next, %grandparent.count
+  %len.i = load i32* %a_len_ptr, !range !0
+  %first.itr.check.i = icmp sgt i32 %n, 0
+  br i1 %first.itr.check.i, label %loop.i, label %exit.i
+
+loop.i:                                           ; preds = %in.bounds.i, %loop
+  %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ]
+  %idx.next.i = add i32 %idx.i, 1
+  %abc.i = icmp slt i32 %idx.i, %len.i
+  br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i
+
+in.bounds.i:                                      ; preds = %loop.i
+  %addr.i = getelementptr i32* %arr, i32 %idx.i
+  store i32 0, i32* %addr.i
+  %next.i = icmp slt i32 %idx.next.i, %n
+  br i1 %next.i, label %loop.i, label %exit.i
+
+out.of.bounds.i:                                  ; preds = %loop.i
+  br label %inner_loop.exit
+
+exit.i:                                           ; preds = %in.bounds.i, %loop
+  br label %inner_loop.exit
+
+inner_loop.exit:                                  ; preds = %exit.i, %out.of.bounds.i
+  br label %loop.i4
+
+loop.i4:                                          ; preds = %inner_loop.exit.i, %inner_loop.exit
+  %idx.i1 = phi i32 [ 0, %inner_loop.exit ], [ %idx.next.i2, %inner_loop.exit.i ]
+  %idx.next.i2 = add i32 %idx.i1, 1
+  %next.i3 = icmp ult i32 %idx.next.i2, %parent.count
+  %len.i.i = load i32* %a_len_ptr, !range !0
+  %first.itr.check.i.i = icmp sgt i32 %n, 0
+  br i1 %first.itr.check.i.i, label %loop.i.i, label %exit.i.i
+
+loop.i.i:                                         ; preds = %in.bounds.i.i, %loop.i4
+  %idx.i.i = phi i32 [ 0, %loop.i4 ], [ %idx.next.i.i, %in.bounds.i.i ]
+  %idx.next.i.i = add i32 %idx.i.i, 1
+  %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i
+  br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i
+
+in.bounds.i.i:                                    ; preds = %loop.i.i
+  %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i
+  store i32 0, i32* %addr.i.i
+  %next.i.i = icmp slt i32 %idx.next.i.i, %n
+  br i1 %next.i.i, label %loop.i.i, label %exit.i.i
+
+out.of.bounds.i.i:                                ; preds = %loop.i.i
+  br label %inner_loop.exit.i
+
+exit.i.i:                                         ; preds = %in.bounds.i.i, %loop.i4
+  br label %inner_loop.exit.i
+
+inner_loop.exit.i:                                ; preds = %exit.i.i, %out.of.bounds.i.i
+  br i1 %next.i3, label %loop.i4, label %with_parent.exit
+
+with_parent.exit:                                 ; preds = %inner_loop.exit.i
+  br i1 %next, label %loop, label %exit
+
+exit:                                             ; preds = %with_parent.exit
+  ret void
+}
+
+attributes #0 = { alwaysinline }
+
+!0 = !{i32 0, i32 2147483647}
author	Sanjoy Das <sanjoy@playingwithpointers.com>
	Fri, 16 Jan 2015 01:03:22 +0000 (01:03 +0000)
committer	Sanjoy Das <sanjoy@playingwithpointers.com>
	Fri, 16 Jan 2015 01:03:22 +0000 (01:03 +0000)
include/llvm/InitializePasses.h		patch \| blob \| history
include/llvm/LinkAllPasses.h		patch \| blob \| history
include/llvm/Transforms/Scalar.h		patch \| blob \| history
lib/Transforms/Scalar/CMakeLists.txt		patch \| blob \| history
lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp	[new file with mode: 0644]	patch \| blob
lib/Transforms/Scalar/Scalar.cpp		patch \| blob \| history
test/Transforms/IRCE/multiple-access-no-preloop.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/IRCE/single-access-no-preloop.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/IRCE/single-access-with-preloop.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/IRCE/unhandled.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/IRCE/with-parent-loops.ll	[new file with mode: 0644]	patch \| blob