[RewriteStatepointsForGC] Make base pointer inference deterministic

[oota-llvm.git] / lib / Transforms / Scalar / RewriteStatepointsForGC.cpp
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

index 0d793233f45304f14e4640a1a4a2010aca59ea46..031f40e2caa6c0a11adc2700080893b2ce22d0d9 100644 (file)
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -21,6 +21,7 @@
  #include "llvm/ADT/DenseSet.h"
  #include "llvm/ADT/SetVector.h"
  #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/MapVector.h"
  #include "llvm/IR/BasicBlock.h"
  #include "llvm/IR/CallSite.h"
  #include "llvm/IR/Dominators.h"
@@ -435,7 +436,7 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
  
    // Due to inheritance, this must be _after_ the global variable and undef
    // checks
-  if (Constant *Con = dyn_cast<Constant>(I)) {
+  if (isa<Constant>(I)) {
      assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
             "order of checks wrong!");
      // Note: Finding a constant base for something marked for relocation
@@ -446,7 +447,7 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
      // off a potentially null value and have proven it null.  We also use
      // null pointers in dead paths of relocation phis (which we might later
      // want to find a base pointer for).
-    assert(isa<ConstantPointerNull>(Con) &&
+    assert(isa<ConstantPointerNull>(I) &&
             "null is the only case which makes sense");
      return BaseDefiningValueResult(I, true);
    }
@@ -661,7 +662,6 @@ static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
  #endif
  
  namespace {
-typedef DenseMap<Value *, BDVState> ConflictStateMapTy;
  // Values of type BDVState form a lattice, and this is a helper
  // class that implementes the meet operation.  The meat of the meet
  // operation is implemented in MeetBDVStates::pureMeet
@@ -761,14 +761,17 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
  
    // Once populated, will contain a mapping from each potentially non-base BDV
    // to a lattice value (described above) which corresponds to that BDV.
-  ConflictStateMapTy states;
-  // Recursively fill in all phis & selects reachable from the initial one
-  // for which we don't already know a definite base value for
+  // We use the order of insertion (DFS over the def/use graph) to provide a
+  // stable deterministic ordering for visiting DenseMaps (which are unordered)
+  // below.  This is important for deterministic compilation.
+  MapVector<Value *, BDVState> states;
+
+  // Recursively fill in all base defining values reachable from the initial
+  // one for which we don't already know a definite base value for
    /* scope */ {
-    DenseSet<Value *> Visited;
      SmallVector<Value*, 16> Worklist;
      Worklist.push_back(def);
-    Visited.insert(def);
+    states.insert(std::make_pair(def, BDVState()));
      while (!Worklist.empty()) {
        Value *Current = Worklist.pop_back_val();
        assert(!isKnownBaseResult(Current) && "why did it get added?");
@@ -781,7 +784,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
            return;
          assert(isExpectedBDVType(Base) && "the only non-base values "
                 "we see should be base defining values");
-        if (Visited.insert(Base).second)
+        if (states.insert(std::make_pair(Base, BDVState())).second)
            Worklist.push_back(Base);
        };
        if (PHINode *Phi = dyn_cast<PHINode>(Current)) {
@@ -799,12 +802,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
          llvm_unreachable("unimplemented instruction case");
        }
      }
-    // The frontier of visited instructions are the ones we might need to
-    // duplicate, so fill in the starting state for the optimistic algorithm
-    // that follows.
-    for (Value *BDV : Visited) {
-      states[BDV] = BDVState();
-    }
    }
  
  #ifndef NDEBUG
@@ -830,7 +827,10 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
      size_t oldSize = states.size();
  #endif
      progress = false;
-    // We're only changing keys in this loop, thus safe to keep iterators
+    // We're only changing values in this loop, thus safe to keep iterators.
+    // Since this is computing a fixed point, the order of visit does not
+    // effect the result.  TODO: We could use a worklist here and make this run
+    // much faster.
      for (auto Pair : states) {
        Value *v = Pair.first;
        assert(!isKnownBaseResult(v) && "why did it get added?");
@@ -856,7 +856,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
          calculateMeet.meetWith(getStateForInput(EE->getVectorOperand()));
        }
  
-
        BDVState oldState = states[v];
        BDVState newState = calculateMeet.getResult();
        if (oldState != newState) {
@@ -877,20 +876,10 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
  #endif
    
    // Insert Phis for all conflicts
-  // We want to keep naming deterministic in the loop that follows, so
-  // sort the keys before iteration.  This is useful in allowing us to
-  // write stable tests. Note that there is no invalidation issue here.
-  SmallVector<Value *, 16> Keys;
-  Keys.reserve(states.size());
-  for (auto Pair : states) {
-    Value *V = Pair.first;
-    Keys.push_back(V);
-  }
-  std::sort(Keys.begin(), Keys.end(), order_by_name);
    // TODO: adjust naming patterns to avoid this order of iteration dependency
-  for (Value *V : Keys) {
-    Instruction *I = cast<Instruction>(V);
-    BDVState State = states[I];
+  for (auto Pair : states) {
+    Instruction *I = cast<Instruction>(Pair.first);
+    BDVState State = Pair.second;
      assert(!isKnownBaseResult(I) && "why did it get added?");
      assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
  
@@ -946,7 +935,37 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
      states[I] = BDVState(BDVState::Conflict, BaseInst);
    }
  
-  // Fixup all the inputs of the new PHIs
+  // Returns a instruction which produces the base pointer for a given
+  // instruction.  The instruction is assumed to be an input to one of the BDVs
+  // seen in the inference algorithm above.  As such, we must either already
+  // know it's base defining value is a base, or have inserted a new
+  // instruction to propagate the base of it's BDV and have entered that newly
+  // introduced instruction into the state table.  In either case, we are
+  // assured to be able to determine an instruction which produces it's base
+  // pointer. 
+  auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
+    Value *BDV = findBaseOrBDV(Input, cache);
+    Value *Base = nullptr;
+    if (isKnownBaseResult(BDV)) {
+      Base = BDV;
+    } else {
+      // Either conflict or base.
+      assert(states.count(BDV));
+      Base = states[BDV].getBase();
+    }
+    assert(Base && "can't be null");
+    // The cast is needed since base traversal may strip away bitcasts
+    if (Base->getType() != Input->getType() &&
+        InsertPt) {
+      Base = new BitCastInst(Base, Input->getType(), "cast",
+                             InsertPt);
+    }
+    return Base;
+  };
+
+  // Fixup all the inputs of the new PHIs.  Visit order needs to be
+  // deterministic and predictable because we're naming newly created
+  // instructions.
    for (auto Pair : states) {
      Instruction *v = cast<Instruction>(Pair.first);
      BDVState state = Pair.second;
@@ -976,15 +995,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
          if (blockIndex != -1) {
            Value *oldBase = basephi->getIncomingValue(blockIndex);
            basephi->addIncoming(oldBase, InBB);
+          
  #ifndef NDEBUG
-          Value *base = findBaseOrBDV(InVal, cache);
-          if (!isKnownBaseResult(base)) {
-            // Either conflict or base.
-            assert(states.count(base));
-            base = states[base].getBase();
-            assert(base != nullptr && "unknown BDVState!");
-          }
-
+          Value *Base = getBaseForInput(InVal, nullptr);
            // In essence this assert states: the only way two
            // values incoming from the same basic block may be
            // different is by being different bitcasts of the same
@@ -992,65 +1005,36 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
            // findBaseOrBDV to return an llvm::Value of the correct
            // type (and still remain pure).  This will remove the
            // need to add bitcasts.
-          assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
+          assert(Base->stripPointerCasts() == oldBase->stripPointerCasts() &&
                   "sanity -- findBaseOrBDV should be pure!");
  #endif
            continue;
          }
  
-        // Find either the defining value for the PHI or the normal base for
-        // a non-phi node
-        Value *base = findBaseOrBDV(InVal, cache);
-        if (!isKnownBaseResult(base)) {
-          // Either conflict or base.
-          assert(states.count(base));
-          base = states[base].getBase();
-          assert(base != nullptr && "unknown BDVState!");
-        }
-        assert(base && "can't be null");
-        // Must use original input BB since base may not be Instruction
-        // The cast is needed since base traversal may strip away bitcasts
-        if (base->getType() != basephi->getType()) {
-          base = new BitCastInst(base, basephi->getType(), "cast",
-                                 InBB->getTerminator());
-        }
-        basephi->addIncoming(base, InBB);
+        // Find the instruction which produces the base for each input.  We may
+        // need to insert a bitcast in the incoming block.
+        // TODO: Need to split critical edges if insertion is needed
+        Value *Base = getBaseForInput(InVal, InBB->getTerminator());
+        basephi->addIncoming(Base, InBB);
        }
        assert(basephi->getNumIncomingValues() == NumPHIValues);
-    } else if (SelectInst *basesel = dyn_cast<SelectInst>(state.getBase())) {
-      SelectInst *sel = cast<SelectInst>(v);
+    } else if (SelectInst *BaseSel = dyn_cast<SelectInst>(state.getBase())) {
+      SelectInst *Sel = cast<SelectInst>(v);
        // Operand 1 & 2 are true, false path respectively. TODO: refactor to
        // something more safe and less hacky.
        for (int i = 1; i <= 2; i++) {
-        Value *InVal = sel->getOperand(i);
-        // Find either the defining value for the PHI or the normal base for
-        // a non-phi node
-        Value *base = findBaseOrBDV(InVal, cache);
-        if (!isKnownBaseResult(base)) {
-          // Either conflict or base.
-          assert(states.count(base));
-          base = states[base].getBase();
-          assert(base != nullptr && "unknown BDVState!");
-        }
-        assert(base && "can't be null");
-        // Must use original input BB since base may not be Instruction
-        // The cast is needed since base traversal may strip away bitcasts
-        if (base->getType() != basesel->getType()) {
-          base = new BitCastInst(base, basesel->getType(), "cast", basesel);
-        }
-        basesel->setOperand(i, base);
+        Value *InVal = Sel->getOperand(i);
+        // Find the instruction which produces the base for each input.  We may
+        // need to insert a bitcast.
+        Value *Base = getBaseForInput(InVal, BaseSel);
+        BaseSel->setOperand(i, Base);
        }
      } else {
        auto *BaseEE = cast<ExtractElementInst>(state.getBase());
        Value *InVal = cast<ExtractElementInst>(v)->getVectorOperand();
-      Value *Base = findBaseOrBDV(InVal, cache);
-      if (!isKnownBaseResult(Base)) {
-        // Either conflict or base.
-        assert(states.count(Base));
-        Base = states[Base].getBase();
-        assert(Base != nullptr && "unknown BDVState!");
-      }
-      assert(Base && "can't be null");
+      // Find the instruction which produces the base for each input.  We may
+      // need to insert a bitcast.
+      Value *Base = getBaseForInput(InVal, BaseEE);
        BaseEE->setOperand(0, Base);
      }
    }
@@ -1066,18 +1050,18 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
    // Keys we sorted above for this purpose.  Note that we are papering over a
    // bigger problem with the algorithm above - it's visit order is not
    // deterministic.  A larger change is needed to fix this.
-  for (auto Key : Keys) {
-    Value *V = Key;
-    auto State = states[Key];
+  for (auto Pair : states) {
+    auto *BDV = Pair.first;
+    auto State = Pair.second;
      Value *Base = State.getBase();
-    assert(V && Base);
-    assert(!isKnownBaseResult(V) && "why did it get added?");
+    assert(BDV && Base);
+    assert(!isKnownBaseResult(BDV) && "why did it get added?");
      assert(isKnownBaseResult(Base) &&
             "must be something we 'know' is a base pointer");
      if (!State.isConflict())
        continue;
  
-    ReverseMap[Base] = V;
+    ReverseMap[Base] = BDV;
      if (auto *BaseI = dyn_cast<Instruction>(Base)) {
        NewInsts.insert(BaseI);
        Worklist.insert(BaseI);
@@ -1120,26 +1104,26 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
    // Cache all of our results so we can cheaply reuse them
    // NOTE: This is actually two caches: one of the base defining value
    // relation and one of the base pointer relation!  FIXME
-  for (auto item : states) {
-    Value *v = item.first;
-    Value *base = item.second.getBase();
-    assert(v && base);
+  for (auto Pair : states) {
+    auto *BDV = Pair.first;
+    Value *base = Pair.second.getBase();
+    assert(BDV && base);
  
      std::string fromstr =
-      cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "")
+      cache.count(BDV) ? (cache[BDV]->hasName() ? cache[BDV]->getName() : "")
                       : "none";
      DEBUG(dbgs() << "Updating base value cache"
-          << " for: " << (v->hasName() ? v->getName() : "")
+          << " for: " << (BDV->hasName() ? BDV->getName() : "")
            << " from: " << fromstr
            << " to: " << (base->hasName() ? base->getName() : "") << "\n");
  
-    if (cache.count(v)) {
+    if (cache.count(BDV)) {
        // Once we transition from the BDV relation being store in the cache to
        // the base relation being stored, it must be stable
-      assert((!isKnownBaseResult(cache[v]) || cache[v] == base) &&
+      assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) &&
               "base relation should be stable");
      }
-    cache[v] = base;
+    cache[BDV] = base;
    }
    assert(cache.find(def) != cache.end());
    return cache[def];