[RewriteStatepointsForGC] Move an expensive debugging check to XDEBUG

[oota-llvm.git] / lib / Transforms / Scalar / RewriteStatepointsForGC.cpp
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

index 052a149ae26a37f78c8dfd41b7450455cf593b53..1f2597c74261ef445e42c86bb0ca2e6eb5a3280f 100644 (file)
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -17,6 +17,7 @@
  #include "llvm/ADT/SetOperations.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
  #include "llvm/IR/BasicBlock.h"
  #include "llvm/IR/CallSite.h"
  #include "llvm/IR/Dominators.h"
@@ -49,11 +50,20 @@ static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
  // Print the liveset found at the insert location
  static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
                                    cl::init(false));
-static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size",
-                                      cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
+                                      cl::init(false));
  // Print out the base pointers for debugging
-static cl::opt<bool> PrintBasePointers("spp-print-base-pointers",
-                                       cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
+                                       cl::init(false));
+
+#ifdef XDEBUG
+static bool ClobberNonLive = true;
+#else
+static bool ClobberNonLive = false;
+#endif
+static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
+                                                  cl::location(ClobberNonLive),
+                                                  cl::Hidden);
  
  namespace {
  struct RewriteStatepointsForGC : public FunctionPass {
@@ -85,6 +95,22 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
                      "Make relocations explicit at statepoints", false, false)
  
  namespace {
+struct GCPtrLivenessData {
+  /// Values defined in this block.
+  DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
+  /// Values used in this block (and thus live); does not included values
+  /// killed within this block.
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
+
+  /// Values live into this basic block (i.e. used by any
+  /// instruction in this basic block or ones reachable from here)
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
+
+  /// Values live out of this basic block (i.e. live into
+  /// any successor block)
+  DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
+};
+
  // The type of the internal cache used inside the findBasePointers family
  // of functions.  From the callers perspective, this is an opaque type and
  // should not be inspected.
@@ -95,31 +121,39 @@ namespace {
  // Generally, after the execution of a full findBasePointer call, only the
  // base relation will remain.  Internally, we add a mixture of the two
  // types, then update all the second type to the first type
-typedef std::map<Value *, Value *> DefiningValueMapTy;
+typedef DenseMap<Value *, Value *> DefiningValueMapTy;
+typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
  
  struct PartiallyConstructedSafepointRecord {
    /// The set of values known to be live accross this safepoint
-  std::set<llvm::Value *> liveset;
+  StatepointLiveSetTy liveset;
  
    /// Mapping from live pointers to a base-defining-value
-  std::map<llvm::Value *, llvm::Value *> base_pairs;
+  DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
  
    /// Any new values which were added to the IR during base pointer analysis
    /// for this safepoint
-  std::set<llvm::Value *> newInsertedDefs;
-
-  /// The bounds of the inserted code for the safepoint
-  std::pair<Instruction *, Instruction *> safepoint;
+  DenseSet<llvm::Value *> NewInsertedDefs;
  
-  // Instruction to which exceptional gc relocates are attached
-  // Makes it easier to iterate through them during relocationViaAlloca.
-  Instruction *exceptional_relocates_token;
+  /// The *new* gc.statepoint instruction itself.  This produces the token
+  /// that normal path gc.relocates and the gc.result are tied to.
+  Instruction *StatepointToken;
  
-  /// The result of the safepointing call (or nullptr)
-  Value *result;
+  /// Instruction to which exceptional gc relocates are attached
+  /// Makes it easier to iterate through them during relocationViaAlloca.
+  Instruction *UnwindToken;
  };
  }
  
+/// Compute the live-in set for every basic block in the function
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+                                GCPtrLivenessData &Data);
+
+/// Given results from the dataflow liveness computation, find the set of live
+/// Values at a particular instruction.
+static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
+                              StatepointLiveSetTy &out);
+
  // TODO: Once we can get to the GCStrategy, this becomes
  // Optional<bool> isGCManagedPointer(const Value *V) const override {
  
@@ -132,130 +166,46 @@ static bool isGCPointerType(const Type *T) {
    return false;
  }
  
-/// Return true if the Value is a gc reference type which is potentially used
-/// after the instruction 'loc'.  This is only used with the edge reachability
-/// liveness code.  Note: It is assumed the V dominates loc.
-static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT,
-                                LoopInfo *LI) {
-  if (!isGCPointerType(V.getType()))
-    return false;
-
-  if (V.use_empty())
-    return false;
-
-  // Given assumption that V dominates loc, this may be live
-  return true;
+// Return true if this type is one which a) is a gc pointer or contains a GC
+// pointer and b) is of a type this code expects to encounter as a live value.
+// (The insertion code will assert that a type which matches (a) and not (b)
+// is not encountered.)
+static bool isHandledGCPointerType(Type *T) {
+  // We fully support gc pointers
+  if (isGCPointerType(T))
+    return true;
+  // We partially support vectors of gc pointers. The code will assert if it
+  // can't handle something.
+  if (auto VT = dyn_cast<VectorType>(T))
+    if (isGCPointerType(VT->getElementType()))
+      return true;
+  return false;
  }
  
  #ifndef NDEBUG
-static bool isAggWhichContainsGCPtrType(Type *Ty) {
+/// Returns true if this type contains a gc pointer whether we know how to
+/// handle that type or not.
+static bool containsGCPtrType(Type *Ty) {
+  if (isGCPointerType(Ty))
+    return true;
    if (VectorType *VT = dyn_cast<VectorType>(Ty))
      return isGCPointerType(VT->getScalarType());
-  else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
-    return isGCPointerType(AT->getElementType()) ||
-           isAggWhichContainsGCPtrType(AT->getElementType());
-  } else if (StructType *ST = dyn_cast<StructType>(Ty)) {
-    bool UnsupportedType = false;
-    for (Type *SubType : ST->subtypes())
-      UnsupportedType |=
-          isGCPointerType(SubType) || isAggWhichContainsGCPtrType(SubType);
-    return UnsupportedType;
-  } else
-    return false;
+  if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
+    return containsGCPtrType(AT->getElementType());
+  if (StructType *ST = dyn_cast<StructType>(Ty))
+    return std::any_of(
+        ST->subtypes().begin(), ST->subtypes().end(),
+        [](Type *SubType) { return containsGCPtrType(SubType); });
+  return false;
  }
-#endif
-
-// Conservatively identifies any definitions which might be live at the
-// given instruction. The  analysis is performed immediately before the
-// given instruction. Values defined by that instruction are not considered
-// live.  Values used by that instruction are considered live.
-//
-// preconditions: valid IR graph, term is either a terminator instruction or
-// a call instruction, pred is the basic block of term, DT, LI are valid
-//
-// side effects: none, does not mutate IR
-//
-//  postconditions: populates liveValues as discussed above
-static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred,
-                                   DominatorTree &DT, LoopInfo *LI,
-                                   std::set<llvm::Value *> &liveValues) {
-  liveValues.clear();
-
-  assert(isa<CallInst>(term) || isa<InvokeInst>(term) || term->isTerminator());
-
-  Function *F = pred->getParent();
-
-  auto is_live_gc_reference =
-      [&](Value &V) { return isLiveGCReferenceAt(V, term, DT, LI); };
-
-  // Are there any gc pointer arguments live over this point?  This needs to be
-  // special cased since arguments aren't defined in basic blocks.
-  for (Argument &arg : F->args()) {
-    assert(!isAggWhichContainsGCPtrType(arg.getType()) &&
-           "support for FCA unimplemented");
-
-    if (is_live_gc_reference(arg)) {
-      liveValues.insert(&arg);
-    }
-  }
-
-  // Walk through all dominating blocks - the ones which can contain
-  // definitions used in this block - and check to see if any of the values
-  // they define are used in locations potentially reachable from the
-  // interesting instruction.
-  BasicBlock *BBI = pred;
-  while (true) {
-    if (TraceLSP) {
-      errs() << "[LSP] Looking at dominating block " << pred->getName() << "\n";
-    }
-    assert(DT.dominates(BBI, pred));
-    assert(isPotentiallyReachable(BBI, pred, &DT) &&
-           "dominated block must be reachable");
-
-    // Walk through the instructions in dominating blocks and keep any
-    // that have a use potentially reachable from the block we're
-    // considering putting the safepoint in
-    for (Instruction &inst : *BBI) {
-      if (TraceLSP) {
-        errs() << "[LSP] Looking at instruction ";
-        inst.dump();
-      }
  
-      if (pred == BBI && (&inst) == term) {
-        if (TraceLSP) {
-          errs() << "[LSP] stopped because we encountered the safepoint "
-                    "instruction.\n";
-        }
-
-        // If we're in the block which defines the interesting instruction,
-        // we don't want to include any values as live which are defined
-        // _after_ the interesting line or as part of the line itself
-        // i.e. "term" is the call instruction for a call safepoint, the
-        // results of the call should not be considered live in that stackmap
-        break;
-      }
-
-      assert(!isAggWhichContainsGCPtrType(inst.getType()) &&
-             "support for FCA unimplemented");
-
-      if (is_live_gc_reference(inst)) {
-        if (TraceLSP) {
-          errs() << "[LSP] found live value for this safepoint ";
-          inst.dump();
-          term->dump();
-        }
-        liveValues.insert(&inst);
-      }
-    }
-    if (!DT.getNode(BBI)->getIDom()) {
-      assert(BBI == &F->getEntryBlock() &&
-             "failed to find a dominator for something other than "
-             "the entry block");
-      break;
-    }
-    BBI = DT.getNode(BBI)->getIDom()->getBlock();
-  }
+// Returns true if this is a type which a) is a gc pointer or contains a GC
+// pointer and b) is of a type which the code doesn't expect (i.e. first class
+// aggregates).  Used to trip assertions.
+static bool isUnhandledGCPointerType(Type *Ty) {
+  return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
  }
+#endif
  
  static bool order_by_name(llvm::Value *a, llvm::Value *b) {
    if (a->hasName() && b->hasName()) {
@@ -270,22 +220,23 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) {
    }
  }
  
-/// Find the initial live set. Note that due to base pointer
-/// insertion, the live set may be incomplete.
-static void
-analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,
-                          PartiallyConstructedSafepointRecord &result) {
+// Conservatively identifies any definitions which might be live at the
+// given instruction. The  analysis is performed immediately before the
+// given instruction. Values defined by that instruction are not considered
+// live.  Values used by that instruction are considered live.
+static void analyzeParsePointLiveness(
+    DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
+    const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
    Instruction *inst = CS.getInstruction();
  
-  BasicBlock *BB = inst->getParent();
-  std::set<Value *> liveset;
-  findLiveGCValuesAtInst(inst, BB, DT, nullptr, liveset);
+  StatepointLiveSetTy liveset;
+  findLiveSetAtInst(inst, OriginalLivenessData, liveset);
  
    if (PrintLiveSet) {
      // Note: This output is used by several of the test cases
      // The order of elemtns in a set is not stable, put them in a vec and sort
      // by name
-    std::vector<Value *> temp;
+    SmallVector<Value *, 64> temp;
      temp.insert(temp.end(), liveset.begin(), liveset.end());
      std::sort(temp.begin(), temp.end(), order_by_name);
      errs() << "Live Variables:\n";
@@ -301,10 +252,49 @@ analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,
    result.liveset = liveset;
  }
  
-/// True iff this value is the null pointer constant (of any pointer type)
-static bool isNullConstant(Value *V) {
-  return isa<Constant>(V) && isa<PointerType>(V->getType()) &&
-         cast<Constant>(V)->isNullValue();
+/// If we can trivially determine that this vector contains only base pointers,
+/// return the base instruction.
+static Value *findBaseOfVector(Value *I) {
+  assert(I->getType()->isVectorTy() &&
+         cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
+         "Illegal to ask for the base pointer of a non-pointer type");
+
+  // Each case parallels findBaseDefiningValue below, see that code for
+  // detailed motivation.
+
+  if (isa<Argument>(I))
+    // An incoming argument to the function is a base pointer
+    return I;
+
+  // We shouldn't see the address of a global as a vector value?
+  assert(!isa<GlobalVariable>(I) &&
+         "unexpected global variable found in base of vector");
+
+  // inlining could possibly introduce phi node that contains
+  // undef if callee has multiple returns
+  if (isa<UndefValue>(I))
+    // utterly meaningless, but useful for dealing with partially optimized
+    // code.
+    return I;
+
+  // Due to inheritance, this must be _after_ the global variable and undef
+  // checks
+  if (Constant *Con = dyn_cast<Constant>(I)) {
+    assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
+           "order of checks wrong!");
+    assert(Con->isNullValue() && "null is the only case which makes sense");
+    return Con;
+  }
+
+  if (isa<LoadInst>(I))
+    return I;
+
+  // Note: This code is currently rather incomplete.  We are essentially only
+  // handling cases where the vector element is trivially a base pointer.  We
+  // need to update the entire base pointer construction algorithm to know how
+  // to track vector elements and potentially scalarize, but the case which
+  // would motivate the work hasn't shown up in real workloads yet.
+  llvm_unreachable("no base found for vector element");
  }
  
  /// Helper function for findBasePointer - Will return a value which either a)
@@ -314,52 +304,36 @@ static Value *findBaseDefiningValue(Value *I) {
    assert(I->getType()->isPointerTy() &&
           "Illegal to ask for the base pointer of a non-pointer type");
  
-  // There are instructions which can never return gc pointer values.  Sanity
-  // check
-  // that this is actually true.
-  assert(!isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
-         !isa<ShuffleVectorInst>(I) && "Vector types are not gc pointers");
-  assert((!isa<Instruction>(I) || isa<InvokeInst>(I) ||
-          !cast<Instruction>(I)->isTerminator()) &&
-         "With the exception of invoke terminators don't define values");
-  assert(!isa<StoreInst>(I) && !isa<FenceInst>(I) &&
-         "Can't be definitions to start with");
-  assert(!isa<ICmpInst>(I) && !isa<FCmpInst>(I) &&
-         "Comparisons don't give ops");
-  // There's a bunch of instructions which just don't make sense to apply to
-  // a pointer.  The only valid reason for this would be pointer bit
-  // twiddling which we're just not going to support.
-  assert((!isa<Instruction>(I) || !cast<Instruction>(I)->isBinaryOp()) &&
-         "Binary ops on pointer values are meaningless.  Unless your "
-         "bit-twiddling which we don't support");
-
-  if (Argument *Arg = dyn_cast<Argument>(I)) {
+  // This case is a bit of a hack - it only handles extracts from vectors which
+  // trivially contain only base pointers.  See note inside the function for
+  // how to improve this.
+  if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
+    Value *VectorOperand = EEI->getVectorOperand();
+    Value *VectorBase = findBaseOfVector(VectorOperand);
+    (void)VectorBase;
+    assert(VectorBase && "extract element not known to be a trivial base");
+    return EEI;
+  }
+
+  if (isa<Argument>(I))
      // An incoming argument to the function is a base pointer
      // We should have never reached here if this argument isn't an gc value
-    assert(Arg->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return Arg;
-  }
+    return I;
  
-  if (GlobalVariable *global = dyn_cast<GlobalVariable>(I)) {
+  if (isa<GlobalVariable>(I))
      // base case
-    assert(global->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return global;
-  }
+    return I;
  
    // inlining could possibly introduce phi node that contains
    // undef if callee has multiple returns
-  if (UndefValue *undef = dyn_cast<UndefValue>(I)) {
-    assert(undef->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return undef; // utterly meaningless, but useful for dealing with
-                  // partially optimized code.
-  }
+  if (isa<UndefValue>(I))
+    // utterly meaningless, but useful for dealing with
+    // partially optimized code.
+    return I;
  
    // Due to inheritance, this must be _after_ the global variable and undef
    // checks
-  if (Constant *con = dyn_cast<Constant>(I)) {
+  if (Constant *Con = dyn_cast<Constant>(I)) {
      assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
             "order of checks wrong!");
      // Note: Finding a constant base for something marked for relocation
@@ -370,52 +344,30 @@ static Value *findBaseDefiningValue(Value *I) {
      // off a potentially null value and have proven it null.  We also use
      // null pointers in dead paths of relocation phis (which we might later
      // want to find a base pointer for).
-    assert(con->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    assert(con->isNullValue() && "null is the only case which makes sense");
-    return con;
+    assert(isa<ConstantPointerNull>(Con) &&
+           "null is the only case which makes sense");
+    return Con;
    }
  
    if (CastInst *CI = dyn_cast<CastInst>(I)) {
-    Value *def = CI->stripPointerCasts();
-    assert(def->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    if (isa<CastInst>(def)) {
-      // If we find a cast instruction here, it means we've found a cast
-      // which is not simply a pointer cast (i.e. an inttoptr).  We don't
-      // know how to handle int->ptr conversion.
-      llvm_unreachable("Can not find the base pointers for an inttoptr cast");
-    }
-    assert(!isa<CastInst>(def) && "shouldn't find another cast here");
-    return findBaseDefiningValue(def);
+    Value *Def = CI->stripPointerCasts();
+    // If we find a cast instruction here, it means we've found a cast which is
+    // not simply a pointer cast (i.e. an inttoptr).  We don't know how to
+    // handle int->ptr conversion.
+    assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
+    return findBaseDefiningValue(Def);
    }
  
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    if (LI->getType()->isPointerTy()) {
-      Value *Op = LI->getOperand(0);
-      (void)Op;
-      // Has to be a pointer to an gc object, or possibly an array of such?
-      assert(Op->getType()->isPointerTy());
-      return LI; // The value loaded is an gc base itself
-    }
-  }
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
-    Value *Op = GEP->getOperand(0);
-    if (Op->getType()->isPointerTy()) {
-      return findBaseDefiningValue(Op); // The base of this GEP is the base
-    }
-  }
+  if (isa<LoadInst>(I))
+    return I; // The value loaded is an gc base itself
  
-  if (AllocaInst *alloc = dyn_cast<AllocaInst>(I)) {
-    // An alloca represents a conceptual stack slot.  It's the slot itself
-    // that the GC needs to know about, not the value in the slot.
-    assert(alloc->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return alloc;
-  }
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+    // The base of this GEP is the base
+    return findBaseDefiningValue(GEP->getPointerOperand());
  
    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
      switch (II->getIntrinsicID()) {
+    case Intrinsic::experimental_gc_result_ptr:
      default:
        // fall through to general call handling
        break;
@@ -423,11 +375,6 @@ static Value *findBaseDefiningValue(Value *I) {
      case Intrinsic::experimental_gc_result_float:
      case Intrinsic::experimental_gc_result_int:
        llvm_unreachable("these don't produce pointers");
-    case Intrinsic::experimental_gc_result_ptr:
-      // This is just a special case of the CallInst check below to handle a
-      // statepoint with deopt args which hasn't been rewritten for GC yet.
-      // TODO: Assert that the statepoint isn't rewritten yet.
-      return II;
      case Intrinsic::experimental_gc_relocate: {
        // Rerunning safepoint insertion after safepoints are already
        // inserted is not supported.  It could probably be made to work,
@@ -445,41 +392,27 @@ static Value *findBaseDefiningValue(Value *I) {
    // We assume that functions in the source language only return base
    // pointers.  This should probably be generalized via attributes to support
    // both source language and internal functions.
-  if (CallInst *call = dyn_cast<CallInst>(I)) {
-    assert(call->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return call;
-  }
-  if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) {
-    assert(invoke->getType()->isPointerTy() &&
-           "Base for pointer must be another pointer");
-    return invoke;
-  }
+  if (isa<CallInst>(I) || isa<InvokeInst>(I))
+    return I;
  
    // I have absolutely no idea how to implement this part yet.  It's not
    // neccessarily hard, I just haven't really looked at it yet.
    assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
  
-  if (AtomicCmpXchgInst *cas = dyn_cast<AtomicCmpXchgInst>(I)) {
+  if (isa<AtomicCmpXchgInst>(I))
      // A CAS is effectively a atomic store and load combined under a
      // predicate.  From the perspective of base pointers, we just treat it
-    // like a load.  We loaded a pointer from a address in memory, that value
-    // had better be a valid base pointer.
-    return cas->getPointerOperand();
-  }
-  if (AtomicRMWInst *atomic = dyn_cast<AtomicRMWInst>(I)) {
-    assert(AtomicRMWInst::Xchg == atomic->getOperation() &&
-           "All others are binary ops which don't apply to base pointers");
-    // semantically, a load, store pair.  Treat it the same as a standard load
-    return atomic->getPointerOperand();
-  }
+    // like a load.
+    return I;
+
+  assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
+                                   "binary ops which don't apply to pointers");
  
    // The aggregate ops.  Aggregates can either be in the heap or on the
    // stack, but in either case, this is simply a field load.  As a result,
    // this is a defining definition of the base just like a load is.
-  if (ExtractValueInst *ev = dyn_cast<ExtractValueInst>(I)) {
-    return ev;
-  }
+  if (isa<ExtractValueInst>(I))
+    return I;
  
    // We should never see an insert vector since that would require we be
    // tracing back a struct value not a pointer value.
@@ -490,28 +423,21 @@ static Value *findBaseDefiningValue(Value *I) {
    // return a value which dynamically selects from amoung several base
    // derived pointers (each with it's own base potentially).  It's the job of
    // the caller to resolve these.
-  if (SelectInst *select = dyn_cast<SelectInst>(I)) {
-    return select;
-  }
-  if (PHINode *phi = dyn_cast<PHINode>(I)) {
-    return phi;
-  }
-
-  errs() << "unknown type: " << *I << "\n";
-  llvm_unreachable("unknown type");
-  return nullptr;
+  assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
+         "missing instruction case in findBaseDefiningValing");
+  return I;
  }
  
  /// Returns the base defining value for this value.
-static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) {
-  Value *&Cached = cache[I];
+static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
+  Value *&Cached = Cache[I];
    if (!Cached) {
      Cached = findBaseDefiningValue(I);
    }
-  assert(cache[I] != nullptr);
+  assert(Cache[I] != nullptr);
  
    if (TraceLSP) {
-    errs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
+    dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
             << "\n";
    }
    return Cached;
@@ -519,25 +445,26 @@ static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &cache) {
  
  /// Return a base pointer for this value if known.  Otherwise, return it's
  /// base defining value.
-static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &cache) {
-  Value *def = findBaseDefiningValueCached(I, cache);
-  auto Found = cache.find(def);
-  if (Found != cache.end()) {
+static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
+  Value *Def = findBaseDefiningValueCached(I, Cache);
+  auto Found = Cache.find(Def);
+  if (Found != Cache.end()) {
      // Either a base-of relation, or a self reference.  Caller must check.
      return Found->second;
    }
    // Only a BDV available
-  return def;
+  return Def;
  }
  
  /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
  /// is it known to be a base pointer?  Or do we need to continue searching.
-static bool isKnownBaseResult(Value *v) {
-  if (!isa<PHINode>(v) && !isa<SelectInst>(v)) {
+static bool isKnownBaseResult(Value *V) {
+  if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
      // no recursion possible
      return true;
    }
-  if (cast<Instruction>(v)->getMetadata("is_base_value")) {
+  if (isa<Instruction>(V) &&
+      cast<Instruction>(V)->getMetadata("is_base_value")) {
      // This is a previously inserted base phi or select.  We know
      // that this is a base value.
      return true;
@@ -558,9 +485,6 @@ public:
    }
    PhiState(Value *b) : status(Base), base(b) {}
    PhiState() : status(Unknown), base(nullptr) {}
-  PhiState(const PhiState &other) : status(other.status), base(other.base) {
-    assert(status != Base || base);
-  }
  
    Status getStatus() const { return status; }
    Value *getBase() const { return base; }
@@ -585,13 +509,14 @@ private:
    Value *base; // non null only if status == base
  };
  
+typedef DenseMap<Value *, PhiState> ConflictStateMapTy;
  // Values of type PhiState form a lattice, and this is a helper
  // class that implementes the meet operation.  The meat of the meet
  // operation is implemented in MeetPhiStates::pureMeet
  class MeetPhiStates {
  public:
    // phiStates is a mapping from PHINodes and SelectInst's to PhiStates.
-  explicit MeetPhiStates(const std::map<Value *, PhiState> &phiStates)
+  explicit MeetPhiStates(const ConflictStateMapTy &phiStates)
        : phiStates(phiStates) {}
  
    // Destructively meet the current result with the base V.  V can
@@ -609,7 +534,7 @@ public:
    PhiState getResult() const { return currentResult; }
  
  private:
-  const std::map<Value *, PhiState> &phiStates;
+  const ConflictStateMapTy &phiStates;
    PhiState currentResult;
  
    /// Return a phi state for a base defining value.  We'll generate a new
@@ -635,23 +560,23 @@ private:
  
      case PhiState::Base:
        assert(stateA.getBase() && "can't be null");
-      if (stateB.isUnknown()) {
+      if (stateB.isUnknown())
          return stateA;
-      } else if (stateB.isBase()) {
+
+      if (stateB.isBase()) {
          if (stateA.getBase() == stateB.getBase()) {
            assert(stateA == stateB && "equality broken!");
            return stateA;
          }
          return PhiState(PhiState::Conflict);
-      } else {
-        assert(stateB.isConflict() && "only three states!");
-        return PhiState(PhiState::Conflict);
        }
+      assert(stateB.isConflict() && "only three states!");
+      return PhiState(PhiState::Conflict);
  
      case PhiState::Conflict:
        return stateA;
      }
-    assert(false && "only three states!");
+    llvm_unreachable("only three states!");
    }
  };
  }
@@ -660,7 +585,7 @@ private:
  /// which is the base pointer.  (This is reliable and can be used for
  /// relocation.)  On failure, returns nullptr.
  static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
-                              std::set<llvm::Value *> &newInsertedDefs) {
+                              DenseSet<llvm::Value *> &NewInsertedDefs) {
    Value *def = findBaseOrBDV(I, cache);
  
    if (isKnownBaseResult(def)) {
@@ -689,22 +614,28 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
    // analougous to pessimistic data flow and would likely lead to an
    // overall worse solution.
  
-  std::map<Value *, PhiState> states;
+  ConflictStateMapTy states;
    states[def] = PhiState();
    // Recursively fill in all phis & selects reachable from the initial one
    // for which we don't already know a definite base value for
-  // PERF: Yes, this is as horribly inefficient as it looks.
+  // TODO: This should be rewritten with a worklist
    bool done = false;
    while (!done) {
      done = true;
+    // Since we're adding elements to 'states' as we run, we can't keep
+    // iterators into the set.
+    SmallVector<Value *, 16> Keys;
+    Keys.reserve(states.size());
      for (auto Pair : states) {
-      Value *v = Pair.first;
+      Value *V = Pair.first;
+      Keys.push_back(V);
+    }
+    for (Value *v : Keys) {
        assert(!isKnownBaseResult(v) && "why did it get added?");
        if (PHINode *phi = dyn_cast<PHINode>(v)) {
-        unsigned NumPHIValues = phi->getNumIncomingValues();
-        assert(NumPHIValues > 0 && "zero input phis are illegal");
-        for (unsigned i = 0; i != NumPHIValues; ++i) {
-          Value *InVal = phi->getIncomingValue(i);
+        assert(phi->getNumIncomingValues() > 0 &&
+               "zero input phis are illegal");
+        for (Value *InVal : phi->incoming_values()) {
            Value *local = findBaseOrBDV(InVal, cache);
            if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
              states[local] = PhiState();
@@ -740,26 +671,22 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
    // have reached conflict state.  The current version seems too conservative.
  
    bool progress = true;
-  size_t oldSize = 0;
    while (progress) {
-    oldSize = states.size();
+#ifndef NDEBUG
+    size_t oldSize = states.size();
+#endif
      progress = false;
+    // We're only changing keys in this loop, thus safe to keep iterators
      for (auto Pair : states) {
        MeetPhiStates calculateMeet(states);
        Value *v = Pair.first;
        assert(!isKnownBaseResult(v) && "why did it get added?");
-      assert(isa<SelectInst>(v) || isa<PHINode>(v));
        if (SelectInst *select = dyn_cast<SelectInst>(v)) {
          calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache));
          calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache));
-      } else if (PHINode *phi = dyn_cast<PHINode>(v)) {
-        for (unsigned i = 0; i < phi->getNumIncomingValues(); i++) {
-          calculateMeet.meetWith(
-              findBaseOrBDV(phi->getIncomingValue(i), cache));
-        }
-      } else {
-        llvm_unreachable("no such state expected");
-      }
+      } else
+        for (Value *Val : cast<PHINode>(v)->incoming_values())
+          calculateMeet.meetWith(findBaseOrBDV(Val, cache));
  
        PhiState oldState = states[v];
        PhiState newState = calculateMeet.getResult();
@@ -784,47 +711,58 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
    }
  
    // Insert Phis for all conflicts
+  // We want to keep naming deterministic in the loop that follows, so
+  // sort the keys before iteration.  This is useful in allowing us to
+  // write stable tests. Note that there is no invalidation issue here.
+  SmallVector<Value *, 16> Keys;
+  Keys.reserve(states.size());
    for (auto Pair : states) {
-    Instruction *v = cast<Instruction>(Pair.first);
-    PhiState state = Pair.second;
+    Value *V = Pair.first;
+    Keys.push_back(V);
+  }
+  std::sort(Keys.begin(), Keys.end(), order_by_name);
+  // TODO: adjust naming patterns to avoid this order of iteration dependency
+  for (Value *V : Keys) {
+    Instruction *v = cast<Instruction>(V);
+    PhiState state = states[V];
      assert(!isKnownBaseResult(v) && "why did it get added?");
      assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
-    if (state.isConflict()) {
-      if (isa<PHINode>(v)) {
-        int num_preds =
-            std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
-        assert(num_preds > 0 && "how did we reach here");
-        PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
-        newInsertedDefs.insert(phi);
-        // Add metadata marking this as a base value
-        auto *const_1 = ConstantInt::get(
-            Type::getInt32Ty(
-                v->getParent()->getParent()->getParent()->getContext()),
-            1);
-        auto MDConst = ConstantAsMetadata::get(const_1);
-        MDNode *md = MDNode::get(
-            v->getParent()->getParent()->getParent()->getContext(), MDConst);
-        phi->setMetadata("is_base_value", md);
-        states[v] = PhiState(PhiState::Conflict, phi);
-      } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) {
-        // The undef will be replaced later
-        UndefValue *undef = UndefValue::get(sel->getType());
-        SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
-                                                 undef, "base_select", sel);
-        newInsertedDefs.insert(basesel);
-        // Add metadata marking this as a base value
-        auto *const_1 = ConstantInt::get(
-            Type::getInt32Ty(
-                v->getParent()->getParent()->getParent()->getContext()),
-            1);
-        auto MDConst = ConstantAsMetadata::get(const_1);
-        MDNode *md = MDNode::get(
-            v->getParent()->getParent()->getParent()->getContext(), MDConst);
-        basesel->setMetadata("is_base_value", md);
-        states[v] = PhiState(PhiState::Conflict, basesel);
-      } else {
-        assert(false);
-      }
+    if (!state.isConflict())
+      continue;
+
+    if (isa<PHINode>(v)) {
+      int num_preds =
+          std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
+      assert(num_preds > 0 && "how did we reach here");
+      PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
+      NewInsertedDefs.insert(phi);
+      // Add metadata marking this as a base value
+      auto *const_1 = ConstantInt::get(
+          Type::getInt32Ty(
+              v->getParent()->getParent()->getParent()->getContext()),
+          1);
+      auto MDConst = ConstantAsMetadata::get(const_1);
+      MDNode *md = MDNode::get(
+          v->getParent()->getParent()->getParent()->getContext(), MDConst);
+      phi->setMetadata("is_base_value", md);
+      states[v] = PhiState(PhiState::Conflict, phi);
+    } else {
+      SelectInst *sel = cast<SelectInst>(v);
+      // The undef will be replaced later
+      UndefValue *undef = UndefValue::get(sel->getType());
+      SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
+                                               undef, "base_select", sel);
+      NewInsertedDefs.insert(basesel);
+      // Add metadata marking this as a base value
+      auto *const_1 = ConstantInt::get(
+          Type::getInt32Ty(
+              v->getParent()->getParent()->getParent()->getContext()),
+          1);
+      auto MDConst = ConstantAsMetadata::get(const_1);
+      MDNode *md = MDNode::get(
+          v->getParent()->getParent()->getParent()->getContext(), MDConst);
+      basesel->setMetadata("is_base_value", md);
+      states[v] = PhiState(PhiState::Conflict, basesel);
      }
    }
  
@@ -835,97 +773,97 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
  
      assert(!isKnownBaseResult(v) && "why did it get added?");
      assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
-    if (state.isConflict()) {
-      if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
-        PHINode *phi = cast<PHINode>(v);
-        unsigned NumPHIValues = phi->getNumIncomingValues();
-        for (unsigned i = 0; i < NumPHIValues; i++) {
-          Value *InVal = phi->getIncomingValue(i);
-          BasicBlock *InBB = phi->getIncomingBlock(i);
-
-          // If we've already seen InBB, add the same incoming value
-          // we added for it earlier.  The IR verifier requires phi
-          // nodes with multiple entries from the same basic block
-          // to have the same incoming value for each of those
-          // entries.  If we don't do this check here and basephi
-          // has a different type than base, we'll end up adding two
-          // bitcasts (and hence two distinct values) as incoming
-          // values for the same basic block.
-
-          int blockIndex = basephi->getBasicBlockIndex(InBB);
-          if (blockIndex != -1) {
-            Value *oldBase = basephi->getIncomingValue(blockIndex);
-            basephi->addIncoming(oldBase, InBB);
-#ifndef NDEBUG
-            Value *base = findBaseOrBDV(InVal, cache);
-            if (!isKnownBaseResult(base)) {
-              // Either conflict or base.
-              assert(states.count(base));
-              base = states[base].getBase();
-              assert(base != nullptr && "unknown PhiState!");
-              assert(newInsertedDefs.count(base) &&
-                     "should have already added this in a prev. iteration!");
-            }
-
-            // In essense this assert states: the only way two
-            // values incoming from the same basic block may be
-            // different is by being different bitcasts of the same
-            // value.  A cleanup that remains TODO is changing
-            // findBaseOrBDV to return an llvm::Value of the correct
-            // type (and still remain pure).  This will remove the
-            // need to add bitcasts.
-            assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
-                   "sanity -- findBaseOrBDV should be pure!");
-#endif
-            continue;
-          }
+    if (!state.isConflict())
+      continue;
  
-          // Find either the defining value for the PHI or the normal base for
-          // a non-phi node
+    if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
+      PHINode *phi = cast<PHINode>(v);
+      unsigned NumPHIValues = phi->getNumIncomingValues();
+      for (unsigned i = 0; i < NumPHIValues; i++) {
+        Value *InVal = phi->getIncomingValue(i);
+        BasicBlock *InBB = phi->getIncomingBlock(i);
+
+        // If we've already seen InBB, add the same incoming value
+        // we added for it earlier.  The IR verifier requires phi
+        // nodes with multiple entries from the same basic block
+        // to have the same incoming value for each of those
+        // entries.  If we don't do this check here and basephi
+        // has a different type than base, we'll end up adding two
+        // bitcasts (and hence two distinct values) as incoming
+        // values for the same basic block.
+
+        int blockIndex = basephi->getBasicBlockIndex(InBB);
+        if (blockIndex != -1) {
+          Value *oldBase = basephi->getIncomingValue(blockIndex);
+          basephi->addIncoming(oldBase, InBB);
+#ifndef NDEBUG
            Value *base = findBaseOrBDV(InVal, cache);
            if (!isKnownBaseResult(base)) {
              // Either conflict or base.
              assert(states.count(base));
              base = states[base].getBase();
              assert(base != nullptr && "unknown PhiState!");
+            assert(NewInsertedDefs.count(base) &&
+                   "should have already added this in a prev. iteration!");
            }
-          assert(base && "can't be null");
-          // Must use original input BB since base may not be Instruction
-          // The cast is needed since base traversal may strip away bitcasts
-          if (base->getType() != basephi->getType()) {
-            base = new BitCastInst(base, basephi->getType(), "cast",
-                                   InBB->getTerminator());
-            newInsertedDefs.insert(base);
-          }
-          basephi->addIncoming(base, InBB);
+
+          // In essense this assert states: the only way two
+          // values incoming from the same basic block may be
+          // different is by being different bitcasts of the same
+          // value.  A cleanup that remains TODO is changing
+          // findBaseOrBDV to return an llvm::Value of the correct
+          // type (and still remain pure).  This will remove the
+          // need to add bitcasts.
+          assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
+                 "sanity -- findBaseOrBDV should be pure!");
+#endif
+          continue;
          }
-        assert(basephi->getNumIncomingValues() == NumPHIValues);
-      } else if (SelectInst *basesel = dyn_cast<SelectInst>(state.getBase())) {
-        SelectInst *sel = cast<SelectInst>(v);
-        // Operand 1 & 2 are true, false path respectively. TODO: refactor to
-        // something more safe and less hacky.
-        for (int i = 1; i <= 2; i++) {
-          Value *InVal = sel->getOperand(i);
-          // Find either the defining value for the PHI or the normal base for
-          // a non-phi node
-          Value *base = findBaseOrBDV(InVal, cache);
-          if (!isKnownBaseResult(base)) {
-            // Either conflict or base.
-            assert(states.count(base));
-            base = states[base].getBase();
-            assert(base != nullptr && "unknown PhiState!");
-          }
-          assert(base && "can't be null");
-          // Must use original input BB since base may not be Instruction
-          // The cast is needed since base traversal may strip away bitcasts
-          if (base->getType() != basesel->getType()) {
-            base = new BitCastInst(base, basesel->getType(), "cast", basesel);
-            newInsertedDefs.insert(base);
-          }
-          basesel->setOperand(i, base);
+
+        // Find either the defining value for the PHI or the normal base for
+        // a non-phi node
+        Value *base = findBaseOrBDV(InVal, cache);
+        if (!isKnownBaseResult(base)) {
+          // Either conflict or base.
+          assert(states.count(base));
+          base = states[base].getBase();
+          assert(base != nullptr && "unknown PhiState!");
          }
-      } else {
-        assert(false && "unexpected type");
+        assert(base && "can't be null");
+        // Must use original input BB since base may not be Instruction
+        // The cast is needed since base traversal may strip away bitcasts
+        if (base->getType() != basephi->getType()) {
+          base = new BitCastInst(base, basephi->getType(), "cast",
+                                 InBB->getTerminator());
+          NewInsertedDefs.insert(base);
+        }
+        basephi->addIncoming(base, InBB);
+      }
+      assert(basephi->getNumIncomingValues() == NumPHIValues);
+    } else {
+      SelectInst *basesel = cast<SelectInst>(state.getBase());
+      SelectInst *sel = cast<SelectInst>(v);
+      // Operand 1 & 2 are true, false path respectively. TODO: refactor to
+      // something more safe and less hacky.
+      for (int i = 1; i <= 2; i++) {
+        Value *InVal = sel->getOperand(i);
+        // Find either the defining value for the PHI or the normal base for
+        // a non-phi node
+        Value *base = findBaseOrBDV(InVal, cache);
+        if (!isKnownBaseResult(base)) {
+          // Either conflict or base.
+          assert(states.count(base));
+          base = states[base].getBase();
+          assert(base != nullptr && "unknown PhiState!");
+        }
+        assert(base && "can't be null");
+        // Must use original input BB since base may not be Instruction
+        // The cast is needed since base traversal may strip away bitcasts
+        if (base->getType() != basesel->getType()) {
+          base = new BitCastInst(base, basesel->getType(), "cast", basesel);
+          NewInsertedDefs.insert(base);
+        }
+        basesel->setOperand(i, base);
        }
      }
    }
@@ -975,29 +913,36 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache,
  // side effects: may insert PHI nodes into the existing CFG, will preserve
  // CFG, will not remove or mutate any existing nodes
  //
-// post condition: base_pairs contains one (derived, base) pair for every
+// post condition: PointerToBase contains one (derived, base) pair for every
  // pointer in live.  Note that derived can be equal to base if the original
  // pointer was a base pointer.
-static void findBasePointers(const std::set<llvm::Value *> &live,
-                             std::map<llvm::Value *, llvm::Value *> &base_pairs,
-                             DominatorTree *DT, DefiningValueMapTy &DVCache,
-                             std::set<llvm::Value *> &newInsertedDefs) {
-  for (Value *ptr : live) {
-    Value *base = findBasePointer(ptr, DVCache, newInsertedDefs);
+static void
+findBasePointers(const StatepointLiveSetTy &live,
+                 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+                 DominatorTree *DT, DefiningValueMapTy &DVCache,
+                 DenseSet<llvm::Value *> &NewInsertedDefs) {
+  // For the naming of values inserted to be deterministic - which makes for
+  // much cleaner and more stable tests - we need to assign an order to the
+  // live values.  DenseSets do not provide a deterministic order across runs.
+  SmallVector<Value *, 64> Temp;
+  Temp.insert(Temp.end(), live.begin(), live.end());
+  std::sort(Temp.begin(), Temp.end(), order_by_name);
+  for (Value *ptr : Temp) {
+    Value *base = findBasePointer(ptr, DVCache, NewInsertedDefs);
      assert(base && "failed to find base pointer");
-    base_pairs[ptr] = base;
+    PointerToBase[ptr] = base;
      assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
              DT->dominates(cast<Instruction>(base)->getParent(),
                            cast<Instruction>(ptr)->getParent())) &&
             "The base we found better dominate the derived pointer");
  
-    if (isNullConstant(base))
-      // If you see this trip and like to live really dangerously, the code
-      // should be correct, just with idioms the verifier can't handle.  You
-      // can try disabling the verifier at your own substaintial risk.
-      llvm_unreachable("the relocation code needs adjustment to handle the"
-                       "relocation of a null pointer constant without causing"
-                       "false positives in the safepoint ir verifier.");
+    // If you see this trip and like to live really dangerously, the code should
+    // be correct, just with idioms the verifier can't handle.  You can try
+    // disabling the verifier at your own substaintial risk.
+    assert(!isa<ConstantPointerNull>(base) &&
+           "the relocation code needs adjustment to handle the relocation of "
+           "a null pointer constant without causing false positives in the "
+           "safepoint ir verifier.");
    }
  }
  
@@ -1006,73 +951,49 @@ static void findBasePointers(const std::set<llvm::Value *> &live,
  static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
                               const CallSite &CS,
                               PartiallyConstructedSafepointRecord &result) {
-  std::map<llvm::Value *, llvm::Value *> base_pairs;
-  std::set<llvm::Value *> newInsertedDefs;
-  findBasePointers(result.liveset, base_pairs, &DT, DVCache, newInsertedDefs);
+  DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
+  DenseSet<llvm::Value *> NewInsertedDefs;
+  findBasePointers(result.liveset, PointerToBase, &DT, DVCache,
+                   NewInsertedDefs);
  
    if (PrintBasePointers) {
+    // Note: Need to print these in a stable order since this is checked in
+    // some tests.
      errs() << "Base Pairs (w/o Relocation):\n";
-    for (auto Pair : base_pairs) {
-      errs() << " derived %" << Pair.first->getName() << " base %"
-             << Pair.second->getName() << "\n";
+    SmallVector<Value *, 64> Temp;
+    Temp.reserve(PointerToBase.size());
+    for (auto Pair : PointerToBase) {
+      Temp.push_back(Pair.first);
      }
-  }
-
-  result.base_pairs = base_pairs;
-  result.newInsertedDefs = newInsertedDefs;
-}
-
-/// Check for liveness of items in the insert defs and add them to the live
-/// and base pointer sets
-static void fixupLiveness(DominatorTree &DT, const CallSite &CS,
-                          const std::set<Value *> &allInsertedDefs,
-                          PartiallyConstructedSafepointRecord &result) {
-  Instruction *inst = CS.getInstruction();
-
-  std::set<llvm::Value *> liveset = result.liveset;
-  std::map<llvm::Value *, llvm::Value *> base_pairs = result.base_pairs;
-
-  auto is_live_gc_reference =
-      [&](Value &V) { return isLiveGCReferenceAt(V, inst, DT, nullptr); };
-
-  // For each new definition, check to see if a) the definition dominates the
-  // instruction we're interested in, and b) one of the uses of that definition
-  // is edge-reachable from the instruction we're interested in.  This is the
-  // same definition of liveness we used in the intial liveness analysis
-  for (Value *newDef : allInsertedDefs) {
-    if (liveset.count(newDef)) {
-      // already live, no action needed
-      continue;
-    }
-
-    // PERF: Use DT to check instruction domination might not be good for
-    // compilation time, and we could change to optimal solution if this
-    // turn to be a issue
-    if (!DT.dominates(cast<Instruction>(newDef), inst)) {
-      // can't possibly be live at inst
-      continue;
-    }
-
-    if (is_live_gc_reference(*newDef)) {
-      // Add the live new defs into liveset and base_pairs
-      liveset.insert(newDef);
-      base_pairs[newDef] = newDef;
+    std::sort(Temp.begin(), Temp.end(), order_by_name);
+    for (Value *Ptr : Temp) {
+      Value *Base = PointerToBase[Ptr];
+      errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
+             << "\n";
      }
    }
  
-  result.liveset = liveset;
-  result.base_pairs = base_pairs;
+  result.PointerToBase = PointerToBase;
+  result.NewInsertedDefs = NewInsertedDefs;
  }
  
-static void fixupLiveReferences(
-    Function &F, DominatorTree &DT, Pass *P,
-    const std::set<llvm::Value *> &allInsertedDefs,
-    std::vector<CallSite> &toUpdate,
-    std::vector<struct PartiallyConstructedSafepointRecord> &records) {
+/// Given an updated version of the dataflow liveness results, update the
+/// liveset and base pointer maps for the call site CS.
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+                                  const CallSite &CS,
+                                  PartiallyConstructedSafepointRecord &result);
+
+static void recomputeLiveInValues(
+    Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+    MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+  // TODO-PERF: reuse the original liveness, then simply run the dataflow
+  // again.  The old values are still live and will help it stablize quickly.
+  GCPtrLivenessData RevisedLivenessData;
+  computeLiveInValues(DT, F, RevisedLivenessData);
    for (size_t i = 0; i < records.size(); i++) {
      struct PartiallyConstructedSafepointRecord &info = records[i];
-    CallSite &CS = toUpdate[i];
-    fixupLiveness(DT, CS, allInsertedDefs, info);
+    const CallSite &CS = toUpdate[i];
+    recomputeLiveInValues(RevisedLivenessData, CS, info);
    }
  }
  
@@ -1099,26 +1020,7 @@ static BasicBlock *normalizeBBForInvokeSafepoint(BasicBlock *BB,
    return ret;
  }
  
-static void
-VerifySafepointBounds(const std::pair<Instruction *, Instruction *> &bounds) {
-  assert(bounds.first->getParent() && bounds.second->getParent() &&
-         "both must belong to basic blocks");
-  if (bounds.first->getParent() == bounds.second->getParent()) {
-    // This is a call safepoint
-    // TODO: scan the range to find the statepoint
-    // TODO: check that the following instruction is not a gc_relocate or
-    // gc_result
-  } else {
-    // This is an invoke safepoint
-    InvokeInst *invoke = dyn_cast<InvokeInst>(bounds.first);
-    (void)invoke;
-    assert(invoke && "only continues over invokes!");
-    assert(invoke->getNormalDest() == bounds.second->getParent() &&
-           "safepoint should continue into normal exit block");
-  }
-}
-
-static int find_index(const SmallVectorImpl<Value *> &livevec, Value *val) {
+static int find_index(ArrayRef<Value *> livevec, Value *val) {
    auto itr = std::find(livevec.begin(), livevec.end(), val);
    assert(livevec.end() != itr);
    size_t index = std::distance(livevec.begin(), itr);
@@ -1168,14 +1070,13 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) {
  ///   statepointToken - statepoint instruction to which relocates should be
  ///   bound.
  ///   Builder - Llvm IR builder to be used to construct new calls.
-/// Returns array with newly created relocates.
-static std::vector<llvm::Instruction *>
-CreateGCRelocates(const SmallVectorImpl<llvm::Value *> &liveVariables,
-                  const int liveStart,
-                  const SmallVectorImpl<llvm::Value *> &basePtrs,
-                  Instruction *statepointToken, IRBuilder<> Builder) {
-
-  std::vector<llvm::Instruction *> newDefs;
+static void CreateGCRelocates(ArrayRef<llvm::Value *> liveVariables,
+                              const int liveStart,
+                              ArrayRef<llvm::Value *> basePtrs,
+                              Instruction *statepointToken,
+                              IRBuilder<> Builder) {
+  SmallVector<Instruction *, 64> NewDefs;
+  NewDefs.reserve(liveVariables.size());
  
    Module *M = statepointToken->getParent()->getParent()->getParent();
  
@@ -1184,7 +1085,7 @@ CreateGCRelocates(const SmallVectorImpl<llvm::Value *> &liveVariables,
      // combination.  This results is some blow up the function declarations in
      // the IR, but removes the need for argument bitcasts which shrinks the IR
      // greatly and makes it much more readable.
-    std::vector<Type *> types;                    // one per 'any' type
+    SmallVector<Type *, 1> types;                 // one per 'any' type
      types.push_back(liveVariables[i]->getType()); // result type
      Value *gc_relocate_decl = Intrinsic::getDeclaration(
          M, Intrinsic::experimental_gc_relocate, types);
@@ -1206,12 +1107,10 @@ CreateGCRelocates(const SmallVectorImpl<llvm::Value *> &liveVariables,
      // fake call.
      cast<CallInst>(reloc)->setCallingConv(CallingConv::Cold);
  
-    newDefs.push_back(cast<Instruction>(reloc));
+    NewDefs.push_back(cast<Instruction>(reloc));
    }
-  assert(newDefs.size() == liveVariables.size() &&
+  assert(NewDefs.size() == liveVariables.size() &&
           "missing or extra redefinition at safepoint");
-
-  return newDefs;
  }
  
  static void
@@ -1244,7 +1143,7 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
    IRBuilder<> Builder(insertBefore);
    // Copy all of the arguments from the original statepoint - this includes the
    // target, call args, and deopt args
-  std::vector<llvm::Value *> args;
+  SmallVector<llvm::Value *, 64> args;
    args.insert(args.end(), CS.arg_begin(), CS.arg_end());
    // TODO: Clear the 'needs rewrite' flag
  
@@ -1282,7 +1181,7 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
      Builder.SetInsertPoint(IP);
      Builder.SetCurrentDebugLocation(IP->getDebugLoc());
  
-  } else if (CS.isInvoke()) {
+  } else {
      InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction());
  
      // Insert the new invoke into the old block.  We'll remove the old one in a
@@ -1317,7 +1216,7 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
      Instruction *exceptional_token =
          cast<Instruction>(Builder.CreateExtractValue(
              unwindBlock->getLandingPadInst(), idx, "relocate_token"));
-    result.exceptional_relocates_token = exceptional_token;
+    result.UnwindToken = exceptional_token;
  
      // Just throw away return value. We will use the one we got for normal
      // block.
@@ -1333,54 +1232,30 @@ makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
  
      // gc relocates will be generated later as if it were regular call
      // statepoint
-  } else {
-    llvm_unreachable("unexpect type of CallSite");
    }
    assert(token);
  
    // Take the name of the original value call if it had one.
    token->takeName(CS.getInstruction());
  
-  // The GCResult is already inserted, we just need to find it
-  Instruction *gc_result = nullptr;
-  /* scope */ {
-    Instruction *toReplace = CS.getInstruction();
-    assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
-           "only valid use before rewrite is gc.result");
-    if (toReplace->hasOneUse()) {
-      Instruction *GCResult = cast<Instruction>(*toReplace->user_begin());
-      assert(isGCResult(GCResult));
-      gc_result = GCResult;
-    }
-  }
+// The GCResult is already inserted, we just need to find it
+#ifndef NDEBUG
+  Instruction *toReplace = CS.getInstruction();
+  assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
+         "only valid use before rewrite is gc.result");
+  assert(!toReplace->hasOneUse() ||
+         isGCResult(cast<Instruction>(*toReplace->user_begin())));
+#endif
  
    // Update the gc.result of the original statepoint (if any) to use the newly
    // inserted statepoint.  This is safe to do here since the token can't be
    // considered a live reference.
    CS.getInstruction()->replaceAllUsesWith(token);
  
-  // Second, create a gc.relocate for every live variable
-  std::vector<llvm::Instruction *> newDefs =
-      CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
-
-  // Need to pass through the last part of the safepoint block so that we
-  // don't accidentally update uses in a following gc.relocate which is
-  // still conceptually part of the same safepoint.  Gah.
-  Instruction *last = nullptr;
-  if (!newDefs.empty()) {
-    last = newDefs.back();
-  } else if (gc_result) {
-    last = gc_result;
-  } else {
-    last = token;
-  }
-  assert(last && "can't be null");
-  const auto bounds = std::make_pair(token, last);
-
-  // Sanity check our results - this is slightly non-trivial due to invokes
-  VerifySafepointBounds(bounds);
+  result.StatepointToken = token;
  
-  result.safepoint = bounds;
+  // Second, create a gc.relocate for every live variable
+  CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
  }
  
  namespace {
@@ -1396,7 +1271,7 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,
                             SmallVectorImpl<Value *> &livevec) {
    assert(basevec.size() == livevec.size());
  
-  std::vector<name_ordering> temp;
+  SmallVector<name_ordering, 64> temp;
    for (size_t i = 0; i < basevec.size(); i++) {
      name_ordering v;
      v.base = basevec[i];
@@ -1412,14 +1287,14 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,
  
  // Replace an existing gc.statepoint with a new one and a set of gc.relocates
  // which make the relocations happening at this safepoint explicit.
-// 
+//
  // WARNING: Does not do any fixup to adjust users of the original live
  // values.  That's the callers responsibility.
  static void
  makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P,
                         PartiallyConstructedSafepointRecord &result) {
-  std::set<llvm::Value *> liveset = result.liveset;
-  std::map<llvm::Value *, llvm::Value *> base_pairs = result.base_pairs;
+  auto liveset = result.liveset;
+  auto PointerToBase = result.PointerToBase;
  
    // Convert to vector for efficient cross referencing.
    SmallVector<Value *, 64> basevec, livevec;
@@ -1428,8 +1303,8 @@ makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P,
    for (Value *L : liveset) {
      livevec.push_back(L);
  
-    assert(base_pairs.find(L) != base_pairs.end());
-    Value *base = base_pairs[L];
+    assert(PointerToBase.find(L) != PointerToBase.end());
+    Value *base = PointerToBase[L];
      basevec.push_back(base);
    }
    assert(livevec.size() == basevec.size());
@@ -1484,17 +1359,16 @@ insertRelocationStores(iterator_range<Value::user_iterator> gcRelocs,
  
  /// do all the relocation update via allocas and mem2reg
  static void relocationViaAlloca(
-    Function &F, DominatorTree &DT, const std::vector<Value *> &live,
-    const std::vector<struct PartiallyConstructedSafepointRecord> &records) {
+    Function &F, DominatorTree &DT, ArrayRef<Value *> live,
+    ArrayRef<struct PartiallyConstructedSafepointRecord> records) {
  #ifndef NDEBUG
-  int initialAllocaNum = 0;
-
-  // record initial number of allocas
-  for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end;
-       itr++) {
-    if (isa<AllocaInst>(*itr))
-      initialAllocaNum++;
-  }
+  // record initial number of (static) allocas; we'll check we have the same
+  // number when we get done.
+  int InitialAllocaNum = 0;
+  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+       I++)
+    if (isa<AllocaInst>(*I))
+      InitialAllocaNum++;
  #endif
  
    // TODO-PERF: change data structures, reserve
@@ -1523,44 +1397,60 @@ static void relocationViaAlloca(
    // otherwise we lose the link between statepoint and old def
    for (size_t i = 0; i < records.size(); i++) {
      const struct PartiallyConstructedSafepointRecord &info = records[i];
-    Value *statepoint = info.safepoint.first;
+    Value *Statepoint = info.StatepointToken;
  
      // This will be used for consistency check
      DenseSet<Value *> visitedLiveValues;
  
      // Insert stores for normal statepoint gc relocates
-    insertRelocationStores(statepoint->users(), allocaMap, visitedLiveValues);
+    insertRelocationStores(Statepoint->users(), allocaMap, visitedLiveValues);
  
      // In case if it was invoke statepoint
      // we will insert stores for exceptional path gc relocates.
-    if (isa<InvokeInst>(statepoint)) {
-      insertRelocationStores(info.exceptional_relocates_token->users(),
-                             allocaMap, visitedLiveValues);
+    if (isa<InvokeInst>(Statepoint)) {
+      insertRelocationStores(info.UnwindToken->users(), allocaMap,
+                             visitedLiveValues);
      }
  
-#ifndef NDEBUG
-    // For consistency check store null's into allocas for values that are not
-    // relocated
-    // by this statepoint.
-    for (auto Pair : allocaMap) {
-      Value *def = Pair.first;
-      Value *alloca = Pair.second;
-
-      // This value was relocated
-      if (visitedLiveValues.count(def)) {
-        continue;
-      }
-      // Result should not be relocated
-      if (def == info.result) {
-        continue;
+    if (ClobberNonLive) {
+      // As a debuging aid, pretend that an unrelocated pointer becomes null at
+      // the gc.statepoint.  This will turn some subtle GC problems into
+      // slightly easier to debug SEGVs.  Note that on large IR files with
+      // lots of gc.statepoints this is extremely costly both memory and time
+      // wise.
+      SmallVector<AllocaInst *, 64> ToClobber;
+      for (auto Pair : allocaMap) {
+        Value *Def = Pair.first;
+        AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
+
+        // This value was relocated
+        if (visitedLiveValues.count(Def)) {
+          continue;
+        }
+        ToClobber.push_back(Alloca);
        }
  
-      Constant *CPN =
-          ConstantPointerNull::get(cast<PointerType>(def->getType()));
-      StoreInst *store = new StoreInst(CPN, alloca);
-      store->insertBefore(info.safepoint.second);
+      auto InsertClobbersAt = [&](Instruction *IP) {
+        for (auto *AI : ToClobber) {
+          auto AIType = cast<PointerType>(AI->getType());
+          auto PT = cast<PointerType>(AIType->getElementType());
+          Constant *CPN = ConstantPointerNull::get(PT);
+          StoreInst *store = new StoreInst(CPN, AI);
+          store->insertBefore(IP);
+        }
+      };
+
+      // Insert the clobbering stores.  These may get intermixed with the
+      // gc.results and gc.relocates, but that's fine.
+      if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
+        InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
+        InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+      } else {
+        BasicBlock::iterator Next(cast<CallInst>(Statepoint));
+        Next++;
+        InsertClobbersAt(Next);
+      }
      }
-#endif
    }
    // update use with load allocas and add store for gc_relocated
    for (auto Pair : allocaMap) {
@@ -1608,11 +1498,21 @@ static void relocationViaAlloca(
      // store must be inserted after load, otherwise store will be in alloca's
      // use list and an extra load will be inserted before it
      StoreInst *store = new StoreInst(def, alloca);
-    if (isa<Instruction>(def)) {
-      store->insertAfter(cast<Instruction>(def));
+    if (Instruction *inst = dyn_cast<Instruction>(def)) {
+      if (InvokeInst *invoke = dyn_cast<InvokeInst>(inst)) {
+        // InvokeInst is a TerminatorInst so the store need to be inserted
+        // into its normal destination block.
+        BasicBlock *normalDest = invoke->getNormalDest();
+        store->insertBefore(normalDest->getFirstNonPHI());
+      } else {
+        assert(!inst->isTerminator() &&
+               "The only TerminatorInst that can produce a value is "
+               "InvokeInst which is handled above.");
+        store->insertAfter(inst);
+      }
      } else {
        assert((isa<Argument>(def) || isa<GlobalVariable>(def) ||
-              (isa<Constant>(def) && cast<Constant>(def)->isNullValue())) &&
+              isa<ConstantPointerNull>(def)) &&
               "Must be argument or global");
        store->insertAfter(cast<Instruction>(alloca));
      }
@@ -1626,26 +1526,27 @@ static void relocationViaAlloca(
    }
  
  #ifndef NDEBUG
-  for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end;
-       itr++) {
-    if (isa<AllocaInst>(*itr))
-      initialAllocaNum--;
-  }
-  assert(initialAllocaNum == 0 && "We must not introduce any extra allocas");
+  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
+       I++)
+    if (isa<AllocaInst>(*I))
+      InitialAllocaNum--;
+  assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
  #endif
  }
  
  /// Implement a unique function which doesn't require we sort the input
  /// vector.  Doing so has the effect of changing the output of a couple of
  /// tests in ways which make them less useful in testing fused safepoints.
-template <typename T> static void unique_unsorted(std::vector<T> &vec) {
-  DenseSet<T> seen;
-  std::vector<T> tmp;
-  vec.reserve(vec.size());
-  std::swap(tmp, vec);
-  for (auto V : tmp) {
-    if (seen.insert(V).second) {
-      vec.push_back(V);
+template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
+  DenseSet<T> Seen;
+  SmallVector<T, 128> TempVec;
+  TempVec.reserve(Vec.size());
+  for (auto Element : Vec)
+    TempVec.push_back(Element);
+  Vec.clear();
+  for (auto V : TempVec) {
+    if (Seen.insert(V).second) {
+      Vec.push_back(V);
      }
    }
  }
@@ -1660,7 +1561,7 @@ static Function *getUseHolder(Module &M) {
  /// Insert holders so that each Value is obviously live through the entire
  /// liftetime of the call.
  static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
-                                 std::vector<CallInst *> &holders) {
+                                 SmallVectorImpl<CallInst *> &holders) {
    Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
    Function *Func = getUseHolder(*M);
    if (CS.isCall()) {
@@ -1679,52 +1580,135 @@ static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
          Func, Values, "", invoke->getUnwindDest()->getFirstInsertionPt());
      holders.push_back(normal_holder);
      holders.push_back(unwind_holder);
-  } else {
-    assert(false && "Unsupported");
-  }
+  } else
+    llvm_unreachable("unsupported call type");
  }
  
  static void findLiveReferences(
-    Function &F, DominatorTree &DT, Pass *P, std::vector<CallSite> &toUpdate,
-    std::vector<struct PartiallyConstructedSafepointRecord> &records) {
+    Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+    MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+  GCPtrLivenessData OriginalLivenessData;
+  computeLiveInValues(DT, F, OriginalLivenessData);
    for (size_t i = 0; i < records.size(); i++) {
      struct PartiallyConstructedSafepointRecord &info = records[i];
-    CallSite &CS = toUpdate[i];
-    analyzeParsePointLiveness(DT, CS, info);
+    const CallSite &CS = toUpdate[i];
+    analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
    }
  }
  
-static void addBasesAsLiveValues(std::set<Value *> &liveset,
-                                 std::map<Value *, Value *> &base_pairs) {
-  // Identify any base pointers which are used in this safepoint, but not
-  // themselves relocated.  We need to relocate them so that later inserted
-  // safepoints can get the properly relocated base register.
-  DenseSet<Value *> missing;
-  for (Value *L : liveset) {
-    assert(base_pairs.find(L) != base_pairs.end());
-    Value *base = base_pairs[L];
-    assert(base);
-    if (liveset.find(base) == liveset.end()) {
-      assert(base_pairs.find(base) == base_pairs.end());
-      // uniqued by set insert
-      missing.insert(base);
+/// Remove any vector of pointers from the liveset by scalarizing them over the
+/// statepoint instruction.  Adds the scalarized pieces to the liveset.  It
+/// would be preferrable to include the vector in the statepoint itself, but
+/// the lowering code currently does not handle that.  Extending it would be
+/// slightly non-trivial since it requires a format change.  Given how rare
+/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+static void splitVectorValues(Instruction *StatepointInst,
+                              StatepointLiveSetTy &LiveSet, DominatorTree &DT) {
+  SmallVector<Value *, 16> ToSplit;
+  for (Value *V : LiveSet)
+    if (isa<VectorType>(V->getType()))
+      ToSplit.push_back(V);
+
+  if (ToSplit.empty())
+    return;
+
+  Function &F = *(StatepointInst->getParent()->getParent());
+
+  DenseMap<Value *, AllocaInst *> AllocaMap;
+  // First is normal return, second is exceptional return (invoke only)
+  DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
+  for (Value *V : ToSplit) {
+    LiveSet.erase(V);
+
+    AllocaInst *Alloca =
+        new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
+    AllocaMap[V] = Alloca;
+
+    VectorType *VT = cast<VectorType>(V->getType());
+    IRBuilder<> Builder(StatepointInst);
+    SmallVector<Value *, 16> Elements;
+    for (unsigned i = 0; i < VT->getNumElements(); i++)
+      Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
+    LiveSet.insert(Elements.begin(), Elements.end());
+
+    auto InsertVectorReform = [&](Instruction *IP) {
+      Builder.SetInsertPoint(IP);
+      Builder.SetCurrentDebugLocation(IP->getDebugLoc());
+      Value *ResultVec = UndefValue::get(VT);
+      for (unsigned i = 0; i < VT->getNumElements(); i++)
+        ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
+                                                Builder.getInt32(i));
+      return ResultVec;
+    };
+
+    if (isa<CallInst>(StatepointInst)) {
+      BasicBlock::iterator Next(StatepointInst);
+      Next++;
+      Instruction *IP = &*(Next);
+      Replacements[V].first = InsertVectorReform(IP);
+      Replacements[V].second = nullptr;
+    } else {
+      InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
+      // We've already normalized - check that we don't have shared destination
+      // blocks
+      BasicBlock *NormalDest = Invoke->getNormalDest();
+      assert(!isa<PHINode>(NormalDest->begin()));
+      BasicBlock *UnwindDest = Invoke->getUnwindDest();
+      assert(!isa<PHINode>(UnwindDest->begin()));
+      // Insert insert element sequences in both successors
+      Instruction *IP = &*(NormalDest->getFirstInsertionPt());
+      Replacements[V].first = InsertVectorReform(IP);
+      IP = &*(UnwindDest->getFirstInsertionPt());
+      Replacements[V].second = InsertVectorReform(IP);
      }
    }
+  for (Value *V : ToSplit) {
+    AllocaInst *Alloca = AllocaMap[V];
+
+    // Capture all users before we start mutating use lists
+    SmallVector<Instruction *, 16> Users;
+    for (User *U : V->users())
+      Users.push_back(cast<Instruction>(U));
+
+    for (Instruction *I : Users) {
+      if (auto Phi = dyn_cast<PHINode>(I)) {
+        for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
+          if (V == Phi->getIncomingValue(i)) {
+            LoadInst *Load = new LoadInst(
+                Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
+            Phi->setIncomingValue(i, Load);
+          }
+      } else {
+        LoadInst *Load = new LoadInst(Alloca, "", I);
+        I->replaceUsesOfWith(V, Load);
+      }
+    }
  
-  // Note that we want these at the end of the list, otherwise
-  // register placement gets screwed up once we lower to STATEPOINT
-  // instructions.  This is an utter hack, but there doesn't seem to be a
-  // better one.
-  for (Value *base : missing) {
-    assert(base);
-    liveset.insert(base);
-    base_pairs[base] = base;
-  }
-  assert(liveset.size() == base_pairs.size());
+    // Store the original value and the replacement value into the alloca
+    StoreInst *Store = new StoreInst(V, Alloca);
+    if (auto I = dyn_cast<Instruction>(V))
+      Store->insertAfter(I);
+    else
+      Store->insertAfter(Alloca);
+
+    // Normal return for invoke, or call return
+    Instruction *Replacement = cast<Instruction>(Replacements[V].first);
+    (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+    // Unwind return for invoke only
+    Replacement = cast_or_null<Instruction>(Replacements[V].second);
+    if (Replacement)
+      (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
+  }
+
+  // apply mem2reg to promote alloca to SSA
+  SmallVector<AllocaInst *, 16> Allocas;
+  for (Value *V : ToSplit)
+    Allocas.push_back(AllocaMap[V]);
+  PromoteMemToReg(Allocas, DT);
  }
  
  static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
-                              std::vector<CallSite> &toUpdate) {
+                              SmallVectorImpl<CallSite> &toUpdate) {
  #ifndef NDEBUG
    // sanity check the input
    std::set<CallSite> uniqued;
@@ -1740,7 +1724,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
  
    // A list of dummy calls added to the IR to keep various values obviously
    // live in the IR.  We'll remove all of these when done.
-  std::vector<CallInst *> holders;
+  SmallVector<CallInst *, 64> holders;
  
    // Insert a dummy call with all of the arguments to the vm_state we'll need
    // for the actual safepoint insertion.  This ensures reference arguments in
@@ -1753,13 +1737,15 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
      SmallVector<Value *, 64> DeoptValues;
      for (Use &U : StatepointCS.vm_state_args()) {
        Value *Arg = cast<Value>(&U);
-      if (isGCPointerType(Arg->getType()))
+      assert(!isUnhandledGCPointerType(Arg->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(Arg->getType()))
          DeoptValues.push_back(Arg);
      }
      insertUseHolderAfter(CS, DeoptValues, holders);
    }
  
-  std::vector<struct PartiallyConstructedSafepointRecord> records;
+  SmallVector<struct PartiallyConstructedSafepointRecord, 64> records;
    records.reserve(toUpdate.size());
    for (size_t i = 0; i < toUpdate.size(); i++) {
      struct PartiallyConstructedSafepointRecord info;
@@ -1771,6 +1757,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
    // site.
    findLiveReferences(F, DT, P, toUpdate, records);
  
+  // Do a limited scalarization of any live at safepoint vector values which
+  // contain pointers.  This enables this pass to run after vectorization at
+  // the cost of some possible performance loss.  TODO: it would be nice to
+  // natively support vectors all the way through the backend so we don't need
+  // to scalarize here.
+  for (size_t i = 0; i < records.size(); i++) {
+    struct PartiallyConstructedSafepointRecord &info = records[i];
+    Instruction *statepoint = toUpdate[i].getInstruction();
+    splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
+  }
+
    // B) Find the base pointers for each live pointer
    /* scope for caching */ {
      // Cache the 'defining value' relation used in the computation and
@@ -1794,11 +1791,11 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
    //   gep a + 1
    //   safepoint 2
    //   br loop
-  std::set<llvm::Value *> allInsertedDefs;
+  DenseSet<llvm::Value *> allInsertedDefs;
    for (size_t i = 0; i < records.size(); i++) {
      struct PartiallyConstructedSafepointRecord &info = records[i];
-    allInsertedDefs.insert(info.newInsertedDefs.begin(),
-                           info.newInsertedDefs.end());
+    allInsertedDefs.insert(info.NewInsertedDefs.begin(),
+                           info.NewInsertedDefs.end());
    }
  
    // We insert some dummy calls after each safepoint to definitely hold live
@@ -1811,33 +1808,22 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
      CallSite &CS = toUpdate[i];
  
      SmallVector<Value *, 128> Bases;
-    for (auto Pair : info.base_pairs) {
+    for (auto Pair : info.PointerToBase) {
        Bases.push_back(Pair.second);
      }
      insertUseHolderAfter(CS, Bases, holders);
    }
  
-  // Add the bases explicitly to the live vector set.  This may result in a few
-  // extra relocations, but the base has to be available whenever a pointer
-  // derived from it is used.  Thus, we need it to be part of the statepoint's
-  // gc arguments list.  TODO: Introduce an explicit notion (in the following
-  // code) of the GC argument list as seperate from the live Values at a
-  // given statepoint.
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    addBasesAsLiveValues(info.liveset, info.base_pairs);
-  }
+  // By selecting base pointers, we've effectively inserted new uses. Thus, we
+  // need to rerun liveness.  We may *also* have inserted new defs, but that's
+  // not the key issue.
+  recomputeLiveInValues(F, DT, P, toUpdate, records);
  
-  // If we inserted any new values, we need to adjust our notion of what is
-  // live at a particular safepoint.
-  if (!allInsertedDefs.empty()) {
-    fixupLiveReferences(F, DT, P, allInsertedDefs, toUpdate, records);
-  }
    if (PrintBasePointers) {
      for (size_t i = 0; i < records.size(); i++) {
        struct PartiallyConstructedSafepointRecord &info = records[i];
        errs() << "Base Pairs: (w/Relocation)\n";
-      for (auto Pair : info.base_pairs) {
+      for (auto Pair : info.PointerToBase) {
          errs() << " derived %" << Pair.first->getName() << " base %"
                 << Pair.second->getName() << "\n";
        }
@@ -1870,7 +1856,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
    // nodes have single entry (because of normalizeBBForInvokeSafepoint).
    // Just remove them all here.
    for (size_t i = 0; i < records.size(); i++) {
-    Instruction *I = records[i].safepoint.first;
+    Instruction *I = records[i].StatepointToken;
  
      if (InvokeInst *invoke = dyn_cast<InvokeInst>(I)) {
        FoldSingleEntryPHINodes(invoke->getNormalDest());
@@ -1882,7 +1868,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
    }
  
    // Do all the fixups of the original live variables to their relocated selves
-  std::vector<Value *> live;
+  SmallVector<Value *, 128> live;
    for (size_t i = 0; i < records.size(); i++) {
      struct PartiallyConstructedSafepointRecord &info = records[i];
      // We can't simply save the live set from the original insertion.  One of
@@ -1890,7 +1876,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
      // That Value* no longer exists and we need to use the new gc_result.
      // Thankfully, the liveset is embedded in the statepoint (and updated), so
      // we just grab that.
-    Statepoint statepoint(info.safepoint.first);
+    Statepoint statepoint(info.StatepointToken);
      live.insert(live.end(), statepoint.gc_args_begin(),
                  statepoint.gc_args_end());
    }
@@ -1911,8 +1897,11 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
  /// point of this function is as an extension point for custom logic.
  static bool shouldRewriteStatepointsIn(Function &F) {
    // TODO: This should check the GCStrategy
-  const std::string StatepointExampleName("statepoint-example");
-  return StatepointExampleName == F.getGC();
+  if (F.hasGC()) {
+    const std::string StatepointExampleName("statepoint-example");
+    return StatepointExampleName == F.getGC();
+  } else
+    return false;
  }
  
  bool RewriteStatepointsForGC::runOnFunction(Function &F) {
@@ -1925,19 +1914,285 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
    if (!shouldRewriteStatepointsIn(F))
      return false;
  
-  // Gather all the statepoints which need rewritten.
-  std::vector<CallSite> ParsePointNeeded;
-  for (inst_iterator itr = inst_begin(F), end = inst_end(F); itr != end;
-       itr++) {
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+  // Gather all the statepoints which need rewritten.  Be careful to only
+  // consider those in reachable code since we need to ask dominance queries
+  // when rewriting.  We'll delete the unreachable ones in a moment.
+  SmallVector<CallSite, 64> ParsePointNeeded;
+  bool HasUnreachableStatepoint = false;
+  for (Instruction &I : inst_range(F)) {
      // TODO: only the ones with the flag set!
-    if (isStatepoint(*itr))
-      ParsePointNeeded.push_back(CallSite(&*itr));
+    if (isStatepoint(I)) {
+      if (DT.isReachableFromEntry(I.getParent()))
+        ParsePointNeeded.push_back(CallSite(&I));
+      else
+        HasUnreachableStatepoint = true;
+    }
    }
  
+  bool MadeChange = false;
+
+  // Delete any unreachable statepoints so that we don't have unrewritten
+  // statepoints surviving this pass.  This makes testing easier and the
+  // resulting IR less confusing to human readers.  Rather than be fancy, we
+  // just reuse a utility function which removes the unreachable blocks.
+  if (HasUnreachableStatepoint)
+    MadeChange |= removeUnreachableBlocks(F);
+
    // Return early if no work to do.
    if (ParsePointNeeded.empty())
-    return false;
+    return MadeChange;
+
+  // As a prepass, go ahead and aggressively destroy single entry phi nodes.
+  // These are created by LCSSA.  They have the effect of increasing the size
+  // of liveness sets for no good reason.  It may be harder to do this post
+  // insertion since relocations and base phis can confuse things.
+  for (BasicBlock &BB : F)
+    if (BB.getUniquePredecessor()) {
+      MadeChange = true;
+      FoldSingleEntryPHINodes(&BB);
+    }
  
-  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  return insertParsePoints(F, DT, this, ParsePointNeeded);
+  MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+  return MadeChange;
+}
+
+// liveness computation via standard dataflow
+// -------------------------------------------------------------------
+
+// TODO: Consider using bitvectors for liveness, the set of potentially
+// interesting values should be small and easy to pre-compute.
+
+/// Is this value a constant consisting of entirely null values?
+static bool isConstantNull(Value *V) {
+  return isa<Constant>(V) && cast<Constant>(V)->isNullValue();
+}
+
+/// Compute the live-in set for the location rbegin starting from
+/// the live-out set of the basic block
+static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
+                                BasicBlock::reverse_iterator rend,
+                                DenseSet<Value *> &LiveTmp) {
+
+  for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
+    Instruction *I = &*ritr;
+
+    // KILL/Def - Remove this definition from LiveIn
+    LiveTmp.erase(I);
+
+    // Don't consider *uses* in PHI nodes, we handle their contribution to
+    // predecessor blocks when we seed the LiveOut sets
+    if (isa<PHINode>(I))
+      continue;
+
+    // USE - Add to the LiveIn set for this instruction
+    for (Value *V : I->operands()) {
+      assert(!isUnhandledGCPointerType(V->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) &&
+          !isa<UndefValue>(V)) {
+        // The choice to exclude null and undef is arbitrary here.  Reconsider?
+        LiveTmp.insert(V);
+      }
+    }
+  }
+}
+
+static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
+
+  for (BasicBlock *Succ : successors(BB)) {
+    const BasicBlock::iterator E(Succ->getFirstNonPHI());
+    for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
+      PHINode *Phi = cast<PHINode>(&*I);
+      Value *V = Phi->getIncomingValueForBlock(BB);
+      assert(!isUnhandledGCPointerType(V->getType()) &&
+             "support for FCA unimplemented");
+      if (isHandledGCPointerType(V->getType()) && !isConstantNull(V) &&
+          !isa<UndefValue>(V)) {
+        // The choice to exclude null and undef is arbitrary here.  Reconsider?
+        LiveTmp.insert(V);
+      }
+    }
+  }
+}
+
+static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
+  DenseSet<Value *> KillSet;
+  for (Instruction &I : *BB)
+    if (isHandledGCPointerType(I.getType()))
+      KillSet.insert(&I);
+  return KillSet;
+}
+
+#ifndef NDEBUG
+/// Check that the items in 'Live' dominate 'TI'.  This is used as a basic
+/// sanity check for the liveness computation.
+static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
+                          TerminatorInst *TI, bool TermOkay = false) {
+  for (Value *V : Live) {
+    if (auto *I = dyn_cast<Instruction>(V)) {
+      // The terminator can be a member of the LiveOut set.  LLVM's definition
+      // of instruction dominance states that V does not dominate itself.  As
+      // such, we need to special case this to allow it.
+      if (TermOkay && TI == I)
+        continue;
+      assert(DT.dominates(I, TI) &&
+             "basic SSA liveness expectation violated by liveness analysis");
+    }
+  }
+}
+
+/// Check that all the liveness sets used during the computation of liveness
+/// obey basic SSA properties.  This is useful for finding cases where we miss
+/// a def.
+static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
+                          BasicBlock &BB) {
+  checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
+  checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
+  checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
+}
+#endif
+
+static void computeLiveInValues(DominatorTree &DT, Function &F,
+                                GCPtrLivenessData &Data) {
+
+  SmallSetVector<BasicBlock *, 200> Worklist;
+  auto AddPredsToWorklist = [&](BasicBlock *BB) {
+    // We use a SetVector so that we don't have duplicates in the worklist.
+    Worklist.insert(pred_begin(BB), pred_end(BB));
+  };
+  auto NextItem = [&]() {
+    BasicBlock *BB = Worklist.back();
+    Worklist.pop_back();
+    return BB;
+  };
+
+  // Seed the liveness for each individual block
+  for (BasicBlock &BB : F) {
+    Data.KillSet[&BB] = computeKillSet(&BB);
+    Data.LiveSet[&BB].clear();
+    computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
+
+#ifndef NDEBUG
+    for (Value *Kill : Data.KillSet[&BB])
+      assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
+#endif
+
+    Data.LiveOut[&BB] = DenseSet<Value *>();
+    computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
+    Data.LiveIn[&BB] = Data.LiveSet[&BB];
+    set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
+    set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
+    if (!Data.LiveIn[&BB].empty())
+      AddPredsToWorklist(&BB);
+  }
+
+  // Propagate that liveness until stable
+  while (!Worklist.empty()) {
+    BasicBlock *BB = NextItem();
+
+    // Compute our new liveout set, then exit early if it hasn't changed
+    // despite the contribution of our successor.
+    DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+    const auto OldLiveOutSize = LiveOut.size();
+    for (BasicBlock *Succ : successors(BB)) {
+      assert(Data.LiveIn.count(Succ));
+      set_union(LiveOut, Data.LiveIn[Succ]);
+    }
+    // assert OutLiveOut is a subset of LiveOut
+    if (OldLiveOutSize == LiveOut.size()) {
+      // If the sets are the same size, then we didn't actually add anything
+      // when unioning our successors LiveIn  Thus, the LiveIn of this block
+      // hasn't changed.
+      continue;
+    }
+    Data.LiveOut[BB] = LiveOut;
+
+    // Apply the effects of this basic block
+    DenseSet<Value *> LiveTmp = LiveOut;
+    set_union(LiveTmp, Data.LiveSet[BB]);
+    set_subtract(LiveTmp, Data.KillSet[BB]);
+
+    assert(Data.LiveIn.count(BB));
+    const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
+    // assert: OldLiveIn is a subset of LiveTmp
+    if (OldLiveIn.size() != LiveTmp.size()) {
+      Data.LiveIn[BB] = LiveTmp;
+      AddPredsToWorklist(BB);
+    }
+  } // while( !worklist.empty() )
+
+#ifndef NDEBUG
+  // Sanity check our ouput against SSA properties.  This helps catch any
+  // missing kills during the above iteration.
+  for (BasicBlock &BB : F) {
+    checkBasicSSA(DT, Data, BB);
+  }
+#endif
+}
+
+static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
+                              StatepointLiveSetTy &Out) {
+
+  BasicBlock *BB = Inst->getParent();
+
+  // Note: The copy is intentional and required
+  assert(Data.LiveOut.count(BB));
+  DenseSet<Value *> LiveOut = Data.LiveOut[BB];
+
+  // We want to handle the statepoint itself oddly.  It's
+  // call result is not live (normal), nor are it's arguments
+  // (unless they're used again later).  This adjustment is
+  // specifically what we need to relocate
+  BasicBlock::reverse_iterator rend(Inst);
+  computeLiveInValues(BB->rbegin(), rend, LiveOut);
+  LiveOut.erase(Inst);
+  Out.insert(LiveOut.begin(), LiveOut.end());
+}
+
+static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
+                                  const CallSite &CS,
+                                  PartiallyConstructedSafepointRecord &Info) {
+  Instruction *Inst = CS.getInstruction();
+  StatepointLiveSetTy Updated;
+  findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
+
+#ifndef NDEBUG
+  DenseSet<Value *> Bases;
+  for (auto KVPair : Info.PointerToBase) {
+    Bases.insert(KVPair.second);
+  }
+#endif
+  // We may have base pointers which are now live that weren't before.  We need
+  // to update the PointerToBase structure to reflect this.
+  for (auto V : Updated)
+    if (!Info.PointerToBase.count(V)) {
+      assert(Bases.count(V) && "can't find base for unexpected live value");
+      Info.PointerToBase[V] = V;
+      continue;
+    }
+
+#ifndef NDEBUG
+  for (auto V : Updated) {
+    assert(Info.PointerToBase.count(V) &&
+           "must be able to find base for live value");
+  }
+#endif
+
+  // Remove any stale base mappings - this can happen since our liveness is
+  // more precise then the one inherent in the base pointer analysis
+  DenseSet<Value *> ToErase;
+  for (auto KVPair : Info.PointerToBase)
+    if (!Updated.count(KVPair.first))
+      ToErase.insert(KVPair.first);
+  for (auto V : ToErase)
+    Info.PointerToBase.erase(V);
+
+#ifndef NDEBUG
+  for (auto KVPair : Info.PointerToBase)
+    assert(Updated.count(KVPair.first) && "record for non-live value");
+#endif
+
+  Info.liveset = Updated;
  }