lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

   1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Rewrite an existing set of gc.statepoints such that they make potential
  11 // relocations performed by the garbage collector explicit in the IR.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/Pass.h"
  16 #include "llvm/Analysis/CFG.h"
  17 #include "llvm/Analysis/InstructionSimplify.h"
  18 #include "llvm/Analysis/TargetTransformInfo.h"
  19 #include "llvm/ADT/SetOperations.h"
  20 #include "llvm/ADT/Statistic.h"
  21 #include "llvm/ADT/DenseSet.h"
  22 #include "llvm/ADT/SetVector.h"
  23 #include "llvm/ADT/StringRef.h"
  24 #include "llvm/ADT/MapVector.h"
  25 #include "llvm/IR/BasicBlock.h"
  26 #include "llvm/IR/CallSite.h"
  27 #include "llvm/IR/Dominators.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/IRBuilder.h"
  30 #include "llvm/IR/InstIterator.h"
  31 #include "llvm/IR/Instructions.h"
  32 #include "llvm/IR/Intrinsics.h"
  33 #include "llvm/IR/IntrinsicInst.h"
  34 #include "llvm/IR/Module.h"
  35 #include "llvm/IR/MDBuilder.h"
  36 #include "llvm/IR/Statepoint.h"
  37 #include "llvm/IR/Value.h"
  38 #include "llvm/IR/Verifier.h"
  39 #include "llvm/Support/Debug.h"
  40 #include "llvm/Support/CommandLine.h"
  41 #include "llvm/Transforms/Scalar.h"
  42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  43 #include "llvm/Transforms/Utils/Cloning.h"
  44 #include "llvm/Transforms/Utils/Local.h"
  45 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  46
  47 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
  48
  49 using namespace llvm;
  50
  51 // Print the liveset found at the insert location
  52 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
  53                                   cl::init(false));
  54 static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
  55                                       cl::init(false));
  56 // Print out the base pointers for debugging
  57 static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
  58                                        cl::init(false));
  59
  60 // Cost threshold measuring when it is profitable to rematerialize value instead
  61 // of relocating it
  62 static cl::opt<unsigned>
  63 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
  64                            cl::init(6));
  65
  66 #ifdef XDEBUG
  67 static bool ClobberNonLive = true;
  68 #else
  69 static bool ClobberNonLive = false;
  70 #endif
  71 static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
  72                                                   cl::location(ClobberNonLive),
  73                                                   cl::Hidden);
  74
  75 static cl::opt<bool> UseDeoptBundles("rs4gc-use-deopt-bundles", cl::Hidden,
  76                                      cl::init(false));
  77 static cl::opt<bool>
  78     AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
  79                                    cl::Hidden, cl::init(true));
  80
  81 namespace {
  82 struct RewriteStatepointsForGC : public ModulePass {
  83   static char ID; // Pass identification, replacement for typeid
  84
  85   RewriteStatepointsForGC() : ModulePass(ID) {
  86     initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry());
  87   }
  88   bool runOnFunction(Function &F);
  89   bool runOnModule(Module &M) override {
  90     bool Changed = false;
  91     for (Function &F : M)
  92       Changed |= runOnFunction(F);
  93
  94     if (Changed) {
  95       // stripNonValidAttributes asserts that shouldRewriteStatepointsIn
  96       // returns true for at least one function in the module.  Since at least
  97       // one function changed, we know that the precondition is satisfied.
  98       stripNonValidAttributes(M);
  99     }
 100
 101     return Changed;
 102   }
 103
 104   void getAnalysisUsage(AnalysisUsage &AU) const override {
 105     // We add and rewrite a bunch of instructions, but don't really do much
 106     // else.  We could in theory preserve a lot more analyses here.
 107     AU.addRequired<DominatorTreeWrapperPass>();
 108     AU.addRequired<TargetTransformInfoWrapperPass>();
 109   }
 110
 111   /// The IR fed into RewriteStatepointsForGC may have had attributes implying
 112   /// dereferenceability that are no longer valid/correct after
 113   /// RewriteStatepointsForGC has run.  This is because semantically, after
 114   /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
 115   /// heap.  stripNonValidAttributes (conservatively) restores correctness
 116   /// by erasing all attributes in the module that externally imply
 117   /// dereferenceability.
 118   /// Similar reasoning also applies to the noalias attributes. gc.statepoint
 119   /// can touch the entire heap including noalias objects.
 120   void stripNonValidAttributes(Module &M);
 121
 122   // Helpers for stripNonValidAttributes
 123   void stripNonValidAttributesFromBody(Function &F);
 124   void stripNonValidAttributesFromPrototype(Function &F);
 125 };
 126 } // namespace
 127
 128 char RewriteStatepointsForGC::ID = 0;
 129
 130 ModulePass *llvm::createRewriteStatepointsForGCPass() {
 131   return new RewriteStatepointsForGC();
 132 }
 133
 134 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
 135                       "Make relocations explicit at statepoints", false, false)
 136 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 137 INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
 138                     "Make relocations explicit at statepoints", false, false)
 139
 140 namespace {
 141 struct GCPtrLivenessData {
 142   /// Values defined in this block.
 143   DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
 144   /// Values used in this block (and thus live); does not included values
 145   /// killed within this block.
 146   DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
 147
 148   /// Values live into this basic block (i.e. used by any
 149   /// instruction in this basic block or ones reachable from here)
 150   DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
 151
 152   /// Values live out of this basic block (i.e. live into
 153   /// any successor block)
 154   DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
 155 };
 156
 157 // The type of the internal cache used inside the findBasePointers family
 158 // of functions.  From the callers perspective, this is an opaque type and
 159 // should not be inspected.
 160 //
 161 // In the actual implementation this caches two relations:
 162 // - The base relation itself (i.e. this pointer is based on that one)
 163 // - The base defining value relation (i.e. before base_phi insertion)
 164 // Generally, after the execution of a full findBasePointer call, only the
 165 // base relation will remain.  Internally, we add a mixture of the two
 166 // types, then update all the second type to the first type
 167 typedef DenseMap<Value *, Value *> DefiningValueMapTy;
 168 typedef DenseSet<Value *> StatepointLiveSetTy;
 169 typedef DenseMap<AssertingVH<Instruction>, AssertingVH<Value>>
 170   RematerializedValueMapTy;
 171
 172 struct PartiallyConstructedSafepointRecord {
 173   /// The set of values known to be live across this safepoint
 174   StatepointLiveSetTy LiveSet;
 175
 176   /// Mapping from live pointers to a base-defining-value
 177   DenseMap<Value *, Value *> PointerToBase;
 178
 179   /// The *new* gc.statepoint instruction itself.  This produces the token
 180   /// that normal path gc.relocates and the gc.result are tied to.
 181   Instruction *StatepointToken;
 182
 183   /// Instruction to which exceptional gc relocates are attached
 184   /// Makes it easier to iterate through them during relocationViaAlloca.
 185   Instruction *UnwindToken;
 186
 187   /// Record live values we are rematerialized instead of relocating.
 188   /// They are not included into 'LiveSet' field.
 189   /// Maps rematerialized copy to it's original value.
 190   RematerializedValueMapTy RematerializedValues;
 191 };
 192 }
 193
 194 static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
 195   assert(UseDeoptBundles && "Should not be called otherwise!");
 196
 197   Optional<OperandBundleUse> DeoptBundle = CS.getOperandBundle("deopt");
 198
 199   if (!DeoptBundle.hasValue()) {
 200     assert(AllowStatepointWithNoDeoptInfo &&
 201            "Found non-leaf call without deopt info!");
 202     return None;
 203   }
 204
 205   return DeoptBundle.getValue().Inputs;
 206 }
 207
 208 /// Compute the live-in set for every basic block in the function
 209 static void computeLiveInValues(DominatorTree &DT, Function &F,
 210                                 GCPtrLivenessData &Data);
 211
 212 /// Given results from the dataflow liveness computation, find the set of live
 213 /// Values at a particular instruction.
 214 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
 215                               StatepointLiveSetTy &out);
 216
 217 // TODO: Once we can get to the GCStrategy, this becomes
 218 // Optional<bool> isGCManagedPointer(const Value *V) const override {
 219
 220 static bool isGCPointerType(Type *T) {
 221   if (auto *PT = dyn_cast<PointerType>(T))
 222     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
 223     // GC managed heap.  We know that a pointer into this heap needs to be
 224     // updated and that no other pointer does.
 225     return (1 == PT->getAddressSpace());
 226   return false;
 227 }
 228
 229 // Return true if this type is one which a) is a gc pointer or contains a GC
 230 // pointer and b) is of a type this code expects to encounter as a live value.
 231 // (The insertion code will assert that a type which matches (a) and not (b)
 232 // is not encountered.)
 233 static bool isHandledGCPointerType(Type *T) {
 234   // We fully support gc pointers
 235   if (isGCPointerType(T))
 236     return true;
 237   // We partially support vectors of gc pointers. The code will assert if it
 238   // can't handle something.
 239   if (auto VT = dyn_cast<VectorType>(T))
 240     if (isGCPointerType(VT->getElementType()))
 241       return true;
 242   return false;
 243 }
 244
 245 #ifndef NDEBUG
 246 /// Returns true if this type contains a gc pointer whether we know how to
 247 /// handle that type or not.
 248 static bool containsGCPtrType(Type *Ty) {
 249   if (isGCPointerType(Ty))
 250     return true;
 251   if (VectorType *VT = dyn_cast<VectorType>(Ty))
 252     return isGCPointerType(VT->getScalarType());
 253   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
 254     return containsGCPtrType(AT->getElementType());
 255   if (StructType *ST = dyn_cast<StructType>(Ty))
 256     return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
 257                        containsGCPtrType);
 258   return false;
 259 }
 260
 261 // Returns true if this is a type which a) is a gc pointer or contains a GC
 262 // pointer and b) is of a type which the code doesn't expect (i.e. first class
 263 // aggregates).  Used to trip assertions.
 264 static bool isUnhandledGCPointerType(Type *Ty) {
 265   return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
 266 }
 267 #endif
 268
 269 static bool order_by_name(Value *a, Value *b) {
 270   if (a->hasName() && b->hasName()) {
 271     return -1 == a->getName().compare(b->getName());
 272   } else if (a->hasName() && !b->hasName()) {
 273     return true;
 274   } else if (!a->hasName() && b->hasName()) {
 275     return false;
 276   } else {
 277     // Better than nothing, but not stable
 278     return a < b;
 279   }
 280 }
 281
 282 // Return the name of the value suffixed with the provided value, or if the
 283 // value didn't have a name, the default value specified.
 284 static std::string suffixed_name_or(Value *V, StringRef Suffix,
 285                                     StringRef DefaultName) {
 286   return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
 287 }
 288
 289 // Conservatively identifies any definitions which might be live at the
 290 // given instruction. The  analysis is performed immediately before the
 291 // given instruction. Values defined by that instruction are not considered
 292 // live.  Values used by that instruction are considered live.
 293 static void analyzeParsePointLiveness(
 294     DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
 295     const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
 296   Instruction *inst = CS.getInstruction();
 297
 298   StatepointLiveSetTy LiveSet;
 299   findLiveSetAtInst(inst, OriginalLivenessData, LiveSet);
 300
 301   if (PrintLiveSet) {
 302     // Note: This output is used by several of the test cases
 303     // The order of elements in a set is not stable, put them in a vec and sort
 304     // by name
 305     SmallVector<Value *, 64> Temp;
 306     Temp.insert(Temp.end(), LiveSet.begin(), LiveSet.end());
 307     std::sort(Temp.begin(), Temp.end(), order_by_name);
 308     errs() << "Live Variables:\n";
 309     for (Value *V : Temp)
 310       dbgs() << " " << V->getName() << " " << *V << "\n";
 311   }
 312   if (PrintLiveSetSize) {
 313     errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
 314     errs() << "Number live values: " << LiveSet.size() << "\n";
 315   }
 316   result.LiveSet = LiveSet;
 317 }
 318
 319 static bool isKnownBaseResult(Value *V);
 320 namespace {
 321 /// A single base defining value - An immediate base defining value for an
 322 /// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
 323 /// For instructions which have multiple pointer [vector] inputs or that
 324 /// transition between vector and scalar types, there is no immediate base
 325 /// defining value.  The 'base defining value' for 'Def' is the transitive
 326 /// closure of this relation stopping at the first instruction which has no
 327 /// immediate base defining value.  The b.d.v. might itself be a base pointer,
 328 /// but it can also be an arbitrary derived pointer.
 329 struct BaseDefiningValueResult {
 330   /// Contains the value which is the base defining value.
 331   Value * const BDV;
 332   /// True if the base defining value is also known to be an actual base
 333   /// pointer.
 334   const bool IsKnownBase;
 335   BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
 336     : BDV(BDV), IsKnownBase(IsKnownBase) {
 337 #ifndef NDEBUG
 338     // Check consistency between new and old means of checking whether a BDV is
 339     // a base.
 340     bool MustBeBase = isKnownBaseResult(BDV);
 341     assert(!MustBeBase || MustBeBase == IsKnownBase);
 342 #endif
 343   }
 344 };
 345 }
 346
 347 static BaseDefiningValueResult findBaseDefiningValue(Value *I);
 348
 349 /// Return a base defining value for the 'Index' element of the given vector
 350 /// instruction 'I'.  If Index is null, returns a BDV for the entire vector
 351 /// 'I'.  As an optimization, this method will try to determine when the
 352 /// element is known to already be a base pointer.  If this can be established,
 353 /// the second value in the returned pair will be true.  Note that either a
 354 /// vector or a pointer typed value can be returned.  For the former, the
 355 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
 356 /// If the later, the return pointer is a BDV (or possibly a base) for the
 357 /// particular element in 'I'.
 358 static BaseDefiningValueResult
 359 findBaseDefiningValueOfVector(Value *I) {
 360   assert(I->getType()->isVectorTy() &&
 361          cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
 362          "Illegal to ask for the base pointer of a non-pointer type");
 363
 364   // Each case parallels findBaseDefiningValue below, see that code for
 365   // detailed motivation.
 366
 367   if (isa<Argument>(I))
 368     // An incoming argument to the function is a base pointer
 369     return BaseDefiningValueResult(I, true);
 370
 371   // We shouldn't see the address of a global as a vector value?
 372   assert(!isa<GlobalVariable>(I) &&
 373          "unexpected global variable found in base of vector");
 374
 375   // inlining could possibly introduce phi node that contains
 376   // undef if callee has multiple returns
 377   if (isa<UndefValue>(I))
 378     // utterly meaningless, but useful for dealing with partially optimized
 379     // code.
 380     return BaseDefiningValueResult(I, true);
 381
 382   // Due to inheritance, this must be _after_ the global variable and undef
 383   // checks
 384   if (Constant *Con = dyn_cast<Constant>(I)) {
 385     assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
 386            "order of checks wrong!");
 387     assert(Con->isNullValue() && "null is the only case which makes sense");
 388     return BaseDefiningValueResult(Con, true);
 389   }
 390
 391   if (isa<LoadInst>(I))
 392     return BaseDefiningValueResult(I, true);
 393
 394   if (isa<InsertElementInst>(I))
 395     // We don't know whether this vector contains entirely base pointers or
 396     // not.  To be conservatively correct, we treat it as a BDV and will
 397     // duplicate code as needed to construct a parallel vector of bases.
 398     return BaseDefiningValueResult(I, false);
 399
 400   if (isa<ShuffleVectorInst>(I))
 401     // We don't know whether this vector contains entirely base pointers or
 402     // not.  To be conservatively correct, we treat it as a BDV and will
 403     // duplicate code as needed to construct a parallel vector of bases.
 404     // TODO: There a number of local optimizations which could be applied here
 405     // for particular sufflevector patterns.
 406     return BaseDefiningValueResult(I, false);
 407
 408   // A PHI or Select is a base defining value.  The outer findBasePointer
 409   // algorithm is responsible for constructing a base value for this BDV.
 410   assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
 411          "unknown vector instruction - no base found for vector element");
 412   return BaseDefiningValueResult(I, false);
 413 }
 414
 415 /// Helper function for findBasePointer - Will return a value which either a)
 416 /// defines the base pointer for the input, b) blocks the simple search
 417 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change
 418 /// from pointer to vector type or back.
 419 static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
 420   if (I->getType()->isVectorTy())
 421     return findBaseDefiningValueOfVector(I);
 422
 423   assert(I->getType()->isPointerTy() &&
 424          "Illegal to ask for the base pointer of a non-pointer type");
 425
 426   if (isa<Argument>(I))
 427     // An incoming argument to the function is a base pointer
 428     // We should have never reached here if this argument isn't an gc value
 429     return BaseDefiningValueResult(I, true);
 430
 431   if (isa<GlobalVariable>(I))
 432     // base case
 433     return BaseDefiningValueResult(I, true);
 434
 435   // inlining could possibly introduce phi node that contains
 436   // undef if callee has multiple returns
 437   if (isa<UndefValue>(I))
 438     // utterly meaningless, but useful for dealing with
 439     // partially optimized code.
 440     return BaseDefiningValueResult(I, true);
 441
 442   // Due to inheritance, this must be _after_ the global variable and undef
 443   // checks
 444   if (isa<Constant>(I)) {
 445     assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
 446            "order of checks wrong!");
 447     // Note: Even for frontends which don't have constant references, we can
 448     // see constants appearing after optimizations.  A simple example is
 449     // specialization of an address computation on null feeding into a merge
 450     // point where the actual use of the now-constant input is protected by
 451     // another null check.  (e.g. test4 in constants.ll)
 452     return BaseDefiningValueResult(I, true);
 453   }
 454
 455   if (CastInst *CI = dyn_cast<CastInst>(I)) {
 456     Value *Def = CI->stripPointerCasts();
 457     // If we find a cast instruction here, it means we've found a cast which is
 458     // not simply a pointer cast (i.e. an inttoptr).  We don't know how to
 459     // handle int->ptr conversion.
 460     assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
 461     return findBaseDefiningValue(Def);
 462   }
 463
 464   if (isa<LoadInst>(I))
 465     // The value loaded is an gc base itself
 466     return BaseDefiningValueResult(I, true);
 467
 468
 469   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
 470     // The base of this GEP is the base
 471     return findBaseDefiningValue(GEP->getPointerOperand());
 472
 473   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
 474     switch (II->getIntrinsicID()) {
 475     case Intrinsic::experimental_gc_result_ptr:
 476     default:
 477       // fall through to general call handling
 478       break;
 479     case Intrinsic::experimental_gc_statepoint:
 480     case Intrinsic::experimental_gc_result_float:
 481     case Intrinsic::experimental_gc_result_int:
 482       llvm_unreachable("these don't produce pointers");
 483     case Intrinsic::experimental_gc_relocate: {
 484       // Rerunning safepoint insertion after safepoints are already
 485       // inserted is not supported.  It could probably be made to work,
 486       // but why are you doing this?  There's no good reason.
 487       llvm_unreachable("repeat safepoint insertion is not supported");
 488     }
 489     case Intrinsic::gcroot:
 490       // Currently, this mechanism hasn't been extended to work with gcroot.
 491       // There's no reason it couldn't be, but I haven't thought about the
 492       // implications much.
 493       llvm_unreachable(
 494           "interaction with the gcroot mechanism is not supported");
 495     }
 496   }
 497   // We assume that functions in the source language only return base
 498   // pointers.  This should probably be generalized via attributes to support
 499   // both source language and internal functions.
 500   if (isa<CallInst>(I) || isa<InvokeInst>(I))
 501     return BaseDefiningValueResult(I, true);
 502
 503   // I have absolutely no idea how to implement this part yet.  It's not
 504   // necessarily hard, I just haven't really looked at it yet.
 505   assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
 506
 507   if (isa<AtomicCmpXchgInst>(I))
 508     // A CAS is effectively a atomic store and load combined under a
 509     // predicate.  From the perspective of base pointers, we just treat it
 510     // like a load.
 511     return BaseDefiningValueResult(I, true);
 512
 513   assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
 514                                    "binary ops which don't apply to pointers");
 515
 516   // The aggregate ops.  Aggregates can either be in the heap or on the
 517   // stack, but in either case, this is simply a field load.  As a result,
 518   // this is a defining definition of the base just like a load is.
 519   if (isa<ExtractValueInst>(I))
 520     return BaseDefiningValueResult(I, true);
 521
 522   // We should never see an insert vector since that would require we be
 523   // tracing back a struct value not a pointer value.
 524   assert(!isa<InsertValueInst>(I) &&
 525          "Base pointer for a struct is meaningless");
 526
 527   // An extractelement produces a base result exactly when it's input does.
 528   // We may need to insert a parallel instruction to extract the appropriate
 529   // element out of the base vector corresponding to the input. Given this,
 530   // it's analogous to the phi and select case even though it's not a merge.
 531   if (isa<ExtractElementInst>(I))
 532     // Note: There a lot of obvious peephole cases here.  This are deliberately
 533     // handled after the main base pointer inference algorithm to make writing
 534     // test cases to exercise that code easier.
 535     return BaseDefiningValueResult(I, false);
 536
 537   // The last two cases here don't return a base pointer.  Instead, they
 538   // return a value which dynamically selects from among several base
 539   // derived pointers (each with it's own base potentially).  It's the job of
 540   // the caller to resolve these.
 541   assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
 542          "missing instruction case in findBaseDefiningValing");
 543   return BaseDefiningValueResult(I, false);
 544 }
 545
 546 /// Returns the base defining value for this value.
 547 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
 548   Value *&Cached = Cache[I];
 549   if (!Cached) {
 550     Cached = findBaseDefiningValue(I).BDV;
 551     DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
 552                  << Cached->getName() << "\n");
 553   }
 554   assert(Cache[I] != nullptr);
 555   return Cached;
 556 }
 557
 558 /// Return a base pointer for this value if known.  Otherwise, return it's
 559 /// base defining value.
 560 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
 561   Value *Def = findBaseDefiningValueCached(I, Cache);
 562   auto Found = Cache.find(Def);
 563   if (Found != Cache.end()) {
 564     // Either a base-of relation, or a self reference.  Caller must check.
 565     return Found->second;
 566   }
 567   // Only a BDV available
 568   return Def;
 569 }
 570
 571 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
 572 /// is it known to be a base pointer?  Or do we need to continue searching.
 573 static bool isKnownBaseResult(Value *V) {
 574   if (!isa<PHINode>(V) && !isa<SelectInst>(V) &&
 575       !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
 576       !isa<ShuffleVectorInst>(V)) {
 577     // no recursion possible
 578     return true;
 579   }
 580   if (isa<Instruction>(V) &&
 581       cast<Instruction>(V)->getMetadata("is_base_value")) {
 582     // This is a previously inserted base phi or select.  We know
 583     // that this is a base value.
 584     return true;
 585   }
 586
 587   // We need to keep searching
 588   return false;
 589 }
 590
 591 namespace {
 592 /// Models the state of a single base defining value in the findBasePointer
 593 /// algorithm for determining where a new instruction is needed to propagate
 594 /// the base of this BDV.
 595 class BDVState {
 596 public:
 597   enum Status { Unknown, Base, Conflict };
 598
 599   BDVState(Status s, Value *b = nullptr) : status(s), base(b) {
 600     assert(status != Base || b);
 601   }
 602   explicit BDVState(Value *b) : status(Base), base(b) {}
 603   BDVState() : status(Unknown), base(nullptr) {}
 604
 605   Status getStatus() const { return status; }
 606   Value *getBase() const { return base; }
 607
 608   bool isBase() const { return getStatus() == Base; }
 609   bool isUnknown() const { return getStatus() == Unknown; }
 610   bool isConflict() const { return getStatus() == Conflict; }
 611
 612   bool operator==(const BDVState &other) const {
 613     return base == other.base && status == other.status;
 614   }
 615
 616   bool operator!=(const BDVState &other) const { return !(*this == other); }
 617
 618   LLVM_DUMP_METHOD
 619   void dump() const { print(dbgs()); dbgs() << '\n'; }
 620
 621   void print(raw_ostream &OS) const {
 622     switch (status) {
 623     case Unknown:
 624       OS << "U";
 625       break;
 626     case Base:
 627       OS << "B";
 628       break;
 629     case Conflict:
 630       OS << "C";
 631       break;
 632     };
 633     OS << " (" << base << " - "
 634        << (base ? base->getName() : "nullptr") << "): ";
 635   }
 636
 637 private:
 638   Status status;
 639   AssertingVH<Value> base; // non null only if status == base
 640 };
 641 }
 642
 643 #ifndef NDEBUG
 644 static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
 645   State.print(OS);
 646   return OS;
 647 }
 648 #endif
 649
 650 namespace {
 651 // Values of type BDVState form a lattice, and this is a helper
 652 // class that implementes the meet operation.  The meat of the meet
 653 // operation is implemented in MeetBDVStates::pureMeet
 654 class MeetBDVStates {
 655 public:
 656   /// Initializes the currentResult to the TOP state so that if can be met with
 657   /// any other state to produce that state.
 658   MeetBDVStates() {}
 659
 660   // Destructively meet the current result with the given BDVState
 661   void meetWith(BDVState otherState) {
 662     currentResult = meet(otherState, currentResult);
 663   }
 664
 665   BDVState getResult() const { return currentResult; }
 666
 667 private:
 668   BDVState currentResult;
 669
 670   /// Perform a meet operation on two elements of the BDVState lattice.
 671   static BDVState meet(BDVState LHS, BDVState RHS) {
 672     assert((pureMeet(LHS, RHS) == pureMeet(RHS, LHS)) &&
 673            "math is wrong: meet does not commute!");
 674     BDVState Result = pureMeet(LHS, RHS);
 675     DEBUG(dbgs() << "meet of " << LHS << " with " << RHS
 676                  << " produced " << Result << "\n");
 677     return Result;
 678   }
 679
 680   static BDVState pureMeet(const BDVState &stateA, const BDVState &stateB) {
 681     switch (stateA.getStatus()) {
 682     case BDVState::Unknown:
 683       return stateB;
 684
 685     case BDVState::Base:
 686       assert(stateA.getBase() && "can't be null");
 687       if (stateB.isUnknown())
 688         return stateA;
 689
 690       if (stateB.isBase()) {
 691         if (stateA.getBase() == stateB.getBase()) {
 692           assert(stateA == stateB && "equality broken!");
 693           return stateA;
 694         }
 695         return BDVState(BDVState::Conflict);
 696       }
 697       assert(stateB.isConflict() && "only three states!");
 698       return BDVState(BDVState::Conflict);
 699
 700     case BDVState::Conflict:
 701       return stateA;
 702     }
 703     llvm_unreachable("only three states!");
 704   }
 705 };
 706 }
 707
 708
 709 /// For a given value or instruction, figure out what base ptr it's derived
 710 /// from.  For gc objects, this is simply itself.  On success, returns a value
 711 /// which is the base pointer.  (This is reliable and can be used for
 712 /// relocation.)  On failure, returns nullptr.
 713 static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
 714   Value *def = findBaseOrBDV(I, cache);
 715
 716   if (isKnownBaseResult(def)) {
 717     return def;
 718   }
 719
 720   // Here's the rough algorithm:
 721   // - For every SSA value, construct a mapping to either an actual base
 722   //   pointer or a PHI which obscures the base pointer.
 723   // - Construct a mapping from PHI to unknown TOP state.  Use an
 724   //   optimistic algorithm to propagate base pointer information.  Lattice
 725   //   looks like:
 726   //   UNKNOWN
 727   //   b1 b2 b3 b4
 728   //   CONFLICT
 729   //   When algorithm terminates, all PHIs will either have a single concrete
 730   //   base or be in a conflict state.
 731   // - For every conflict, insert a dummy PHI node without arguments.  Add
 732   //   these to the base[Instruction] = BasePtr mapping.  For every
 733   //   non-conflict, add the actual base.
 734   //  - For every conflict, add arguments for the base[a] of each input
 735   //   arguments.
 736   //
 737   // Note: A simpler form of this would be to add the conflict form of all
 738   // PHIs without running the optimistic algorithm.  This would be
 739   // analogous to pessimistic data flow and would likely lead to an
 740   // overall worse solution.
 741
 742 #ifndef NDEBUG
 743   auto isExpectedBDVType = [](Value *BDV) {
 744     return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
 745            isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV);
 746   };
 747 #endif
 748
 749   // Once populated, will contain a mapping from each potentially non-base BDV
 750   // to a lattice value (described above) which corresponds to that BDV.
 751   // We use the order of insertion (DFS over the def/use graph) to provide a
 752   // stable deterministic ordering for visiting DenseMaps (which are unordered)
 753   // below.  This is important for deterministic compilation.
 754   MapVector<Value *, BDVState> States;
 755
 756   // Recursively fill in all base defining values reachable from the initial
 757   // one for which we don't already know a definite base value for
 758   /* scope */ {
 759     SmallVector<Value*, 16> Worklist;
 760     Worklist.push_back(def);
 761     States.insert(std::make_pair(def, BDVState()));
 762     while (!Worklist.empty()) {
 763       Value *Current = Worklist.pop_back_val();
 764       assert(!isKnownBaseResult(Current) && "why did it get added?");
 765
 766       auto visitIncomingValue = [&](Value *InVal) {
 767         Value *Base = findBaseOrBDV(InVal, cache);
 768         if (isKnownBaseResult(Base))
 769           // Known bases won't need new instructions introduced and can be
 770           // ignored safely
 771           return;
 772         assert(isExpectedBDVType(Base) && "the only non-base values "
 773                "we see should be base defining values");
 774         if (States.insert(std::make_pair(Base, BDVState())).second)
 775           Worklist.push_back(Base);
 776       };
 777       if (PHINode *Phi = dyn_cast<PHINode>(Current)) {
 778         for (Value *InVal : Phi->incoming_values())
 779           visitIncomingValue(InVal);
 780       } else if (SelectInst *Sel = dyn_cast<SelectInst>(Current)) {
 781         visitIncomingValue(Sel->getTrueValue());
 782         visitIncomingValue(Sel->getFalseValue());
 783       } else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) {
 784         visitIncomingValue(EE->getVectorOperand());
 785       } else if (auto *IE = dyn_cast<InsertElementInst>(Current)) {
 786         visitIncomingValue(IE->getOperand(0)); // vector operand
 787         visitIncomingValue(IE->getOperand(1)); // scalar operand
 788       } else {
 789         // There is one known class of instructions we know we don't handle.
 790         assert(isa<ShuffleVectorInst>(Current));
 791         llvm_unreachable("unimplemented instruction case");
 792       }
 793     }
 794   }
 795
 796 #ifndef NDEBUG
 797   DEBUG(dbgs() << "States after initialization:\n");
 798   for (auto Pair : States) {
 799     DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
 800   }
 801 #endif
 802
 803   // Return a phi state for a base defining value.  We'll generate a new
 804   // base state for known bases and expect to find a cached state otherwise.
 805   auto getStateForBDV = [&](Value *baseValue) {
 806     if (isKnownBaseResult(baseValue))
 807       return BDVState(baseValue);
 808     auto I = States.find(baseValue);
 809     assert(I != States.end() && "lookup failed!");
 810     return I->second;
 811   };
 812
 813   bool progress = true;
 814   while (progress) {
 815 #ifndef NDEBUG
 816     const size_t oldSize = States.size();
 817 #endif
 818     progress = false;
 819     // We're only changing values in this loop, thus safe to keep iterators.
 820     // Since this is computing a fixed point, the order of visit does not
 821     // effect the result.  TODO: We could use a worklist here and make this run
 822     // much faster.
 823     for (auto Pair : States) {
 824       Value *BDV = Pair.first;
 825       assert(!isKnownBaseResult(BDV) && "why did it get added?");
 826
 827       // Given an input value for the current instruction, return a BDVState
 828       // instance which represents the BDV of that value.
 829       auto getStateForInput = [&](Value *V) mutable {
 830         Value *BDV = findBaseOrBDV(V, cache);
 831         return getStateForBDV(BDV);
 832       };
 833
 834       MeetBDVStates calculateMeet;
 835       if (SelectInst *select = dyn_cast<SelectInst>(BDV)) {
 836         calculateMeet.meetWith(getStateForInput(select->getTrueValue()));
 837         calculateMeet.meetWith(getStateForInput(select->getFalseValue()));
 838       } else if (PHINode *Phi = dyn_cast<PHINode>(BDV)) {
 839         for (Value *Val : Phi->incoming_values())
 840           calculateMeet.meetWith(getStateForInput(Val));
 841       } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
 842         // The 'meet' for an extractelement is slightly trivial, but it's still
 843         // useful in that it drives us to conflict if our input is.
 844         calculateMeet.meetWith(getStateForInput(EE->getVectorOperand()));
 845       } else {
 846         // Given there's a inherent type mismatch between the operands, will
 847         // *always* produce Conflict.
 848         auto *IE = cast<InsertElementInst>(BDV);
 849         calculateMeet.meetWith(getStateForInput(IE->getOperand(0)));
 850         calculateMeet.meetWith(getStateForInput(IE->getOperand(1)));
 851       }
 852
 853       BDVState oldState = States[BDV];
 854       BDVState newState = calculateMeet.getResult();
 855       if (oldState != newState) {
 856         progress = true;
 857         States[BDV] = newState;
 858       }
 859     }
 860
 861     assert(oldSize == States.size() &&
 862            "fixed point shouldn't be adding any new nodes to state");
 863   }
 864
 865 #ifndef NDEBUG
 866   DEBUG(dbgs() << "States after meet iteration:\n");
 867   for (auto Pair : States) {
 868     DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
 869   }
 870 #endif
 871
 872   // Insert Phis for all conflicts
 873   // TODO: adjust naming patterns to avoid this order of iteration dependency
 874   for (auto Pair : States) {
 875     Instruction *I = cast<Instruction>(Pair.first);
 876     BDVState State = Pair.second;
 877     assert(!isKnownBaseResult(I) && "why did it get added?");
 878     assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
 879
 880     // extractelement instructions are a bit special in that we may need to
 881     // insert an extract even when we know an exact base for the instruction.
 882     // The problem is that we need to convert from a vector base to a scalar
 883     // base for the particular indice we're interested in.
 884     if (State.isBase() && isa<ExtractElementInst>(I) &&
 885         isa<VectorType>(State.getBase()->getType())) {
 886       auto *EE = cast<ExtractElementInst>(I);
 887       // TODO: In many cases, the new instruction is just EE itself.  We should
 888       // exploit this, but can't do it here since it would break the invariant
 889       // about the BDV not being known to be a base.
 890       auto *BaseInst = ExtractElementInst::Create(State.getBase(),
 891                                                   EE->getIndexOperand(),
 892                                                   "base_ee", EE);
 893       BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
 894       States[I] = BDVState(BDVState::Base, BaseInst);
 895     }
 896
 897     // Since we're joining a vector and scalar base, they can never be the
 898     // same.  As a result, we should always see insert element having reached
 899     // the conflict state.
 900     if (isa<InsertElementInst>(I)) {
 901       assert(State.isConflict());
 902     }
 903
 904     if (!State.isConflict())
 905       continue;
 906
 907     /// Create and insert a new instruction which will represent the base of
 908     /// the given instruction 'I'.
 909     auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* {
 910       if (isa<PHINode>(I)) {
 911         BasicBlock *BB = I->getParent();
 912         int NumPreds = std::distance(pred_begin(BB), pred_end(BB));
 913         assert(NumPreds > 0 && "how did we reach here");
 914         std::string Name = suffixed_name_or(I, ".base", "base_phi");
 915         return PHINode::Create(I->getType(), NumPreds, Name, I);
 916       } else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) {
 917         // The undef will be replaced later
 918         UndefValue *Undef = UndefValue::get(Sel->getType());
 919         std::string Name = suffixed_name_or(I, ".base", "base_select");
 920         return SelectInst::Create(Sel->getCondition(), Undef,
 921                                   Undef, Name, Sel);
 922       } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
 923         UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType());
 924         std::string Name = suffixed_name_or(I, ".base", "base_ee");
 925         return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name,
 926                                           EE);
 927       } else {
 928         auto *IE = cast<InsertElementInst>(I);
 929         UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType());
 930         UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType());
 931         std::string Name = suffixed_name_or(I, ".base", "base_ie");
 932         return InsertElementInst::Create(VecUndef, ScalarUndef,
 933                                          IE->getOperand(2), Name, IE);
 934       }
 935
 936     };
 937     Instruction *BaseInst = MakeBaseInstPlaceholder(I);
 938     // Add metadata marking this as a base value
 939     BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
 940     States[I] = BDVState(BDVState::Conflict, BaseInst);
 941   }
 942
 943   // Returns a instruction which produces the base pointer for a given
 944   // instruction.  The instruction is assumed to be an input to one of the BDVs
 945   // seen in the inference algorithm above.  As such, we must either already
 946   // know it's base defining value is a base, or have inserted a new
 947   // instruction to propagate the base of it's BDV and have entered that newly
 948   // introduced instruction into the state table.  In either case, we are
 949   // assured to be able to determine an instruction which produces it's base
 950   // pointer.
 951   auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
 952     Value *BDV = findBaseOrBDV(Input, cache);
 953     Value *Base = nullptr;
 954     if (isKnownBaseResult(BDV)) {
 955       Base = BDV;
 956     } else {
 957       // Either conflict or base.
 958       assert(States.count(BDV));
 959       Base = States[BDV].getBase();
 960     }
 961     assert(Base && "can't be null");
 962     // The cast is needed since base traversal may strip away bitcasts
 963     if (Base->getType() != Input->getType() &&
 964         InsertPt) {
 965       Base = new BitCastInst(Base, Input->getType(), "cast",
 966                              InsertPt);
 967     }
 968     return Base;
 969   };
 970
 971   // Fixup all the inputs of the new PHIs.  Visit order needs to be
 972   // deterministic and predictable because we're naming newly created
 973   // instructions.
 974   for (auto Pair : States) {
 975     Instruction *BDV = cast<Instruction>(Pair.first);
 976     BDVState State = Pair.second;
 977
 978     assert(!isKnownBaseResult(BDV) && "why did it get added?");
 979     assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
 980     if (!State.isConflict())
 981       continue;
 982
 983     if (PHINode *basephi = dyn_cast<PHINode>(State.getBase())) {
 984       PHINode *phi = cast<PHINode>(BDV);
 985       unsigned NumPHIValues = phi->getNumIncomingValues();
 986       for (unsigned i = 0; i < NumPHIValues; i++) {
 987         Value *InVal = phi->getIncomingValue(i);
 988         BasicBlock *InBB = phi->getIncomingBlock(i);
 989
 990         // If we've already seen InBB, add the same incoming value
 991         // we added for it earlier.  The IR verifier requires phi
 992         // nodes with multiple entries from the same basic block
 993         // to have the same incoming value for each of those
 994         // entries.  If we don't do this check here and basephi
 995         // has a different type than base, we'll end up adding two
 996         // bitcasts (and hence two distinct values) as incoming
 997         // values for the same basic block.
 998
 999         int blockIndex = basephi->getBasicBlockIndex(InBB);
1000         if (blockIndex != -1) {
1001           Value *oldBase = basephi->getIncomingValue(blockIndex);
1002           basephi->addIncoming(oldBase, InBB);
1003
1004 #ifndef NDEBUG
1005           Value *Base = getBaseForInput(InVal, nullptr);
1006           // In essence this assert states: the only way two
1007           // values incoming from the same basic block may be
1008           // different is by being different bitcasts of the same
1009           // value.  A cleanup that remains TODO is changing
1010           // findBaseOrBDV to return an llvm::Value of the correct
1011           // type (and still remain pure).  This will remove the
1012           // need to add bitcasts.
1013           assert(Base->stripPointerCasts() == oldBase->stripPointerCasts() &&
1014                  "sanity -- findBaseOrBDV should be pure!");
1015 #endif
1016           continue;
1017         }
1018
1019         // Find the instruction which produces the base for each input.  We may
1020         // need to insert a bitcast in the incoming block.
1021         // TODO: Need to split critical edges if insertion is needed
1022         Value *Base = getBaseForInput(InVal, InBB->getTerminator());
1023         basephi->addIncoming(Base, InBB);
1024       }
1025       assert(basephi->getNumIncomingValues() == NumPHIValues);
1026     } else if (SelectInst *BaseSel = dyn_cast<SelectInst>(State.getBase())) {
1027       SelectInst *Sel = cast<SelectInst>(BDV);
1028       // Operand 1 & 2 are true, false path respectively. TODO: refactor to
1029       // something more safe and less hacky.
1030       for (int i = 1; i <= 2; i++) {
1031         Value *InVal = Sel->getOperand(i);
1032         // Find the instruction which produces the base for each input.  We may
1033         // need to insert a bitcast.
1034         Value *Base = getBaseForInput(InVal, BaseSel);
1035         BaseSel->setOperand(i, Base);
1036       }
1037     } else if (auto *BaseEE = dyn_cast<ExtractElementInst>(State.getBase())) {
1038       Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
1039       // Find the instruction which produces the base for each input.  We may
1040       // need to insert a bitcast.
1041       Value *Base = getBaseForInput(InVal, BaseEE);
1042       BaseEE->setOperand(0, Base);
1043     } else {
1044       auto *BaseIE = cast<InsertElementInst>(State.getBase());
1045       auto *BdvIE = cast<InsertElementInst>(BDV);
1046       auto UpdateOperand = [&](int OperandIdx) {
1047         Value *InVal = BdvIE->getOperand(OperandIdx);
1048         Value *Base = getBaseForInput(InVal, BaseIE);
1049         BaseIE->setOperand(OperandIdx, Base);
1050       };
1051       UpdateOperand(0); // vector operand
1052       UpdateOperand(1); // scalar operand
1053     }
1054
1055   }
1056
1057   // Now that we're done with the algorithm, see if we can optimize the
1058   // results slightly by reducing the number of new instructions needed.
1059   // Arguably, this should be integrated into the algorithm above, but
1060   // doing as a post process step is easier to reason about for the moment.
1061   DenseMap<Value *, Value *> ReverseMap;
1062   SmallPtrSet<Instruction *, 16> NewInsts;
1063   SmallSetVector<AssertingVH<Instruction>, 16> Worklist;
1064   // Note: We need to visit the states in a deterministic order.  We uses the
1065   // Keys we sorted above for this purpose.  Note that we are papering over a
1066   // bigger problem with the algorithm above - it's visit order is not
1067   // deterministic.  A larger change is needed to fix this.
1068   for (auto Pair : States) {
1069     auto *BDV = Pair.first;
1070     auto State = Pair.second;
1071     Value *Base = State.getBase();
1072     assert(BDV && Base);
1073     assert(!isKnownBaseResult(BDV) && "why did it get added?");
1074     assert(isKnownBaseResult(Base) &&
1075            "must be something we 'know' is a base pointer");
1076     if (!State.isConflict())
1077       continue;
1078
1079     ReverseMap[Base] = BDV;
1080     if (auto *BaseI = dyn_cast<Instruction>(Base)) {
1081       NewInsts.insert(BaseI);
1082       Worklist.insert(BaseI);
1083     }
1084   }
1085   auto ReplaceBaseInstWith = [&](Value *BDV, Instruction *BaseI,
1086                                  Value *Replacement) {
1087     // Add users which are new instructions (excluding self references)
1088     for (User *U : BaseI->users())
1089       if (auto *UI = dyn_cast<Instruction>(U))
1090         if (NewInsts.count(UI) && UI != BaseI)
1091           Worklist.insert(UI);
1092     // Then do the actual replacement
1093     NewInsts.erase(BaseI);
1094     ReverseMap.erase(BaseI);
1095     BaseI->replaceAllUsesWith(Replacement);
1096     assert(States.count(BDV));
1097     assert(States[BDV].isConflict() && States[BDV].getBase() == BaseI);
1098     States[BDV] = BDVState(BDVState::Conflict, Replacement);
1099     BaseI->eraseFromParent();
1100   };
1101   const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
1102   while (!Worklist.empty()) {
1103     Instruction *BaseI = Worklist.pop_back_val();
1104     assert(NewInsts.count(BaseI));
1105     Value *Bdv = ReverseMap[BaseI];
1106     if (auto *BdvI = dyn_cast<Instruction>(Bdv))
1107       if (BaseI->isIdenticalTo(BdvI)) {
1108         DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
1109         ReplaceBaseInstWith(Bdv, BaseI, Bdv);
1110         continue;
1111       }
1112     if (Value *V = SimplifyInstruction(BaseI, DL)) {
1113       DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
1114       ReplaceBaseInstWith(Bdv, BaseI, V);
1115       continue;
1116     }
1117   }
1118
1119   // Cache all of our results so we can cheaply reuse them
1120   // NOTE: This is actually two caches: one of the base defining value
1121   // relation and one of the base pointer relation!  FIXME
1122   for (auto Pair : States) {
1123     auto *BDV = Pair.first;
1124     Value *base = Pair.second.getBase();
1125     assert(BDV && base);
1126
1127     std::string fromstr = cache.count(BDV) ? cache[BDV]->getName() : "none";
1128     DEBUG(dbgs() << "Updating base value cache"
1129           << " for: " << BDV->getName()
1130           << " from: " << fromstr
1131           << " to: " << base->getName() << "\n");
1132
1133     if (cache.count(BDV)) {
1134       // Once we transition from the BDV relation being store in the cache to
1135       // the base relation being stored, it must be stable
1136       assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) &&
1137              "base relation should be stable");
1138     }
1139     cache[BDV] = base;
1140   }
1141   assert(cache.find(def) != cache.end());
1142   return cache[def];
1143 }
1144
1145 // For a set of live pointers (base and/or derived), identify the base
1146 // pointer of the object which they are derived from.  This routine will
1147 // mutate the IR graph as needed to make the 'base' pointer live at the
1148 // definition site of 'derived'.  This ensures that any use of 'derived' can
1149 // also use 'base'.  This may involve the insertion of a number of
1150 // additional PHI nodes.
1151 //
1152 // preconditions: live is a set of pointer type Values
1153 //
1154 // side effects: may insert PHI nodes into the existing CFG, will preserve
1155 // CFG, will not remove or mutate any existing nodes
1156 //
1157 // post condition: PointerToBase contains one (derived, base) pair for every
1158 // pointer in live.  Note that derived can be equal to base if the original
1159 // pointer was a base pointer.
1160 static void
1161 findBasePointers(const StatepointLiveSetTy &live,
1162                  DenseMap<Value *, Value *> &PointerToBase,
1163                  DominatorTree *DT, DefiningValueMapTy &DVCache) {
1164   // For the naming of values inserted to be deterministic - which makes for
1165   // much cleaner and more stable tests - we need to assign an order to the
1166   // live values.  DenseSets do not provide a deterministic order across runs.
1167   SmallVector<Value *, 64> Temp;
1168   Temp.insert(Temp.end(), live.begin(), live.end());
1169   std::sort(Temp.begin(), Temp.end(), order_by_name);
1170   for (Value *ptr : Temp) {
1171     Value *base = findBasePointer(ptr, DVCache);
1172     assert(base && "failed to find base pointer");
1173     PointerToBase[ptr] = base;
1174     assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
1175             DT->dominates(cast<Instruction>(base)->getParent(),
1176                           cast<Instruction>(ptr)->getParent())) &&
1177            "The base we found better dominate the derived pointer");
1178
1179     // If you see this trip and like to live really dangerously, the code should
1180     // be correct, just with idioms the verifier can't handle.  You can try
1181     // disabling the verifier at your own substantial risk.
1182     assert(!isa<ConstantPointerNull>(base) &&
1183            "the relocation code needs adjustment to handle the relocation of "
1184            "a null pointer constant without causing false positives in the "
1185            "safepoint ir verifier.");
1186   }
1187 }
1188
1189 /// Find the required based pointers (and adjust the live set) for the given
1190 /// parse point.
1191 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
1192                              const CallSite &CS,
1193                              PartiallyConstructedSafepointRecord &result) {
1194   DenseMap<Value *, Value *> PointerToBase;
1195   findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
1196
1197   if (PrintBasePointers) {
1198     // Note: Need to print these in a stable order since this is checked in
1199     // some tests.
1200     errs() << "Base Pairs (w/o Relocation):\n";
1201     SmallVector<Value *, 64> Temp;
1202     Temp.reserve(PointerToBase.size());
1203     for (auto Pair : PointerToBase) {
1204       Temp.push_back(Pair.first);
1205     }
1206     std::sort(Temp.begin(), Temp.end(), order_by_name);
1207     for (Value *Ptr : Temp) {
1208       Value *Base = PointerToBase[Ptr];
1209       errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
1210              << "\n";
1211     }
1212   }
1213
1214   result.PointerToBase = PointerToBase;
1215 }
1216
1217 /// Given an updated version of the dataflow liveness results, update the
1218 /// liveset and base pointer maps for the call site CS.
1219 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
1220                                   const CallSite &CS,
1221                                   PartiallyConstructedSafepointRecord &result);
1222
1223 static void recomputeLiveInValues(
1224     Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
1225     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
1226   // TODO-PERF: reuse the original liveness, then simply run the dataflow
1227   // again.  The old values are still live and will help it stabilize quickly.
1228   GCPtrLivenessData RevisedLivenessData;
1229   computeLiveInValues(DT, F, RevisedLivenessData);
1230   for (size_t i = 0; i < records.size(); i++) {
1231     struct PartiallyConstructedSafepointRecord &info = records[i];
1232     const CallSite &CS = toUpdate[i];
1233     recomputeLiveInValues(RevisedLivenessData, CS, info);
1234   }
1235 }
1236
1237 // When inserting gc.relocate and gc.result calls, we need to ensure there are
1238 // no uses of the original value / return value between the gc.statepoint and
1239 // the gc.relocate / gc.result call.  One case which can arise is a phi node
1240 // starting one of the successor blocks.  We also need to be able to insert the
1241 // gc.relocates only on the path which goes through the statepoint.  We might
1242 // need to split an edge to make this possible.
1243 static BasicBlock *
1244 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
1245                             DominatorTree &DT) {
1246   BasicBlock *Ret = BB;
1247   if (!BB->getUniquePredecessor())
1248     Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
1249
1250   // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1251   // from it
1252   FoldSingleEntryPHINodes(Ret);
1253   assert(!isa<PHINode>(Ret->begin()) &&
1254          "All PHI nodes should have been removed!");
1255
1256   // At this point, we can safely insert a gc.relocate or gc.result as the first
1257   // instruction in Ret if needed.
1258   return Ret;
1259 }
1260
1261 // Create new attribute set containing only attributes which can be transferred
1262 // from original call to the safepoint.
1263 static AttributeSet legalizeCallAttributes(AttributeSet AS) {
1264   AttributeSet Ret;
1265
1266   for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
1267     unsigned Index = AS.getSlotIndex(Slot);
1268
1269     if (Index == AttributeSet::ReturnIndex ||
1270         Index == AttributeSet::FunctionIndex) {
1271
1272       for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
1273
1274         // Do not allow certain attributes - just skip them
1275         // Safepoint can not be read only or read none.
1276         if (Attr.hasAttribute(Attribute::ReadNone) ||
1277             Attr.hasAttribute(Attribute::ReadOnly))
1278           continue;
1279
1280         // These attributes control the generation of the gc.statepoint call /
1281         // invoke itself; and once the gc.statepoint is in place, they're of no
1282         // use.
1283         if (Attr.hasAttribute("statepoint-num-patch-bytes") ||
1284             Attr.hasAttribute("statepoint-id"))
1285           continue;
1286
1287         Ret = Ret.addAttributes(
1288             AS.getContext(), Index,
1289             AttributeSet::get(AS.getContext(), Index, AttrBuilder(Attr)));
1290       }
1291     }
1292
1293     // Just skip parameter attributes for now
1294   }
1295
1296   return Ret;
1297 }
1298
1299 /// Helper function to place all gc relocates necessary for the given
1300 /// statepoint.
1301 /// Inputs:
1302 ///   liveVariables - list of variables to be relocated.
1303 ///   liveStart - index of the first live variable.
1304 ///   basePtrs - base pointers.
1305 ///   statepointToken - statepoint instruction to which relocates should be
1306 ///   bound.
1307 ///   Builder - Llvm IR builder to be used to construct new calls.
1308 static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
1309                               const int LiveStart,
1310                               ArrayRef<Value *> BasePtrs,
1311                               Instruction *StatepointToken,
1312                               IRBuilder<> Builder) {
1313   if (LiveVariables.empty())
1314     return;
1315
1316   auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
1317     auto ValIt = std::find(LiveVec.begin(), LiveVec.end(), Val);
1318     assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
1319     size_t Index = std::distance(LiveVec.begin(), ValIt);
1320     assert(Index < LiveVec.size() && "Bug in std::find?");
1321     return Index;
1322   };
1323
1324   // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
1325   // unique declarations for each pointer type, but this proved problematic
1326   // because the intrinsic mangling code is incomplete and fragile.  Since
1327   // we're moving towards a single unified pointer type anyways, we can just
1328   // cast everything to an i8* of the right address space.  A bitcast is added
1329   // later to convert gc_relocate to the actual value's type.
1330   Module *M = StatepointToken->getModule();
1331   auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
1332   Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
1333   Value *GCRelocateDecl =
1334     Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
1335
1336   for (unsigned i = 0; i < LiveVariables.size(); i++) {
1337     // Generate the gc.relocate call and save the result
1338     Value *BaseIdx =
1339       Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
1340     Value *LiveIdx = Builder.getInt32(LiveStart + i);
1341
1342     // only specify a debug name if we can give a useful one
1343     CallInst *Reloc = Builder.CreateCall(
1344         GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
1345         suffixed_name_or(LiveVariables[i], ".relocated", ""));
1346     // Trick CodeGen into thinking there are lots of free registers at this
1347     // fake call.
1348     Reloc->setCallingConv(CallingConv::Cold);
1349   }
1350 }
1351
1352 namespace {
1353
1354 /// This struct is used to defer RAUWs and `eraseFromParent` s.  Using this
1355 /// avoids having to worry about keeping around dangling pointers to Values.
1356 class DeferredReplacement {
1357   AssertingVH<Instruction> Old;
1358   AssertingVH<Instruction> New;
1359
1360 public:
1361   explicit DeferredReplacement(Instruction *Old, Instruction *New) :
1362     Old(Old), New(New) {
1363     assert(Old != New && "Not allowed!");
1364   }
1365
1366   /// Does the task represented by this instance.
1367   void doReplacement() {
1368     Instruction *OldI = Old;
1369     Instruction *NewI = New;
1370
1371     assert(OldI != NewI && "Disallowed at construction?!");
1372
1373     Old = nullptr;
1374     New = nullptr;
1375
1376     if (NewI)
1377       OldI->replaceAllUsesWith(NewI);
1378     OldI->eraseFromParent();
1379   }
1380 };
1381 }
1382
1383 static void
1384 makeStatepointExplicitImpl(const CallSite CS, /* to replace */
1385                            const SmallVectorImpl<Value *> &BasePtrs,
1386                            const SmallVectorImpl<Value *> &LiveVariables,
1387                            PartiallyConstructedSafepointRecord &Result,
1388                            std::vector<DeferredReplacement> &Replacements) {
1389   assert(BasePtrs.size() == LiveVariables.size());
1390   assert((UseDeoptBundles || isStatepoint(CS)) &&
1391          "This method expects to be rewriting a statepoint");
1392
1393   // Then go ahead and use the builder do actually do the inserts.  We insert
1394   // immediately before the previous instruction under the assumption that all
1395   // arguments will be available here.  We can't insert afterwards since we may
1396   // be replacing a terminator.
1397   Instruction *InsertBefore = CS.getInstruction();
1398   IRBuilder<> Builder(InsertBefore);
1399
1400   ArrayRef<Value *> GCArgs(LiveVariables);
1401   uint64_t StatepointID = 0xABCDEF00;
1402   uint32_t NumPatchBytes = 0;
1403   uint32_t Flags = uint32_t(StatepointFlags::None);
1404
1405   ArrayRef<Use> CallArgs;
1406   ArrayRef<Use> DeoptArgs;
1407   ArrayRef<Use> TransitionArgs;
1408
1409   Value *CallTarget = nullptr;
1410
1411   if (UseDeoptBundles) {
1412     CallArgs = {CS.arg_begin(), CS.arg_end()};
1413     DeoptArgs = GetDeoptBundleOperands(CS);
1414     // TODO: we don't fill in TransitionArgs or Flags in this branch, but we
1415     // could have an operand bundle for that too.
1416     AttributeSet OriginalAttrs = CS.getAttributes();
1417
1418     Attribute AttrID = OriginalAttrs.getAttribute(AttributeSet::FunctionIndex,
1419                                                   "statepoint-id");
1420     if (AttrID.isStringAttribute())
1421       AttrID.getValueAsString().getAsInteger(10, StatepointID);
1422
1423     Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
1424         AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
1425     if (AttrNumPatchBytes.isStringAttribute())
1426       AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
1427
1428     CallTarget = CS.getCalledValue();
1429   } else {
1430     // This branch will be gone soon, and we will soon only support the
1431     // UseDeoptBundles == true configuration.
1432     Statepoint OldSP(CS);
1433     StatepointID = OldSP.getID();
1434     NumPatchBytes = OldSP.getNumPatchBytes();
1435     Flags = OldSP.getFlags();
1436
1437     CallArgs = {OldSP.arg_begin(), OldSP.arg_end()};
1438     DeoptArgs = {OldSP.vm_state_begin(), OldSP.vm_state_end()};
1439     TransitionArgs = {OldSP.gc_transition_args_begin(),
1440                       OldSP.gc_transition_args_end()};
1441     CallTarget = OldSP.getCalledValue();
1442   }
1443
1444   // Create the statepoint given all the arguments
1445   Instruction *Token = nullptr;
1446   AttributeSet ReturnAttrs;
1447   if (CS.isCall()) {
1448     CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
1449     CallInst *Call = Builder.CreateGCStatepointCall(
1450         StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
1451         TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
1452
1453     Call->setTailCall(ToReplace->isTailCall());
1454     Call->setCallingConv(ToReplace->getCallingConv());
1455
1456     // Currently we will fail on parameter attributes and on certain
1457     // function attributes.
1458     AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
1459     // In case if we can handle this set of attributes - set up function attrs
1460     // directly on statepoint and return attrs later for gc_result intrinsic.
1461     Call->setAttributes(NewAttrs.getFnAttributes());
1462     ReturnAttrs = NewAttrs.getRetAttributes();
1463
1464     Token = Call;
1465
1466     // Put the following gc_result and gc_relocate calls immediately after the
1467     // the old call (which we're about to delete)
1468     assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
1469     Builder.SetInsertPoint(ToReplace->getNextNode());
1470     Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
1471   } else {
1472     InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
1473
1474     // Insert the new invoke into the old block.  We'll remove the old one in a
1475     // moment at which point this will become the new terminator for the
1476     // original block.
1477     InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
1478         StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
1479         ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
1480         GCArgs, "statepoint_token");
1481
1482     Invoke->setCallingConv(ToReplace->getCallingConv());
1483
1484     // Currently we will fail on parameter attributes and on certain
1485     // function attributes.
1486     AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
1487     // In case if we can handle this set of attributes - set up function attrs
1488     // directly on statepoint and return attrs later for gc_result intrinsic.
1489     Invoke->setAttributes(NewAttrs.getFnAttributes());
1490     ReturnAttrs = NewAttrs.getRetAttributes();
1491
1492     Token = Invoke;
1493
1494     // Generate gc relocates in exceptional path
1495     BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
1496     assert(!isa<PHINode>(UnwindBlock->begin()) &&
1497            UnwindBlock->getUniquePredecessor() &&
1498            "can't safely insert in this block!");
1499
1500     Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
1501     Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1502
1503     // Extract second element from landingpad return value. We will attach
1504     // exceptional gc relocates to it.
1505     Instruction *ExceptionalToken =
1506         cast<Instruction>(Builder.CreateExtractValue(
1507             UnwindBlock->getLandingPadInst(), 1, "relocate_token"));
1508     Result.UnwindToken = ExceptionalToken;
1509
1510     const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
1511     CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken,
1512                       Builder);
1513
1514     // Generate gc relocates and returns for normal block
1515     BasicBlock *NormalDest = ToReplace->getNormalDest();
1516     assert(!isa<PHINode>(NormalDest->begin()) &&
1517            NormalDest->getUniquePredecessor() &&
1518            "can't safely insert in this block!");
1519
1520     Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt());
1521
1522     // gc relocates will be generated later as if it were regular call
1523     // statepoint
1524   }
1525   assert(Token && "Should be set in one of the above branches!");
1526
1527   if (UseDeoptBundles) {
1528     Token->setName("statepoint_token");
1529     if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
1530       StringRef Name =
1531           CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
1532       CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
1533       GCResult->setAttributes(CS.getAttributes().getRetAttributes());
1534
1535       // We cannot RAUW or delete CS.getInstruction() because it could be in the
1536       // live set of some other safepoint, in which case that safepoint's
1537       // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1538       // llvm::Instruction.  Instead, we defer the replacement and deletion to
1539       // after the live sets have been made explicit in the IR, and we no longer
1540       // have raw pointers to worry about.
1541       Replacements.emplace_back(CS.getInstruction(), GCResult);
1542     } else {
1543       Replacements.emplace_back(CS.getInstruction(), nullptr);
1544     }
1545   } else {
1546     assert(!CS.getInstruction()->hasNUsesOrMore(2) &&
1547            "only valid use before rewrite is gc.result");
1548     assert(!CS.getInstruction()->hasOneUse() ||
1549            isGCResult(cast<Instruction>(*CS.getInstruction()->user_begin())));
1550
1551     // Take the name of the original statepoint token if there was one.
1552     Token->takeName(CS.getInstruction());
1553
1554     // Update the gc.result of the original statepoint (if any) to use the newly
1555     // inserted statepoint.  This is safe to do here since the token can't be
1556     // considered a live reference.
1557     CS.getInstruction()->replaceAllUsesWith(Token);
1558     CS.getInstruction()->eraseFromParent();
1559   }
1560
1561   Result.StatepointToken = Token;
1562
1563   // Second, create a gc.relocate for every live variable
1564   const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
1565   CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder);
1566 }
1567
1568 namespace {
1569 struct NameOrdering {
1570   Value *Base;
1571   Value *Derived;
1572
1573   bool operator()(NameOrdering const &a, NameOrdering const &b) {
1574     return -1 == a.Derived->getName().compare(b.Derived->getName());
1575   }
1576 };
1577 }
1578
1579 static void StabilizeOrder(SmallVectorImpl<Value *> &BaseVec,
1580                            SmallVectorImpl<Value *> &LiveVec) {
1581   assert(BaseVec.size() == LiveVec.size());
1582
1583   SmallVector<NameOrdering, 64> Temp;
1584   for (size_t i = 0; i < BaseVec.size(); i++) {
1585     NameOrdering v;
1586     v.Base = BaseVec[i];
1587     v.Derived = LiveVec[i];
1588     Temp.push_back(v);
1589   }
1590
1591   std::sort(Temp.begin(), Temp.end(), NameOrdering());
1592   for (size_t i = 0; i < BaseVec.size(); i++) {
1593     BaseVec[i] = Temp[i].Base;
1594     LiveVec[i] = Temp[i].Derived;
1595   }
1596 }
1597
1598 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
1599 // which make the relocations happening at this safepoint explicit.
1600 //
1601 // WARNING: Does not do any fixup to adjust users of the original live
1602 // values.  That's the callers responsibility.
1603 static void
1604 makeStatepointExplicit(DominatorTree &DT, const CallSite &CS,
1605                        PartiallyConstructedSafepointRecord &Result,
1606                        std::vector<DeferredReplacement> &Replacements) {
1607   const auto &LiveSet = Result.LiveSet;
1608   const auto &PointerToBase = Result.PointerToBase;
1609
1610   // Convert to vector for efficient cross referencing.
1611   SmallVector<Value *, 64> BaseVec, LiveVec;
1612   LiveVec.reserve(LiveSet.size());
1613   BaseVec.reserve(LiveSet.size());
1614   for (Value *L : LiveSet) {
1615     LiveVec.push_back(L);
1616     assert(PointerToBase.count(L));
1617     Value *Base = PointerToBase.find(L)->second;
1618     BaseVec.push_back(Base);
1619   }
1620   assert(LiveVec.size() == BaseVec.size());
1621
1622   // To make the output IR slightly more stable (for use in diffs), ensure a
1623   // fixed order of the values in the safepoint (by sorting the value name).
1624   // The order is otherwise meaningless.
1625   StabilizeOrder(BaseVec, LiveVec);
1626
1627   // Do the actual rewriting and delete the old statepoint
1628   makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
1629 }
1630
1631 // Helper function for the relocationViaAlloca.
1632 //
1633 // It receives iterator to the statepoint gc relocates and emits a store to the
1634 // assigned location (via allocaMap) for the each one of them.  It adds the
1635 // visited values into the visitedLiveValues set, which we will later use them
1636 // for sanity checking.
1637 static void
1638 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
1639                        DenseMap<Value *, Value *> &AllocaMap,
1640                        DenseSet<Value *> &VisitedLiveValues) {
1641
1642   for (User *U : GCRelocs) {
1643     if (!isa<IntrinsicInst>(U))
1644       continue;
1645
1646     IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U);
1647
1648     // We only care about relocates
1649     if (RelocatedValue->getIntrinsicID() !=
1650         Intrinsic::experimental_gc_relocate) {
1651       continue;
1652     }
1653
1654     GCRelocateOperands RelocateOperands(RelocatedValue);
1655     Value *OriginalValue =
1656         const_cast<Value *>(RelocateOperands.getDerivedPtr());
1657     assert(AllocaMap.count(OriginalValue));
1658     Value *Alloca = AllocaMap[OriginalValue];
1659
1660     // Emit store into the related alloca
1661     // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
1662     // the correct type according to alloca.
1663     assert(RelocatedValue->getNextNode() &&
1664            "Should always have one since it's not a terminator");
1665     IRBuilder<> Builder(RelocatedValue->getNextNode());
1666     Value *CastedRelocatedValue =
1667       Builder.CreateBitCast(RelocatedValue,
1668                             cast<AllocaInst>(Alloca)->getAllocatedType(),
1669                             suffixed_name_or(RelocatedValue, ".casted", ""));
1670
1671     StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
1672     Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
1673
1674 #ifndef NDEBUG
1675     VisitedLiveValues.insert(OriginalValue);
1676 #endif
1677   }
1678 }
1679
1680 // Helper function for the "relocationViaAlloca". Similar to the
1681 // "insertRelocationStores" but works for rematerialized values.
1682 static void
1683 insertRematerializationStores(
1684   RematerializedValueMapTy RematerializedValues,
1685   DenseMap<Value *, Value *> &AllocaMap,
1686   DenseSet<Value *> &VisitedLiveValues) {
1687
1688   for (auto RematerializedValuePair: RematerializedValues) {
1689     Instruction *RematerializedValue = RematerializedValuePair.first;
1690     Value *OriginalValue = RematerializedValuePair.second;
1691
1692     assert(AllocaMap.count(OriginalValue) &&
1693            "Can not find alloca for rematerialized value");
1694     Value *Alloca = AllocaMap[OriginalValue];
1695
1696     StoreInst *Store = new StoreInst(RematerializedValue, Alloca);
1697     Store->insertAfter(RematerializedValue);
1698
1699 #ifndef NDEBUG
1700     VisitedLiveValues.insert(OriginalValue);
1701 #endif
1702   }
1703 }
1704
1705 /// Do all the relocation update via allocas and mem2reg
1706 static void relocationViaAlloca(
1707     Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
1708     ArrayRef<PartiallyConstructedSafepointRecord> Records) {
1709 #ifndef NDEBUG
1710   // record initial number of (static) allocas; we'll check we have the same
1711   // number when we get done.
1712   int InitialAllocaNum = 0;
1713   for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
1714        I++)
1715     if (isa<AllocaInst>(*I))
1716       InitialAllocaNum++;
1717 #endif
1718
1719   // TODO-PERF: change data structures, reserve
1720   DenseMap<Value *, Value *> AllocaMap;
1721   SmallVector<AllocaInst *, 200> PromotableAllocas;
1722   // Used later to chack that we have enough allocas to store all values
1723   std::size_t NumRematerializedValues = 0;
1724   PromotableAllocas.reserve(Live.size());
1725
1726   // Emit alloca for "LiveValue" and record it in "allocaMap" and
1727   // "PromotableAllocas"
1728   auto emitAllocaFor = [&](Value *LiveValue) {
1729     AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "",
1730                                         F.getEntryBlock().getFirstNonPHI());
1731     AllocaMap[LiveValue] = Alloca;
1732     PromotableAllocas.push_back(Alloca);
1733   };
1734
1735   // Emit alloca for each live gc pointer
1736   for (Value *V : Live)
1737     emitAllocaFor(V);
1738
1739   // Emit allocas for rematerialized values
1740   for (const auto &Info : Records)
1741     for (auto RematerializedValuePair : Info.RematerializedValues) {
1742       Value *OriginalValue = RematerializedValuePair.second;
1743       if (AllocaMap.count(OriginalValue) != 0)
1744         continue;
1745
1746       emitAllocaFor(OriginalValue);
1747       ++NumRematerializedValues;
1748     }
1749
1750   // The next two loops are part of the same conceptual operation.  We need to
1751   // insert a store to the alloca after the original def and at each
1752   // redefinition.  We need to insert a load before each use.  These are split
1753   // into distinct loops for performance reasons.
1754
1755   // Update gc pointer after each statepoint: either store a relocated value or
1756   // null (if no relocated value was found for this gc pointer and it is not a
1757   // gc_result).  This must happen before we update the statepoint with load of
1758   // alloca otherwise we lose the link between statepoint and old def.
1759   for (const auto &Info : Records) {
1760     Value *Statepoint = Info.StatepointToken;
1761
1762     // This will be used for consistency check
1763     DenseSet<Value *> VisitedLiveValues;
1764
1765     // Insert stores for normal statepoint gc relocates
1766     insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues);
1767
1768     // In case if it was invoke statepoint
1769     // we will insert stores for exceptional path gc relocates.
1770     if (isa<InvokeInst>(Statepoint)) {
1771       insertRelocationStores(Info.UnwindToken->users(), AllocaMap,
1772                              VisitedLiveValues);
1773     }
1774
1775     // Do similar thing with rematerialized values
1776     insertRematerializationStores(Info.RematerializedValues, AllocaMap,
1777                                   VisitedLiveValues);
1778
1779     if (ClobberNonLive) {
1780       // As a debugging aid, pretend that an unrelocated pointer becomes null at
1781       // the gc.statepoint.  This will turn some subtle GC problems into
1782       // slightly easier to debug SEGVs.  Note that on large IR files with
1783       // lots of gc.statepoints this is extremely costly both memory and time
1784       // wise.
1785       SmallVector<AllocaInst *, 64> ToClobber;
1786       for (auto Pair : AllocaMap) {
1787         Value *Def = Pair.first;
1788         AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
1789
1790         // This value was relocated
1791         if (VisitedLiveValues.count(Def)) {
1792           continue;
1793         }
1794         ToClobber.push_back(Alloca);
1795       }
1796
1797       auto InsertClobbersAt = [&](Instruction *IP) {
1798         for (auto *AI : ToClobber) {
1799           auto AIType = cast<PointerType>(AI->getType());
1800           auto PT = cast<PointerType>(AIType->getElementType());
1801           Constant *CPN = ConstantPointerNull::get(PT);
1802           StoreInst *Store = new StoreInst(CPN, AI);
1803           Store->insertBefore(IP);
1804         }
1805       };
1806
1807       // Insert the clobbering stores.  These may get intermixed with the
1808       // gc.results and gc.relocates, but that's fine.
1809       if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
1810         InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
1811         InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
1812       } else {
1813         InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
1814       }
1815     }
1816   }
1817
1818   // Update use with load allocas and add store for gc_relocated.
1819   for (auto Pair : AllocaMap) {
1820     Value *Def = Pair.first;
1821     Value *Alloca = Pair.second;
1822
1823     // We pre-record the uses of allocas so that we dont have to worry about
1824     // later update that changes the user information..
1825
1826     SmallVector<Instruction *, 20> Uses;
1827     // PERF: trade a linear scan for repeated reallocation
1828     Uses.reserve(std::distance(Def->user_begin(), Def->user_end()));
1829     for (User *U : Def->users()) {
1830       if (!isa<ConstantExpr>(U)) {
1831         // If the def has a ConstantExpr use, then the def is either a
1832         // ConstantExpr use itself or null.  In either case
1833         // (recursively in the first, directly in the second), the oop
1834         // it is ultimately dependent on is null and this particular
1835         // use does not need to be fixed up.
1836         Uses.push_back(cast<Instruction>(U));
1837       }
1838     }
1839
1840     std::sort(Uses.begin(), Uses.end());
1841     auto Last = std::unique(Uses.begin(), Uses.end());
1842     Uses.erase(Last, Uses.end());
1843
1844     for (Instruction *Use : Uses) {
1845       if (isa<PHINode>(Use)) {
1846         PHINode *Phi = cast<PHINode>(Use);
1847         for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
1848           if (Def == Phi->getIncomingValue(i)) {
1849             LoadInst *Load = new LoadInst(
1850                 Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
1851             Phi->setIncomingValue(i, Load);
1852           }
1853         }
1854       } else {
1855         LoadInst *Load = new LoadInst(Alloca, "", Use);
1856         Use->replaceUsesOfWith(Def, Load);
1857       }
1858     }
1859
1860     // Emit store for the initial gc value.  Store must be inserted after load,
1861     // otherwise store will be in alloca's use list and an extra load will be
1862     // inserted before it.
1863     StoreInst *Store = new StoreInst(Def, Alloca);
1864     if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
1865       if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
1866         // InvokeInst is a TerminatorInst so the store need to be inserted
1867         // into its normal destination block.
1868         BasicBlock *NormalDest = Invoke->getNormalDest();
1869         Store->insertBefore(NormalDest->getFirstNonPHI());
1870       } else {
1871         assert(!Inst->isTerminator() &&
1872                "The only TerminatorInst that can produce a value is "
1873                "InvokeInst which is handled above.");
1874         Store->insertAfter(Inst);
1875       }
1876     } else {
1877       assert(isa<Argument>(Def));
1878       Store->insertAfter(cast<Instruction>(Alloca));
1879     }
1880   }
1881
1882   assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
1883          "we must have the same allocas with lives");
1884   if (!PromotableAllocas.empty()) {
1885     // Apply mem2reg to promote alloca to SSA
1886     PromoteMemToReg(PromotableAllocas, DT);
1887   }
1888
1889 #ifndef NDEBUG
1890   for (auto &I : F.getEntryBlock())
1891     if (isa<AllocaInst>(I))
1892       InitialAllocaNum--;
1893   assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
1894 #endif
1895 }
1896
1897 /// Implement a unique function which doesn't require we sort the input
1898 /// vector.  Doing so has the effect of changing the output of a couple of
1899 /// tests in ways which make them less useful in testing fused safepoints.
1900 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
1901   SmallSet<T, 8> Seen;
1902   Vec.erase(std::remove_if(Vec.begin(), Vec.end(), [&](const T &V) {
1903               return !Seen.insert(V).second;
1904             }), Vec.end());
1905 }
1906
1907 /// Insert holders so that each Value is obviously live through the entire
1908 /// lifetime of the call.
1909 static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
1910                                  SmallVectorImpl<CallInst *> &Holders) {
1911   if (Values.empty())
1912     // No values to hold live, might as well not insert the empty holder
1913     return;
1914
1915   Module *M = CS.getInstruction()->getModule();
1916   // Use a dummy vararg function to actually hold the values live
1917   Function *Func = cast<Function>(M->getOrInsertFunction(
1918       "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
1919   if (CS.isCall()) {
1920     // For call safepoints insert dummy calls right after safepoint
1921     Holders.push_back(CallInst::Create(Func, Values, "",
1922                                        &*++CS.getInstruction()->getIterator()));
1923     return;
1924   }
1925   // For invoke safepooints insert dummy calls both in normal and
1926   // exceptional destination blocks
1927   auto *II = cast<InvokeInst>(CS.getInstruction());
1928   Holders.push_back(CallInst::Create(
1929       Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
1930   Holders.push_back(CallInst::Create(
1931       Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
1932 }
1933
1934 static void findLiveReferences(
1935     Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
1936     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
1937   GCPtrLivenessData OriginalLivenessData;
1938   computeLiveInValues(DT, F, OriginalLivenessData);
1939   for (size_t i = 0; i < records.size(); i++) {
1940     struct PartiallyConstructedSafepointRecord &info = records[i];
1941     const CallSite &CS = toUpdate[i];
1942     analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
1943   }
1944 }
1945
1946 /// Remove any vector of pointers from the live set by scalarizing them over the
1947 /// statepoint instruction.  Adds the scalarized pieces to the live set.  It
1948 /// would be preferable to include the vector in the statepoint itself, but
1949 /// the lowering code currently does not handle that.  Extending it would be
1950 /// slightly non-trivial since it requires a format change.  Given how rare
1951 /// such cases are (for the moment?) scalarizing is an acceptable compromise.
1952 static void splitVectorValues(Instruction *StatepointInst,
1953                               StatepointLiveSetTy &LiveSet,
1954                               DenseMap<Value *, Value *>& PointerToBase,
1955                               DominatorTree &DT) {
1956   SmallVector<Value *, 16> ToSplit;
1957   for (Value *V : LiveSet)
1958     if (isa<VectorType>(V->getType()))
1959       ToSplit.push_back(V);
1960
1961   if (ToSplit.empty())
1962     return;
1963
1964   DenseMap<Value *, SmallVector<Value *, 16>> ElementMapping;
1965
1966   Function &F = *(StatepointInst->getParent()->getParent());
1967
1968   DenseMap<Value *, AllocaInst *> AllocaMap;
1969   // First is normal return, second is exceptional return (invoke only)
1970   DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
1971   for (Value *V : ToSplit) {
1972     AllocaInst *Alloca =
1973         new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
1974     AllocaMap[V] = Alloca;
1975
1976     VectorType *VT = cast<VectorType>(V->getType());
1977     IRBuilder<> Builder(StatepointInst);
1978     SmallVector<Value *, 16> Elements;
1979     for (unsigned i = 0; i < VT->getNumElements(); i++)
1980       Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
1981     ElementMapping[V] = Elements;
1982
1983     auto InsertVectorReform = [&](Instruction *IP) {
1984       Builder.SetInsertPoint(IP);
1985       Builder.SetCurrentDebugLocation(IP->getDebugLoc());
1986       Value *ResultVec = UndefValue::get(VT);
1987       for (unsigned i = 0; i < VT->getNumElements(); i++)
1988         ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
1989                                                 Builder.getInt32(i));
1990       return ResultVec;
1991     };
1992
1993     if (isa<CallInst>(StatepointInst)) {
1994       BasicBlock::iterator Next(StatepointInst);
1995       Next++;
1996       Instruction *IP = &*(Next);
1997       Replacements[V].first = InsertVectorReform(IP);
1998       Replacements[V].second = nullptr;
1999     } else {
2000       InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
2001       // We've already normalized - check that we don't have shared destination
2002       // blocks
2003       BasicBlock *NormalDest = Invoke->getNormalDest();
2004       assert(!isa<PHINode>(NormalDest->begin()));
2005       BasicBlock *UnwindDest = Invoke->getUnwindDest();
2006       assert(!isa<PHINode>(UnwindDest->begin()));
2007       // Insert insert element sequences in both successors
2008       Instruction *IP = &*(NormalDest->getFirstInsertionPt());
2009       Replacements[V].first = InsertVectorReform(IP);
2010       IP = &*(UnwindDest->getFirstInsertionPt());
2011       Replacements[V].second = InsertVectorReform(IP);
2012     }
2013   }
2014
2015   for (Value *V : ToSplit) {
2016     AllocaInst *Alloca = AllocaMap[V];
2017
2018     // Capture all users before we start mutating use lists
2019     SmallVector<Instruction *, 16> Users;
2020     for (User *U : V->users())
2021       Users.push_back(cast<Instruction>(U));
2022
2023     for (Instruction *I : Users) {
2024       if (auto Phi = dyn_cast<PHINode>(I)) {
2025         for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
2026           if (V == Phi->getIncomingValue(i)) {
2027             LoadInst *Load = new LoadInst(
2028                 Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
2029             Phi->setIncomingValue(i, Load);
2030           }
2031       } else {
2032         LoadInst *Load = new LoadInst(Alloca, "", I);
2033         I->replaceUsesOfWith(V, Load);
2034       }
2035     }
2036
2037     // Store the original value and the replacement value into the alloca
2038     StoreInst *Store = new StoreInst(V, Alloca);
2039     if (auto I = dyn_cast<Instruction>(V))
2040       Store->insertAfter(I);
2041     else
2042       Store->insertAfter(Alloca);
2043
2044     // Normal return for invoke, or call return
2045     Instruction *Replacement = cast<Instruction>(Replacements[V].first);
2046     (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
2047     // Unwind return for invoke only
2048     Replacement = cast_or_null<Instruction>(Replacements[V].second);
2049     if (Replacement)
2050       (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
2051   }
2052
2053   // apply mem2reg to promote alloca to SSA
2054   SmallVector<AllocaInst *, 16> Allocas;
2055   for (Value *V : ToSplit)
2056     Allocas.push_back(AllocaMap[V]);
2057   PromoteMemToReg(Allocas, DT);
2058
2059   // Update our tracking of live pointers and base mappings to account for the
2060   // changes we just made.
2061   for (Value *V : ToSplit) {
2062     auto &Elements = ElementMapping[V];
2063
2064     LiveSet.erase(V);
2065     LiveSet.insert(Elements.begin(), Elements.end());
2066     // We need to update the base mapping as well.
2067     assert(PointerToBase.count(V));
2068     Value *OldBase = PointerToBase[V];
2069     auto &BaseElements = ElementMapping[OldBase];
2070     PointerToBase.erase(V);
2071     assert(Elements.size() == BaseElements.size());
2072     for (unsigned i = 0; i < Elements.size(); i++) {
2073       Value *Elem = Elements[i];
2074       PointerToBase[Elem] = BaseElements[i];
2075     }
2076   }
2077 }
2078
2079 // Helper function for the "rematerializeLiveValues". It walks use chain
2080 // starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
2081 // values are visited (currently it is GEP's and casts). Returns true if it
2082 // successfully reached "BaseValue" and false otherwise.
2083 // Fills "ChainToBase" array with all visited values. "BaseValue" is not
2084 // recorded.
2085 static bool findRematerializableChainToBasePointer(
2086   SmallVectorImpl<Instruction*> &ChainToBase,
2087   Value *CurrentValue, Value *BaseValue) {
2088
2089   // We have found a base value
2090   if (CurrentValue == BaseValue) {
2091     return true;
2092   }
2093
2094   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
2095     ChainToBase.push_back(GEP);
2096     return findRematerializableChainToBasePointer(ChainToBase,
2097                                                   GEP->getPointerOperand(),
2098                                                   BaseValue);
2099   }
2100
2101   if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
2102     Value *Def = CI->stripPointerCasts();
2103
2104     // This two checks are basically similar. First one is here for the
2105     // consistency with findBasePointers logic.
2106     assert(!isa<CastInst>(Def) && "not a pointer cast found");
2107     if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
2108       return false;
2109
2110     ChainToBase.push_back(CI);
2111     return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue);
2112   }
2113
2114   // Not supported instruction in the chain
2115   return false;
2116 }
2117
2118 // Helper function for the "rematerializeLiveValues". Compute cost of the use
2119 // chain we are going to rematerialize.
2120 static unsigned
2121 chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
2122                        TargetTransformInfo &TTI) {
2123   unsigned Cost = 0;
2124
2125   for (Instruction *Instr : Chain) {
2126     if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
2127       assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
2128              "non noop cast is found during rematerialization");
2129
2130       Type *SrcTy = CI->getOperand(0)->getType();
2131       Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy);
2132
2133     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
2134       // Cost of the address calculation
2135       Type *ValTy = GEP->getPointerOperandType()->getPointerElementType();
2136       Cost += TTI.getAddressComputationCost(ValTy);
2137
2138       // And cost of the GEP itself
2139       // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2140       //       allowed for the external usage)
2141       if (!GEP->hasAllConstantIndices())
2142         Cost += 2;
2143
2144     } else {
2145       llvm_unreachable("unsupported instruciton type during rematerialization");
2146     }
2147   }
2148
2149   return Cost;
2150 }
2151
2152 // From the statepoint live set pick values that are cheaper to recompute then
2153 // to relocate. Remove this values from the live set, rematerialize them after
2154 // statepoint and record them in "Info" structure. Note that similar to
2155 // relocated values we don't do any user adjustments here.
2156 static void rematerializeLiveValues(CallSite CS,
2157                                     PartiallyConstructedSafepointRecord &Info,
2158                                     TargetTransformInfo &TTI) {
2159   const unsigned int ChainLengthThreshold = 10;
2160
2161   // Record values we are going to delete from this statepoint live set.
2162   // We can not di this in following loop due to iterator invalidation.
2163   SmallVector<Value *, 32> LiveValuesToBeDeleted;
2164
2165   for (Value *LiveValue: Info.LiveSet) {
2166     // For each live pointer find it's defining chain
2167     SmallVector<Instruction *, 3> ChainToBase;
2168     assert(Info.PointerToBase.count(LiveValue));
2169     bool FoundChain =
2170       findRematerializableChainToBasePointer(ChainToBase,
2171                                              LiveValue,
2172                                              Info.PointerToBase[LiveValue]);
2173     // Nothing to do, or chain is too long
2174     if (!FoundChain ||
2175         ChainToBase.size() == 0 ||
2176         ChainToBase.size() > ChainLengthThreshold)
2177       continue;
2178
2179     // Compute cost of this chain
2180     unsigned Cost = chainToBasePointerCost(ChainToBase, TTI);
2181     // TODO: We can also account for cases when we will be able to remove some
2182     //       of the rematerialized values by later optimization passes. I.e if
2183     //       we rematerialized several intersecting chains. Or if original values
2184     //       don't have any uses besides this statepoint.
2185
2186     // For invokes we need to rematerialize each chain twice - for normal and
2187     // for unwind basic blocks. Model this by multiplying cost by two.
2188     if (CS.isInvoke()) {
2189       Cost *= 2;
2190     }
2191     // If it's too expensive - skip it
2192     if (Cost >= RematerializationThreshold)
2193       continue;
2194
2195     // Remove value from the live set
2196     LiveValuesToBeDeleted.push_back(LiveValue);
2197
2198     // Clone instructions and record them inside "Info" structure
2199
2200     // Walk backwards to visit top-most instructions first
2201     std::reverse(ChainToBase.begin(), ChainToBase.end());
2202
2203     // Utility function which clones all instructions from "ChainToBase"
2204     // and inserts them before "InsertBefore". Returns rematerialized value
2205     // which should be used after statepoint.
2206     auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) {
2207       Instruction *LastClonedValue = nullptr;
2208       Instruction *LastValue = nullptr;
2209       for (Instruction *Instr: ChainToBase) {
2210         // Only GEP's and casts are suported as we need to be careful to not
2211         // introduce any new uses of pointers not in the liveset.
2212         // Note that it's fine to introduce new uses of pointers which were
2213         // otherwise not used after this statepoint.
2214         assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
2215
2216         Instruction *ClonedValue = Instr->clone();
2217         ClonedValue->insertBefore(InsertBefore);
2218         ClonedValue->setName(Instr->getName() + ".remat");
2219
2220         // If it is not first instruction in the chain then it uses previously
2221         // cloned value. We should update it to use cloned value.
2222         if (LastClonedValue) {
2223           assert(LastValue);
2224           ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
2225 #ifndef NDEBUG
2226           // Assert that cloned instruction does not use any instructions from
2227           // this chain other than LastClonedValue
2228           for (auto OpValue : ClonedValue->operand_values()) {
2229             assert(std::find(ChainToBase.begin(), ChainToBase.end(), OpValue) ==
2230                        ChainToBase.end() &&
2231                    "incorrect use in rematerialization chain");
2232           }
2233 #endif
2234         }
2235
2236         LastClonedValue = ClonedValue;
2237         LastValue = Instr;
2238       }
2239       assert(LastClonedValue);
2240       return LastClonedValue;
2241     };
2242
2243     // Different cases for calls and invokes. For invokes we need to clone
2244     // instructions both on normal and unwind path.
2245     if (CS.isCall()) {
2246       Instruction *InsertBefore = CS.getInstruction()->getNextNode();
2247       assert(InsertBefore);
2248       Instruction *RematerializedValue = rematerializeChain(InsertBefore);
2249       Info.RematerializedValues[RematerializedValue] = LiveValue;
2250     } else {
2251       InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
2252
2253       Instruction *NormalInsertBefore =
2254           &*Invoke->getNormalDest()->getFirstInsertionPt();
2255       Instruction *UnwindInsertBefore =
2256           &*Invoke->getUnwindDest()->getFirstInsertionPt();
2257
2258       Instruction *NormalRematerializedValue =
2259           rematerializeChain(NormalInsertBefore);
2260       Instruction *UnwindRematerializedValue =
2261           rematerializeChain(UnwindInsertBefore);
2262
2263       Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
2264       Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
2265     }
2266   }
2267
2268   // Remove rematerializaed values from the live set
2269   for (auto LiveValue: LiveValuesToBeDeleted) {
2270     Info.LiveSet.erase(LiveValue);
2271   }
2272 }
2273
2274 static bool insertParsePoints(Function &F, DominatorTree &DT,
2275                               TargetTransformInfo &TTI,
2276                               SmallVectorImpl<CallSite> &ToUpdate) {
2277 #ifndef NDEBUG
2278   // sanity check the input
2279   std::set<CallSite> Uniqued;
2280   Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
2281   assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
2282
2283   for (CallSite CS : ToUpdate) {
2284     assert(CS.getInstruction()->getParent()->getParent() == &F);
2285     assert((UseDeoptBundles || isStatepoint(CS)) &&
2286            "expected to already be a deopt statepoint");
2287   }
2288 #endif
2289
2290   // When inserting gc.relocates for invokes, we need to be able to insert at
2291   // the top of the successor blocks.  See the comment on
2292   // normalForInvokeSafepoint on exactly what is needed.  Note that this step
2293   // may restructure the CFG.
2294   for (CallSite CS : ToUpdate) {
2295     if (!CS.isInvoke())
2296       continue;
2297     auto *II = cast<InvokeInst>(CS.getInstruction());
2298     normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
2299     normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
2300   }
2301
2302   // A list of dummy calls added to the IR to keep various values obviously
2303   // live in the IR.  We'll remove all of these when done.
2304   SmallVector<CallInst *, 64> Holders;
2305
2306   // Insert a dummy call with all of the arguments to the vm_state we'll need
2307   // for the actual safepoint insertion.  This ensures reference arguments in
2308   // the deopt argument list are considered live through the safepoint (and
2309   // thus makes sure they get relocated.)
2310   for (CallSite CS : ToUpdate) {
2311     SmallVector<Value *, 64> DeoptValues;
2312
2313     iterator_range<const Use *> DeoptStateRange =
2314         UseDeoptBundles
2315             ? iterator_range<const Use *>(GetDeoptBundleOperands(CS))
2316             : iterator_range<const Use *>(Statepoint(CS).vm_state_args());
2317
2318     for (Value *Arg : DeoptStateRange) {
2319       assert(!isUnhandledGCPointerType(Arg->getType()) &&
2320              "support for FCA unimplemented");
2321       if (isHandledGCPointerType(Arg->getType()))
2322         DeoptValues.push_back(Arg);
2323     }
2324
2325     insertUseHolderAfter(CS, DeoptValues, Holders);
2326   }
2327
2328   SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
2329
2330   // A) Identify all gc pointers which are statically live at the given call
2331   // site.
2332   findLiveReferences(F, DT, ToUpdate, Records);
2333
2334   // B) Find the base pointers for each live pointer
2335   /* scope for caching */ {
2336     // Cache the 'defining value' relation used in the computation and
2337     // insertion of base phis and selects.  This ensures that we don't insert
2338     // large numbers of duplicate base_phis.
2339     DefiningValueMapTy DVCache;
2340
2341     for (size_t i = 0; i < Records.size(); i++) {
2342       PartiallyConstructedSafepointRecord &info = Records[i];
2343       findBasePointers(DT, DVCache, ToUpdate[i], info);
2344     }
2345   } // end of cache scope
2346
2347   // The base phi insertion logic (for any safepoint) may have inserted new
2348   // instructions which are now live at some safepoint.  The simplest such
2349   // example is:
2350   // loop:
2351   //   phi a  <-- will be a new base_phi here
2352   //   safepoint 1 <-- that needs to be live here
2353   //   gep a + 1
2354   //   safepoint 2
2355   //   br loop
2356   // We insert some dummy calls after each safepoint to definitely hold live
2357   // the base pointers which were identified for that safepoint.  We'll then
2358   // ask liveness for _every_ base inserted to see what is now live.  Then we
2359   // remove the dummy calls.
2360   Holders.reserve(Holders.size() + Records.size());
2361   for (size_t i = 0; i < Records.size(); i++) {
2362     PartiallyConstructedSafepointRecord &Info = Records[i];
2363
2364     SmallVector<Value *, 128> Bases;
2365     for (auto Pair : Info.PointerToBase)
2366       Bases.push_back(Pair.second);
2367
2368     insertUseHolderAfter(ToUpdate[i], Bases, Holders);
2369   }
2370
2371   // By selecting base pointers, we've effectively inserted new uses. Thus, we
2372   // need to rerun liveness.  We may *also* have inserted new defs, but that's
2373   // not the key issue.
2374   recomputeLiveInValues(F, DT, ToUpdate, Records);
2375
2376   if (PrintBasePointers) {
2377     for (auto &Info : Records) {
2378       errs() << "Base Pairs: (w/Relocation)\n";
2379       for (auto Pair : Info.PointerToBase)
2380         errs() << " derived %" << Pair.first->getName() << " base %"
2381                << Pair.second->getName() << "\n";
2382     }
2383   }
2384
2385   for (CallInst *CI : Holders)
2386     CI->eraseFromParent();
2387
2388   Holders.clear();
2389
2390   // Do a limited scalarization of any live at safepoint vector values which
2391   // contain pointers.  This enables this pass to run after vectorization at
2392   // the cost of some possible performance loss.  TODO: it would be nice to
2393   // natively support vectors all the way through the backend so we don't need
2394   // to scalarize here.
2395   for (size_t i = 0; i < Records.size(); i++) {
2396     PartiallyConstructedSafepointRecord &Info = Records[i];
2397     Instruction *Statepoint = ToUpdate[i].getInstruction();
2398     splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
2399                       Info.PointerToBase, DT);
2400   }
2401
2402   // In order to reduce live set of statepoint we might choose to rematerialize
2403   // some values instead of relocating them. This is purely an optimization and
2404   // does not influence correctness.
2405   for (size_t i = 0; i < Records.size(); i++)
2406     rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
2407
2408   // We need this to safely RAUW and delete call or invoke return values that
2409   // may themselves be live over a statepoint.  For details, please see usage in
2410   // makeStatepointExplicitImpl.
2411   std::vector<DeferredReplacement> Replacements;
2412
2413   // Now run through and replace the existing statepoints with new ones with
2414   // the live variables listed.  We do not yet update uses of the values being
2415   // relocated. We have references to live variables that need to
2416   // survive to the last iteration of this loop.  (By construction, the
2417   // previous statepoint can not be a live variable, thus we can and remove
2418   // the old statepoint calls as we go.)
2419   for (size_t i = 0; i < Records.size(); i++)
2420     makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
2421
2422   ToUpdate.clear(); // prevent accident use of invalid CallSites
2423
2424   for (auto &PR : Replacements)
2425     PR.doReplacement();
2426
2427   Replacements.clear();
2428
2429   for (auto &Info : Records) {
2430     // These live sets may contain state Value pointers, since we replaced calls
2431     // with operand bundles with calls wrapped in gc.statepoint, and some of
2432     // those calls may have been def'ing live gc pointers.  Clear these out to
2433     // avoid accidentally using them.
2434     //
2435     // TODO: We should create a separate data structure that does not contain
2436     // these live sets, and migrate to using that data structure from this point
2437     // onward.
2438     Info.LiveSet.clear();
2439     Info.PointerToBase.clear();
2440   }
2441
2442   // Do all the fixups of the original live variables to their relocated selves
2443   SmallVector<Value *, 128> Live;
2444   for (size_t i = 0; i < Records.size(); i++) {
2445     PartiallyConstructedSafepointRecord &Info = Records[i];
2446
2447     // We can't simply save the live set from the original insertion.  One of
2448     // the live values might be the result of a call which needs a safepoint.
2449     // That Value* no longer exists and we need to use the new gc_result.
2450     // Thankfully, the live set is embedded in the statepoint (and updated), so
2451     // we just grab that.
2452     Statepoint Statepoint(Info.StatepointToken);
2453     Live.insert(Live.end(), Statepoint.gc_args_begin(),
2454                 Statepoint.gc_args_end());
2455 #ifndef NDEBUG
2456     // Do some basic sanity checks on our liveness results before performing
2457     // relocation.  Relocation can and will turn mistakes in liveness results
2458     // into non-sensical code which is must harder to debug.
2459     // TODO: It would be nice to test consistency as well
2460     assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
2461            "statepoint must be reachable or liveness is meaningless");
2462     for (Value *V : Statepoint.gc_args()) {
2463       if (!isa<Instruction>(V))
2464         // Non-instruction values trivial dominate all possible uses
2465         continue;
2466       auto *LiveInst = cast<Instruction>(V);
2467       assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
2468              "unreachable values should never be live");
2469       assert(DT.dominates(LiveInst, Info.StatepointToken) &&
2470              "basic SSA liveness expectation violated by liveness analysis");
2471     }
2472 #endif
2473   }
2474   unique_unsorted(Live);
2475
2476 #ifndef NDEBUG
2477   // sanity check
2478   for (auto *Ptr : Live)
2479     assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
2480 #endif
2481
2482   relocationViaAlloca(F, DT, Live, Records);
2483   return !Records.empty();
2484 }
2485
2486 // Handles both return values and arguments for Functions and CallSites.
2487 template <typename AttrHolder>
2488 static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
2489                                       unsigned Index) {
2490   AttrBuilder R;
2491   if (AH.getDereferenceableBytes(Index))
2492     R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
2493                                   AH.getDereferenceableBytes(Index)));
2494   if (AH.getDereferenceableOrNullBytes(Index))
2495     R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
2496                                   AH.getDereferenceableOrNullBytes(Index)));
2497   if (AH.doesNotAlias(Index))
2498     R.addAttribute(Attribute::NoAlias);
2499
2500   if (!R.empty())
2501     AH.setAttributes(AH.getAttributes().removeAttributes(
2502         Ctx, Index, AttributeSet::get(Ctx, Index, R)));
2503 }
2504
2505 void
2506 RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
2507   LLVMContext &Ctx = F.getContext();
2508
2509   for (Argument &A : F.args())
2510     if (isa<PointerType>(A.getType()))
2511       RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1);
2512
2513   if (isa<PointerType>(F.getReturnType()))
2514     RemoveNonValidAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
2515 }
2516
2517 void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
2518   if (F.empty())
2519     return;
2520
2521   LLVMContext &Ctx = F.getContext();
2522   MDBuilder Builder(Ctx);
2523
2524   for (Instruction &I : instructions(F)) {
2525     if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
2526       assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
2527       bool IsImmutableTBAA =
2528           MD->getNumOperands() == 4 &&
2529           mdconst::extract<ConstantInt>(MD->getOperand(3))->getValue() == 1;
2530
2531       if (!IsImmutableTBAA)
2532         continue; // no work to do, MD_tbaa is already marked mutable
2533
2534       MDNode *Base = cast<MDNode>(MD->getOperand(0));
2535       MDNode *Access = cast<MDNode>(MD->getOperand(1));
2536       uint64_t Offset =
2537           mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue();
2538
2539       MDNode *MutableTBAA =
2540           Builder.createTBAAStructTagNode(Base, Access, Offset);
2541       I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
2542     }
2543
2544     if (CallSite CS = CallSite(&I)) {
2545       for (int i = 0, e = CS.arg_size(); i != e; i++)
2546         if (isa<PointerType>(CS.getArgument(i)->getType()))
2547           RemoveNonValidAttrAtIndex(Ctx, CS, i + 1);
2548       if (isa<PointerType>(CS.getType()))
2549         RemoveNonValidAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
2550     }
2551   }
2552 }
2553
2554 /// Returns true if this function should be rewritten by this pass.  The main
2555 /// point of this function is as an extension point for custom logic.
2556 static bool shouldRewriteStatepointsIn(Function &F) {
2557   // TODO: This should check the GCStrategy
2558   if (F.hasGC()) {
2559     const char *FunctionGCName = F.getGC();
2560     const StringRef StatepointExampleName("statepoint-example");
2561     const StringRef CoreCLRName("coreclr");
2562     return (StatepointExampleName == FunctionGCName) ||
2563            (CoreCLRName == FunctionGCName);
2564   } else
2565     return false;
2566 }
2567
2568 void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) {
2569 #ifndef NDEBUG
2570   assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) &&
2571          "precondition!");
2572 #endif
2573
2574   for (Function &F : M)
2575     stripNonValidAttributesFromPrototype(F);
2576
2577   for (Function &F : M)
2578     stripNonValidAttributesFromBody(F);
2579 }
2580
2581 bool RewriteStatepointsForGC::runOnFunction(Function &F) {
2582   // Nothing to do for declarations.
2583   if (F.isDeclaration() || F.empty())
2584     return false;
2585
2586   // Policy choice says not to rewrite - the most common reason is that we're
2587   // compiling code without a GCStrategy.
2588   if (!shouldRewriteStatepointsIn(F))
2589     return false;
2590
2591   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
2592   TargetTransformInfo &TTI =
2593       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2594
2595   auto NeedsRewrite = [](Instruction &I) {
2596     if (UseDeoptBundles) {
2597       if (ImmutableCallSite CS = ImmutableCallSite(&I))
2598         return !callsGCLeafFunction(CS);
2599       return false;
2600     }
2601
2602     return isStatepoint(I);
2603   };
2604
2605   // Gather all the statepoints which need rewritten.  Be careful to only
2606   // consider those in reachable code since we need to ask dominance queries
2607   // when rewriting.  We'll delete the unreachable ones in a moment.
2608   SmallVector<CallSite, 64> ParsePointNeeded;
2609   bool HasUnreachableStatepoint = false;
2610   for (Instruction &I : instructions(F)) {
2611     // TODO: only the ones with the flag set!
2612     if (NeedsRewrite(I)) {
2613       if (DT.isReachableFromEntry(I.getParent()))
2614         ParsePointNeeded.push_back(CallSite(&I));
2615       else
2616         HasUnreachableStatepoint = true;
2617     }
2618   }
2619
2620   bool MadeChange = false;
2621
2622   // Delete any unreachable statepoints so that we don't have unrewritten
2623   // statepoints surviving this pass.  This makes testing easier and the
2624   // resulting IR less confusing to human readers.  Rather than be fancy, we
2625   // just reuse a utility function which removes the unreachable blocks.
2626   if (HasUnreachableStatepoint)
2627     MadeChange |= removeUnreachableBlocks(F);
2628
2629   // Return early if no work to do.
2630   if (ParsePointNeeded.empty())
2631     return MadeChange;
2632
2633   // As a prepass, go ahead and aggressively destroy single entry phi nodes.
2634   // These are created by LCSSA.  They have the effect of increasing the size
2635   // of liveness sets for no good reason.  It may be harder to do this post
2636   // insertion since relocations and base phis can confuse things.
2637   for (BasicBlock &BB : F)
2638     if (BB.getUniquePredecessor()) {
2639       MadeChange = true;
2640       FoldSingleEntryPHINodes(&BB);
2641     }
2642
2643   // Before we start introducing relocations, we want to tweak the IR a bit to
2644   // avoid unfortunate code generation effects.  The main example is that we
2645   // want to try to make sure the comparison feeding a branch is after any
2646   // safepoints.  Otherwise, we end up with a comparison of pre-relocation
2647   // values feeding a branch after relocation.  This is semantically correct,
2648   // but results in extra register pressure since both the pre-relocation and
2649   // post-relocation copies must be available in registers.  For code without
2650   // relocations this is handled elsewhere, but teaching the scheduler to
2651   // reverse the transform we're about to do would be slightly complex.
2652   // Note: This may extend the live range of the inputs to the icmp and thus
2653   // increase the liveset of any statepoint we move over.  This is profitable
2654   // as long as all statepoints are in rare blocks.  If we had in-register
2655   // lowering for live values this would be a much safer transform.
2656   auto getConditionInst = [](TerminatorInst *TI) -> Instruction* {
2657     if (auto *BI = dyn_cast<BranchInst>(TI))
2658       if (BI->isConditional())
2659         return dyn_cast<Instruction>(BI->getCondition());
2660     // TODO: Extend this to handle switches
2661     return nullptr;
2662   };
2663   for (BasicBlock &BB : F) {
2664     TerminatorInst *TI = BB.getTerminator();
2665     if (auto *Cond = getConditionInst(TI))
2666       // TODO: Handle more than just ICmps here.  We should be able to move
2667       // most instructions without side effects or memory access.
2668       if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
2669         MadeChange = true;
2670         Cond->moveBefore(TI);
2671       }
2672   }
2673
2674   MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
2675   return MadeChange;
2676 }
2677
2678 // liveness computation via standard dataflow
2679 // -------------------------------------------------------------------
2680
2681 // TODO: Consider using bitvectors for liveness, the set of potentially
2682 // interesting values should be small and easy to pre-compute.
2683
2684 /// Compute the live-in set for the location rbegin starting from
2685 /// the live-out set of the basic block
2686 static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
2687                                 BasicBlock::reverse_iterator rend,
2688                                 DenseSet<Value *> &LiveTmp) {
2689
2690   for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
2691     Instruction *I = &*ritr;
2692
2693     // KILL/Def - Remove this definition from LiveIn
2694     LiveTmp.erase(I);
2695
2696     // Don't consider *uses* in PHI nodes, we handle their contribution to
2697     // predecessor blocks when we seed the LiveOut sets
2698     if (isa<PHINode>(I))
2699       continue;
2700
2701     // USE - Add to the LiveIn set for this instruction
2702     for (Value *V : I->operands()) {
2703       assert(!isUnhandledGCPointerType(V->getType()) &&
2704              "support for FCA unimplemented");
2705       if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
2706         // The choice to exclude all things constant here is slightly subtle.
2707         // There are two independent reasons:
2708         // - We assume that things which are constant (from LLVM's definition)
2709         // do not move at runtime.  For example, the address of a global
2710         // variable is fixed, even though it's contents may not be.
2711         // - Second, we can't disallow arbitrary inttoptr constants even
2712         // if the language frontend does.  Optimization passes are free to
2713         // locally exploit facts without respect to global reachability.  This
2714         // can create sections of code which are dynamically unreachable and
2715         // contain just about anything.  (see constants.ll in tests)
2716         LiveTmp.insert(V);
2717       }
2718     }
2719   }
2720 }
2721
2722 static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
2723
2724   for (BasicBlock *Succ : successors(BB)) {
2725     const BasicBlock::iterator E(Succ->getFirstNonPHI());
2726     for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
2727       PHINode *Phi = cast<PHINode>(&*I);
2728       Value *V = Phi->getIncomingValueForBlock(BB);
2729       assert(!isUnhandledGCPointerType(V->getType()) &&
2730              "support for FCA unimplemented");
2731       if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
2732         LiveTmp.insert(V);
2733       }
2734     }
2735   }
2736 }
2737
2738 static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
2739   DenseSet<Value *> KillSet;
2740   for (Instruction &I : *BB)
2741     if (isHandledGCPointerType(I.getType()))
2742       KillSet.insert(&I);
2743   return KillSet;
2744 }
2745
2746 #ifndef NDEBUG
2747 /// Check that the items in 'Live' dominate 'TI'.  This is used as a basic
2748 /// sanity check for the liveness computation.
2749 static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
2750                           TerminatorInst *TI, bool TermOkay = false) {
2751   for (Value *V : Live) {
2752     if (auto *I = dyn_cast<Instruction>(V)) {
2753       // The terminator can be a member of the LiveOut set.  LLVM's definition
2754       // of instruction dominance states that V does not dominate itself.  As
2755       // such, we need to special case this to allow it.
2756       if (TermOkay && TI == I)
2757         continue;
2758       assert(DT.dominates(I, TI) &&
2759              "basic SSA liveness expectation violated by liveness analysis");
2760     }
2761   }
2762 }
2763
2764 /// Check that all the liveness sets used during the computation of liveness
2765 /// obey basic SSA properties.  This is useful for finding cases where we miss
2766 /// a def.
2767 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
2768                           BasicBlock &BB) {
2769   checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
2770   checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
2771   checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
2772 }
2773 #endif
2774
2775 static void computeLiveInValues(DominatorTree &DT, Function &F,
2776                                 GCPtrLivenessData &Data) {
2777
2778   SmallSetVector<BasicBlock *, 200> Worklist;
2779   auto AddPredsToWorklist = [&](BasicBlock *BB) {
2780     // We use a SetVector so that we don't have duplicates in the worklist.
2781     Worklist.insert(pred_begin(BB), pred_end(BB));
2782   };
2783   auto NextItem = [&]() {
2784     BasicBlock *BB = Worklist.back();
2785     Worklist.pop_back();
2786     return BB;
2787   };
2788
2789   // Seed the liveness for each individual block
2790   for (BasicBlock &BB : F) {
2791     Data.KillSet[&BB] = computeKillSet(&BB);
2792     Data.LiveSet[&BB].clear();
2793     computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
2794
2795 #ifndef NDEBUG
2796     for (Value *Kill : Data.KillSet[&BB])
2797       assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
2798 #endif
2799
2800     Data.LiveOut[&BB] = DenseSet<Value *>();
2801     computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
2802     Data.LiveIn[&BB] = Data.LiveSet[&BB];
2803     set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
2804     set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
2805     if (!Data.LiveIn[&BB].empty())
2806       AddPredsToWorklist(&BB);
2807   }
2808
2809   // Propagate that liveness until stable
2810   while (!Worklist.empty()) {
2811     BasicBlock *BB = NextItem();
2812
2813     // Compute our new liveout set, then exit early if it hasn't changed
2814     // despite the contribution of our successor.
2815     DenseSet<Value *> LiveOut = Data.LiveOut[BB];
2816     const auto OldLiveOutSize = LiveOut.size();
2817     for (BasicBlock *Succ : successors(BB)) {
2818       assert(Data.LiveIn.count(Succ));
2819       set_union(LiveOut, Data.LiveIn[Succ]);
2820     }
2821     // assert OutLiveOut is a subset of LiveOut
2822     if (OldLiveOutSize == LiveOut.size()) {
2823       // If the sets are the same size, then we didn't actually add anything
2824       // when unioning our successors LiveIn  Thus, the LiveIn of this block
2825       // hasn't changed.
2826       continue;
2827     }
2828     Data.LiveOut[BB] = LiveOut;
2829
2830     // Apply the effects of this basic block
2831     DenseSet<Value *> LiveTmp = LiveOut;
2832     set_union(LiveTmp, Data.LiveSet[BB]);
2833     set_subtract(LiveTmp, Data.KillSet[BB]);
2834
2835     assert(Data.LiveIn.count(BB));
2836     const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
2837     // assert: OldLiveIn is a subset of LiveTmp
2838     if (OldLiveIn.size() != LiveTmp.size()) {
2839       Data.LiveIn[BB] = LiveTmp;
2840       AddPredsToWorklist(BB);
2841     }
2842   } // while( !worklist.empty() )
2843
2844 #ifndef NDEBUG
2845   // Sanity check our output against SSA properties.  This helps catch any
2846   // missing kills during the above iteration.
2847   for (BasicBlock &BB : F) {
2848     checkBasicSSA(DT, Data, BB);
2849   }
2850 #endif
2851 }
2852
2853 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
2854                               StatepointLiveSetTy &Out) {
2855
2856   BasicBlock *BB = Inst->getParent();
2857
2858   // Note: The copy is intentional and required
2859   assert(Data.LiveOut.count(BB));
2860   DenseSet<Value *> LiveOut = Data.LiveOut[BB];
2861
2862   // We want to handle the statepoint itself oddly.  It's
2863   // call result is not live (normal), nor are it's arguments
2864   // (unless they're used again later).  This adjustment is
2865   // specifically what we need to relocate
2866   BasicBlock::reverse_iterator rend(Inst->getIterator());
2867   computeLiveInValues(BB->rbegin(), rend, LiveOut);
2868   LiveOut.erase(Inst);
2869   Out.insert(LiveOut.begin(), LiveOut.end());
2870 }
2871
2872 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
2873                                   const CallSite &CS,
2874                                   PartiallyConstructedSafepointRecord &Info) {
2875   Instruction *Inst = CS.getInstruction();
2876   StatepointLiveSetTy Updated;
2877   findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
2878
2879 #ifndef NDEBUG
2880   DenseSet<Value *> Bases;
2881   for (auto KVPair : Info.PointerToBase) {
2882     Bases.insert(KVPair.second);
2883   }
2884 #endif
2885   // We may have base pointers which are now live that weren't before.  We need
2886   // to update the PointerToBase structure to reflect this.
2887   for (auto V : Updated)
2888     if (!Info.PointerToBase.count(V)) {
2889       assert(Bases.count(V) && "can't find base for unexpected live value");
2890       Info.PointerToBase[V] = V;
2891       continue;
2892     }
2893
2894 #ifndef NDEBUG
2895   for (auto V : Updated) {
2896     assert(Info.PointerToBase.count(V) &&
2897            "must be able to find base for live value");
2898   }
2899 #endif
2900
2901   // Remove any stale base mappings - this can happen since our liveness is
2902   // more precise then the one inherent in the base pointer analysis
2903   DenseSet<Value *> ToErase;
2904   for (auto KVPair : Info.PointerToBase)
2905     if (!Updated.count(KVPair.first))
2906       ToErase.insert(KVPair.first);
2907   for (auto V : ToErase)
2908     Info.PointerToBase.erase(V);
2909
2910 #ifndef NDEBUG
2911   for (auto KVPair : Info.PointerToBase)
2912     assert(Updated.count(KVPair.first) && "record for non-live value");
2913 #endif
2914
2915   Info.LiveSet = Updated;
2916 }