lib/Transforms/Scalar/RewriteStatepointsForGC.cpp

   1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Rewrite an existing set of gc.statepoints such that they make potential
  11 // relocations performed by the garbage collector explicit in the IR.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/Pass.h"
  16 #include "llvm/Analysis/CFG.h"
  17 #include "llvm/Analysis/InstructionSimplify.h"
  18 #include "llvm/Analysis/TargetTransformInfo.h"
  19 #include "llvm/ADT/SetOperations.h"
  20 #include "llvm/ADT/Statistic.h"
  21 #include "llvm/ADT/DenseSet.h"
  22 #include "llvm/ADT/SetVector.h"
  23 #include "llvm/ADT/StringRef.h"
  24 #include "llvm/ADT/MapVector.h"
  25 #include "llvm/IR/BasicBlock.h"
  26 #include "llvm/IR/CallSite.h"
  27 #include "llvm/IR/Dominators.h"
  28 #include "llvm/IR/Function.h"
  29 #include "llvm/IR/IRBuilder.h"
  30 #include "llvm/IR/InstIterator.h"
  31 #include "llvm/IR/Instructions.h"
  32 #include "llvm/IR/Intrinsics.h"
  33 #include "llvm/IR/IntrinsicInst.h"
  34 #include "llvm/IR/Module.h"
  35 #include "llvm/IR/MDBuilder.h"
  36 #include "llvm/IR/Statepoint.h"
  37 #include "llvm/IR/Value.h"
  38 #include "llvm/IR/Verifier.h"
  39 #include "llvm/Support/Debug.h"
  40 #include "llvm/Support/CommandLine.h"
  41 #include "llvm/Transforms/Scalar.h"
  42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  43 #include "llvm/Transforms/Utils/Cloning.h"
  44 #include "llvm/Transforms/Utils/Local.h"
  45 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
  46
  47 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
  48
  49 using namespace llvm;
  50
  51 // Print the liveset found at the insert location
  52 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
  53                                   cl::init(false));
  54 static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
  55                                       cl::init(false));
  56 // Print out the base pointers for debugging
  57 static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
  58                                        cl::init(false));
  59
  60 // Cost threshold measuring when it is profitable to rematerialize value instead
  61 // of relocating it
  62 static cl::opt<unsigned>
  63 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
  64                            cl::init(6));
  65
  66 #ifdef XDEBUG
  67 static bool ClobberNonLive = true;
  68 #else
  69 static bool ClobberNonLive = false;
  70 #endif
  71 static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
  72                                                   cl::location(ClobberNonLive),
  73                                                   cl::Hidden);
  74
  75 static cl::opt<bool> UseDeoptBundles("rs4gc-use-deopt-bundles", cl::Hidden,
  76                                      cl::init(false));
  77 static cl::opt<bool>
  78     AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
  79                                    cl::Hidden, cl::init(true));
  80
  81 /// Should we split vectors of pointers into their individual elements?  This
  82 /// is known to be buggy, but the alternate implementation isn't yet ready.
  83 /// This is purely to provide a debugging and dianostic hook until the vector
  84 /// split is replaced with vector relocations.
  85 static cl::opt<bool> UseVectorSplit("rs4gc-split-vector-values", cl::Hidden,
  86                                     cl::init(true));
  87
  88 namespace {
  89 struct RewriteStatepointsForGC : public ModulePass {
  90   static char ID; // Pass identification, replacement for typeid
  91
  92   RewriteStatepointsForGC() : ModulePass(ID) {
  93     initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry());
  94   }
  95   bool runOnFunction(Function &F);
  96   bool runOnModule(Module &M) override {
  97     bool Changed = false;
  98     for (Function &F : M)
  99       Changed |= runOnFunction(F);
 100
 101     if (Changed) {
 102       // stripNonValidAttributes asserts that shouldRewriteStatepointsIn
 103       // returns true for at least one function in the module.  Since at least
 104       // one function changed, we know that the precondition is satisfied.
 105       stripNonValidAttributes(M);
 106     }
 107
 108     return Changed;
 109   }
 110
 111   void getAnalysisUsage(AnalysisUsage &AU) const override {
 112     // We add and rewrite a bunch of instructions, but don't really do much
 113     // else.  We could in theory preserve a lot more analyses here.
 114     AU.addRequired<DominatorTreeWrapperPass>();
 115     AU.addRequired<TargetTransformInfoWrapperPass>();
 116   }
 117
 118   /// The IR fed into RewriteStatepointsForGC may have had attributes implying
 119   /// dereferenceability that are no longer valid/correct after
 120   /// RewriteStatepointsForGC has run.  This is because semantically, after
 121   /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
 122   /// heap.  stripNonValidAttributes (conservatively) restores correctness
 123   /// by erasing all attributes in the module that externally imply
 124   /// dereferenceability.
 125   /// Similar reasoning also applies to the noalias attributes. gc.statepoint
 126   /// can touch the entire heap including noalias objects.
 127   void stripNonValidAttributes(Module &M);
 128
 129   // Helpers for stripNonValidAttributes
 130   void stripNonValidAttributesFromBody(Function &F);
 131   void stripNonValidAttributesFromPrototype(Function &F);
 132 };
 133 } // namespace
 134
 135 char RewriteStatepointsForGC::ID = 0;
 136
 137 ModulePass *llvm::createRewriteStatepointsForGCPass() {
 138   return new RewriteStatepointsForGC();
 139 }
 140
 141 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
 142                       "Make relocations explicit at statepoints", false, false)
 143 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 144 INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
 145                     "Make relocations explicit at statepoints", false, false)
 146
 147 namespace {
 148 struct GCPtrLivenessData {
 149   /// Values defined in this block.
 150   DenseMap<BasicBlock *, DenseSet<Value *>> KillSet;
 151   /// Values used in this block (and thus live); does not included values
 152   /// killed within this block.
 153   DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet;
 154
 155   /// Values live into this basic block (i.e. used by any
 156   /// instruction in this basic block or ones reachable from here)
 157   DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn;
 158
 159   /// Values live out of this basic block (i.e. live into
 160   /// any successor block)
 161   DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut;
 162 };
 163
 164 // The type of the internal cache used inside the findBasePointers family
 165 // of functions.  From the callers perspective, this is an opaque type and
 166 // should not be inspected.
 167 //
 168 // In the actual implementation this caches two relations:
 169 // - The base relation itself (i.e. this pointer is based on that one)
 170 // - The base defining value relation (i.e. before base_phi insertion)
 171 // Generally, after the execution of a full findBasePointer call, only the
 172 // base relation will remain.  Internally, we add a mixture of the two
 173 // types, then update all the second type to the first type
 174 typedef DenseMap<Value *, Value *> DefiningValueMapTy;
 175 typedef DenseSet<Value *> StatepointLiveSetTy;
 176 typedef DenseMap<AssertingVH<Instruction>, AssertingVH<Value>>
 177   RematerializedValueMapTy;
 178
 179 struct PartiallyConstructedSafepointRecord {
 180   /// The set of values known to be live across this safepoint
 181   StatepointLiveSetTy LiveSet;
 182
 183   /// Mapping from live pointers to a base-defining-value
 184   DenseMap<Value *, Value *> PointerToBase;
 185
 186   /// The *new* gc.statepoint instruction itself.  This produces the token
 187   /// that normal path gc.relocates and the gc.result are tied to.
 188   Instruction *StatepointToken;
 189
 190   /// Instruction to which exceptional gc relocates are attached
 191   /// Makes it easier to iterate through them during relocationViaAlloca.
 192   Instruction *UnwindToken;
 193
 194   /// Record live values we are rematerialized instead of relocating.
 195   /// They are not included into 'LiveSet' field.
 196   /// Maps rematerialized copy to it's original value.
 197   RematerializedValueMapTy RematerializedValues;
 198 };
 199 }
 200
 201 static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
 202   assert(UseDeoptBundles && "Should not be called otherwise!");
 203
 204   Optional<OperandBundleUse> DeoptBundle = CS.getOperandBundle("deopt");
 205
 206   if (!DeoptBundle.hasValue()) {
 207     assert(AllowStatepointWithNoDeoptInfo &&
 208            "Found non-leaf call without deopt info!");
 209     return None;
 210   }
 211
 212   return DeoptBundle.getValue().Inputs;
 213 }
 214
 215 /// Compute the live-in set for every basic block in the function
 216 static void computeLiveInValues(DominatorTree &DT, Function &F,
 217                                 GCPtrLivenessData &Data);
 218
 219 /// Given results from the dataflow liveness computation, find the set of live
 220 /// Values at a particular instruction.
 221 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
 222                               StatepointLiveSetTy &out);
 223
 224 // TODO: Once we can get to the GCStrategy, this becomes
 225 // Optional<bool> isGCManagedPointer(const Type *Ty) const override {
 226
 227 static bool isGCPointerType(Type *T) {
 228   if (auto *PT = dyn_cast<PointerType>(T))
 229     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
 230     // GC managed heap.  We know that a pointer into this heap needs to be
 231     // updated and that no other pointer does.
 232     return (1 == PT->getAddressSpace());
 233   return false;
 234 }
 235
 236 // Return true if this type is one which a) is a gc pointer or contains a GC
 237 // pointer and b) is of a type this code expects to encounter as a live value.
 238 // (The insertion code will assert that a type which matches (a) and not (b)
 239 // is not encountered.)
 240 static bool isHandledGCPointerType(Type *T) {
 241   // We fully support gc pointers
 242   if (isGCPointerType(T))
 243     return true;
 244   // We partially support vectors of gc pointers. The code will assert if it
 245   // can't handle something.
 246   if (auto VT = dyn_cast<VectorType>(T))
 247     if (isGCPointerType(VT->getElementType()))
 248       return true;
 249   return false;
 250 }
 251
 252 #ifndef NDEBUG
 253 /// Returns true if this type contains a gc pointer whether we know how to
 254 /// handle that type or not.
 255 static bool containsGCPtrType(Type *Ty) {
 256   if (isGCPointerType(Ty))
 257     return true;
 258   if (VectorType *VT = dyn_cast<VectorType>(Ty))
 259     return isGCPointerType(VT->getScalarType());
 260   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
 261     return containsGCPtrType(AT->getElementType());
 262   if (StructType *ST = dyn_cast<StructType>(Ty))
 263     return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
 264                        containsGCPtrType);
 265   return false;
 266 }
 267
 268 // Returns true if this is a type which a) is a gc pointer or contains a GC
 269 // pointer and b) is of a type which the code doesn't expect (i.e. first class
 270 // aggregates).  Used to trip assertions.
 271 static bool isUnhandledGCPointerType(Type *Ty) {
 272   return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
 273 }
 274 #endif
 275
 276 static bool order_by_name(Value *a, Value *b) {
 277   if (a->hasName() && b->hasName()) {
 278     return -1 == a->getName().compare(b->getName());
 279   } else if (a->hasName() && !b->hasName()) {
 280     return true;
 281   } else if (!a->hasName() && b->hasName()) {
 282     return false;
 283   } else {
 284     // Better than nothing, but not stable
 285     return a < b;
 286   }
 287 }
 288
 289 // Return the name of the value suffixed with the provided value, or if the
 290 // value didn't have a name, the default value specified.
 291 static std::string suffixed_name_or(Value *V, StringRef Suffix,
 292                                     StringRef DefaultName) {
 293   return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
 294 }
 295
 296 // Conservatively identifies any definitions which might be live at the
 297 // given instruction. The  analysis is performed immediately before the
 298 // given instruction. Values defined by that instruction are not considered
 299 // live.  Values used by that instruction are considered live.
 300 static void analyzeParsePointLiveness(
 301     DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData,
 302     const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
 303   Instruction *inst = CS.getInstruction();
 304
 305   StatepointLiveSetTy LiveSet;
 306   findLiveSetAtInst(inst, OriginalLivenessData, LiveSet);
 307
 308   if (PrintLiveSet) {
 309     // Note: This output is used by several of the test cases
 310     // The order of elements in a set is not stable, put them in a vec and sort
 311     // by name
 312     SmallVector<Value *, 64> Temp;
 313     Temp.insert(Temp.end(), LiveSet.begin(), LiveSet.end());
 314     std::sort(Temp.begin(), Temp.end(), order_by_name);
 315     errs() << "Live Variables:\n";
 316     for (Value *V : Temp)
 317       dbgs() << " " << V->getName() << " " << *V << "\n";
 318   }
 319   if (PrintLiveSetSize) {
 320     errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
 321     errs() << "Number live values: " << LiveSet.size() << "\n";
 322   }
 323   result.LiveSet = LiveSet;
 324 }
 325
 326 static bool isKnownBaseResult(Value *V);
 327 namespace {
 328 /// A single base defining value - An immediate base defining value for an
 329 /// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
 330 /// For instructions which have multiple pointer [vector] inputs or that
 331 /// transition between vector and scalar types, there is no immediate base
 332 /// defining value.  The 'base defining value' for 'Def' is the transitive
 333 /// closure of this relation stopping at the first instruction which has no
 334 /// immediate base defining value.  The b.d.v. might itself be a base pointer,
 335 /// but it can also be an arbitrary derived pointer.
 336 struct BaseDefiningValueResult {
 337   /// Contains the value which is the base defining value.
 338   Value * const BDV;
 339   /// True if the base defining value is also known to be an actual base
 340   /// pointer.
 341   const bool IsKnownBase;
 342   BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
 343     : BDV(BDV), IsKnownBase(IsKnownBase) {
 344 #ifndef NDEBUG
 345     // Check consistency between new and old means of checking whether a BDV is
 346     // a base.
 347     bool MustBeBase = isKnownBaseResult(BDV);
 348     assert(!MustBeBase || MustBeBase == IsKnownBase);
 349 #endif
 350   }
 351 };
 352 }
 353
 354 static BaseDefiningValueResult findBaseDefiningValue(Value *I);
 355
 356 /// Return a base defining value for the 'Index' element of the given vector
 357 /// instruction 'I'.  If Index is null, returns a BDV for the entire vector
 358 /// 'I'.  As an optimization, this method will try to determine when the
 359 /// element is known to already be a base pointer.  If this can be established,
 360 /// the second value in the returned pair will be true.  Note that either a
 361 /// vector or a pointer typed value can be returned.  For the former, the
 362 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
 363 /// If the later, the return pointer is a BDV (or possibly a base) for the
 364 /// particular element in 'I'.
 365 static BaseDefiningValueResult
 366 findBaseDefiningValueOfVector(Value *I) {
 367   assert(I->getType()->isVectorTy() &&
 368          cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
 369          "Illegal to ask for the base pointer of a non-pointer type");
 370
 371   // Each case parallels findBaseDefiningValue below, see that code for
 372   // detailed motivation.
 373
 374   if (isa<Argument>(I))
 375     // An incoming argument to the function is a base pointer
 376     return BaseDefiningValueResult(I, true);
 377
 378   // We shouldn't see the address of a global as a vector value?
 379   assert(!isa<GlobalVariable>(I) &&
 380          "unexpected global variable found in base of vector");
 381
 382   // inlining could possibly introduce phi node that contains
 383   // undef if callee has multiple returns
 384   if (isa<UndefValue>(I))
 385     // utterly meaningless, but useful for dealing with partially optimized
 386     // code.
 387     return BaseDefiningValueResult(I, true);
 388
 389   // Due to inheritance, this must be _after_ the global variable and undef
 390   // checks
 391   if (Constant *Con = dyn_cast<Constant>(I)) {
 392     assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
 393            "order of checks wrong!");
 394     assert(Con->isNullValue() && "null is the only case which makes sense");
 395     return BaseDefiningValueResult(Con, true);
 396   }
 397
 398   if (isa<LoadInst>(I))
 399     return BaseDefiningValueResult(I, true);
 400
 401   if (isa<InsertElementInst>(I))
 402     // We don't know whether this vector contains entirely base pointers or
 403     // not.  To be conservatively correct, we treat it as a BDV and will
 404     // duplicate code as needed to construct a parallel vector of bases.
 405     return BaseDefiningValueResult(I, false);
 406
 407   if (isa<ShuffleVectorInst>(I))
 408     // We don't know whether this vector contains entirely base pointers or
 409     // not.  To be conservatively correct, we treat it as a BDV and will
 410     // duplicate code as needed to construct a parallel vector of bases.
 411     // TODO: There a number of local optimizations which could be applied here
 412     // for particular sufflevector patterns.
 413     return BaseDefiningValueResult(I, false);
 414
 415   // A PHI or Select is a base defining value.  The outer findBasePointer
 416   // algorithm is responsible for constructing a base value for this BDV.
 417   assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
 418          "unknown vector instruction - no base found for vector element");
 419   return BaseDefiningValueResult(I, false);
 420 }
 421
 422 /// Helper function for findBasePointer - Will return a value which either a)
 423 /// defines the base pointer for the input, b) blocks the simple search
 424 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change
 425 /// from pointer to vector type or back.
 426 static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
 427   if (I->getType()->isVectorTy())
 428     return findBaseDefiningValueOfVector(I);
 429
 430   assert(I->getType()->isPointerTy() &&
 431          "Illegal to ask for the base pointer of a non-pointer type");
 432
 433   if (isa<Argument>(I))
 434     // An incoming argument to the function is a base pointer
 435     // We should have never reached here if this argument isn't an gc value
 436     return BaseDefiningValueResult(I, true);
 437
 438   if (isa<Constant>(I))
 439     // We assume that objects with a constant base (e.g. a global) can't move
 440     // and don't need to be reported to the collector because they are always
 441     // live.  All constants have constant bases.  Besides global references, all
 442     // kinds of constants (e.g. undef, constant expressions, null pointers) can
 443     // be introduced by the inliner or the optimizer, especially on dynamically
 444     // dead paths.  See e.g. test4 in constants.ll.
 445     return BaseDefiningValueResult(I, true);
 446
 447   if (CastInst *CI = dyn_cast<CastInst>(I)) {
 448     Value *Def = CI->stripPointerCasts();
 449     // If stripping pointer casts changes the address space there is an
 450     // addrspacecast in between.
 451     assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
 452                cast<PointerType>(CI->getType())->getAddressSpace() &&
 453            "unsupported addrspacecast");
 454     // If we find a cast instruction here, it means we've found a cast which is
 455     // not simply a pointer cast (i.e. an inttoptr).  We don't know how to
 456     // handle int->ptr conversion.
 457     assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
 458     return findBaseDefiningValue(Def);
 459   }
 460
 461   if (isa<LoadInst>(I))
 462     // The value loaded is an gc base itself
 463     return BaseDefiningValueResult(I, true);
 464
 465
 466   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
 467     // The base of this GEP is the base
 468     return findBaseDefiningValue(GEP->getPointerOperand());
 469
 470   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
 471     switch (II->getIntrinsicID()) {
 472     default:
 473       // fall through to general call handling
 474       break;
 475     case Intrinsic::experimental_gc_statepoint:
 476       llvm_unreachable("statepoints don't produce pointers");
 477     case Intrinsic::experimental_gc_relocate: {
 478       // Rerunning safepoint insertion after safepoints are already
 479       // inserted is not supported.  It could probably be made to work,
 480       // but why are you doing this?  There's no good reason.
 481       llvm_unreachable("repeat safepoint insertion is not supported");
 482     }
 483     case Intrinsic::gcroot:
 484       // Currently, this mechanism hasn't been extended to work with gcroot.
 485       // There's no reason it couldn't be, but I haven't thought about the
 486       // implications much.
 487       llvm_unreachable(
 488           "interaction with the gcroot mechanism is not supported");
 489     }
 490   }
 491   // We assume that functions in the source language only return base
 492   // pointers.  This should probably be generalized via attributes to support
 493   // both source language and internal functions.
 494   if (isa<CallInst>(I) || isa<InvokeInst>(I))
 495     return BaseDefiningValueResult(I, true);
 496
 497   // I have absolutely no idea how to implement this part yet.  It's not
 498   // necessarily hard, I just haven't really looked at it yet.
 499   assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
 500
 501   if (isa<AtomicCmpXchgInst>(I))
 502     // A CAS is effectively a atomic store and load combined under a
 503     // predicate.  From the perspective of base pointers, we just treat it
 504     // like a load.
 505     return BaseDefiningValueResult(I, true);
 506
 507   assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
 508                                    "binary ops which don't apply to pointers");
 509
 510   // The aggregate ops.  Aggregates can either be in the heap or on the
 511   // stack, but in either case, this is simply a field load.  As a result,
 512   // this is a defining definition of the base just like a load is.
 513   if (isa<ExtractValueInst>(I))
 514     return BaseDefiningValueResult(I, true);
 515
 516   // We should never see an insert vector since that would require we be
 517   // tracing back a struct value not a pointer value.
 518   assert(!isa<InsertValueInst>(I) &&
 519          "Base pointer for a struct is meaningless");
 520
 521   // An extractelement produces a base result exactly when it's input does.
 522   // We may need to insert a parallel instruction to extract the appropriate
 523   // element out of the base vector corresponding to the input. Given this,
 524   // it's analogous to the phi and select case even though it's not a merge.
 525   if (isa<ExtractElementInst>(I))
 526     // Note: There a lot of obvious peephole cases here.  This are deliberately
 527     // handled after the main base pointer inference algorithm to make writing
 528     // test cases to exercise that code easier.
 529     return BaseDefiningValueResult(I, false);
 530
 531   // The last two cases here don't return a base pointer.  Instead, they
 532   // return a value which dynamically selects from among several base
 533   // derived pointers (each with it's own base potentially).  It's the job of
 534   // the caller to resolve these.
 535   assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
 536          "missing instruction case in findBaseDefiningValing");
 537   return BaseDefiningValueResult(I, false);
 538 }
 539
 540 /// Returns the base defining value for this value.
 541 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
 542   Value *&Cached = Cache[I];
 543   if (!Cached) {
 544     Cached = findBaseDefiningValue(I).BDV;
 545     DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
 546                  << Cached->getName() << "\n");
 547   }
 548   assert(Cache[I] != nullptr);
 549   return Cached;
 550 }
 551
 552 /// Return a base pointer for this value if known.  Otherwise, return it's
 553 /// base defining value.
 554 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
 555   Value *Def = findBaseDefiningValueCached(I, Cache);
 556   auto Found = Cache.find(Def);
 557   if (Found != Cache.end()) {
 558     // Either a base-of relation, or a self reference.  Caller must check.
 559     return Found->second;
 560   }
 561   // Only a BDV available
 562   return Def;
 563 }
 564
 565 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
 566 /// is it known to be a base pointer?  Or do we need to continue searching.
 567 static bool isKnownBaseResult(Value *V) {
 568   if (!isa<PHINode>(V) && !isa<SelectInst>(V) &&
 569       !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
 570       !isa<ShuffleVectorInst>(V)) {
 571     // no recursion possible
 572     return true;
 573   }
 574   if (isa<Instruction>(V) &&
 575       cast<Instruction>(V)->getMetadata("is_base_value")) {
 576     // This is a previously inserted base phi or select.  We know
 577     // that this is a base value.
 578     return true;
 579   }
 580
 581   // We need to keep searching
 582   return false;
 583 }
 584
 585 namespace {
 586 /// Models the state of a single base defining value in the findBasePointer
 587 /// algorithm for determining where a new instruction is needed to propagate
 588 /// the base of this BDV.
 589 class BDVState {
 590 public:
 591   enum Status { Unknown, Base, Conflict };
 592
 593   BDVState(Status s, Value *b = nullptr) : status(s), base(b) {
 594     assert(status != Base || b);
 595   }
 596   explicit BDVState(Value *b) : status(Base), base(b) {}
 597   BDVState() : status(Unknown), base(nullptr) {}
 598
 599   Status getStatus() const { return status; }
 600   Value *getBase() const { return base; }
 601
 602   bool isBase() const { return getStatus() == Base; }
 603   bool isUnknown() const { return getStatus() == Unknown; }
 604   bool isConflict() const { return getStatus() == Conflict; }
 605
 606   bool operator==(const BDVState &other) const {
 607     return base == other.base && status == other.status;
 608   }
 609
 610   bool operator!=(const BDVState &other) const { return !(*this == other); }
 611
 612   LLVM_DUMP_METHOD
 613   void dump() const { print(dbgs()); dbgs() << '\n'; }
 614
 615   void print(raw_ostream &OS) const {
 616     switch (status) {
 617     case Unknown:
 618       OS << "U";
 619       break;
 620     case Base:
 621       OS << "B";
 622       break;
 623     case Conflict:
 624       OS << "C";
 625       break;
 626     };
 627     OS << " (" << base << " - "
 628        << (base ? base->getName() : "nullptr") << "): ";
 629   }
 630
 631 private:
 632   Status status;
 633   AssertingVH<Value> base; // non null only if status == base
 634 };
 635 }
 636
 637 #ifndef NDEBUG
 638 static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
 639   State.print(OS);
 640   return OS;
 641 }
 642 #endif
 643
 644 namespace {
 645 // Values of type BDVState form a lattice, and this is a helper
 646 // class that implementes the meet operation.  The meat of the meet
 647 // operation is implemented in MeetBDVStates::pureMeet
 648 class MeetBDVStates {
 649 public:
 650   /// Initializes the currentResult to the TOP state so that if can be met with
 651   /// any other state to produce that state.
 652   MeetBDVStates() {}
 653
 654   // Destructively meet the current result with the given BDVState
 655   void meetWith(BDVState otherState) {
 656     currentResult = meet(otherState, currentResult);
 657   }
 658
 659   BDVState getResult() const { return currentResult; }
 660
 661 private:
 662   BDVState currentResult;
 663
 664   /// Perform a meet operation on two elements of the BDVState lattice.
 665   static BDVState meet(BDVState LHS, BDVState RHS) {
 666     assert((pureMeet(LHS, RHS) == pureMeet(RHS, LHS)) &&
 667            "math is wrong: meet does not commute!");
 668     BDVState Result = pureMeet(LHS, RHS);
 669     DEBUG(dbgs() << "meet of " << LHS << " with " << RHS
 670                  << " produced " << Result << "\n");
 671     return Result;
 672   }
 673
 674   static BDVState pureMeet(const BDVState &stateA, const BDVState &stateB) {
 675     switch (stateA.getStatus()) {
 676     case BDVState::Unknown:
 677       return stateB;
 678
 679     case BDVState::Base:
 680       assert(stateA.getBase() && "can't be null");
 681       if (stateB.isUnknown())
 682         return stateA;
 683
 684       if (stateB.isBase()) {
 685         if (stateA.getBase() == stateB.getBase()) {
 686           assert(stateA == stateB && "equality broken!");
 687           return stateA;
 688         }
 689         return BDVState(BDVState::Conflict);
 690       }
 691       assert(stateB.isConflict() && "only three states!");
 692       return BDVState(BDVState::Conflict);
 693
 694     case BDVState::Conflict:
 695       return stateA;
 696     }
 697     llvm_unreachable("only three states!");
 698   }
 699 };
 700 }
 701
 702
 703 /// For a given value or instruction, figure out what base ptr it's derived
 704 /// from.  For gc objects, this is simply itself.  On success, returns a value
 705 /// which is the base pointer.  (This is reliable and can be used for
 706 /// relocation.)  On failure, returns nullptr.
 707 static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
 708   Value *def = findBaseOrBDV(I, cache);
 709
 710   if (isKnownBaseResult(def)) {
 711     return def;
 712   }
 713
 714   // Here's the rough algorithm:
 715   // - For every SSA value, construct a mapping to either an actual base
 716   //   pointer or a PHI which obscures the base pointer.
 717   // - Construct a mapping from PHI to unknown TOP state.  Use an
 718   //   optimistic algorithm to propagate base pointer information.  Lattice
 719   //   looks like:
 720   //   UNKNOWN
 721   //   b1 b2 b3 b4
 722   //   CONFLICT
 723   //   When algorithm terminates, all PHIs will either have a single concrete
 724   //   base or be in a conflict state.
 725   // - For every conflict, insert a dummy PHI node without arguments.  Add
 726   //   these to the base[Instruction] = BasePtr mapping.  For every
 727   //   non-conflict, add the actual base.
 728   //  - For every conflict, add arguments for the base[a] of each input
 729   //   arguments.
 730   //
 731   // Note: A simpler form of this would be to add the conflict form of all
 732   // PHIs without running the optimistic algorithm.  This would be
 733   // analogous to pessimistic data flow and would likely lead to an
 734   // overall worse solution.
 735
 736 #ifndef NDEBUG
 737   auto isExpectedBDVType = [](Value *BDV) {
 738     return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
 739            isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV);
 740   };
 741 #endif
 742
 743   // Once populated, will contain a mapping from each potentially non-base BDV
 744   // to a lattice value (described above) which corresponds to that BDV.
 745   // We use the order of insertion (DFS over the def/use graph) to provide a
 746   // stable deterministic ordering for visiting DenseMaps (which are unordered)
 747   // below.  This is important for deterministic compilation.
 748   MapVector<Value *, BDVState> States;
 749
 750   // Recursively fill in all base defining values reachable from the initial
 751   // one for which we don't already know a definite base value for
 752   /* scope */ {
 753     SmallVector<Value*, 16> Worklist;
 754     Worklist.push_back(def);
 755     States.insert(std::make_pair(def, BDVState()));
 756     while (!Worklist.empty()) {
 757       Value *Current = Worklist.pop_back_val();
 758       assert(!isKnownBaseResult(Current) && "why did it get added?");
 759
 760       auto visitIncomingValue = [&](Value *InVal) {
 761         Value *Base = findBaseOrBDV(InVal, cache);
 762         if (isKnownBaseResult(Base))
 763           // Known bases won't need new instructions introduced and can be
 764           // ignored safely
 765           return;
 766         assert(isExpectedBDVType(Base) && "the only non-base values "
 767                "we see should be base defining values");
 768         if (States.insert(std::make_pair(Base, BDVState())).second)
 769           Worklist.push_back(Base);
 770       };
 771       if (PHINode *Phi = dyn_cast<PHINode>(Current)) {
 772         for (Value *InVal : Phi->incoming_values())
 773           visitIncomingValue(InVal);
 774       } else if (SelectInst *Sel = dyn_cast<SelectInst>(Current)) {
 775         visitIncomingValue(Sel->getTrueValue());
 776         visitIncomingValue(Sel->getFalseValue());
 777       } else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) {
 778         visitIncomingValue(EE->getVectorOperand());
 779       } else if (auto *IE = dyn_cast<InsertElementInst>(Current)) {
 780         visitIncomingValue(IE->getOperand(0)); // vector operand
 781         visitIncomingValue(IE->getOperand(1)); // scalar operand
 782       } else {
 783         // There is one known class of instructions we know we don't handle.
 784         assert(isa<ShuffleVectorInst>(Current));
 785         llvm_unreachable("unimplemented instruction case");
 786       }
 787     }
 788   }
 789
 790 #ifndef NDEBUG
 791   DEBUG(dbgs() << "States after initialization:\n");
 792   for (auto Pair : States) {
 793     DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
 794   }
 795 #endif
 796
 797   // Return a phi state for a base defining value.  We'll generate a new
 798   // base state for known bases and expect to find a cached state otherwise.
 799   auto getStateForBDV = [&](Value *baseValue) {
 800     if (isKnownBaseResult(baseValue))
 801       return BDVState(baseValue);
 802     auto I = States.find(baseValue);
 803     assert(I != States.end() && "lookup failed!");
 804     return I->second;
 805   };
 806
 807   bool progress = true;
 808   while (progress) {
 809 #ifndef NDEBUG
 810     const size_t oldSize = States.size();
 811 #endif
 812     progress = false;
 813     // We're only changing values in this loop, thus safe to keep iterators.
 814     // Since this is computing a fixed point, the order of visit does not
 815     // effect the result.  TODO: We could use a worklist here and make this run
 816     // much faster.
 817     for (auto Pair : States) {
 818       Value *BDV = Pair.first;
 819       assert(!isKnownBaseResult(BDV) && "why did it get added?");
 820
 821       // Given an input value for the current instruction, return a BDVState
 822       // instance which represents the BDV of that value.
 823       auto getStateForInput = [&](Value *V) mutable {
 824         Value *BDV = findBaseOrBDV(V, cache);
 825         return getStateForBDV(BDV);
 826       };
 827
 828       MeetBDVStates calculateMeet;
 829       if (SelectInst *select = dyn_cast<SelectInst>(BDV)) {
 830         calculateMeet.meetWith(getStateForInput(select->getTrueValue()));
 831         calculateMeet.meetWith(getStateForInput(select->getFalseValue()));
 832       } else if (PHINode *Phi = dyn_cast<PHINode>(BDV)) {
 833         for (Value *Val : Phi->incoming_values())
 834           calculateMeet.meetWith(getStateForInput(Val));
 835       } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
 836         // The 'meet' for an extractelement is slightly trivial, but it's still
 837         // useful in that it drives us to conflict if our input is.
 838         calculateMeet.meetWith(getStateForInput(EE->getVectorOperand()));
 839       } else {
 840         // Given there's a inherent type mismatch between the operands, will
 841         // *always* produce Conflict.
 842         auto *IE = cast<InsertElementInst>(BDV);
 843         calculateMeet.meetWith(getStateForInput(IE->getOperand(0)));
 844         calculateMeet.meetWith(getStateForInput(IE->getOperand(1)));
 845       }
 846
 847       BDVState oldState = States[BDV];
 848       BDVState newState = calculateMeet.getResult();
 849       if (oldState != newState) {
 850         progress = true;
 851         States[BDV] = newState;
 852       }
 853     }
 854
 855     assert(oldSize == States.size() &&
 856            "fixed point shouldn't be adding any new nodes to state");
 857   }
 858
 859 #ifndef NDEBUG
 860   DEBUG(dbgs() << "States after meet iteration:\n");
 861   for (auto Pair : States) {
 862     DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
 863   }
 864 #endif
 865
 866   // Insert Phis for all conflicts
 867   // TODO: adjust naming patterns to avoid this order of iteration dependency
 868   for (auto Pair : States) {
 869     Instruction *I = cast<Instruction>(Pair.first);
 870     BDVState State = Pair.second;
 871     assert(!isKnownBaseResult(I) && "why did it get added?");
 872     assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
 873
 874     // extractelement instructions are a bit special in that we may need to
 875     // insert an extract even when we know an exact base for the instruction.
 876     // The problem is that we need to convert from a vector base to a scalar
 877     // base for the particular indice we're interested in.
 878     if (State.isBase() && isa<ExtractElementInst>(I) &&
 879         isa<VectorType>(State.getBase()->getType())) {
 880       auto *EE = cast<ExtractElementInst>(I);
 881       // TODO: In many cases, the new instruction is just EE itself.  We should
 882       // exploit this, but can't do it here since it would break the invariant
 883       // about the BDV not being known to be a base.
 884       auto *BaseInst = ExtractElementInst::Create(State.getBase(),
 885                                                   EE->getIndexOperand(),
 886                                                   "base_ee", EE);
 887       BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
 888       States[I] = BDVState(BDVState::Base, BaseInst);
 889     }
 890
 891     // Since we're joining a vector and scalar base, they can never be the
 892     // same.  As a result, we should always see insert element having reached
 893     // the conflict state.
 894     if (isa<InsertElementInst>(I)) {
 895       assert(State.isConflict());
 896     }
 897
 898     if (!State.isConflict())
 899       continue;
 900
 901     /// Create and insert a new instruction which will represent the base of
 902     /// the given instruction 'I'.
 903     auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* {
 904       if (isa<PHINode>(I)) {
 905         BasicBlock *BB = I->getParent();
 906         int NumPreds = std::distance(pred_begin(BB), pred_end(BB));
 907         assert(NumPreds > 0 && "how did we reach here");
 908         std::string Name = suffixed_name_or(I, ".base", "base_phi");
 909         return PHINode::Create(I->getType(), NumPreds, Name, I);
 910       } else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) {
 911         // The undef will be replaced later
 912         UndefValue *Undef = UndefValue::get(Sel->getType());
 913         std::string Name = suffixed_name_or(I, ".base", "base_select");
 914         return SelectInst::Create(Sel->getCondition(), Undef,
 915                                   Undef, Name, Sel);
 916       } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
 917         UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType());
 918         std::string Name = suffixed_name_or(I, ".base", "base_ee");
 919         return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name,
 920                                           EE);
 921       } else {
 922         auto *IE = cast<InsertElementInst>(I);
 923         UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType());
 924         UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType());
 925         std::string Name = suffixed_name_or(I, ".base", "base_ie");
 926         return InsertElementInst::Create(VecUndef, ScalarUndef,
 927                                          IE->getOperand(2), Name, IE);
 928       }
 929
 930     };
 931     Instruction *BaseInst = MakeBaseInstPlaceholder(I);
 932     // Add metadata marking this as a base value
 933     BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
 934     States[I] = BDVState(BDVState::Conflict, BaseInst);
 935   }
 936
 937   // Returns a instruction which produces the base pointer for a given
 938   // instruction.  The instruction is assumed to be an input to one of the BDVs
 939   // seen in the inference algorithm above.  As such, we must either already
 940   // know it's base defining value is a base, or have inserted a new
 941   // instruction to propagate the base of it's BDV and have entered that newly
 942   // introduced instruction into the state table.  In either case, we are
 943   // assured to be able to determine an instruction which produces it's base
 944   // pointer.
 945   auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
 946     Value *BDV = findBaseOrBDV(Input, cache);
 947     Value *Base = nullptr;
 948     if (isKnownBaseResult(BDV)) {
 949       Base = BDV;
 950     } else {
 951       // Either conflict or base.
 952       assert(States.count(BDV));
 953       Base = States[BDV].getBase();
 954     }
 955     assert(Base && "can't be null");
 956     // The cast is needed since base traversal may strip away bitcasts
 957     if (Base->getType() != Input->getType() &&
 958         InsertPt) {
 959       Base = new BitCastInst(Base, Input->getType(), "cast",
 960                              InsertPt);
 961     }
 962     return Base;
 963   };
 964
 965   // Fixup all the inputs of the new PHIs.  Visit order needs to be
 966   // deterministic and predictable because we're naming newly created
 967   // instructions.
 968   for (auto Pair : States) {
 969     Instruction *BDV = cast<Instruction>(Pair.first);
 970     BDVState State = Pair.second;
 971
 972     assert(!isKnownBaseResult(BDV) && "why did it get added?");
 973     assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
 974     if (!State.isConflict())
 975       continue;
 976
 977     if (PHINode *basephi = dyn_cast<PHINode>(State.getBase())) {
 978       PHINode *phi = cast<PHINode>(BDV);
 979       unsigned NumPHIValues = phi->getNumIncomingValues();
 980       for (unsigned i = 0; i < NumPHIValues; i++) {
 981         Value *InVal = phi->getIncomingValue(i);
 982         BasicBlock *InBB = phi->getIncomingBlock(i);
 983
 984         // If we've already seen InBB, add the same incoming value
 985         // we added for it earlier.  The IR verifier requires phi
 986         // nodes with multiple entries from the same basic block
 987         // to have the same incoming value for each of those
 988         // entries.  If we don't do this check here and basephi
 989         // has a different type than base, we'll end up adding two
 990         // bitcasts (and hence two distinct values) as incoming
 991         // values for the same basic block.
 992
 993         int blockIndex = basephi->getBasicBlockIndex(InBB);
 994         if (blockIndex != -1) {
 995           Value *oldBase = basephi->getIncomingValue(blockIndex);
 996           basephi->addIncoming(oldBase, InBB);
 997
 998 #ifndef NDEBUG
 999           Value *Base = getBaseForInput(InVal, nullptr);
1000           // In essence this assert states: the only way two
1001           // values incoming from the same basic block may be
1002           // different is by being different bitcasts of the same
1003           // value.  A cleanup that remains TODO is changing
1004           // findBaseOrBDV to return an llvm::Value of the correct
1005           // type (and still remain pure).  This will remove the
1006           // need to add bitcasts.
1007           assert(Base->stripPointerCasts() == oldBase->stripPointerCasts() &&
1008                  "sanity -- findBaseOrBDV should be pure!");
1009 #endif
1010           continue;
1011         }
1012
1013         // Find the instruction which produces the base for each input.  We may
1014         // need to insert a bitcast in the incoming block.
1015         // TODO: Need to split critical edges if insertion is needed
1016         Value *Base = getBaseForInput(InVal, InBB->getTerminator());
1017         basephi->addIncoming(Base, InBB);
1018       }
1019       assert(basephi->getNumIncomingValues() == NumPHIValues);
1020     } else if (SelectInst *BaseSel = dyn_cast<SelectInst>(State.getBase())) {
1021       SelectInst *Sel = cast<SelectInst>(BDV);
1022       // Operand 1 & 2 are true, false path respectively. TODO: refactor to
1023       // something more safe and less hacky.
1024       for (int i = 1; i <= 2; i++) {
1025         Value *InVal = Sel->getOperand(i);
1026         // Find the instruction which produces the base for each input.  We may
1027         // need to insert a bitcast.
1028         Value *Base = getBaseForInput(InVal, BaseSel);
1029         BaseSel->setOperand(i, Base);
1030       }
1031     } else if (auto *BaseEE = dyn_cast<ExtractElementInst>(State.getBase())) {
1032       Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
1033       // Find the instruction which produces the base for each input.  We may
1034       // need to insert a bitcast.
1035       Value *Base = getBaseForInput(InVal, BaseEE);
1036       BaseEE->setOperand(0, Base);
1037     } else {
1038       auto *BaseIE = cast<InsertElementInst>(State.getBase());
1039       auto *BdvIE = cast<InsertElementInst>(BDV);
1040       auto UpdateOperand = [&](int OperandIdx) {
1041         Value *InVal = BdvIE->getOperand(OperandIdx);
1042         Value *Base = getBaseForInput(InVal, BaseIE);
1043         BaseIE->setOperand(OperandIdx, Base);
1044       };
1045       UpdateOperand(0); // vector operand
1046       UpdateOperand(1); // scalar operand
1047     }
1048
1049   }
1050
1051   // Now that we're done with the algorithm, see if we can optimize the
1052   // results slightly by reducing the number of new instructions needed.
1053   // Arguably, this should be integrated into the algorithm above, but
1054   // doing as a post process step is easier to reason about for the moment.
1055   DenseMap<Value *, Value *> ReverseMap;
1056   SmallPtrSet<Instruction *, 16> NewInsts;
1057   SmallSetVector<AssertingVH<Instruction>, 16> Worklist;
1058   // Note: We need to visit the states in a deterministic order.  We uses the
1059   // Keys we sorted above for this purpose.  Note that we are papering over a
1060   // bigger problem with the algorithm above - it's visit order is not
1061   // deterministic.  A larger change is needed to fix this.
1062   for (auto Pair : States) {
1063     auto *BDV = Pair.first;
1064     auto State = Pair.second;
1065     Value *Base = State.getBase();
1066     assert(BDV && Base);
1067     assert(!isKnownBaseResult(BDV) && "why did it get added?");
1068     assert(isKnownBaseResult(Base) &&
1069            "must be something we 'know' is a base pointer");
1070     if (!State.isConflict())
1071       continue;
1072
1073     ReverseMap[Base] = BDV;
1074     if (auto *BaseI = dyn_cast<Instruction>(Base)) {
1075       NewInsts.insert(BaseI);
1076       Worklist.insert(BaseI);
1077     }
1078   }
1079   auto ReplaceBaseInstWith = [&](Value *BDV, Instruction *BaseI,
1080                                  Value *Replacement) {
1081     // Add users which are new instructions (excluding self references)
1082     for (User *U : BaseI->users())
1083       if (auto *UI = dyn_cast<Instruction>(U))
1084         if (NewInsts.count(UI) && UI != BaseI)
1085           Worklist.insert(UI);
1086     // Then do the actual replacement
1087     NewInsts.erase(BaseI);
1088     ReverseMap.erase(BaseI);
1089     BaseI->replaceAllUsesWith(Replacement);
1090     assert(States.count(BDV));
1091     assert(States[BDV].isConflict() && States[BDV].getBase() == BaseI);
1092     States[BDV] = BDVState(BDVState::Conflict, Replacement);
1093     BaseI->eraseFromParent();
1094   };
1095   const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
1096   while (!Worklist.empty()) {
1097     Instruction *BaseI = Worklist.pop_back_val();
1098     assert(NewInsts.count(BaseI));
1099     Value *Bdv = ReverseMap[BaseI];
1100     if (auto *BdvI = dyn_cast<Instruction>(Bdv))
1101       if (BaseI->isIdenticalTo(BdvI)) {
1102         DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
1103         ReplaceBaseInstWith(Bdv, BaseI, Bdv);
1104         continue;
1105       }
1106     if (Value *V = SimplifyInstruction(BaseI, DL)) {
1107       DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
1108       ReplaceBaseInstWith(Bdv, BaseI, V);
1109       continue;
1110     }
1111   }
1112
1113   // Cache all of our results so we can cheaply reuse them
1114   // NOTE: This is actually two caches: one of the base defining value
1115   // relation and one of the base pointer relation!  FIXME
1116   for (auto Pair : States) {
1117     auto *BDV = Pair.first;
1118     Value *base = Pair.second.getBase();
1119     assert(BDV && base);
1120
1121     std::string fromstr = cache.count(BDV) ? cache[BDV]->getName() : "none";
1122     DEBUG(dbgs() << "Updating base value cache"
1123           << " for: " << BDV->getName()
1124           << " from: " << fromstr
1125           << " to: " << base->getName() << "\n");
1126
1127     if (cache.count(BDV)) {
1128       // Once we transition from the BDV relation being store in the cache to
1129       // the base relation being stored, it must be stable
1130       assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) &&
1131              "base relation should be stable");
1132     }
1133     cache[BDV] = base;
1134   }
1135   assert(cache.count(def));
1136   return cache[def];
1137 }
1138
1139 // For a set of live pointers (base and/or derived), identify the base
1140 // pointer of the object which they are derived from.  This routine will
1141 // mutate the IR graph as needed to make the 'base' pointer live at the
1142 // definition site of 'derived'.  This ensures that any use of 'derived' can
1143 // also use 'base'.  This may involve the insertion of a number of
1144 // additional PHI nodes.
1145 //
1146 // preconditions: live is a set of pointer type Values
1147 //
1148 // side effects: may insert PHI nodes into the existing CFG, will preserve
1149 // CFG, will not remove or mutate any existing nodes
1150 //
1151 // post condition: PointerToBase contains one (derived, base) pair for every
1152 // pointer in live.  Note that derived can be equal to base if the original
1153 // pointer was a base pointer.
1154 static void
1155 findBasePointers(const StatepointLiveSetTy &live,
1156                  DenseMap<Value *, Value *> &PointerToBase,
1157                  DominatorTree *DT, DefiningValueMapTy &DVCache) {
1158   // For the naming of values inserted to be deterministic - which makes for
1159   // much cleaner and more stable tests - we need to assign an order to the
1160   // live values.  DenseSets do not provide a deterministic order across runs.
1161   SmallVector<Value *, 64> Temp;
1162   Temp.insert(Temp.end(), live.begin(), live.end());
1163   std::sort(Temp.begin(), Temp.end(), order_by_name);
1164   for (Value *ptr : Temp) {
1165     Value *base = findBasePointer(ptr, DVCache);
1166     assert(base && "failed to find base pointer");
1167     PointerToBase[ptr] = base;
1168     assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
1169             DT->dominates(cast<Instruction>(base)->getParent(),
1170                           cast<Instruction>(ptr)->getParent())) &&
1171            "The base we found better dominate the derived pointer");
1172
1173     // If you see this trip and like to live really dangerously, the code should
1174     // be correct, just with idioms the verifier can't handle.  You can try
1175     // disabling the verifier at your own substantial risk.
1176     assert(!isa<ConstantPointerNull>(base) &&
1177            "the relocation code needs adjustment to handle the relocation of "
1178            "a null pointer constant without causing false positives in the "
1179            "safepoint ir verifier.");
1180   }
1181 }
1182
1183 /// Find the required based pointers (and adjust the live set) for the given
1184 /// parse point.
1185 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
1186                              const CallSite &CS,
1187                              PartiallyConstructedSafepointRecord &result) {
1188   DenseMap<Value *, Value *> PointerToBase;
1189   findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
1190
1191   if (PrintBasePointers) {
1192     // Note: Need to print these in a stable order since this is checked in
1193     // some tests.
1194     errs() << "Base Pairs (w/o Relocation):\n";
1195     SmallVector<Value *, 64> Temp;
1196     Temp.reserve(PointerToBase.size());
1197     for (auto Pair : PointerToBase) {
1198       Temp.push_back(Pair.first);
1199     }
1200     std::sort(Temp.begin(), Temp.end(), order_by_name);
1201     for (Value *Ptr : Temp) {
1202       Value *Base = PointerToBase[Ptr];
1203       errs() << " derived ";
1204       Ptr->printAsOperand(errs(), false);
1205       errs() << " base ";
1206       Base->printAsOperand(errs(), false);
1207       errs() << "\n";;
1208     }
1209   }
1210
1211   result.PointerToBase = PointerToBase;
1212 }
1213
1214 /// Given an updated version of the dataflow liveness results, update the
1215 /// liveset and base pointer maps for the call site CS.
1216 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
1217                                   const CallSite &CS,
1218                                   PartiallyConstructedSafepointRecord &result);
1219
1220 static void recomputeLiveInValues(
1221     Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
1222     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
1223   // TODO-PERF: reuse the original liveness, then simply run the dataflow
1224   // again.  The old values are still live and will help it stabilize quickly.
1225   GCPtrLivenessData RevisedLivenessData;
1226   computeLiveInValues(DT, F, RevisedLivenessData);
1227   for (size_t i = 0; i < records.size(); i++) {
1228     struct PartiallyConstructedSafepointRecord &info = records[i];
1229     const CallSite &CS = toUpdate[i];
1230     recomputeLiveInValues(RevisedLivenessData, CS, info);
1231   }
1232 }
1233
1234 // When inserting gc.relocate and gc.result calls, we need to ensure there are
1235 // no uses of the original value / return value between the gc.statepoint and
1236 // the gc.relocate / gc.result call.  One case which can arise is a phi node
1237 // starting one of the successor blocks.  We also need to be able to insert the
1238 // gc.relocates only on the path which goes through the statepoint.  We might
1239 // need to split an edge to make this possible.
1240 static BasicBlock *
1241 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
1242                             DominatorTree &DT) {
1243   BasicBlock *Ret = BB;
1244   if (!BB->getUniquePredecessor())
1245     Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
1246
1247   // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1248   // from it
1249   FoldSingleEntryPHINodes(Ret);
1250   assert(!isa<PHINode>(Ret->begin()) &&
1251          "All PHI nodes should have been removed!");
1252
1253   // At this point, we can safely insert a gc.relocate or gc.result as the first
1254   // instruction in Ret if needed.
1255   return Ret;
1256 }
1257
1258 // Create new attribute set containing only attributes which can be transferred
1259 // from original call to the safepoint.
1260 static AttributeSet legalizeCallAttributes(AttributeSet AS) {
1261   AttributeSet Ret;
1262
1263   for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
1264     unsigned Index = AS.getSlotIndex(Slot);
1265
1266     if (Index == AttributeSet::ReturnIndex ||
1267         Index == AttributeSet::FunctionIndex) {
1268
1269       for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
1270
1271         // Do not allow certain attributes - just skip them
1272         // Safepoint can not be read only or read none.
1273         if (Attr.hasAttribute(Attribute::ReadNone) ||
1274             Attr.hasAttribute(Attribute::ReadOnly))
1275           continue;
1276
1277         // These attributes control the generation of the gc.statepoint call /
1278         // invoke itself; and once the gc.statepoint is in place, they're of no
1279         // use.
1280         if (Attr.hasAttribute("statepoint-num-patch-bytes") ||
1281             Attr.hasAttribute("statepoint-id"))
1282           continue;
1283
1284         Ret = Ret.addAttributes(
1285             AS.getContext(), Index,
1286             AttributeSet::get(AS.getContext(), Index, AttrBuilder(Attr)));
1287       }
1288     }
1289
1290     // Just skip parameter attributes for now
1291   }
1292
1293   return Ret;
1294 }
1295
1296 /// Helper function to place all gc relocates necessary for the given
1297 /// statepoint.
1298 /// Inputs:
1299 ///   liveVariables - list of variables to be relocated.
1300 ///   liveStart - index of the first live variable.
1301 ///   basePtrs - base pointers.
1302 ///   statepointToken - statepoint instruction to which relocates should be
1303 ///   bound.
1304 ///   Builder - Llvm IR builder to be used to construct new calls.
1305 static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
1306                               const int LiveStart,
1307                               ArrayRef<Value *> BasePtrs,
1308                               Instruction *StatepointToken,
1309                               IRBuilder<> Builder) {
1310   if (LiveVariables.empty())
1311     return;
1312
1313   auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
1314     auto ValIt = std::find(LiveVec.begin(), LiveVec.end(), Val);
1315     assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
1316     size_t Index = std::distance(LiveVec.begin(), ValIt);
1317     assert(Index < LiveVec.size() && "Bug in std::find?");
1318     return Index;
1319   };
1320   Module *M = StatepointToken->getModule();
1321
1322   // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1323   // element type is i8 addrspace(1)*). We originally generated unique
1324   // declarations for each pointer type, but this proved problematic because
1325   // the intrinsic mangling code is incomplete and fragile.  Since we're moving
1326   // towards a single unified pointer type anyways, we can just cast everything
1327   // to an i8* of the right address space.  A bitcast is added later to convert
1328   // gc_relocate to the actual value's type.
1329   auto getGCRelocateDecl = [&] (Type *Ty) {
1330     assert(isHandledGCPointerType(Ty));
1331     auto AS = Ty->getScalarType()->getPointerAddressSpace();
1332     Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
1333     if (auto *VT = dyn_cast<VectorType>(Ty))
1334       NewTy = VectorType::get(NewTy, VT->getNumElements());
1335     return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate,
1336                                      {NewTy});
1337   };
1338
1339   // Lazily populated map from input types to the canonicalized form mentioned
1340   // in the comment above.  This should probably be cached somewhere more
1341   // broadly.
1342   DenseMap<Type*, Value*> TypeToDeclMap;
1343
1344   for (unsigned i = 0; i < LiveVariables.size(); i++) {
1345     // Generate the gc.relocate call and save the result
1346     Value *BaseIdx =
1347       Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
1348     Value *LiveIdx = Builder.getInt32(LiveStart + i);
1349
1350     Type *Ty = LiveVariables[i]->getType();
1351     if (!TypeToDeclMap.count(Ty))
1352       TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
1353     Value *GCRelocateDecl = TypeToDeclMap[Ty];
1354
1355     // only specify a debug name if we can give a useful one
1356     CallInst *Reloc = Builder.CreateCall(
1357         GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
1358         suffixed_name_or(LiveVariables[i], ".relocated", ""));
1359     // Trick CodeGen into thinking there are lots of free registers at this
1360     // fake call.
1361     Reloc->setCallingConv(CallingConv::Cold);
1362   }
1363 }
1364
1365 namespace {
1366
1367 /// This struct is used to defer RAUWs and `eraseFromParent` s.  Using this
1368 /// avoids having to worry about keeping around dangling pointers to Values.
1369 class DeferredReplacement {
1370   AssertingVH<Instruction> Old;
1371   AssertingVH<Instruction> New;
1372
1373 public:
1374   explicit DeferredReplacement(Instruction *Old, Instruction *New) :
1375     Old(Old), New(New) {
1376     assert(Old != New && "Not allowed!");
1377   }
1378
1379   /// Does the task represented by this instance.
1380   void doReplacement() {
1381     Instruction *OldI = Old;
1382     Instruction *NewI = New;
1383
1384     assert(OldI != NewI && "Disallowed at construction?!");
1385
1386     Old = nullptr;
1387     New = nullptr;
1388
1389     if (NewI)
1390       OldI->replaceAllUsesWith(NewI);
1391     OldI->eraseFromParent();
1392   }
1393 };
1394 }
1395
1396 static void
1397 makeStatepointExplicitImpl(const CallSite CS, /* to replace */
1398                            const SmallVectorImpl<Value *> &BasePtrs,
1399                            const SmallVectorImpl<Value *> &LiveVariables,
1400                            PartiallyConstructedSafepointRecord &Result,
1401                            std::vector<DeferredReplacement> &Replacements) {
1402   assert(BasePtrs.size() == LiveVariables.size());
1403   assert((UseDeoptBundles || isStatepoint(CS)) &&
1404          "This method expects to be rewriting a statepoint");
1405
1406   // Then go ahead and use the builder do actually do the inserts.  We insert
1407   // immediately before the previous instruction under the assumption that all
1408   // arguments will be available here.  We can't insert afterwards since we may
1409   // be replacing a terminator.
1410   Instruction *InsertBefore = CS.getInstruction();
1411   IRBuilder<> Builder(InsertBefore);
1412
1413   ArrayRef<Value *> GCArgs(LiveVariables);
1414   uint64_t StatepointID = 0xABCDEF00;
1415   uint32_t NumPatchBytes = 0;
1416   uint32_t Flags = uint32_t(StatepointFlags::None);
1417
1418   ArrayRef<Use> CallArgs;
1419   ArrayRef<Use> DeoptArgs;
1420   ArrayRef<Use> TransitionArgs;
1421
1422   Value *CallTarget = nullptr;
1423
1424   if (UseDeoptBundles) {
1425     CallArgs = {CS.arg_begin(), CS.arg_end()};
1426     DeoptArgs = GetDeoptBundleOperands(CS);
1427     // TODO: we don't fill in TransitionArgs or Flags in this branch, but we
1428     // could have an operand bundle for that too.
1429     AttributeSet OriginalAttrs = CS.getAttributes();
1430
1431     Attribute AttrID = OriginalAttrs.getAttribute(AttributeSet::FunctionIndex,
1432                                                   "statepoint-id");
1433     if (AttrID.isStringAttribute())
1434       AttrID.getValueAsString().getAsInteger(10, StatepointID);
1435
1436     Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
1437         AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
1438     if (AttrNumPatchBytes.isStringAttribute())
1439       AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
1440
1441     CallTarget = CS.getCalledValue();
1442   } else {
1443     // This branch will be gone soon, and we will soon only support the
1444     // UseDeoptBundles == true configuration.
1445     Statepoint OldSP(CS);
1446     StatepointID = OldSP.getID();
1447     NumPatchBytes = OldSP.getNumPatchBytes();
1448     Flags = OldSP.getFlags();
1449
1450     CallArgs = {OldSP.arg_begin(), OldSP.arg_end()};
1451     DeoptArgs = {OldSP.vm_state_begin(), OldSP.vm_state_end()};
1452     TransitionArgs = {OldSP.gc_transition_args_begin(),
1453                       OldSP.gc_transition_args_end()};
1454     CallTarget = OldSP.getCalledValue();
1455   }
1456
1457   // Create the statepoint given all the arguments
1458   Instruction *Token = nullptr;
1459   AttributeSet ReturnAttrs;
1460   if (CS.isCall()) {
1461     CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
1462     CallInst *Call = Builder.CreateGCStatepointCall(
1463         StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
1464         TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
1465
1466     Call->setTailCall(ToReplace->isTailCall());
1467     Call->setCallingConv(ToReplace->getCallingConv());
1468
1469     // Currently we will fail on parameter attributes and on certain
1470     // function attributes.
1471     AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
1472     // In case if we can handle this set of attributes - set up function attrs
1473     // directly on statepoint and return attrs later for gc_result intrinsic.
1474     Call->setAttributes(NewAttrs.getFnAttributes());
1475     ReturnAttrs = NewAttrs.getRetAttributes();
1476
1477     Token = Call;
1478
1479     // Put the following gc_result and gc_relocate calls immediately after the
1480     // the old call (which we're about to delete)
1481     assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
1482     Builder.SetInsertPoint(ToReplace->getNextNode());
1483     Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
1484   } else {
1485     InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
1486
1487     // Insert the new invoke into the old block.  We'll remove the old one in a
1488     // moment at which point this will become the new terminator for the
1489     // original block.
1490     InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
1491         StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
1492         ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
1493         GCArgs, "statepoint_token");
1494
1495     Invoke->setCallingConv(ToReplace->getCallingConv());
1496
1497     // Currently we will fail on parameter attributes and on certain
1498     // function attributes.
1499     AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
1500     // In case if we can handle this set of attributes - set up function attrs
1501     // directly on statepoint and return attrs later for gc_result intrinsic.
1502     Invoke->setAttributes(NewAttrs.getFnAttributes());
1503     ReturnAttrs = NewAttrs.getRetAttributes();
1504
1505     Token = Invoke;
1506
1507     // Generate gc relocates in exceptional path
1508     BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
1509     assert(!isa<PHINode>(UnwindBlock->begin()) &&
1510            UnwindBlock->getUniquePredecessor() &&
1511            "can't safely insert in this block!");
1512
1513     Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
1514     Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1515
1516     // Attach exceptional gc relocates to the landingpad.
1517     Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
1518     Result.UnwindToken = ExceptionalToken;
1519
1520     const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
1521     CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken,
1522                       Builder);
1523
1524     // Generate gc relocates and returns for normal block
1525     BasicBlock *NormalDest = ToReplace->getNormalDest();
1526     assert(!isa<PHINode>(NormalDest->begin()) &&
1527            NormalDest->getUniquePredecessor() &&
1528            "can't safely insert in this block!");
1529
1530     Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt());
1531
1532     // gc relocates will be generated later as if it were regular call
1533     // statepoint
1534   }
1535   assert(Token && "Should be set in one of the above branches!");
1536
1537   if (UseDeoptBundles) {
1538     Token->setName("statepoint_token");
1539     if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
1540       StringRef Name =
1541           CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
1542       CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
1543       GCResult->setAttributes(CS.getAttributes().getRetAttributes());
1544
1545       // We cannot RAUW or delete CS.getInstruction() because it could be in the
1546       // live set of some other safepoint, in which case that safepoint's
1547       // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1548       // llvm::Instruction.  Instead, we defer the replacement and deletion to
1549       // after the live sets have been made explicit in the IR, and we no longer
1550       // have raw pointers to worry about.
1551       Replacements.emplace_back(CS.getInstruction(), GCResult);
1552     } else {
1553       Replacements.emplace_back(CS.getInstruction(), nullptr);
1554     }
1555   } else {
1556     assert(!CS.getInstruction()->hasNUsesOrMore(2) &&
1557            "only valid use before rewrite is gc.result");
1558     assert(!CS.getInstruction()->hasOneUse() ||
1559            isGCResult(cast<Instruction>(*CS.getInstruction()->user_begin())));
1560
1561     // Take the name of the original statepoint token if there was one.
1562     Token->takeName(CS.getInstruction());
1563
1564     // Update the gc.result of the original statepoint (if any) to use the newly
1565     // inserted statepoint.  This is safe to do here since the token can't be
1566     // considered a live reference.
1567     CS.getInstruction()->replaceAllUsesWith(Token);
1568     CS.getInstruction()->eraseFromParent();
1569   }
1570
1571   Result.StatepointToken = Token;
1572
1573   // Second, create a gc.relocate for every live variable
1574   const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
1575   CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder);
1576 }
1577
1578 namespace {
1579 struct NameOrdering {
1580   Value *Base;
1581   Value *Derived;
1582
1583   bool operator()(NameOrdering const &a, NameOrdering const &b) {
1584     return -1 == a.Derived->getName().compare(b.Derived->getName());
1585   }
1586 };
1587 }
1588
1589 static void StabilizeOrder(SmallVectorImpl<Value *> &BaseVec,
1590                            SmallVectorImpl<Value *> &LiveVec) {
1591   assert(BaseVec.size() == LiveVec.size());
1592
1593   SmallVector<NameOrdering, 64> Temp;
1594   for (size_t i = 0; i < BaseVec.size(); i++) {
1595     NameOrdering v;
1596     v.Base = BaseVec[i];
1597     v.Derived = LiveVec[i];
1598     Temp.push_back(v);
1599   }
1600
1601   std::sort(Temp.begin(), Temp.end(), NameOrdering());
1602   for (size_t i = 0; i < BaseVec.size(); i++) {
1603     BaseVec[i] = Temp[i].Base;
1604     LiveVec[i] = Temp[i].Derived;
1605   }
1606 }
1607
1608 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
1609 // which make the relocations happening at this safepoint explicit.
1610 //
1611 // WARNING: Does not do any fixup to adjust users of the original live
1612 // values.  That's the callers responsibility.
1613 static void
1614 makeStatepointExplicit(DominatorTree &DT, const CallSite &CS,
1615                        PartiallyConstructedSafepointRecord &Result,
1616                        std::vector<DeferredReplacement> &Replacements) {
1617   const auto &LiveSet = Result.LiveSet;
1618   const auto &PointerToBase = Result.PointerToBase;
1619
1620   // Convert to vector for efficient cross referencing.
1621   SmallVector<Value *, 64> BaseVec, LiveVec;
1622   LiveVec.reserve(LiveSet.size());
1623   BaseVec.reserve(LiveSet.size());
1624   for (Value *L : LiveSet) {
1625     LiveVec.push_back(L);
1626     assert(PointerToBase.count(L));
1627     Value *Base = PointerToBase.find(L)->second;
1628     BaseVec.push_back(Base);
1629   }
1630   assert(LiveVec.size() == BaseVec.size());
1631
1632   // To make the output IR slightly more stable (for use in diffs), ensure a
1633   // fixed order of the values in the safepoint (by sorting the value name).
1634   // The order is otherwise meaningless.
1635   StabilizeOrder(BaseVec, LiveVec);
1636
1637   // Do the actual rewriting and delete the old statepoint
1638   makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
1639 }
1640
1641 // Helper function for the relocationViaAlloca.
1642 //
1643 // It receives iterator to the statepoint gc relocates and emits a store to the
1644 // assigned location (via allocaMap) for the each one of them.  It adds the
1645 // visited values into the visitedLiveValues set, which we will later use them
1646 // for sanity checking.
1647 static void
1648 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
1649                        DenseMap<Value *, Value *> &AllocaMap,
1650                        DenseSet<Value *> &VisitedLiveValues) {
1651
1652   for (User *U : GCRelocs) {
1653     GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
1654     if (!Relocate)
1655       continue;
1656
1657     Value *OriginalValue = const_cast<Value *>(Relocate->getDerivedPtr());
1658     assert(AllocaMap.count(OriginalValue));
1659     Value *Alloca = AllocaMap[OriginalValue];
1660
1661     // Emit store into the related alloca
1662     // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
1663     // the correct type according to alloca.
1664     assert(Relocate->getNextNode() &&
1665            "Should always have one since it's not a terminator");
1666     IRBuilder<> Builder(Relocate->getNextNode());
1667     Value *CastedRelocatedValue =
1668       Builder.CreateBitCast(Relocate,
1669                             cast<AllocaInst>(Alloca)->getAllocatedType(),
1670                             suffixed_name_or(Relocate, ".casted", ""));
1671
1672     StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
1673     Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
1674
1675 #ifndef NDEBUG
1676     VisitedLiveValues.insert(OriginalValue);
1677 #endif
1678   }
1679 }
1680
1681 // Helper function for the "relocationViaAlloca". Similar to the
1682 // "insertRelocationStores" but works for rematerialized values.
1683 static void
1684 insertRematerializationStores(
1685   RematerializedValueMapTy RematerializedValues,
1686   DenseMap<Value *, Value *> &AllocaMap,
1687   DenseSet<Value *> &VisitedLiveValues) {
1688
1689   for (auto RematerializedValuePair: RematerializedValues) {
1690     Instruction *RematerializedValue = RematerializedValuePair.first;
1691     Value *OriginalValue = RematerializedValuePair.second;
1692
1693     assert(AllocaMap.count(OriginalValue) &&
1694            "Can not find alloca for rematerialized value");
1695     Value *Alloca = AllocaMap[OriginalValue];
1696
1697     StoreInst *Store = new StoreInst(RematerializedValue, Alloca);
1698     Store->insertAfter(RematerializedValue);
1699
1700 #ifndef NDEBUG
1701     VisitedLiveValues.insert(OriginalValue);
1702 #endif
1703   }
1704 }
1705
1706 /// Do all the relocation update via allocas and mem2reg
1707 static void relocationViaAlloca(
1708     Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
1709     ArrayRef<PartiallyConstructedSafepointRecord> Records) {
1710 #ifndef NDEBUG
1711   // record initial number of (static) allocas; we'll check we have the same
1712   // number when we get done.
1713   int InitialAllocaNum = 0;
1714   for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
1715        I++)
1716     if (isa<AllocaInst>(*I))
1717       InitialAllocaNum++;
1718 #endif
1719
1720   // TODO-PERF: change data structures, reserve
1721   DenseMap<Value *, Value *> AllocaMap;
1722   SmallVector<AllocaInst *, 200> PromotableAllocas;
1723   // Used later to chack that we have enough allocas to store all values
1724   std::size_t NumRematerializedValues = 0;
1725   PromotableAllocas.reserve(Live.size());
1726
1727   // Emit alloca for "LiveValue" and record it in "allocaMap" and
1728   // "PromotableAllocas"
1729   auto emitAllocaFor = [&](Value *LiveValue) {
1730     AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "",
1731                                         F.getEntryBlock().getFirstNonPHI());
1732     AllocaMap[LiveValue] = Alloca;
1733     PromotableAllocas.push_back(Alloca);
1734   };
1735
1736   // Emit alloca for each live gc pointer
1737   for (Value *V : Live)
1738     emitAllocaFor(V);
1739
1740   // Emit allocas for rematerialized values
1741   for (const auto &Info : Records)
1742     for (auto RematerializedValuePair : Info.RematerializedValues) {
1743       Value *OriginalValue = RematerializedValuePair.second;
1744       if (AllocaMap.count(OriginalValue) != 0)
1745         continue;
1746
1747       emitAllocaFor(OriginalValue);
1748       ++NumRematerializedValues;
1749     }
1750
1751   // The next two loops are part of the same conceptual operation.  We need to
1752   // insert a store to the alloca after the original def and at each
1753   // redefinition.  We need to insert a load before each use.  These are split
1754   // into distinct loops for performance reasons.
1755
1756   // Update gc pointer after each statepoint: either store a relocated value or
1757   // null (if no relocated value was found for this gc pointer and it is not a
1758   // gc_result).  This must happen before we update the statepoint with load of
1759   // alloca otherwise we lose the link between statepoint and old def.
1760   for (const auto &Info : Records) {
1761     Value *Statepoint = Info.StatepointToken;
1762
1763     // This will be used for consistency check
1764     DenseSet<Value *> VisitedLiveValues;
1765
1766     // Insert stores for normal statepoint gc relocates
1767     insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues);
1768
1769     // In case if it was invoke statepoint
1770     // we will insert stores for exceptional path gc relocates.
1771     if (isa<InvokeInst>(Statepoint)) {
1772       insertRelocationStores(Info.UnwindToken->users(), AllocaMap,
1773                              VisitedLiveValues);
1774     }
1775
1776     // Do similar thing with rematerialized values
1777     insertRematerializationStores(Info.RematerializedValues, AllocaMap,
1778                                   VisitedLiveValues);
1779
1780     if (ClobberNonLive) {
1781       // As a debugging aid, pretend that an unrelocated pointer becomes null at
1782       // the gc.statepoint.  This will turn some subtle GC problems into
1783       // slightly easier to debug SEGVs.  Note that on large IR files with
1784       // lots of gc.statepoints this is extremely costly both memory and time
1785       // wise.
1786       SmallVector<AllocaInst *, 64> ToClobber;
1787       for (auto Pair : AllocaMap) {
1788         Value *Def = Pair.first;
1789         AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
1790
1791         // This value was relocated
1792         if (VisitedLiveValues.count(Def)) {
1793           continue;
1794         }
1795         ToClobber.push_back(Alloca);
1796       }
1797
1798       auto InsertClobbersAt = [&](Instruction *IP) {
1799         for (auto *AI : ToClobber) {
1800           auto AIType = cast<PointerType>(AI->getType());
1801           auto PT = cast<PointerType>(AIType->getElementType());
1802           Constant *CPN = ConstantPointerNull::get(PT);
1803           StoreInst *Store = new StoreInst(CPN, AI);
1804           Store->insertBefore(IP);
1805         }
1806       };
1807
1808       // Insert the clobbering stores.  These may get intermixed with the
1809       // gc.results and gc.relocates, but that's fine.
1810       if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
1811         InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
1812         InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
1813       } else {
1814         InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
1815       }
1816     }
1817   }
1818
1819   // Update use with load allocas and add store for gc_relocated.
1820   for (auto Pair : AllocaMap) {
1821     Value *Def = Pair.first;
1822     Value *Alloca = Pair.second;
1823
1824     // We pre-record the uses of allocas so that we dont have to worry about
1825     // later update that changes the user information..
1826
1827     SmallVector<Instruction *, 20> Uses;
1828     // PERF: trade a linear scan for repeated reallocation
1829     Uses.reserve(std::distance(Def->user_begin(), Def->user_end()));
1830     for (User *U : Def->users()) {
1831       if (!isa<ConstantExpr>(U)) {
1832         // If the def has a ConstantExpr use, then the def is either a
1833         // ConstantExpr use itself or null.  In either case
1834         // (recursively in the first, directly in the second), the oop
1835         // it is ultimately dependent on is null and this particular
1836         // use does not need to be fixed up.
1837         Uses.push_back(cast<Instruction>(U));
1838       }
1839     }
1840
1841     std::sort(Uses.begin(), Uses.end());
1842     auto Last = std::unique(Uses.begin(), Uses.end());
1843     Uses.erase(Last, Uses.end());
1844
1845     for (Instruction *Use : Uses) {
1846       if (isa<PHINode>(Use)) {
1847         PHINode *Phi = cast<PHINode>(Use);
1848         for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
1849           if (Def == Phi->getIncomingValue(i)) {
1850             LoadInst *Load = new LoadInst(
1851                 Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
1852             Phi->setIncomingValue(i, Load);
1853           }
1854         }
1855       } else {
1856         LoadInst *Load = new LoadInst(Alloca, "", Use);
1857         Use->replaceUsesOfWith(Def, Load);
1858       }
1859     }
1860
1861     // Emit store for the initial gc value.  Store must be inserted after load,
1862     // otherwise store will be in alloca's use list and an extra load will be
1863     // inserted before it.
1864     StoreInst *Store = new StoreInst(Def, Alloca);
1865     if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
1866       if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
1867         // InvokeInst is a TerminatorInst so the store need to be inserted
1868         // into its normal destination block.
1869         BasicBlock *NormalDest = Invoke->getNormalDest();
1870         Store->insertBefore(NormalDest->getFirstNonPHI());
1871       } else {
1872         assert(!Inst->isTerminator() &&
1873                "The only TerminatorInst that can produce a value is "
1874                "InvokeInst which is handled above.");
1875         Store->insertAfter(Inst);
1876       }
1877     } else {
1878       assert(isa<Argument>(Def));
1879       Store->insertAfter(cast<Instruction>(Alloca));
1880     }
1881   }
1882
1883   assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
1884          "we must have the same allocas with lives");
1885   if (!PromotableAllocas.empty()) {
1886     // Apply mem2reg to promote alloca to SSA
1887     PromoteMemToReg(PromotableAllocas, DT);
1888   }
1889
1890 #ifndef NDEBUG
1891   for (auto &I : F.getEntryBlock())
1892     if (isa<AllocaInst>(I))
1893       InitialAllocaNum--;
1894   assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
1895 #endif
1896 }
1897
1898 /// Implement a unique function which doesn't require we sort the input
1899 /// vector.  Doing so has the effect of changing the output of a couple of
1900 /// tests in ways which make them less useful in testing fused safepoints.
1901 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
1902   SmallSet<T, 8> Seen;
1903   Vec.erase(std::remove_if(Vec.begin(), Vec.end(), [&](const T &V) {
1904               return !Seen.insert(V).second;
1905             }), Vec.end());
1906 }
1907
1908 /// Insert holders so that each Value is obviously live through the entire
1909 /// lifetime of the call.
1910 static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
1911                                  SmallVectorImpl<CallInst *> &Holders) {
1912   if (Values.empty())
1913     // No values to hold live, might as well not insert the empty holder
1914     return;
1915
1916   Module *M = CS.getInstruction()->getModule();
1917   // Use a dummy vararg function to actually hold the values live
1918   Function *Func = cast<Function>(M->getOrInsertFunction(
1919       "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
1920   if (CS.isCall()) {
1921     // For call safepoints insert dummy calls right after safepoint
1922     Holders.push_back(CallInst::Create(Func, Values, "",
1923                                        &*++CS.getInstruction()->getIterator()));
1924     return;
1925   }
1926   // For invoke safepooints insert dummy calls both in normal and
1927   // exceptional destination blocks
1928   auto *II = cast<InvokeInst>(CS.getInstruction());
1929   Holders.push_back(CallInst::Create(
1930       Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
1931   Holders.push_back(CallInst::Create(
1932       Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
1933 }
1934
1935 static void findLiveReferences(
1936     Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
1937     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
1938   GCPtrLivenessData OriginalLivenessData;
1939   computeLiveInValues(DT, F, OriginalLivenessData);
1940   for (size_t i = 0; i < records.size(); i++) {
1941     struct PartiallyConstructedSafepointRecord &info = records[i];
1942     const CallSite &CS = toUpdate[i];
1943     analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info);
1944   }
1945 }
1946
1947 /// Remove any vector of pointers from the live set by scalarizing them over the
1948 /// statepoint instruction.  Adds the scalarized pieces to the live set.  It
1949 /// would be preferable to include the vector in the statepoint itself, but
1950 /// the lowering code currently does not handle that.  Extending it would be
1951 /// slightly non-trivial since it requires a format change.  Given how rare
1952 /// such cases are (for the moment?) scalarizing is an acceptable compromise.
1953 static void splitVectorValues(Instruction *StatepointInst,
1954                               StatepointLiveSetTy &LiveSet,
1955                               DenseMap<Value *, Value *>& PointerToBase,
1956                               DominatorTree &DT) {
1957   SmallVector<Value *, 16> ToSplit;
1958   for (Value *V : LiveSet)
1959     if (isa<VectorType>(V->getType()))
1960       ToSplit.push_back(V);
1961
1962   if (ToSplit.empty())
1963     return;
1964
1965   DenseMap<Value *, SmallVector<Value *, 16>> ElementMapping;
1966
1967   Function &F = *(StatepointInst->getParent()->getParent());
1968
1969   DenseMap<Value *, AllocaInst *> AllocaMap;
1970   // First is normal return, second is exceptional return (invoke only)
1971   DenseMap<Value *, std::pair<Value *, Value *>> Replacements;
1972   for (Value *V : ToSplit) {
1973     AllocaInst *Alloca =
1974         new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI());
1975     AllocaMap[V] = Alloca;
1976
1977     VectorType *VT = cast<VectorType>(V->getType());
1978     IRBuilder<> Builder(StatepointInst);
1979     SmallVector<Value *, 16> Elements;
1980     for (unsigned i = 0; i < VT->getNumElements(); i++)
1981       Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
1982     ElementMapping[V] = Elements;
1983
1984     auto InsertVectorReform = [&](Instruction *IP) {
1985       Builder.SetInsertPoint(IP);
1986       Builder.SetCurrentDebugLocation(IP->getDebugLoc());
1987       Value *ResultVec = UndefValue::get(VT);
1988       for (unsigned i = 0; i < VT->getNumElements(); i++)
1989         ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
1990                                                 Builder.getInt32(i));
1991       return ResultVec;
1992     };
1993
1994     if (isa<CallInst>(StatepointInst)) {
1995       BasicBlock::iterator Next(StatepointInst);
1996       Next++;
1997       Instruction *IP = &*(Next);
1998       Replacements[V].first = InsertVectorReform(IP);
1999       Replacements[V].second = nullptr;
2000     } else {
2001       InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
2002       // We've already normalized - check that we don't have shared destination
2003       // blocks
2004       BasicBlock *NormalDest = Invoke->getNormalDest();
2005       assert(!isa<PHINode>(NormalDest->begin()));
2006       BasicBlock *UnwindDest = Invoke->getUnwindDest();
2007       assert(!isa<PHINode>(UnwindDest->begin()));
2008       // Insert insert element sequences in both successors
2009       Instruction *IP = &*(NormalDest->getFirstInsertionPt());
2010       Replacements[V].first = InsertVectorReform(IP);
2011       IP = &*(UnwindDest->getFirstInsertionPt());
2012       Replacements[V].second = InsertVectorReform(IP);
2013     }
2014   }
2015
2016   for (Value *V : ToSplit) {
2017     AllocaInst *Alloca = AllocaMap[V];
2018
2019     // Capture all users before we start mutating use lists
2020     SmallVector<Instruction *, 16> Users;
2021     for (User *U : V->users())
2022       Users.push_back(cast<Instruction>(U));
2023
2024     for (Instruction *I : Users) {
2025       if (auto Phi = dyn_cast<PHINode>(I)) {
2026         for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
2027           if (V == Phi->getIncomingValue(i)) {
2028             LoadInst *Load = new LoadInst(
2029                 Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
2030             Phi->setIncomingValue(i, Load);
2031           }
2032       } else {
2033         LoadInst *Load = new LoadInst(Alloca, "", I);
2034         I->replaceUsesOfWith(V, Load);
2035       }
2036     }
2037
2038     // Store the original value and the replacement value into the alloca
2039     StoreInst *Store = new StoreInst(V, Alloca);
2040     if (auto I = dyn_cast<Instruction>(V))
2041       Store->insertAfter(I);
2042     else
2043       Store->insertAfter(Alloca);
2044
2045     // Normal return for invoke, or call return
2046     Instruction *Replacement = cast<Instruction>(Replacements[V].first);
2047     (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
2048     // Unwind return for invoke only
2049     Replacement = cast_or_null<Instruction>(Replacements[V].second);
2050     if (Replacement)
2051       (new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
2052   }
2053
2054   // apply mem2reg to promote alloca to SSA
2055   SmallVector<AllocaInst *, 16> Allocas;
2056   for (Value *V : ToSplit)
2057     Allocas.push_back(AllocaMap[V]);
2058   PromoteMemToReg(Allocas, DT);
2059
2060   // Update our tracking of live pointers and base mappings to account for the
2061   // changes we just made.
2062   for (Value *V : ToSplit) {
2063     auto &Elements = ElementMapping[V];
2064
2065     LiveSet.erase(V);
2066     LiveSet.insert(Elements.begin(), Elements.end());
2067     // We need to update the base mapping as well.
2068     assert(PointerToBase.count(V));
2069     Value *OldBase = PointerToBase[V];
2070     auto &BaseElements = ElementMapping[OldBase];
2071     PointerToBase.erase(V);
2072     assert(Elements.size() == BaseElements.size());
2073     for (unsigned i = 0; i < Elements.size(); i++) {
2074       Value *Elem = Elements[i];
2075       PointerToBase[Elem] = BaseElements[i];
2076     }
2077   }
2078 }
2079
2080 // Helper function for the "rematerializeLiveValues". It walks use chain
2081 // starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
2082 // values are visited (currently it is GEP's and casts). Returns true if it
2083 // successfully reached "BaseValue" and false otherwise.
2084 // Fills "ChainToBase" array with all visited values. "BaseValue" is not
2085 // recorded.
2086 static bool findRematerializableChainToBasePointer(
2087   SmallVectorImpl<Instruction*> &ChainToBase,
2088   Value *CurrentValue, Value *BaseValue) {
2089
2090   // We have found a base value
2091   if (CurrentValue == BaseValue) {
2092     return true;
2093   }
2094
2095   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
2096     ChainToBase.push_back(GEP);
2097     return findRematerializableChainToBasePointer(ChainToBase,
2098                                                   GEP->getPointerOperand(),
2099                                                   BaseValue);
2100   }
2101
2102   if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
2103     if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
2104       return false;
2105
2106     ChainToBase.push_back(CI);
2107     return findRematerializableChainToBasePointer(ChainToBase,
2108                                                   CI->getOperand(0), BaseValue);
2109   }
2110
2111   // Not supported instruction in the chain
2112   return false;
2113 }
2114
2115 // Helper function for the "rematerializeLiveValues". Compute cost of the use
2116 // chain we are going to rematerialize.
2117 static unsigned
2118 chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
2119                        TargetTransformInfo &TTI) {
2120   unsigned Cost = 0;
2121
2122   for (Instruction *Instr : Chain) {
2123     if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
2124       assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
2125              "non noop cast is found during rematerialization");
2126
2127       Type *SrcTy = CI->getOperand(0)->getType();
2128       Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy);
2129
2130     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
2131       // Cost of the address calculation
2132       Type *ValTy = GEP->getPointerOperandType()->getPointerElementType();
2133       Cost += TTI.getAddressComputationCost(ValTy);
2134
2135       // And cost of the GEP itself
2136       // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2137       //       allowed for the external usage)
2138       if (!GEP->hasAllConstantIndices())
2139         Cost += 2;
2140
2141     } else {
2142       llvm_unreachable("unsupported instruciton type during rematerialization");
2143     }
2144   }
2145
2146   return Cost;
2147 }
2148
2149 // From the statepoint live set pick values that are cheaper to recompute then
2150 // to relocate. Remove this values from the live set, rematerialize them after
2151 // statepoint and record them in "Info" structure. Note that similar to
2152 // relocated values we don't do any user adjustments here.
2153 static void rematerializeLiveValues(CallSite CS,
2154                                     PartiallyConstructedSafepointRecord &Info,
2155                                     TargetTransformInfo &TTI) {
2156   const unsigned int ChainLengthThreshold = 10;
2157
2158   // Record values we are going to delete from this statepoint live set.
2159   // We can not di this in following loop due to iterator invalidation.
2160   SmallVector<Value *, 32> LiveValuesToBeDeleted;
2161
2162   for (Value *LiveValue: Info.LiveSet) {
2163     // For each live pointer find it's defining chain
2164     SmallVector<Instruction *, 3> ChainToBase;
2165     assert(Info.PointerToBase.count(LiveValue));
2166     bool FoundChain =
2167       findRematerializableChainToBasePointer(ChainToBase,
2168                                              LiveValue,
2169                                              Info.PointerToBase[LiveValue]);
2170     // Nothing to do, or chain is too long
2171     if (!FoundChain ||
2172         ChainToBase.size() == 0 ||
2173         ChainToBase.size() > ChainLengthThreshold)
2174       continue;
2175
2176     // Compute cost of this chain
2177     unsigned Cost = chainToBasePointerCost(ChainToBase, TTI);
2178     // TODO: We can also account for cases when we will be able to remove some
2179     //       of the rematerialized values by later optimization passes. I.e if
2180     //       we rematerialized several intersecting chains. Or if original values
2181     //       don't have any uses besides this statepoint.
2182
2183     // For invokes we need to rematerialize each chain twice - for normal and
2184     // for unwind basic blocks. Model this by multiplying cost by two.
2185     if (CS.isInvoke()) {
2186       Cost *= 2;
2187     }
2188     // If it's too expensive - skip it
2189     if (Cost >= RematerializationThreshold)
2190       continue;
2191
2192     // Remove value from the live set
2193     LiveValuesToBeDeleted.push_back(LiveValue);
2194
2195     // Clone instructions and record them inside "Info" structure
2196
2197     // Walk backwards to visit top-most instructions first
2198     std::reverse(ChainToBase.begin(), ChainToBase.end());
2199
2200     // Utility function which clones all instructions from "ChainToBase"
2201     // and inserts them before "InsertBefore". Returns rematerialized value
2202     // which should be used after statepoint.
2203     auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) {
2204       Instruction *LastClonedValue = nullptr;
2205       Instruction *LastValue = nullptr;
2206       for (Instruction *Instr: ChainToBase) {
2207         // Only GEP's and casts are suported as we need to be careful to not
2208         // introduce any new uses of pointers not in the liveset.
2209         // Note that it's fine to introduce new uses of pointers which were
2210         // otherwise not used after this statepoint.
2211         assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
2212
2213         Instruction *ClonedValue = Instr->clone();
2214         ClonedValue->insertBefore(InsertBefore);
2215         ClonedValue->setName(Instr->getName() + ".remat");
2216
2217         // If it is not first instruction in the chain then it uses previously
2218         // cloned value. We should update it to use cloned value.
2219         if (LastClonedValue) {
2220           assert(LastValue);
2221           ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
2222 #ifndef NDEBUG
2223           // Assert that cloned instruction does not use any instructions from
2224           // this chain other than LastClonedValue
2225           for (auto OpValue : ClonedValue->operand_values()) {
2226             assert(std::find(ChainToBase.begin(), ChainToBase.end(), OpValue) ==
2227                        ChainToBase.end() &&
2228                    "incorrect use in rematerialization chain");
2229           }
2230 #endif
2231         }
2232
2233         LastClonedValue = ClonedValue;
2234         LastValue = Instr;
2235       }
2236       assert(LastClonedValue);
2237       return LastClonedValue;
2238     };
2239
2240     // Different cases for calls and invokes. For invokes we need to clone
2241     // instructions both on normal and unwind path.
2242     if (CS.isCall()) {
2243       Instruction *InsertBefore = CS.getInstruction()->getNextNode();
2244       assert(InsertBefore);
2245       Instruction *RematerializedValue = rematerializeChain(InsertBefore);
2246       Info.RematerializedValues[RematerializedValue] = LiveValue;
2247     } else {
2248       InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
2249
2250       Instruction *NormalInsertBefore =
2251           &*Invoke->getNormalDest()->getFirstInsertionPt();
2252       Instruction *UnwindInsertBefore =
2253           &*Invoke->getUnwindDest()->getFirstInsertionPt();
2254
2255       Instruction *NormalRematerializedValue =
2256           rematerializeChain(NormalInsertBefore);
2257       Instruction *UnwindRematerializedValue =
2258           rematerializeChain(UnwindInsertBefore);
2259
2260       Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
2261       Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
2262     }
2263   }
2264
2265   // Remove rematerializaed values from the live set
2266   for (auto LiveValue: LiveValuesToBeDeleted) {
2267     Info.LiveSet.erase(LiveValue);
2268   }
2269 }
2270
2271 static bool insertParsePoints(Function &F, DominatorTree &DT,
2272                               TargetTransformInfo &TTI,
2273                               SmallVectorImpl<CallSite> &ToUpdate) {
2274 #ifndef NDEBUG
2275   // sanity check the input
2276   std::set<CallSite> Uniqued;
2277   Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
2278   assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
2279
2280   for (CallSite CS : ToUpdate) {
2281     assert(CS.getInstruction()->getParent()->getParent() == &F);
2282     assert((UseDeoptBundles || isStatepoint(CS)) &&
2283            "expected to already be a deopt statepoint");
2284   }
2285 #endif
2286
2287   // When inserting gc.relocates for invokes, we need to be able to insert at
2288   // the top of the successor blocks.  See the comment on
2289   // normalForInvokeSafepoint on exactly what is needed.  Note that this step
2290   // may restructure the CFG.
2291   for (CallSite CS : ToUpdate) {
2292     if (!CS.isInvoke())
2293       continue;
2294     auto *II = cast<InvokeInst>(CS.getInstruction());
2295     normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
2296     normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
2297   }
2298
2299   // A list of dummy calls added to the IR to keep various values obviously
2300   // live in the IR.  We'll remove all of these when done.
2301   SmallVector<CallInst *, 64> Holders;
2302
2303   // Insert a dummy call with all of the arguments to the vm_state we'll need
2304   // for the actual safepoint insertion.  This ensures reference arguments in
2305   // the deopt argument list are considered live through the safepoint (and
2306   // thus makes sure they get relocated.)
2307   for (CallSite CS : ToUpdate) {
2308     SmallVector<Value *, 64> DeoptValues;
2309
2310     iterator_range<const Use *> DeoptStateRange =
2311         UseDeoptBundles
2312             ? iterator_range<const Use *>(GetDeoptBundleOperands(CS))
2313             : iterator_range<const Use *>(Statepoint(CS).vm_state_args());
2314
2315     for (Value *Arg : DeoptStateRange) {
2316       assert(!isUnhandledGCPointerType(Arg->getType()) &&
2317              "support for FCA unimplemented");
2318       if (isHandledGCPointerType(Arg->getType()))
2319         DeoptValues.push_back(Arg);
2320     }
2321
2322     insertUseHolderAfter(CS, DeoptValues, Holders);
2323   }
2324
2325   SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
2326
2327   // A) Identify all gc pointers which are statically live at the given call
2328   // site.
2329   findLiveReferences(F, DT, ToUpdate, Records);
2330
2331   // B) Find the base pointers for each live pointer
2332   /* scope for caching */ {
2333     // Cache the 'defining value' relation used in the computation and
2334     // insertion of base phis and selects.  This ensures that we don't insert
2335     // large numbers of duplicate base_phis.
2336     DefiningValueMapTy DVCache;
2337
2338     for (size_t i = 0; i < Records.size(); i++) {
2339       PartiallyConstructedSafepointRecord &info = Records[i];
2340       findBasePointers(DT, DVCache, ToUpdate[i], info);
2341     }
2342   } // end of cache scope
2343
2344   // The base phi insertion logic (for any safepoint) may have inserted new
2345   // instructions which are now live at some safepoint.  The simplest such
2346   // example is:
2347   // loop:
2348   //   phi a  <-- will be a new base_phi here
2349   //   safepoint 1 <-- that needs to be live here
2350   //   gep a + 1
2351   //   safepoint 2
2352   //   br loop
2353   // We insert some dummy calls after each safepoint to definitely hold live
2354   // the base pointers which were identified for that safepoint.  We'll then
2355   // ask liveness for _every_ base inserted to see what is now live.  Then we
2356   // remove the dummy calls.
2357   Holders.reserve(Holders.size() + Records.size());
2358   for (size_t i = 0; i < Records.size(); i++) {
2359     PartiallyConstructedSafepointRecord &Info = Records[i];
2360
2361     SmallVector<Value *, 128> Bases;
2362     for (auto Pair : Info.PointerToBase)
2363       Bases.push_back(Pair.second);
2364
2365     insertUseHolderAfter(ToUpdate[i], Bases, Holders);
2366   }
2367
2368   // By selecting base pointers, we've effectively inserted new uses. Thus, we
2369   // need to rerun liveness.  We may *also* have inserted new defs, but that's
2370   // not the key issue.
2371   recomputeLiveInValues(F, DT, ToUpdate, Records);
2372
2373   if (PrintBasePointers) {
2374     for (auto &Info : Records) {
2375       errs() << "Base Pairs: (w/Relocation)\n";
2376       for (auto Pair : Info.PointerToBase) {
2377         errs() << " derived ";
2378         Pair.first->printAsOperand(errs(), false);
2379         errs() << " base ";
2380         Pair.second->printAsOperand(errs(), false);
2381         errs() << "\n";
2382       }
2383     }
2384   }
2385
2386   // It is possible that non-constant live variables have a constant base.  For
2387   // example, a GEP with a variable offset from a global.  In this case we can
2388   // remove it from the liveset.  We already don't add constants to the liveset
2389   // because we assume they won't move at runtime and the GC doesn't need to be
2390   // informed about them.  The same reasoning applies if the base is constant.
2391   // Note that the relocation placement code relies on this filtering for
2392   // correctness as it expects the base to be in the liveset, which isn't true
2393   // if the base is constant.
2394   for (auto &Info : Records)
2395     for (auto &BasePair : Info.PointerToBase)
2396       if (isa<Constant>(BasePair.second))
2397         Info.LiveSet.erase(BasePair.first);
2398
2399   for (CallInst *CI : Holders)
2400     CI->eraseFromParent();
2401
2402   Holders.clear();
2403
2404   // Do a limited scalarization of any live at safepoint vector values which
2405   // contain pointers.  This enables this pass to run after vectorization at
2406   // the cost of some possible performance loss.  Note: This is known to not
2407   // handle updating of the side tables correctly which can lead to relocation
2408   // bugs when the same vector is live at multiple statepoints.  We're in the
2409   // process of implementing the alternate lowering - relocating the
2410   // vector-of-pointers as first class item and updating the backend to
2411   // understand that - but that's not yet complete.
2412   if (UseVectorSplit)
2413     for (size_t i = 0; i < Records.size(); i++) {
2414       PartiallyConstructedSafepointRecord &Info = Records[i];
2415       Instruction *Statepoint = ToUpdate[i].getInstruction();
2416       splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
2417                         Info.PointerToBase, DT);
2418     }
2419
2420   // In order to reduce live set of statepoint we might choose to rematerialize
2421   // some values instead of relocating them. This is purely an optimization and
2422   // does not influence correctness.
2423   for (size_t i = 0; i < Records.size(); i++)
2424     rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
2425
2426   // We need this to safely RAUW and delete call or invoke return values that
2427   // may themselves be live over a statepoint.  For details, please see usage in
2428   // makeStatepointExplicitImpl.
2429   std::vector<DeferredReplacement> Replacements;
2430
2431   // Now run through and replace the existing statepoints with new ones with
2432   // the live variables listed.  We do not yet update uses of the values being
2433   // relocated. We have references to live variables that need to
2434   // survive to the last iteration of this loop.  (By construction, the
2435   // previous statepoint can not be a live variable, thus we can and remove
2436   // the old statepoint calls as we go.)
2437   for (size_t i = 0; i < Records.size(); i++)
2438     makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
2439
2440   ToUpdate.clear(); // prevent accident use of invalid CallSites
2441
2442   for (auto &PR : Replacements)
2443     PR.doReplacement();
2444
2445   Replacements.clear();
2446
2447   for (auto &Info : Records) {
2448     // These live sets may contain state Value pointers, since we replaced calls
2449     // with operand bundles with calls wrapped in gc.statepoint, and some of
2450     // those calls may have been def'ing live gc pointers.  Clear these out to
2451     // avoid accidentally using them.
2452     //
2453     // TODO: We should create a separate data structure that does not contain
2454     // these live sets, and migrate to using that data structure from this point
2455     // onward.
2456     Info.LiveSet.clear();
2457     Info.PointerToBase.clear();
2458   }
2459
2460   // Do all the fixups of the original live variables to their relocated selves
2461   SmallVector<Value *, 128> Live;
2462   for (size_t i = 0; i < Records.size(); i++) {
2463     PartiallyConstructedSafepointRecord &Info = Records[i];
2464
2465     // We can't simply save the live set from the original insertion.  One of
2466     // the live values might be the result of a call which needs a safepoint.
2467     // That Value* no longer exists and we need to use the new gc_result.
2468     // Thankfully, the live set is embedded in the statepoint (and updated), so
2469     // we just grab that.
2470     Statepoint Statepoint(Info.StatepointToken);
2471     Live.insert(Live.end(), Statepoint.gc_args_begin(),
2472                 Statepoint.gc_args_end());
2473 #ifndef NDEBUG
2474     // Do some basic sanity checks on our liveness results before performing
2475     // relocation.  Relocation can and will turn mistakes in liveness results
2476     // into non-sensical code which is must harder to debug.
2477     // TODO: It would be nice to test consistency as well
2478     assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
2479            "statepoint must be reachable or liveness is meaningless");
2480     for (Value *V : Statepoint.gc_args()) {
2481       if (!isa<Instruction>(V))
2482         // Non-instruction values trivial dominate all possible uses
2483         continue;
2484       auto *LiveInst = cast<Instruction>(V);
2485       assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
2486              "unreachable values should never be live");
2487       assert(DT.dominates(LiveInst, Info.StatepointToken) &&
2488              "basic SSA liveness expectation violated by liveness analysis");
2489     }
2490 #endif
2491   }
2492   unique_unsorted(Live);
2493
2494 #ifndef NDEBUG
2495   // sanity check
2496   for (auto *Ptr : Live)
2497     assert(isHandledGCPointerType(Ptr->getType()) &&
2498            "must be a gc pointer type");
2499 #endif
2500
2501   relocationViaAlloca(F, DT, Live, Records);
2502   return !Records.empty();
2503 }
2504
2505 // Handles both return values and arguments for Functions and CallSites.
2506 template <typename AttrHolder>
2507 static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
2508                                       unsigned Index) {
2509   AttrBuilder R;
2510   if (AH.getDereferenceableBytes(Index))
2511     R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
2512                                   AH.getDereferenceableBytes(Index)));
2513   if (AH.getDereferenceableOrNullBytes(Index))
2514     R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
2515                                   AH.getDereferenceableOrNullBytes(Index)));
2516   if (AH.doesNotAlias(Index))
2517     R.addAttribute(Attribute::NoAlias);
2518
2519   if (!R.empty())
2520     AH.setAttributes(AH.getAttributes().removeAttributes(
2521         Ctx, Index, AttributeSet::get(Ctx, Index, R)));
2522 }
2523
2524 void
2525 RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
2526   LLVMContext &Ctx = F.getContext();
2527
2528   for (Argument &A : F.args())
2529     if (isa<PointerType>(A.getType()))
2530       RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1);
2531
2532   if (isa<PointerType>(F.getReturnType()))
2533     RemoveNonValidAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
2534 }
2535
2536 void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
2537   if (F.empty())
2538     return;
2539
2540   LLVMContext &Ctx = F.getContext();
2541   MDBuilder Builder(Ctx);
2542
2543   for (Instruction &I : instructions(F)) {
2544     if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
2545       assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
2546       bool IsImmutableTBAA =
2547           MD->getNumOperands() == 4 &&
2548           mdconst::extract<ConstantInt>(MD->getOperand(3))->getValue() == 1;
2549
2550       if (!IsImmutableTBAA)
2551         continue; // no work to do, MD_tbaa is already marked mutable
2552
2553       MDNode *Base = cast<MDNode>(MD->getOperand(0));
2554       MDNode *Access = cast<MDNode>(MD->getOperand(1));
2555       uint64_t Offset =
2556           mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue();
2557
2558       MDNode *MutableTBAA =
2559           Builder.createTBAAStructTagNode(Base, Access, Offset);
2560       I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
2561     }
2562
2563     if (CallSite CS = CallSite(&I)) {
2564       for (int i = 0, e = CS.arg_size(); i != e; i++)
2565         if (isa<PointerType>(CS.getArgument(i)->getType()))
2566           RemoveNonValidAttrAtIndex(Ctx, CS, i + 1);
2567       if (isa<PointerType>(CS.getType()))
2568         RemoveNonValidAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
2569     }
2570   }
2571 }
2572
2573 /// Returns true if this function should be rewritten by this pass.  The main
2574 /// point of this function is as an extension point for custom logic.
2575 static bool shouldRewriteStatepointsIn(Function &F) {
2576   // TODO: This should check the GCStrategy
2577   if (F.hasGC()) {
2578     const auto &FunctionGCName = F.getGC();
2579     const StringRef StatepointExampleName("statepoint-example");
2580     const StringRef CoreCLRName("coreclr");
2581     return (StatepointExampleName == FunctionGCName) ||
2582            (CoreCLRName == FunctionGCName);
2583   } else
2584     return false;
2585 }
2586
2587 void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) {
2588 #ifndef NDEBUG
2589   assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) &&
2590          "precondition!");
2591 #endif
2592
2593   for (Function &F : M)
2594     stripNonValidAttributesFromPrototype(F);
2595
2596   for (Function &F : M)
2597     stripNonValidAttributesFromBody(F);
2598 }
2599
2600 bool RewriteStatepointsForGC::runOnFunction(Function &F) {
2601   // Nothing to do for declarations.
2602   if (F.isDeclaration() || F.empty())
2603     return false;
2604
2605   // Policy choice says not to rewrite - the most common reason is that we're
2606   // compiling code without a GCStrategy.
2607   if (!shouldRewriteStatepointsIn(F))
2608     return false;
2609
2610   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
2611   TargetTransformInfo &TTI =
2612       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2613
2614   auto NeedsRewrite = [](Instruction &I) {
2615     if (UseDeoptBundles) {
2616       if (ImmutableCallSite CS = ImmutableCallSite(&I))
2617         return !callsGCLeafFunction(CS);
2618       return false;
2619     }
2620
2621     return isStatepoint(I);
2622   };
2623
2624   // Gather all the statepoints which need rewritten.  Be careful to only
2625   // consider those in reachable code since we need to ask dominance queries
2626   // when rewriting.  We'll delete the unreachable ones in a moment.
2627   SmallVector<CallSite, 64> ParsePointNeeded;
2628   bool HasUnreachableStatepoint = false;
2629   for (Instruction &I : instructions(F)) {
2630     // TODO: only the ones with the flag set!
2631     if (NeedsRewrite(I)) {
2632       if (DT.isReachableFromEntry(I.getParent()))
2633         ParsePointNeeded.push_back(CallSite(&I));
2634       else
2635         HasUnreachableStatepoint = true;
2636     }
2637   }
2638
2639   bool MadeChange = false;
2640
2641   // Delete any unreachable statepoints so that we don't have unrewritten
2642   // statepoints surviving this pass.  This makes testing easier and the
2643   // resulting IR less confusing to human readers.  Rather than be fancy, we
2644   // just reuse a utility function which removes the unreachable blocks.
2645   if (HasUnreachableStatepoint)
2646     MadeChange |= removeUnreachableBlocks(F);
2647
2648   // Return early if no work to do.
2649   if (ParsePointNeeded.empty())
2650     return MadeChange;
2651
2652   // As a prepass, go ahead and aggressively destroy single entry phi nodes.
2653   // These are created by LCSSA.  They have the effect of increasing the size
2654   // of liveness sets for no good reason.  It may be harder to do this post
2655   // insertion since relocations and base phis can confuse things.
2656   for (BasicBlock &BB : F)
2657     if (BB.getUniquePredecessor()) {
2658       MadeChange = true;
2659       FoldSingleEntryPHINodes(&BB);
2660     }
2661
2662   // Before we start introducing relocations, we want to tweak the IR a bit to
2663   // avoid unfortunate code generation effects.  The main example is that we
2664   // want to try to make sure the comparison feeding a branch is after any
2665   // safepoints.  Otherwise, we end up with a comparison of pre-relocation
2666   // values feeding a branch after relocation.  This is semantically correct,
2667   // but results in extra register pressure since both the pre-relocation and
2668   // post-relocation copies must be available in registers.  For code without
2669   // relocations this is handled elsewhere, but teaching the scheduler to
2670   // reverse the transform we're about to do would be slightly complex.
2671   // Note: This may extend the live range of the inputs to the icmp and thus
2672   // increase the liveset of any statepoint we move over.  This is profitable
2673   // as long as all statepoints are in rare blocks.  If we had in-register
2674   // lowering for live values this would be a much safer transform.
2675   auto getConditionInst = [](TerminatorInst *TI) -> Instruction* {
2676     if (auto *BI = dyn_cast<BranchInst>(TI))
2677       if (BI->isConditional())
2678         return dyn_cast<Instruction>(BI->getCondition());
2679     // TODO: Extend this to handle switches
2680     return nullptr;
2681   };
2682   for (BasicBlock &BB : F) {
2683     TerminatorInst *TI = BB.getTerminator();
2684     if (auto *Cond = getConditionInst(TI))
2685       // TODO: Handle more than just ICmps here.  We should be able to move
2686       // most instructions without side effects or memory access.
2687       if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
2688         MadeChange = true;
2689         Cond->moveBefore(TI);
2690       }
2691   }
2692
2693   MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
2694   return MadeChange;
2695 }
2696
2697 // liveness computation via standard dataflow
2698 // -------------------------------------------------------------------
2699
2700 // TODO: Consider using bitvectors for liveness, the set of potentially
2701 // interesting values should be small and easy to pre-compute.
2702
2703 /// Compute the live-in set for the location rbegin starting from
2704 /// the live-out set of the basic block
2705 static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
2706                                 BasicBlock::reverse_iterator rend,
2707                                 DenseSet<Value *> &LiveTmp) {
2708
2709   for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) {
2710     Instruction *I = &*ritr;
2711
2712     // KILL/Def - Remove this definition from LiveIn
2713     LiveTmp.erase(I);
2714
2715     // Don't consider *uses* in PHI nodes, we handle their contribution to
2716     // predecessor blocks when we seed the LiveOut sets
2717     if (isa<PHINode>(I))
2718       continue;
2719
2720     // USE - Add to the LiveIn set for this instruction
2721     for (Value *V : I->operands()) {
2722       assert(!isUnhandledGCPointerType(V->getType()) &&
2723              "support for FCA unimplemented");
2724       if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
2725         // The choice to exclude all things constant here is slightly subtle.
2726         // There are two independent reasons:
2727         // - We assume that things which are constant (from LLVM's definition)
2728         // do not move at runtime.  For example, the address of a global
2729         // variable is fixed, even though it's contents may not be.
2730         // - Second, we can't disallow arbitrary inttoptr constants even
2731         // if the language frontend does.  Optimization passes are free to
2732         // locally exploit facts without respect to global reachability.  This
2733         // can create sections of code which are dynamically unreachable and
2734         // contain just about anything.  (see constants.ll in tests)
2735         LiveTmp.insert(V);
2736       }
2737     }
2738   }
2739 }
2740
2741 static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) {
2742
2743   for (BasicBlock *Succ : successors(BB)) {
2744     const BasicBlock::iterator E(Succ->getFirstNonPHI());
2745     for (BasicBlock::iterator I = Succ->begin(); I != E; I++) {
2746       PHINode *Phi = cast<PHINode>(&*I);
2747       Value *V = Phi->getIncomingValueForBlock(BB);
2748       assert(!isUnhandledGCPointerType(V->getType()) &&
2749              "support for FCA unimplemented");
2750       if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
2751         LiveTmp.insert(V);
2752       }
2753     }
2754   }
2755 }
2756
2757 static DenseSet<Value *> computeKillSet(BasicBlock *BB) {
2758   DenseSet<Value *> KillSet;
2759   for (Instruction &I : *BB)
2760     if (isHandledGCPointerType(I.getType()))
2761       KillSet.insert(&I);
2762   return KillSet;
2763 }
2764
2765 #ifndef NDEBUG
2766 /// Check that the items in 'Live' dominate 'TI'.  This is used as a basic
2767 /// sanity check for the liveness computation.
2768 static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live,
2769                           TerminatorInst *TI, bool TermOkay = false) {
2770   for (Value *V : Live) {
2771     if (auto *I = dyn_cast<Instruction>(V)) {
2772       // The terminator can be a member of the LiveOut set.  LLVM's definition
2773       // of instruction dominance states that V does not dominate itself.  As
2774       // such, we need to special case this to allow it.
2775       if (TermOkay && TI == I)
2776         continue;
2777       assert(DT.dominates(I, TI) &&
2778              "basic SSA liveness expectation violated by liveness analysis");
2779     }
2780   }
2781 }
2782
2783 /// Check that all the liveness sets used during the computation of liveness
2784 /// obey basic SSA properties.  This is useful for finding cases where we miss
2785 /// a def.
2786 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
2787                           BasicBlock &BB) {
2788   checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
2789   checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
2790   checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
2791 }
2792 #endif
2793
2794 static void computeLiveInValues(DominatorTree &DT, Function &F,
2795                                 GCPtrLivenessData &Data) {
2796
2797   SmallSetVector<BasicBlock *, 200> Worklist;
2798   auto AddPredsToWorklist = [&](BasicBlock *BB) {
2799     // We use a SetVector so that we don't have duplicates in the worklist.
2800     Worklist.insert(pred_begin(BB), pred_end(BB));
2801   };
2802   auto NextItem = [&]() {
2803     BasicBlock *BB = Worklist.back();
2804     Worklist.pop_back();
2805     return BB;
2806   };
2807
2808   // Seed the liveness for each individual block
2809   for (BasicBlock &BB : F) {
2810     Data.KillSet[&BB] = computeKillSet(&BB);
2811     Data.LiveSet[&BB].clear();
2812     computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
2813
2814 #ifndef NDEBUG
2815     for (Value *Kill : Data.KillSet[&BB])
2816       assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
2817 #endif
2818
2819     Data.LiveOut[&BB] = DenseSet<Value *>();
2820     computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
2821     Data.LiveIn[&BB] = Data.LiveSet[&BB];
2822     set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]);
2823     set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]);
2824     if (!Data.LiveIn[&BB].empty())
2825       AddPredsToWorklist(&BB);
2826   }
2827
2828   // Propagate that liveness until stable
2829   while (!Worklist.empty()) {
2830     BasicBlock *BB = NextItem();
2831
2832     // Compute our new liveout set, then exit early if it hasn't changed
2833     // despite the contribution of our successor.
2834     DenseSet<Value *> LiveOut = Data.LiveOut[BB];
2835     const auto OldLiveOutSize = LiveOut.size();
2836     for (BasicBlock *Succ : successors(BB)) {
2837       assert(Data.LiveIn.count(Succ));
2838       set_union(LiveOut, Data.LiveIn[Succ]);
2839     }
2840     // assert OutLiveOut is a subset of LiveOut
2841     if (OldLiveOutSize == LiveOut.size()) {
2842       // If the sets are the same size, then we didn't actually add anything
2843       // when unioning our successors LiveIn  Thus, the LiveIn of this block
2844       // hasn't changed.
2845       continue;
2846     }
2847     Data.LiveOut[BB] = LiveOut;
2848
2849     // Apply the effects of this basic block
2850     DenseSet<Value *> LiveTmp = LiveOut;
2851     set_union(LiveTmp, Data.LiveSet[BB]);
2852     set_subtract(LiveTmp, Data.KillSet[BB]);
2853
2854     assert(Data.LiveIn.count(BB));
2855     const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB];
2856     // assert: OldLiveIn is a subset of LiveTmp
2857     if (OldLiveIn.size() != LiveTmp.size()) {
2858       Data.LiveIn[BB] = LiveTmp;
2859       AddPredsToWorklist(BB);
2860     }
2861   } // while( !worklist.empty() )
2862
2863 #ifndef NDEBUG
2864   // Sanity check our output against SSA properties.  This helps catch any
2865   // missing kills during the above iteration.
2866   for (BasicBlock &BB : F) {
2867     checkBasicSSA(DT, Data, BB);
2868   }
2869 #endif
2870 }
2871
2872 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
2873                               StatepointLiveSetTy &Out) {
2874
2875   BasicBlock *BB = Inst->getParent();
2876
2877   // Note: The copy is intentional and required
2878   assert(Data.LiveOut.count(BB));
2879   DenseSet<Value *> LiveOut = Data.LiveOut[BB];
2880
2881   // We want to handle the statepoint itself oddly.  It's
2882   // call result is not live (normal), nor are it's arguments
2883   // (unless they're used again later).  This adjustment is
2884   // specifically what we need to relocate
2885   BasicBlock::reverse_iterator rend(Inst->getIterator());
2886   computeLiveInValues(BB->rbegin(), rend, LiveOut);
2887   LiveOut.erase(Inst);
2888   Out.insert(LiveOut.begin(), LiveOut.end());
2889 }
2890
2891 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
2892                                   const CallSite &CS,
2893                                   PartiallyConstructedSafepointRecord &Info) {
2894   Instruction *Inst = CS.getInstruction();
2895   StatepointLiveSetTy Updated;
2896   findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
2897
2898 #ifndef NDEBUG
2899   DenseSet<Value *> Bases;
2900   for (auto KVPair : Info.PointerToBase) {
2901     Bases.insert(KVPair.second);
2902   }
2903 #endif
2904   // We may have base pointers which are now live that weren't before.  We need
2905   // to update the PointerToBase structure to reflect this.
2906   for (auto V : Updated)
2907     if (!Info.PointerToBase.count(V)) {
2908       assert(Bases.count(V) && "can't find base for unexpected live value");
2909       Info.PointerToBase[V] = V;
2910       continue;
2911     }
2912
2913 #ifndef NDEBUG
2914   for (auto V : Updated) {
2915     assert(Info.PointerToBase.count(V) &&
2916            "must be able to find base for live value");
2917   }
2918 #endif
2919
2920   // Remove any stale base mappings - this can happen since our liveness is
2921   // more precise then the one inherent in the base pointer analysis
2922   DenseSet<Value *> ToErase;
2923   for (auto KVPair : Info.PointerToBase)
2924     if (!Updated.count(KVPair.first))
2925       ToErase.insert(KVPair.first);
2926   for (auto V : ToErase)
2927     Info.PointerToBase.erase(V);
2928
2929 #ifndef NDEBUG
2930   for (auto KVPair : Info.PointerToBase)
2931     assert(Updated.count(KVPair.first) && "record for non-live value");
2932 #endif
2933
2934   Info.LiveSet = Updated;
2935 }