Enhance GVN to do more precise alias queries for non-local memory
[oota-llvm.git] / lib / Analysis / MemoryDependenceAnalysis.cpp
index 743420b5b80ae7d77c21ab5145f0adff02f00ace..b4c6d09e4c2165cfd6c9145bdf37dc1af0082d23 100644 (file)
@@ -19,6 +19,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -46,11 +47,15 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
 char MemoryDependenceAnalysis::ID = 0;
   
 // Register this pass...
-static RegisterPass<MemoryDependenceAnalysis> X("memdep",
-                                     "Memory Dependence Analysis", false, true);
+INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
+                "Memory Dependence Analysis", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
+                      "Memory Dependence Analysis", false, true)
 
 MemoryDependenceAnalysis::MemoryDependenceAnalysis()
-: FunctionPass(&ID), PredCache(0) {
+: FunctionPass(ID), PredCache(0) {
+  initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
 }
 MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
 }
@@ -108,45 +113,34 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     Instruction *Inst = --ScanIt;
     
     // If this inst is a memory op, get the pointer it accessed
-    Value *Pointer = 0;
-    uint64_t PointerSize = 0;
+    AliasAnalysis::Location Loc;
     if (StoreInst *S = dyn_cast<StoreInst>(Inst)) {
-      Pointer = S->getPointerOperand();
-      PointerSize = AA->getTypeStoreSize(S->getOperand(0)->getType());
+      Loc = AliasAnalysis::Location(S->getPointerOperand(),
+                                    AA->getTypeStoreSize(S->getValueOperand()
+                                                           ->getType()),
+                                    S->getMetadata(LLVMContext::MD_tbaa));
     } else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
-      Pointer = V->getOperand(0);
-      PointerSize = AA->getTypeStoreSize(V->getType());
-    } else if (isFreeCall(Inst)) {
-      Pointer = Inst->getOperand(1);
+      Loc = AliasAnalysis::Location(V->getPointerOperand(),
+                                    AA->getTypeStoreSize(V->getType()),
+                                    V->getMetadata(LLVMContext::MD_tbaa));
+    } else if (const CallInst *CI = isFreeCall(Inst)) {
       // calls to free() erase the entire structure
-      PointerSize = ~0ULL;
-    } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+      Loc = AliasAnalysis::Location(CI->getArgOperand(0));
+    } else if (CallSite InstCS = cast<Value>(Inst)) {
       // Debug intrinsics don't cause dependences.
       if (isa<DbgInfoIntrinsic>(Inst)) continue;
-      CallSite InstCS = CallSite::get(Inst);
       // If these two calls do not interfere, look past it.
       switch (AA->getModRefInfo(CS, InstCS)) {
       case AliasAnalysis::NoModRef:
-        // If the two calls don't interact (e.g. InstCS is readnone) keep
-        // scanning.
+        // If the two calls are the same, return InstCS as a Def, so that
+        // CS can be found redundant and eliminated.
+        if (isReadOnlyCall && InstCS.onlyReadsMemory() &&
+            CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+          return MemDepResult::getDef(Inst);
+
+        // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+        // keep scanning.
         continue;
-      case AliasAnalysis::Ref:
-        // If the two calls read the same memory locations and CS is a readonly
-        // function, then we have two cases: 1) the calls may not interfere with
-        // each other at all.  2) the calls may produce the same value.  In case
-        // #1 we want to ignore the values, in case #2, we want to return Inst
-        // as a Def dependence.  This allows us to CSE in cases like:
-        //   X = strlen(P);
-        //    memchr(...);
-        //   Y = strlen(P);  // Y = X
-        if (isReadOnlyCall) {
-          if (CS.getCalledFunction() != 0 &&
-              CS.getCalledFunction() == InstCS.getCalledFunction())
-            return MemDepResult::getDef(Inst);
-          // Ignore unrelated read/read call dependences.
-          continue;
-        }
-        // FALL THROUGH
       default:
         return MemDepResult::getClobber(Inst);
       }
@@ -155,7 +149,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
       continue;
     }
     
-    if (AA->getModRefInfo(CS, Pointer, PointerSize) != AliasAnalysis::NoModRef)
+    if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
       return MemDepResult::getClobber(Inst);
   }
   
@@ -167,10 +161,11 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
 }
 
 /// getPointerDependencyFrom - Return the instruction on which a memory
-/// location depends.  If isLoad is true, this routine ignore may-aliases with
-/// read-only operations.
+/// location depends.  If isLoad is true, this routine ignores may-aliases with
+/// read-only operations.  If isLoad is false, this routine ignores may-aliases
+/// with reads from read-only locations.
 MemDepResult MemoryDependenceAnalysis::
-getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, 
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 
                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
 
   Value *InvariantTag = 0;
@@ -187,8 +182,8 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     }
     
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-      // Debug intrinsics don't cause dependences.
-      if (isa<DbgInfoIntrinsic>(Inst)) continue;
+      // Debug intrinsics don't (and can't) cause dependences.
+      if (isa<DbgInfoIntrinsic>(II)) continue;
       
       // If we pass an invariant-end marker, then we've just entered an
       // invariant region and can start ignoring dependencies.
@@ -196,28 +191,33 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
         // FIXME: This only considers queries directly on the invariant-tagged
         // pointer, not on query pointers that are indexed off of them.  It'd
         // be nice to handle that at some point.
-        AliasAnalysis::AliasResult R = 
-          AA->alias(II->getOperand(3), ~0U, MemPtr, ~0U);
-        if (R == AliasAnalysis::MustAlias) {
-          InvariantTag = II->getOperand(1);
-          continue;
-        }
-      
+        AliasAnalysis::AliasResult R =
+          AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc);
+        if (R == AliasAnalysis::MustAlias)
+          InvariantTag = II->getArgOperand(0);
+
+        continue;
+      }
+
       // If we reach a lifetime begin or end marker, then the query ends here
       // because the value is undefined.
-      } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
         // FIXME: This only considers queries directly on the invariant-tagged
         // pointer, not on query pointers that are indexed off of them.  It'd
         // be nice to handle that at some point.
         AliasAnalysis::AliasResult R =
-          AA->alias(II->getOperand(2), ~0U, MemPtr, ~0U);
+          AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc);
         if (R == AliasAnalysis::MustAlias)
           return MemDepResult::getDef(II);
+        continue;
       }
     }
 
     // If we're querying on a load and we're in an invariant region, we're done
     // at this point. Nothing a load depends on can live in an invariant region.
+    //
+    // FIXME: this will prevent us from returning load/load must-aliases, so GVN
+    // won't remove redundant loads.
     if (isLoad && InvariantTag) continue;
 
     // Values depend on loads if the pointers are must aliased.  This means that
@@ -225,16 +225,22 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
       Value *Pointer = LI->getPointerOperand();
       uint64_t PointerSize = AA->getTypeStoreSize(LI->getType());
+      MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
+      AliasAnalysis::Location LoadLoc(Pointer, PointerSize, TBAATag);
       
       // If we found a pointer, check if it could be the same as our pointer.
-      AliasAnalysis::AliasResult R =
-        AA->alias(Pointer, PointerSize, MemPtr, MemSize);
+      AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
       if (R == AliasAnalysis::NoAlias)
         continue;
       
       // May-alias loads don't depend on each other without a dependence.
       if (isLoad && R == AliasAnalysis::MayAlias)
         continue;
+
+      // Stores don't alias loads from read-only memory.
+      if (!isLoad && AA->pointsToConstantMemory(LoadLoc))
+        continue;
+
       // Stores depend on may and must aliased loads, loads depend on must-alias
       // loads.
       return MemDepResult::getDef(Inst);
@@ -248,17 +254,19 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
       // If alias analysis can tell that this store is guaranteed to not modify
       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
       // the query pointer points to constant memory etc.
-      if (AA->getModRefInfo(SI, MemPtr, MemSize) == AliasAnalysis::NoModRef)
+      if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
         continue;
 
       // Ok, this store might clobber the query pointer.  Check to see if it is
       // a must alias: in this case, we want to return this as a def.
       Value *Pointer = SI->getPointerOperand();
       uint64_t PointerSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
+      MDNode *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa);
       
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R =
-        AA->alias(Pointer, PointerSize, MemPtr, MemSize);
+        AA->alias(AliasAnalysis::Location(Pointer, PointerSize, TBAATag),
+                  MemLoc);
       
       if (R == AliasAnalysis::NoAlias)
         continue;
@@ -275,8 +283,9 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     // a subsequent bitcast of the malloc call result.  There can be stores to
     // the malloced memory between the malloc call and its bitcast uses, and we
     // need to continue scanning until the malloc call.
-    if (isa<AllocaInst>(Inst) || extractMallocCall(Inst)) {
-      Value *AccessPtr = MemPtr->getUnderlyingObject();
+    if (isa<AllocaInst>(Inst) ||
+        (isa<CallInst>(Inst) && extractMallocCall(Inst))) {
+      const Value *AccessPtr = MemLoc.Ptr->getUnderlyingObject();
       
       if (AccessPtr == Inst ||
           AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
@@ -285,7 +294,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     }
 
     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
-    switch (AA->getModRefInfo(Inst, MemPtr, MemSize)) {
+    switch (AA->getModRefInfo(Inst, MemLoc)) {
     case AliasAnalysis::NoModRef:
       // If the call has no effect on the queried pointer, just ignore it.
       continue;
@@ -335,8 +344,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
   
   BasicBlock *QueryParent = QueryInst->getParent();
   
-  Value *MemPtr = 0;
-  uint64_t MemSize = 0;
+  AliasAnalysis::Location MemLoc;
   
   // Do the scan.
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
@@ -351,41 +359,46 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
     // previous instruction as a clobber.
     if (SI->isVolatile())
       LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
-    else {
-      MemPtr = SI->getPointerOperand();
-      MemSize = AA->getTypeStoreSize(SI->getOperand(0)->getType());
-    }
+    else
+      MemLoc = AliasAnalysis::Location(SI->getPointerOperand(),
+                                       AA->getTypeStoreSize(SI->getOperand(0)
+                                                              ->getType()),
+                                       SI->getMetadata(LLVMContext::MD_tbaa));
   } else if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
     // If this is a volatile load, don't mess around with it.  Just return the
     // previous instruction as a clobber.
     if (LI->isVolatile())
       LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
-    else {
-      MemPtr = LI->getPointerOperand();
-      MemSize = AA->getTypeStoreSize(LI->getType());
-    }
-  } else if (isFreeCall(QueryInst)) {
-    MemPtr = QueryInst->getOperand(1);
+    else
+      MemLoc = AliasAnalysis::Location(LI->getPointerOperand(),
+                                       AA->getTypeStoreSize(LI->getType()),
+                                       LI->getMetadata(LLVMContext::MD_tbaa));
+  } else if (const CallInst *CI = isFreeCall(QueryInst)) {
     // calls to free() erase the entire structure, not just a field.
-    MemSize = ~0UL;
+    MemLoc = AliasAnalysis::Location(CI->getArgOperand(0));
   } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
     int IntrinsicID = 0;  // Intrinsic IDs start at 1.
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+    IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst);
+    if (II)
       IntrinsicID = II->getIntrinsicID();
 
     switch (IntrinsicID) {
     case Intrinsic::lifetime_start:
     case Intrinsic::lifetime_end:
     case Intrinsic::invariant_start:
-      MemPtr = QueryInst->getOperand(2);
-      MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue();
+      MemLoc = AliasAnalysis::Location(II->getArgOperand(1),
+                                       cast<ConstantInt>(II->getArgOperand(0))
+                                         ->getZExtValue(),
+                                       II->getMetadata(LLVMContext::MD_tbaa));
       break;
     case Intrinsic::invariant_end:
-      MemPtr = QueryInst->getOperand(3);
-      MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue();
+      MemLoc = AliasAnalysis::Location(II->getArgOperand(2),
+                                       cast<ConstantInt>(II->getArgOperand(1))
+                                         ->getZExtValue(),
+                                       II->getMetadata(LLVMContext::MD_tbaa));
       break;
     default:
-      CallSite QueryCS = CallSite::get(QueryInst);
+      CallSite QueryCS(QueryInst);
       bool isReadOnly = AA->onlyReadsMemory(QueryCS);
       LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
                                              QueryParent);
@@ -397,12 +410,12 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
   }
   
   // If we need to do a pointer scan, make it happen.
-  if (MemPtr) {
+  if (MemLoc.Ptr) {
     bool isLoad = !QueryInst->mayWriteToMemory();
     if (IntrinsicInst *II = dyn_cast<MemoryUseIntrinsic>(QueryInst)) {
       isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
     }
-    LocalCache = getPointerDependencyFrom(MemPtr, MemSize, isLoad, ScanPos,
+    LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
                                           QueryParent);
   }
   
@@ -455,7 +468,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     // Okay, we have a cache entry.  If we know it is not dirty, just return it
     // with no computation.
     if (!CacheP.second) {
-      NumCacheNonLocal++;
+      ++NumCacheNonLocal;
       return Cache;
     }
     
@@ -477,7 +490,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
     for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
       DirtyBlocks.push_back(*PI);
-    NumUncacheNonLocal++;
+    ++NumUncacheNonLocal;
   }
   
   // isReadonlyCall - If this is a read-only call, we can be more aggressive.
@@ -546,9 +559,9 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     // If we had a dirty entry for the block, update it.  Otherwise, just add
     // a new entry.
     if (ExistingResult)
-      ExistingResult->setResult(Dep, 0);
+      ExistingResult->setResult(Dep);
     else
-      Cache.push_back(NonLocalDepEntry(DirtyBB, Dep, 0));
+      Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
     
     // If the block has a dependency (i.e. it isn't completely transparent to
     // the value), remember the association!
@@ -577,31 +590,27 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
 /// own block.
 ///
 void MemoryDependenceAnalysis::
-getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
-                             SmallVectorImpl<NonLocalDepEntry> &Result) {
-  assert(isa<PointerType>(Pointer->getType()) &&
+getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
+                             BasicBlock *FromBB,
+                             SmallVectorImpl<NonLocalDepResult> &Result) {
+  assert(Loc.Ptr->getType()->isPointerTy() &&
          "Can't get pointer deps of a non-pointer!");
   Result.clear();
   
-  // We know that the pointer value is live into FromBB find the def/clobbers
-  // from presecessors.
-  const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
-  uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
-  
-  PHITransAddr Address(Pointer, TD);
+  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
   
   // This is the set of blocks we've inspected, and the pointer we consider in
   // each block.  Because of critical edges, we currently bail out if querying
   // a block with multiple different pointers.  This can happen during PHI
   // translation.
   DenseMap<BasicBlock*, Value*> Visited;
-  if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB,
+  if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
                                    Result, Visited, true))
     return;
   Result.clear();
-  Result.push_back(NonLocalDepEntry(FromBB,
-                                    MemDepResult::getClobber(FromBB->begin()),
-                                    Pointer));
+  Result.push_back(NonLocalDepResult(FromBB,
+                                     MemDepResult::getClobber(FromBB->begin()),
+                                     const_cast<Value *>(Loc.Ptr)));
 }
 
 /// GetNonLocalInfoForBlock - Compute the memdep value for BB with
@@ -609,7 +618,7 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
 /// lookup (which may use dirty cache info if available).  If we do a lookup,
 /// add the result to the cache.
 MemDepResult MemoryDependenceAnalysis::
-GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
+GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
                         bool isLoad, BasicBlock *BB,
                         NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
   
@@ -643,22 +652,21 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
     ScanPos = ExistingResult->getResult().getInst();
     
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
-    ValueIsLoadPair CacheKey(Pointer, isLoad);
+    ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
     RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
   } else {
     ++NumUncacheNonLocalPtr;
   }
   
   // Scan the block for the dependency.
-  MemDepResult Dep = getPointerDependencyFrom(Pointer, PointeeSize, isLoad, 
-                                              ScanPos, BB);
+  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
   
   // If we had a dirty entry for the block, update it.  Otherwise, just add
   // a new entry.
   if (ExistingResult)
-    ExistingResult->setResult(Dep, Pointer);
+    ExistingResult->setResult(Dep);
   else
-    Cache->push_back(NonLocalDepEntry(BB, Dep, Pointer));
+    Cache->push_back(NonLocalDepEntry(BB, Dep));
   
   // If the block has a dependency (i.e. it isn't completely transparent to
   // the value), remember the reverse association because we just added it
@@ -670,7 +678,7 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
   // update MemDep when we remove instructions.
   Instruction *Inst = Dep.getInst();
   assert(Inst && "Didn't depend on anything?");
-  ValueIsLoadPair CacheKey(Pointer, isLoad);
+  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
   ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
   return Dep;
 }
@@ -724,22 +732,56 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
 /// not compute dependence information for some reason.  This should be treated
 /// as a clobber dependence on the first instruction in the predecessor block.
 bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+                            const AliasAnalysis::Location &Loc,
                             bool isLoad, BasicBlock *StartBB,
-                            SmallVectorImpl<NonLocalDepEntry> &Result,
+                            SmallVectorImpl<NonLocalDepResult> &Result,
                             DenseMap<BasicBlock*, Value*> &Visited,
                             bool SkipFirstBlock) {
   
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
-  
-  std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
-    &NonLocalPointerDeps[CacheKey];
-  NonLocalDepInfo *Cache = &CacheInfo->second;
+
+  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+  // CacheKey, this value will be inserted as the associated value. Otherwise,
+  // it'll be ignored, and we'll have to check to see if the cached size and
+  // tbaa tag are consistent with the current query.
+  NonLocalPointerInfo InitialNLPI;
+  InitialNLPI.Size = Loc.Size;
+  InitialNLPI.TBAATag = Loc.TBAATag;
+
+  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+  // already have one.
+  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = 
+    NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+  if (!Pair.second) {
+    // If this query's Size is inconsistent with the cached one, take the
+    // maximum size and restart the query.
+    if (CacheInfo->Size != Loc.Size) {
+      CacheInfo->Size = std::max(CacheInfo->Size, Loc.Size);
+      return getNonLocalPointerDepFromBB(Pointer,
+                                         Loc.getWithNewSize(CacheInfo->Size),
+                                         isLoad, StartBB, Result, Visited,
+                                         SkipFirstBlock);
+    }
+
+    // If this query's TBAATag is inconsistent with the cached one, discard the
+    // tag and restart the query.
+    if (CacheInfo->TBAATag != Loc.TBAATag) {
+      CacheInfo->TBAATag = 0;
+      return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+                                         isLoad, StartBB, Result, Visited,
+                                         SkipFirstBlock);
+    }
+  }
+
+  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
 
   // If we have valid cached information for exactly the block we are
   // investigating, just return it with no recomputation.
-  if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
     // We have a fully cached result for this query then we can just return the
     // cached results and populate the visited set.  However, we have to verify
     // that we don't already have conflicting results for these blocks.  Check
@@ -759,11 +801,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
       }
     }
     
+    Value *Addr = Pointer.getAddr();
     for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
          I != E; ++I) {
-      Visited.insert(std::make_pair(I->getBB(), Pointer.getAddr()));
+      Visited.insert(std::make_pair(I->getBB(), Addr));
       if (!I->getResult().isNonLocal())
-        Result.push_back(*I);
+        Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
     }
     ++NumCacheCompleteNonLocalPtr;
     return false;
@@ -774,9 +817,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
   // than its valid cache info.  If empty, the result will be valid cache info,
   // otherwise it isn't.
   if (Cache->empty())
-    CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
-  else
-    CacheInfo->first = BBSkipFirstBlockPair();
+    CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+  else {
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+    CacheInfo->Size = 0;
+    CacheInfo->TBAATag = 0;
+  }
   
   SmallVector<BasicBlock*, 32> Worklist;
   Worklist.push_back(StartBB);
@@ -801,13 +847,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
       // Get the dependency info for Pointer in BB.  If we have cached
       // information, we will use it, otherwise we compute it.
       DEBUG(AssertSorted(*Cache, NumSortedEntries));
-      MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize,
-                                                 isLoad, BB, Cache,
+      MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
                                                  NumSortedEntries);
       
       // If we got a Def or Clobber, add this to the list of results.
       if (!Dep.isNonLocal()) {
-        Result.push_back(NonLocalDepEntry(BB, Dep, Pointer.getAddr()));
+        Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
         continue;
       }
     }
@@ -859,7 +904,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
       // Get the PHI translated pointer in this predecessor.  This can fail if
       // not translatable, in which case the getAddr() returns null.
       PHITransAddr PredPointer(Pointer);
-      PredPointer.PHITranslateValue(BB, Pred);
+      PredPointer.PHITranslateValue(BB, Pred, 0);
 
       Value *PredPtrVal = PredPointer.getAddr();
       
@@ -889,9 +934,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
       // a computation of the pointer in this predecessor.
       if (PredPtrVal == 0) {
         // Add the entry to the Result list.
-        NonLocalDepEntry Entry(Pred,
-                               MemDepResult::getClobber(Pred->getTerminator()),
-                               PredPtrVal);
+        NonLocalDepResult Entry(Pred,
+                                MemDepResult::getClobber(Pred->getTerminator()),
+                                PredPtrVal);
         Result.push_back(Entry);
 
         // Since we had a phi translation failure, the cache for CacheKey won't
@@ -899,7 +944,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
         // queries.  Mark this in NonLocalPointerDeps by setting the
         // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
         // cached value to do more work but not miss the phi trans failure.
-        NonLocalPointerDeps[CacheKey].first = BBSkipFirstBlockPair();
+        NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
+        NLPI.Pair = BBSkipFirstBlockPair();
+        NLPI.Size = 0;
+        NLPI.TBAATag = 0;
         continue;
       }
 
@@ -910,21 +958,25 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
       
       // If we have a problem phi translating, fall through to the code below
       // to handle the failure condition.
-      if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred,
+      if (getNonLocalPointerDepFromBB(PredPointer,
+                                      Loc.getWithNewPtr(PredPointer.getAddr()),
+                                      isLoad, Pred,
                                       Result, Visited))
         goto PredTranslationFailure;
     }
     
     // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
     CacheInfo = &NonLocalPointerDeps[CacheKey];
-    Cache = &CacheInfo->second;
+    Cache = &CacheInfo->NonLocalDeps;
     NumSortedEntries = Cache->size();
     
     // Since we did phi translation, the "Cache" set won't contain all of the
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
     // results from the set"  Clear out the indicator for this.
-    CacheInfo->first = BBSkipFirstBlockPair();
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+    CacheInfo->Size = 0;
+    CacheInfo->TBAATag = 0;
     SkipFirstBlock = false;
     continue;
 
@@ -933,7 +985,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
     if (Cache == 0) {
       // Refresh the CacheInfo/Cache pointer if it got invalidated.
       CacheInfo = &NonLocalPointerDeps[CacheKey];
-      Cache = &CacheInfo->second;
+      Cache = &CacheInfo->NonLocalDeps;
       NumSortedEntries = Cache->size();
     }
     
@@ -941,7 +993,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
     // results for the query.  This is ok (we can still use it to accelerate
     // specific block queries) but we can't do the fastpath "return all
     // results from the set".  Clear out the indicator for this.
-    CacheInfo->first = BBSkipFirstBlockPair();
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+    CacheInfo->Size = 0;
+    CacheInfo->TBAATag = 0;
     
     // If *nothing* works, mark the pointer as being clobbered by the first
     // instruction in this block.
@@ -959,9 +1013,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
       
       assert(I->getResult().isNonLocal() &&
              "Should only be here with transparent block");
-      I->setResult(MemDepResult::getClobber(BB->begin()), Pointer.getAddr());
+      I->setResult(MemDepResult::getClobber(BB->begin()));
       ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
-      Result.push_back(*I);
+      Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
+                                         Pointer.getAddr()));
       break;
     }
   }
@@ -982,7 +1037,7 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
   
   // Remove all of the entries in the BB->val map.  This involves removing
   // instructions from the reverse map.
-  NonLocalDepInfo &PInfo = It->second.second;
+  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
   
   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
     Instruction *Target = PInfo[i].getResult().getInst();
@@ -1006,13 +1061,20 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
 /// in more places that cached info does not necessarily keep.
 void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
   // If Ptr isn't really a pointer, just ignore it.
-  if (!isa<PointerType>(Ptr->getType())) return;
+  if (!Ptr->getType()->isPointerTy()) return;
   // Flush store info for the pointer.
   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
   // Flush load info for the pointer.
   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
 }
 
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+  PredCache->clear();
+}
+
 /// removeInstruction - Remove an instruction from the dependence analysis,
 /// updating the dependence of instructions that previously depended on it.
 /// This method attempts to keep the cache coherent using the reverse map.
@@ -1047,7 +1109,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
   
   // Remove it from both the load info and the store info.  The instruction
   // can't be in either of these maps if it is non-pointer.
-  if (isa<PointerType>(RemInst->getType())) {
+  if (RemInst->getType()->isPointerTy()) {
     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
   }
@@ -1115,7 +1177,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
         if (DI->getResult().getInst() != RemInst) continue;
         
         // Convert to a dirty entry for the subsequent instruction.
-        DI->setResult(NewDirtyVal, DI->getAddress());
+        DI->setResult(NewDirtyVal);
         
         if (Instruction *NextI = NewDirtyVal.getInst())
           ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
@@ -1146,10 +1208,12 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
       assert(P.getPointer() != RemInst &&
              "Already removed NonLocalPointerDeps info for RemInst");
       
-      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second;
+      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
       
       // The cache is not valid for any specific block anymore.
-      NonLocalPointerDeps[P].first = BBSkipFirstBlockPair();
+      NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
+      NonLocalPointerDeps[P].Size = 0;
+      NonLocalPointerDeps[P].TBAATag = 0;
       
       // Update any entries for RemInst to use the instruction after it.
       for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
@@ -1157,7 +1221,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
         if (DI->getResult().getInst() != RemInst) continue;
         
         // Convert to a dirty entry for the subsequent instruction.
-        DI->setResult(NewDirtyVal, DI->getAddress());
+        DI->setResult(NewDirtyVal);
         
         if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
           ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
@@ -1195,7 +1259,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
   for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
        E = NonLocalPointerDeps.end(); I != E; ++I) {
     assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
-    const NonLocalDepInfo &Val = I->second.second;
+    const NonLocalDepInfo &Val = I->second.NonLocalDeps;
     for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
          II != E; ++II)
       assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");