Enhance GVN to do more precise alias queries for non-local memory
authorDan Gohman <gohman@apple.com>
Wed, 10 Nov 2010 20:37:15 +0000 (20:37 +0000)
committerDan Gohman <gohman@apple.com>
Wed, 10 Nov 2010 20:37:15 +0000 (20:37 +0000)
references. For example, this allows gvn to eliminate the load in
this example:

  void foo(int n, int* p, int *q) {
    p[0] = 0;
    p[1] = 1;
    if (n) {
      *q = p[0];
    }
  }

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118714 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Analysis/AliasAnalysis.h
include/llvm/Analysis/MemoryDependenceAnalysis.h
lib/Analysis/MemDepPrinter.cpp
lib/Analysis/MemoryDependenceAnalysis.cpp
lib/Transforms/Scalar/GVN.cpp
test/Transforms/GVN/non-local-offset.ll [new file with mode: 0644]

index 17abae4eb9082f8426a33c84abf94b017936f779..302204e41b2de1f2d694a0be1c5348a455fdb00b 100644 (file)
@@ -107,6 +107,12 @@ public:
       return Copy;
     }
 
+    Location getWithNewSize(uint64_t NewSize) const {
+      Location Copy(*this);
+      Copy.Size = NewSize;
+      return Copy;
+    }
+
     Location getWithoutTBAATag() const {
       Location Copy(*this);
       Copy.TBAATag = 0;
index b5c68f6833f80e309f3f1dc69bb327408c83a74d..c1a1536037b8e24da3df323082200c0fa0aa4f58 100644 (file)
@@ -227,11 +227,14 @@ namespace llvm {
       BBSkipFirstBlockPair Pair;
       /// NonLocalDeps - The results of the query for each relevant block.
       NonLocalDepInfo NonLocalDeps;
+      /// Size - The maximum size of the dereferences of the
+      /// pointer. May be UnknownSize if the sizes are unknown.
+      uint64_t Size;
       /// TBAATag - The TBAA tag associated with dereferences of the
       /// pointer. May be null if there are no tags or conflicting tags.
-      MDNode *TBAATag;
+      const MDNode *TBAATag;
 
-      NonLocalPointerInfo() : TBAATag(0) {}
+      NonLocalPointerInfo() : Size(0), TBAATag(0) {}
     };
 
     /// CachedNonLocalPointerInfo - This map stores the cached results of doing
@@ -315,14 +318,6 @@ namespace llvm {
                                       bool isLoad, BasicBlock *BB,
                                     SmallVectorImpl<NonLocalDepResult> &Result);
 
-    /// getNonLocalPointerDependence - A convenience wrapper.
-    void getNonLocalPointerDependency(Value *Pointer, bool isLoad,
-                                      BasicBlock *BB,
-                                    SmallVectorImpl<NonLocalDepResult> &Result){
-      return getNonLocalPointerDependency(AliasAnalysis::Location(Pointer),
-                                          isLoad, BB, Result);
-    }
-    
     /// removeInstruction - Remove an instruction from the dependence analysis,
     /// updating the dependence of instructions that previously depended on it.
     void removeInstruction(Instruction *InstToRemove);
index 597daff8db2b825a87b9680d784ab1d207ff2143..90a7c576f07e84d032b7cf68460b6c6eedf2e407 100644 (file)
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CallSite.h"
@@ -40,7 +41,8 @@ namespace {
     void print(raw_ostream &OS, const Module * = 0) const;
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<MemoryDependenceAnalysis>();
+      AU.addRequiredTransitive<AliasAnalysis>();
+      AU.addRequiredTransitive<MemoryDependenceAnalysis>();
       AU.setPreservesAll();
     }
 
@@ -64,6 +66,7 @@ FunctionPass *llvm::createMemDepPrinter() {
 
 bool MemDepPrinter::runOnFunction(Function &F) {
   this->F = &F;
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
 
   // All this code uses non-const interfaces because MemDep is not
@@ -99,15 +102,23 @@ bool MemDepPrinter::runOnFunction(Function &F) {
       SmallVector<NonLocalDepResult, 4> NLDI;
       if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
         // FIXME: Volatile is not handled properly here.
-        MDA.getNonLocalPointerDependency(LI->getPointerOperand(), !LI->isVolatile(),
+        AliasAnalysis::Location Loc(LI->getPointerOperand(),
+                                    AA.getTypeStoreSize(LI->getType()),
+                                    LI->getMetadata(LLVMContext::MD_tbaa));
+        MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(),
                                          LI->getParent(), NLDI);
       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
         // FIXME: Volatile is not handled properly here.
-        MDA.getNonLocalPointerDependency(SI->getPointerOperand(), false,
-                                         SI->getParent(), NLDI);
+        AliasAnalysis::Location Loc(SI->getPointerOperand(),
+                                    AA.getTypeStoreSize(SI->getValueOperand()
+                                                          ->getType()),
+                                    SI->getMetadata(LLVMContext::MD_tbaa));
+        MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
       } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
-        MDA.getNonLocalPointerDependency(VI->getPointerOperand(), false,
-                                         VI->getParent(), NLDI);
+        AliasAnalysis::Location Loc(SI->getPointerOperand(),
+                                    AliasAnalysis::UnknownSize,
+                                    SI->getMetadata(LLVMContext::MD_tbaa));
+        MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
       } else {
         llvm_unreachable("Unknown memory instruction!");
       }
index c72cd1e83a84d86041ce332706af0ea5e607a9b2..b4c6d09e4c2165cfd6c9145bdf37dc1af0082d23 100644 (file)
@@ -741,16 +741,40 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
   
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
-  NonLocalPointerInfo *CacheInfo = &NonLocalPointerDeps[CacheKey];
 
-  // If this query's TBAATag is inconsistent with the cached one, discard the
-  // tag and restart the query.
-  if (CacheInfo->TBAATag != Loc.TBAATag) {
-    CacheInfo->TBAATag = 0;
-    NonLocalPointerDeps.erase(CacheKey);
-    return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
-                                       isLoad, StartBB, Result, Visited,
-                                       SkipFirstBlock);
+  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+  // CacheKey, this value will be inserted as the associated value. Otherwise,
+  // it'll be ignored, and we'll have to check to see if the cached size and
+  // tbaa tag are consistent with the current query.
+  NonLocalPointerInfo InitialNLPI;
+  InitialNLPI.Size = Loc.Size;
+  InitialNLPI.TBAATag = Loc.TBAATag;
+
+  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+  // already have one.
+  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = 
+    NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+  if (!Pair.second) {
+    // If this query's Size is inconsistent with the cached one, take the
+    // maximum size and restart the query.
+    if (CacheInfo->Size != Loc.Size) {
+      CacheInfo->Size = std::max(CacheInfo->Size, Loc.Size);
+      return getNonLocalPointerDepFromBB(Pointer,
+                                         Loc.getWithNewSize(CacheInfo->Size),
+                                         isLoad, StartBB, Result, Visited,
+                                         SkipFirstBlock);
+    }
+
+    // If this query's TBAATag is inconsistent with the cached one, discard the
+    // tag and restart the query.
+    if (CacheInfo->TBAATag != Loc.TBAATag) {
+      CacheInfo->TBAATag = 0;
+      return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+                                         isLoad, StartBB, Result, Visited,
+                                         SkipFirstBlock);
+    }
   }
 
   NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
@@ -796,6 +820,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
   else {
     CacheInfo->Pair = BBSkipFirstBlockPair();
+    CacheInfo->Size = 0;
     CacheInfo->TBAATag = 0;
   }
   
@@ -921,6 +946,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         // cached value to do more work but not miss the phi trans failure.
         NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
         NLPI.Pair = BBSkipFirstBlockPair();
+        NLPI.Size = 0;
         NLPI.TBAATag = 0;
         continue;
       }
@@ -949,6 +975,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // specific block queries) but we can't do the fastpath "return all
     // results from the set"  Clear out the indicator for this.
     CacheInfo->Pair = BBSkipFirstBlockPair();
+    CacheInfo->Size = 0;
     CacheInfo->TBAATag = 0;
     SkipFirstBlock = false;
     continue;
@@ -967,6 +994,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // specific block queries) but we can't do the fastpath "return all
     // results from the set".  Clear out the indicator for this.
     CacheInfo->Pair = BBSkipFirstBlockPair();
+    CacheInfo->Size = 0;
     CacheInfo->TBAATag = 0;
     
     // If *nothing* works, mark the pointer as being clobbered by the first
@@ -1184,6 +1212,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
       
       // The cache is not valid for any specific block anymore.
       NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
+      NonLocalPointerDeps[P].Size = 0;
       NonLocalPointerDeps[P].TBAATag = 0;
       
       // Update any entries for RemInst to use the instruction after it.
index a2fcca59841d7d615fc9b355419bf74cf6e0991e..e506d0efd7f91e0273833cf049830538093a9731 100644 (file)
@@ -1351,8 +1351,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
                               SmallVectorImpl<Instruction*> &toErase) {
   // Find the non-local dependencies of the load.
   SmallVector<NonLocalDepResult, 64> Deps;
-  MD->getNonLocalPointerDependency(LI->getPointerOperand(), true,
-                                   LI->getParent(),
+  AliasAnalysis::Location Loc(LI->getPointerOperand(),
+                         VN.getAliasAnalysis()->getTypeStoreSize(LI->getType()),
+                              LI->getMetadata(LLVMContext::MD_tbaa));
+  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(),
                                    Deps);
   //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
   //             << Deps.size() << *LI << '\n');
diff --git a/test/Transforms/GVN/non-local-offset.ll b/test/Transforms/GVN/non-local-offset.ll
new file mode 100644 (file)
index 0000000..8eaa999
--- /dev/null
@@ -0,0 +1,59 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; GVN should ignore the store to p[1] to see that the load from p[0] is
+; fully redundant.
+
+; CHECK: @yes
+; CHECK: if.then:
+; CHECK-NEXT: store i32 0, i32* %q
+; CHECK-NEXT: ret void
+
+define void @yes(i1 %c, i32* %p, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p
+  %p1 = getelementptr inbounds i32* %p, i64 1
+  store i32 1, i32* %p1
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  ret void
+}
+
+; GVN should ignore the store to p[1] to see that the first load from p[0] is
+; fully redundant. However, the second load is larger, so it's not a simple
+; redundancy.
+
+; CHECK: @watch_out_for_size_change
+; CHECK: if.then:
+; CHECK-NEXT: store i32 0, i32* %q
+; CHECK-NEXT: ret void
+; CHECK: if.else:
+; CHECK: load i64* %pc
+; CHECK: store i64
+
+define void @watch_out_for_size_change(i1 %c, i32* %p, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p
+  %p1 = getelementptr inbounds i32* %p, i64 1
+  store i32 1, i32* %p1
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  %pc = bitcast i32* %p to i64*
+  %qc = bitcast i32* %q to i64*
+  %t64 = load i64* %pc
+  store i64 %t64, i64* %qc
+  ret void
+}