Be more precise when eliminating pointers bue to memcpy's. This allows more
[oota-llvm.git] / lib / Transforms / Scalar / DeadStoreElimination.cpp
index 39a0aba2c95dd1d98e23658ef0fffc8248f0d8c9..f9d1205ada558e2a6feef90bcdf9ce703a409069 100644 (file)
@@ -2,8 +2,8 @@
 //
 //                     The LLVM Compiler Infrastructure
 //
-// This file was developed by Owen Anderson and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
@@ -20,6 +20,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -51,9 +52,9 @@ namespace {
                                             Instruction* dependency,
                                         SetVector<Instruction*>& possiblyDead);
     bool handleEndBlock(BasicBlock& BB, SetVector<Instruction*>& possiblyDead);
-    bool RemoveUndeadPointers(Value* pointer,
+    bool RemoveUndeadPointers(Value* pointer, uint64_t killPointerSize,
                               BasicBlock::iterator& BBI,
-                              SmallPtrSet<AllocaInst*, 64>& deadPointers, 
+                              SmallPtrSet<Value*, 64>& deadPointers, 
                               SetVector<Instruction*>& possiblyDead);
     void DeleteDeadInstructionChains(Instruction *I,
                                      SetVector<Instruction*> &DeadInsts);
@@ -63,14 +64,18 @@ namespace {
     /// from allocas, it is safe to ignore GEP indices, since
     /// either the store will be in the alloca, and thus dead,
     /// or beyond the end of the alloca, and thus undefined.
-    void TranslatePointerBitCasts(Value*& v) {
+    void TranslatePointerBitCasts(Value*& v, bool zeroGepsOnly = false) {
       assert(isa<PointerType>(v->getType()) &&
              "Translating a non-pointer type?");
       while (true) {
         if (BitCastInst* C = dyn_cast<BitCastInst>(v))
           v = C->getOperand(0);
         else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(v))
-          v = G->getOperand(0);
+          if (!zeroGepsOnly || G->hasAllZeroIndices()) {
+            v = G->getOperand(0);
+          } else {
+            break;
+          }
         else
           break;
       }
@@ -95,7 +100,8 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
 
 bool DSE::runOnBasicBlock(BasicBlock &BB) {
   MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
-  
+  TargetData &TD = getAnalysis<TargetData>();  
+
   // Record the last-seen store to this pointer
   DenseMap<Value*, StoreInst*> lastStore;
   // Record instructions possibly made dead by deleting a store
@@ -111,11 +117,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       continue;
       
     Value* pointer = 0;
-    if (StoreInst* S = dyn_cast<StoreInst>(BBI))
-      pointer = S->getPointerOperand();
-    else
+    if (StoreInst* S = dyn_cast<StoreInst>(BBI)) {
+      if (!S->isVolatile())
+        pointer = S->getPointerOperand();
+      else
+        continue;
+    } else
       pointer = cast<FreeInst>(BBI)->getPointerOperand();
       
+    TranslatePointerBitCasts(pointer, true);
     StoreInst*& last = lastStore[pointer];
     bool deletedStore = false;
       
@@ -127,7 +137,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       while (dep != MemoryDependenceAnalysis::None &&
              dep != MemoryDependenceAnalysis::NonLocal &&
              isa<StoreInst>(dep)) {
-        if (dep != last) {
+        if (dep != last ||
+             TD.getTypeStoreSize(last->getOperand(0)->getType()) >
+             TD.getTypeStoreSize(BBI->getOperand(0)->getType())) {
           dep = MD.getDependency(BBI, dep);
           continue;
         }
@@ -140,7 +152,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
           possiblyDead.insert(D);
         if (Instruction* D = dyn_cast<Instruction>(last->getOperand(1)))
           possiblyDead.insert(D);
-          
+        
         last->eraseFromParent();
         NumFastStores++;
         deletedStore = true;
@@ -194,15 +206,17 @@ bool DSE::handleFreeWithNonTrivialDependency(FreeInst* F, Instruction* dep,
   StoreInst* dependency = dyn_cast<StoreInst>(dep);
   if (!dependency)
     return false;
+  else if (dependency->isVolatile())
+    return false;
   
   Value* depPointer = dependency->getPointerOperand();
   const Type* depType = dependency->getOperand(0)->getType();
-  unsigned depPointerSize = TD.getTypeSize(depType);
-  
+  unsigned depPointerSize = TD.getTypeStoreSize(depType);
+
   // Check for aliasing
-  AliasAnalysis::AliasResult A = AA.alias(F->getPointerOperand(), ~0UL,
+  AliasAnalysis::AliasResult A = AA.alias(F->getPointerOperand(), ~0U,
                                           depPointer, depPointerSize);
-    
+
   if (A == AliasAnalysis::MustAlias) {
     // Remove it!
     MD.removeInstruction(dependency);
@@ -236,119 +250,254 @@ bool DSE::handleEndBlock(BasicBlock& BB,
   bool MadeChange = false;
   
   // Pointers alloca'd in this function are dead in the end block
-  SmallPtrSet<AllocaInst*, 64> deadPointers;
+  SmallPtrSet<Value*, 64> deadPointers;
   
   // Find all of the alloca'd pointers in the entry block
   BasicBlock *Entry = BB.getParent()->begin();
   for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
     if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
       deadPointers.insert(AI);
+  for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
+       AE = BB.getParent()->arg_end(); AI != AE; ++AI)
+    if (AI->hasByValAttr())
+      deadPointers.insert(AI);
   
   // Scan the basic block backwards
   for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
     --BBI;
     
-    if (deadPointers.empty())
-      break;
-    
     // If we find a store whose pointer is dead...
     if (StoreInst* S = dyn_cast<StoreInst>(BBI)) {
-      Value* pointerOperand = S->getPointerOperand();
-      // See through pointer-to-pointer bitcasts
-      TranslatePointerBitCasts(pointerOperand);
+      if (!S->isVolatile()) {
+        Value* pointerOperand = S->getPointerOperand();
+        // See through pointer-to-pointer bitcasts
+        TranslatePointerBitCasts(pointerOperand);
       
-      if (deadPointers.count(pointerOperand)){
-        // Remove it!
-        MD.removeInstruction(S);
+        // Alloca'd pointers or byval arguments (which are functionally like
+        // alloca's) are valid candidates for removal.
+        if (deadPointers.count(pointerOperand)) {
+          // Remove it!
+          MD.removeInstruction(S);
         
-        // DCE instructions only used to calculate that store
-        if (Instruction* D = dyn_cast<Instruction>(S->getOperand(0)))
+          // DCE instructions only used to calculate that store
+          if (Instruction* D = dyn_cast<Instruction>(S->getOperand(0)))
+            possiblyDead.insert(D);
+          if (Instruction* D = dyn_cast<Instruction>(S->getOperand(1)))
+            possiblyDead.insert(D);
+        
+          BBI++;
+          S->eraseFromParent();
+          NumFastStores++;
+          MadeChange = true;
+        }
+      }
+      
+      continue;
+    
+    // We can also remove memcpy's to local variables at the end of a function
+    } else if (MemCpyInst* M = dyn_cast<MemCpyInst>(BBI)) {
+      Value* dest = M->getDest();
+      TranslatePointerBitCasts(dest);
+      
+      if (deadPointers.count(dest)) {
+        MD.removeInstruction(M);
+        
+        // DCE instructions only used to calculate that memcpy
+        if (Instruction* D = dyn_cast<Instruction>(M->getRawSource()))
+          possiblyDead.insert(D);
+        if (Instruction* D = dyn_cast<Instruction>(M->getLength()))
           possiblyDead.insert(D);
-        if (Instruction* D = dyn_cast<Instruction>(S->getOperand(1)))
+        if (Instruction* D = dyn_cast<Instruction>(M->getRawDest()))
           possiblyDead.insert(D);
         
         BBI++;
-        S->eraseFromParent();
-        NumFastStores++;
+        M->eraseFromParent();
+        NumFastOther++;
         MadeChange = true;
+        
+        continue;
       }
       
-      continue;
+      // Because a memcpy is also a load, we can't skip it if we didn't remove it
     }
     
     Value* killPointer = 0;
+    uint64_t killPointerSize = ~0UL;
     
     // If we encounter a use of the pointer, it is no longer considered dead
     if (LoadInst* L = dyn_cast<LoadInst>(BBI)) {
+      // However, if this load is unused, we can go ahead and remove it, and
+      // not have to worry about it making our pointer undead!
+      if (L->use_empty()) {
+        MD.removeInstruction(L);
+        
+        // DCE instructions only used to calculate that load
+        if (Instruction* D = dyn_cast<Instruction>(L->getPointerOperand()))
+          possiblyDead.insert(D);
+        
+        BBI++;
+        L->eraseFromParent();
+        NumFastOther++;
+        MadeChange = true;
+        possiblyDead.remove(L);
+        
+        continue;
+      }
+      
       killPointer = L->getPointerOperand();
     } else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) {
       killPointer = V->getOperand(0);
+    } else if (isa<MemCpyInst>(BBI) &&
+               isa<ConstantInt>(cast<MemCpyInst>(BBI)->getLength())) {
+      killPointer = cast<MemCpyInst>(BBI)->getSource();
+      killPointerSize = cast<ConstantInt>(
+                            cast<MemCpyInst>(BBI)->getLength())->getZExtValue();
     } else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) {
       deadPointers.erase(A);
+      
+      // Dead alloca's can be DCE'd when we reach them
+      if (A->use_empty()) {
+        MD.removeInstruction(A);
+        
+        // DCE instructions only used to calculate that load
+        if (Instruction* D = dyn_cast<Instruction>(A->getArraySize()))
+          possiblyDead.insert(D);
+        
+        BBI++;
+        A->eraseFromParent();
+        NumFastOther++;
+        MadeChange = true;
+        possiblyDead.remove(A);
+      }
+      
       continue;
     } else if (CallSite::get(BBI).getInstruction() != 0) {
+      // If this call does not access memory, it can't
+      // be undeadifying any of our pointers.
+      CallSite CS = CallSite::get(BBI);
+      if (AA.doesNotAccessMemory(CS))
+        continue;
+      
+      unsigned modRef = 0;
+      unsigned other = 0;
+      
       // Remove any pointers made undead by the call from the dead set
-      std::vector<Instruction*> dead;
-      for (SmallPtrSet<AllocaInst*, 64>::iterator I = deadPointers.begin(),
+      std::vector<Value*> dead;
+      for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
            E = deadPointers.end(); I != E; ++I) {
+        // HACK: if we detect that our AA is imprecise, it's not
+        // worth it to scan the rest of the deadPointers set.  Just
+        // assume that the AA will return ModRef for everything, and
+        // go ahead and bail.
+        if (modRef >= 16 && other == 0) {
+          deadPointers.clear();
+          return MadeChange;
+        }
+
         // Get size information for the alloca
-        unsigned pointerSize = ~0UL;
-        if (ConstantInt* C = dyn_cast<ConstantInt>((*I)->getArraySize()))
-          pointerSize = C->getZExtValue() * \
-                        TD.getTypeSize((*I)->getAllocatedType());     
-        
+        unsigned pointerSize = ~0U;
+        if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
+          if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
+            pointerSize = C->getZExtValue() * \
+                          TD.getABITypeSize(A->getAllocatedType());
+        } else {
+          const PointerType* PT = cast<PointerType>(
+                                                 cast<Argument>(*I)->getType());
+          pointerSize = TD.getABITypeSize(PT->getElementType());
+        }
+
         // See if the call site touches it
-        AliasAnalysis::ModRefResult A = AA.getModRefInfo(CallSite::get(BBI),
-                                                         *I, pointerSize);
+        AliasAnalysis::ModRefResult A = AA.getModRefInfo(CS, *I, pointerSize);
+        
+        if (A == AliasAnalysis::ModRef)
+          modRef++;
+        else
+          other++;
+        
         if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
           dead.push_back(*I);
       }
 
-      for (std::vector<Instruction*>::iterator I = dead.begin(), E = dead.end();
+      for (std::vector<Value*>::iterator I = dead.begin(), E = dead.end();
            I != E; ++I)
         deadPointers.erase(*I);
       
       continue;
+    } else {
+      // For any non-memory-affecting non-terminators, DCE them as we reach them
+      Instruction *CI = BBI;
+      if (!CI->isTerminator() && CI->use_empty() && !isa<FreeInst>(CI)) {
+        
+        // DCE instructions only used to calculate that load
+        for (Instruction::op_iterator OI = CI->op_begin(), OE = CI->op_end();
+             OI != OE; ++OI)
+          if (Instruction* D = dyn_cast<Instruction>(OI))
+            possiblyDead.insert(D);
+        
+        BBI++;
+        CI->eraseFromParent();
+        NumFastOther++;
+        MadeChange = true;
+        possiblyDead.remove(CI);
+        
+        continue;
+      }
     }
     
     if (!killPointer)
       continue;
     
+    TranslatePointerBitCasts(killPointer);
+    
     // Deal with undead pointers
-    MadeChange |= RemoveUndeadPointers(killPointer, BBI,
+    MadeChange |= RemoveUndeadPointers(killPointer, killPointerSize, BBI,
                                        deadPointers, possiblyDead);
   }
   
   return MadeChange;
 }
 
-/// RemoveUndeadPointers - takes an instruction and a setvector of
-/// dead instructions.  If I is dead, it is erased, and its operands are
-/// checked for deadness.  If they are dead, they are added to the dead
-/// setvector.
-bool DSE::RemoveUndeadPointers(Value* killPointer,
+/// RemoveUndeadPointers - check for uses of a pointer that make it
+/// undead when scanning for dead stores to alloca's.
+bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,
                                 BasicBlock::iterator& BBI,
-                                SmallPtrSet<AllocaInst*, 64>& deadPointers, 
+                                SmallPtrSet<Value*, 64>& deadPointers, 
                                 SetVector<Instruction*>& possiblyDead) {
   TargetData &TD = getAnalysis<TargetData>();
   AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
   MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>();
                                   
+  // If the kill pointer can be easily reduced to an alloca,
+  // don't bother doing extraneous AA queries
+  if (deadPointers.count(killPointer)) {
+    deadPointers.erase(killPointer);
+    return false;
+  } else if (isa<GlobalValue>(killPointer)) {
+    // A global can't be in the dead pointer set
+    return false;
+  }
+  
   bool MadeChange = false;
   
-  std::vector<Instruction*> undead;
+  std::vector<Value*> undead;
     
-  for (SmallPtrSet<AllocaInst*, 64>::iterator I = deadPointers.begin(),
+  for (SmallPtrSet<Value*, 64>::iterator I = deadPointers.begin(),
       E = deadPointers.end(); I != E; ++I) {
     // Get size information for the alloca
-    unsigned pointerSize = ~0UL;
-    if (ConstantInt* C = dyn_cast<ConstantInt>((*I)->getArraySize()))
-      pointerSize = C->getZExtValue() * \
-                    TD.getTypeSize((*I)->getAllocatedType());     
-      
+    unsigned pointerSize = ~0U;
+    if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) {
+      if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize()))
+        pointerSize = C->getZExtValue() * \
+                      TD.getABITypeSize(A->getAllocatedType());
+    } else {
+      const PointerType* PT = cast<PointerType>(
+                                                cast<Argument>(*I)->getType());
+      pointerSize = TD.getABITypeSize(PT->getElementType());
+    }
+
     // See if this pointer could alias it
     AliasAnalysis::AliasResult A = AA.alias(*I, pointerSize,
-                                            killPointer, ~0UL);
+                                            killPointer, killPointerSize);
 
     // If it must-alias and a store, we can delete it
     if (isa<StoreInst>(BBI) && A == AliasAnalysis::MustAlias) {
@@ -375,9 +524,9 @@ bool DSE::RemoveUndeadPointers(Value* killPointer,
         undead.push_back(*I);
   }
 
-  for (std::vector<Instruction*>::iterator I = undead.begin(), E = undead.end();
+  for (std::vector<Value*>::iterator I = undead.begin(), E = undead.end();
        I != E; ++I)
-    deadPointers.erase(*I);
+      deadPointers.erase(*I);
   
   return MadeChange;
 }