If a global constant is dead then global's debug info should not prevent the optimize...

[oota-llvm.git] / lib / Transforms / IPO / GlobalOpt.cpp
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp

index e5427766bfa959f806a9196e27ff5f60497e3efb..28cd902472dcf3f70ffdbee3d5eea038cc01fabe 100644 (file)
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -28,13 +28,14 @@
  #include "llvm/Support/Compiler.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "llvm/Support/Compiler.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/DenseMap.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/StringExtras.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/Statistic.h"
  #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
  #include <algorithm>
  #include <algorithm>
-#include <map>
-#include <set>
  using namespace llvm;
  
  STATISTIC(NumMarked    , "Number of globals marked constant");
  using namespace llvm;
  
  STATISTIC(NumMarked    , "Number of globals marked constant");
@@ -49,6 +50,8 @@ STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
  STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
  STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
  STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
  STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
  STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
  STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
+STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
+STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
  
  namespace {
    struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass {
  
  namespace {
    struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass {
@@ -56,7 +59,7 @@ namespace {
        AU.addRequired<TargetData>();
      }
      static char ID; // Pass identification, replacement for typeid
        AU.addRequired<TargetData>();
      }
      static char ID; // Pass identification, replacement for typeid
-    GlobalOpt() : ModulePass((intptr_t)&ID) {}
+    GlobalOpt() : ModulePass(&ID) {}
  
      bool runOnModule(Module &M);
  
  
      bool runOnModule(Module &M);
  
@@ -64,16 +67,19 @@ namespace {
      GlobalVariable *FindGlobalCtors(Module &M);
      bool OptimizeFunctions(Module &M);
      bool OptimizeGlobalVars(Module &M);
      GlobalVariable *FindGlobalCtors(Module &M);
      bool OptimizeFunctions(Module &M);
      bool OptimizeGlobalVars(Module &M);
+    bool ResolveAliases(Module &M);
      bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
      bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
    };
      bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
      bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
    };
-
-  char GlobalOpt::ID = 0;
-  RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer");
  }
  
  }
  
+char GlobalOpt::ID = 0;
+static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer");
+
  ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
  
  ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
  
+namespace {
+
  /// GlobalStatus - As we analyze each global, keep track of some information
  /// about it.  If we find out that the address of the global is taken, none of
  /// this info will be accurate.
  /// GlobalStatus - As we analyze each global, keep track of some information
  /// about it.  If we find out that the address of the global is taken, none of
  /// this info will be accurate.
@@ -128,18 +134,26 @@ struct VISIBILITY_HIDDEN GlobalStatus {
                     HasNonInstructionUser(false), HasPHIUser(false) {}
  };
  
                     HasNonInstructionUser(false), HasPHIUser(false) {}
  };
  
-
+}
  
  /// ConstantIsDead - Return true if the specified constant is (transitively)
  
  /// ConstantIsDead - Return true if the specified constant is (transitively)
-/// dead.  The constant may be used by other constants (e.g. constant arrays and
-/// constant exprs) as long as they are dead, but it cannot be used by anything
-/// else.
-static bool ConstantIsDead(Constant *C) {
+/// dead.  The constant may be used by other constants (e.g. constant arrays,
+/// constant exprs, constant global variables) as long as they are dead, 
+/// but it cannot be used by anything else. If DeadGVs is not null then
+/// record dead constant GV users.
+static bool ConstantIsDead(Constant *C, 
+                           SmallPtrSet<GlobalVariable *, 4> *DeadGVs = false) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+    if (GV->hasLocalLinkage() && GV->use_empty()) {
+      if (DeadGVs)
+        DeadGVs->insert(GV);
+      return true;
+    }
    if (isa<GlobalValue>(C)) return false;
  
    for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
      if (Constant *CU = dyn_cast<Constant>(*UI)) {
    if (isa<GlobalValue>(C)) return false;
  
    for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
      if (Constant *CU = dyn_cast<Constant>(*UI)) {
-      if (!ConstantIsDead(CU)) return false;
+      if (!ConstantIsDead(CU, DeadGVs)) return false;
      } else
        return false;
    return true;
      } else
        return false;
    return true;
@@ -151,7 +165,7 @@ static bool ConstantIsDead(Constant *C) {
  /// can't do anything with it.
  ///
  static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
  /// can't do anything with it.
  ///
  static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
-                          std::set<PHINode*> &PHIUsers) {
+                          SmallPtrSet<PHINode*, 16> &PHIUsers) {
    for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
        GS.HasNonInstructionUser = true;
    for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
        GS.HasNonInstructionUser = true;
@@ -209,7 +223,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
        } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
          // PHI nodes we can check just like select or GEP instructions, but we
          // have to be careful about infinite recursion.
        } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
          // PHI nodes we can check just like select or GEP instructions, but we
          // have to be careful about infinite recursion.
-        if (PHIUsers.insert(PN).second)  // Not already visited.
+        if (PHIUsers.insert(PN))  // Not already visited.
            if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
          GS.HasPHIUser = true;
        } else if (isa<CmpInst>(I)) {
            if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
          GS.HasPHIUser = true;
        } else if (isa<CmpInst>(I)) {
@@ -332,7 +346,13 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
      } else if (Constant *C = dyn_cast<Constant>(U)) {
        // If we have a chain of dead constantexprs or other things dangling from
        // us, and if they are all dead, nuke them without remorse.
      } else if (Constant *C = dyn_cast<Constant>(U)) {
        // If we have a chain of dead constantexprs or other things dangling from
        // us, and if they are all dead, nuke them without remorse.
-      if (ConstantIsDead(C)) {
+      SmallPtrSet<GlobalVariable *, 4> DeadGVs;
+      if (ConstantIsDead(C, &DeadGVs)) {
+        for (SmallPtrSet<GlobalVariable *, 4>::iterator TI = DeadGVs.begin(),
+               TE = DeadGVs.end(); TI != TE; ) {
+          GlobalVariable *TGV = *TI; ++TI;
+          TGV->eraseFromParent();
+        }
          C->destroyConstant();
          // This could have invalidated UI, start over from scratch.
          CleanupConstantGlobalUsers(V, Init);
          C->destroyConstant();
          // This could have invalidated UI, start over from scratch.
          CleanupConstantGlobalUsers(V, Init);
@@ -455,20 +475,26 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
  /// behavior of the program in a more fine-grained way.  We have determined that
  /// this transformation is safe already.  We return the first global variable we
  /// insert so that the caller can reprocess it.
  /// behavior of the program in a more fine-grained way.  We have determined that
  /// this transformation is safe already.  We return the first global variable we
  /// insert so that the caller can reprocess it.
-static GlobalVariable *SRAGlobal(GlobalVariable *GV) {
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
    // Make sure this global only has simple uses that we can SRA.
    if (!GlobalUsersSafeToSRA(GV))
      return 0;
    
    // Make sure this global only has simple uses that we can SRA.
    if (!GlobalUsersSafeToSRA(GV))
      return 0;
    
-  assert(GV->hasInternalLinkage() && !GV->isConstant());
+  assert(GV->hasLocalLinkage() && !GV->isConstant());
    Constant *Init = GV->getInitializer();
    const Type *Ty = Init->getType();
  
    std::vector<GlobalVariable*> NewGlobals;
    Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
  
    Constant *Init = GV->getInitializer();
    const Type *Ty = Init->getType();
  
    std::vector<GlobalVariable*> NewGlobals;
    Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
  
+  // Get the alignment of the global, either explicit or target-specific.
+  unsigned StartAlignment = GV->getAlignment();
+  if (StartAlignment == 0)
+    StartAlignment = TD.getABITypeAlignment(GV->getType());
+   
    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
      NewGlobals.reserve(STy->getNumElements());
    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
      NewGlobals.reserve(STy->getNumElements());
+    const StructLayout &Layout = *TD.getStructLayout(STy);
      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
        Constant *In = getAggregateConstantElement(Init,
                                              ConstantInt::get(Type::Int32Ty, i));
      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
        Constant *In = getAggregateConstantElement(Init,
                                              ConstantInt::get(Type::Int32Ty, i));
@@ -477,22 +503,32 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV) {
                                                 GlobalVariable::InternalLinkage,
                                                 In, GV->getName()+"."+utostr(i),
                                                 (Module *)NULL,
                                                 GlobalVariable::InternalLinkage,
                                                 In, GV->getName()+"."+utostr(i),
                                                 (Module *)NULL,
-                                               GV->isThreadLocal());
+                                               GV->isThreadLocal(),
+                                               GV->getType()->getAddressSpace());
        Globals.insert(GV, NGV);
        NewGlobals.push_back(NGV);
        Globals.insert(GV, NGV);
        NewGlobals.push_back(NGV);
+      
+      // Calculate the known alignment of the field.  If the original aggregate
+      // had 256 byte alignment for example, something might depend on that:
+      // propagate info to each field.
+      uint64_t FieldOffset = Layout.getElementOffset(i);
+      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+      if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
+        NGV->setAlignment(NewAlign);
      }
    } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
      unsigned NumElements = 0;
      if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
        NumElements = ATy->getNumElements();
      }
    } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
      unsigned NumElements = 0;
      if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
        NumElements = ATy->getNumElements();
-    else if (const VectorType *PTy = dyn_cast<VectorType>(STy))
-      NumElements = PTy->getNumElements();
      else
      else
-      assert(0 && "Unknown aggregate sequential type!");
+      NumElements = cast<VectorType>(STy)->getNumElements();
  
      if (NumElements > 16 && GV->hasNUsesOrMore(16))
        return 0; // It's not worth it.
      NewGlobals.reserve(NumElements);
  
      if (NumElements > 16 && GV->hasNUsesOrMore(16))
        return 0; // It's not worth it.
      NewGlobals.reserve(NumElements);
+    
+    uint64_t EltSize = TD.getTypePaddedSize(STy->getElementType());
+    unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
      for (unsigned i = 0, e = NumElements; i != e; ++i) {
        Constant *In = getAggregateConstantElement(Init,
                                              ConstantInt::get(Type::Int32Ty, i));
      for (unsigned i = 0, e = NumElements; i != e; ++i) {
        Constant *In = getAggregateConstantElement(Init,
                                              ConstantInt::get(Type::Int32Ty, i));
@@ -502,9 +538,17 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV) {
                                                 GlobalVariable::InternalLinkage,
                                                 In, GV->getName()+"."+utostr(i),
                                                 (Module *)NULL,
                                                 GlobalVariable::InternalLinkage,
                                                 In, GV->getName()+"."+utostr(i),
                                                 (Module *)NULL,
-                                               GV->isThreadLocal());
+                                               GV->isThreadLocal(),
+                                               GV->getType()->getAddressSpace());
        Globals.insert(GV, NGV);
        NewGlobals.push_back(NGV);
        Globals.insert(GV, NGV);
        NewGlobals.push_back(NGV);
+      
+      // Calculate the known alignment of the field.  If the original aggregate
+      // had 256 byte alignment for example, something might depend on that:
+      // propagate info to each field.
+      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+      if (NewAlign > EltAlign)
+        NGV->setAlignment(NewAlign);
      }
    }
  
      }
    }
  
@@ -546,8 +590,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV) {
          Idxs.push_back(NullInt);
          for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
            Idxs.push_back(GEPI->getOperand(i));
          Idxs.push_back(NullInt);
          for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
            Idxs.push_back(GEPI->getOperand(i));
-        NewPtr = new GetElementPtrInst(NewPtr, Idxs.begin(), Idxs.end(),
-                                       GEPI->getName()+"."+utostr(Val), GEPI);
+        NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
+                                           GEPI->getName()+"."+utostr(Val), GEPI);
        }
      }
      GEP->replaceAllUsesWith(NewPtr);
        }
      }
      GEP->replaceAllUsesWith(NewPtr);
@@ -679,8 +723,9 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
        // Should handle GEP here.
        SmallVector<Constant*, 8> Idxs;
        Idxs.reserve(GEPI->getNumOperands()-1);
        // Should handle GEP here.
        SmallVector<Constant*, 8> Idxs;
        Idxs.reserve(GEPI->getNumOperands()-1);
-      for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
-        if (Constant *C = dyn_cast<Constant>(GEPI->getOperand(i)))
+      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+           i != e; ++i)
+        if (Constant *C = dyn_cast<Constant>(*i))
            Idxs.push_back(C);
          else
            break;
            Idxs.push_back(C);
          else
            break;
@@ -704,44 +749,46 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
  /// if the loaded value is dynamically null, then we know that they cannot be
  /// reachable with a null optimize away the load.
  static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
  /// if the loaded value is dynamically null, then we know that they cannot be
  /// reachable with a null optimize away the load.
  static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
-  std::vector<LoadInst*> Loads;
    bool Changed = false;
  
    bool Changed = false;
  
+  // Keep track of whether we are able to remove all the uses of the global
+  // other than the store that defines it.
+  bool AllNonStoreUsesGone = true;
+  
    // Replace all uses of loads with uses of uses of the stored value.
    // Replace all uses of loads with uses of uses of the stored value.
-  for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end();
-       GUI != E; ++GUI)
-    if (LoadInst *LI = dyn_cast<LoadInst>(*GUI)) {
-      Loads.push_back(LI);
+  for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
+    User *GlobalUser = *GUI++;
+    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
        Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
        Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+      // If we were able to delete all uses of the loads
+      if (LI->use_empty()) {
+        LI->eraseFromParent();
+        Changed = true;
+      } else {
+        AllNonStoreUsesGone = false;
+      }
+    } else if (isa<StoreInst>(GlobalUser)) {
+      // Ignore the store that stores "LV" to the global.
+      assert(GlobalUser->getOperand(1) == GV &&
+             "Must be storing *to* the global");
      } else {
      } else {
-      // If we get here we could have stores, selects, or phi nodes whose values
-      // are loaded.
-      assert((isa<StoreInst>(*GUI) || isa<PHINode>(*GUI) ||
-              isa<SelectInst>(*GUI) || isa<ConstantExpr>(*GUI)) &&
-             "Only expect load and stores!");
+      AllNonStoreUsesGone = false;
+
+      // If we get here we could have other crazy uses that are transitively
+      // loaded.
+      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
+              isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!");
      }
      }
+  }
  
    if (Changed) {
      DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV;
      ++NumGlobUses;
    }
  
  
    if (Changed) {
      DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV;
      ++NumGlobUses;
    }
  
-  // Delete all of the loads we can, keeping track of whether we nuked them all!
-  bool AllLoadsGone = true;
-  while (!Loads.empty()) {
-    LoadInst *L = Loads.back();
-    if (L->use_empty()) {
-      L->eraseFromParent();
-      Changed = true;
-    } else {
-      AllLoadsGone = false;
-    }
-    Loads.pop_back();
-  }
-
    // If we nuked all of the loads, then none of the stores are needed either,
    // nor is the global.
    // If we nuked all of the loads, then none of the stores are needed either,
    // nor is the global.
-  if (AllLoadsGone) {
+  if (AllNonStoreUsesGone) {
      DOUT << "  *** GLOBAL NOW DEAD!\n";
      CleanupConstantGlobalUsers(GV, 0);
      if (GV->use_empty()) {
      DOUT << "  *** GLOBAL NOW DEAD!\n";
      CleanupConstantGlobalUsers(GV, 0);
      if (GV->use_empty()) {
@@ -789,8 +836,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                       MI->getAlignment(), MI->getName(), MI);
      Value* Indices[2];
      Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty);
                       MI->getAlignment(), MI->getName(), MI);
      Value* Indices[2];
      Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty);
-    Value *NewGEP = new GetElementPtrInst(NewMI, Indices, Indices + 2,
-                                          NewMI->getName()+".el0", MI);
+    Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
+                                              NewMI->getName()+".el0", MI);
      MI->replaceAllUsesWith(NewGEP);
      MI->eraseFromParent();
      MI = NewMI;
      MI->replaceAllUsesWith(NewGEP);
      MI->eraseFromParent();
      MI = NewMI;
@@ -804,6 +851,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                               GV->getName()+".body",
                                               (Module *)NULL,
                                               GV->isThreadLocal());
                                               GV->getName()+".body",
                                               (Module *)NULL,
                                               GV->isThreadLocal());
+  // FIXME: This new global should have the alignment returned by malloc.  Code
+  // could depend on malloc returning large alignment (on the mac, 16 bytes) but
+  // this would only guarantee some lower alignment.
    GV->getParent()->getGlobalList().insert(GV, NewGV);
  
    // Anything that used the malloc now uses the global directly.
    GV->getParent()->getGlobalList().insert(GV, NewGV);
  
    // Anything that used the malloc now uses the global directly.
@@ -844,7 +894,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
            case ICmpInst::ICMP_ULE:
            case ICmpInst::ICMP_SLE:
            case ICmpInst::ICMP_EQ:
            case ICmpInst::ICMP_ULE:
            case ICmpInst::ICMP_SLE:
            case ICmpInst::ICMP_EQ:
-            LV = BinaryOperator::createNot(LV, "notinit", CI);
+            LV = BinaryOperator::CreateNot(LV, "notinit", CI);
              break;
            case ICmpInst::ICMP_NE:
            case ICmpInst::ICMP_UGE:
              break;
            case ICmpInst::ICMP_NE:
            case ICmpInst::ICMP_UGE:
@@ -895,26 +945,43 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
  static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V,
                                                        GlobalVariable *GV,
                                                SmallPtrSet<PHINode*, 8> &PHIs) {
  static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Instruction *V,
                                                        GlobalVariable *GV,
                                                SmallPtrSet<PHINode*, 8> &PHIs) {
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (isa<LoadInst>(*UI) || isa<CmpInst>(*UI)) {
-      // Fine, ignore.
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    Instruction *Inst = dyn_cast<Instruction>(*UI);
+    if (Inst == 0) return false;
+    
+    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
+      continue; // Fine, ignore.
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
        if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
          return false;  // Storing the pointer itself... bad.
        if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
          return false;  // Storing the pointer itself... bad.
-      // Otherwise, storing through it, or storing into GV... fine.
-    } else if (isa<GetElementPtrInst>(*UI)) {
-      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(cast<Instruction>(*UI),
-                                                     GV, PHIs))
+      continue; // Otherwise, storing through it, or storing into GV... fine.
+    }
+    
+    if (isa<GetElementPtrInst>(Inst)) {
+      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
          return false;
          return false;
-    } else if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
+      continue;
+    }
+    
+    if (PHINode *PN = dyn_cast<PHINode>(Inst)) {
        // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
        // cycles.
        if (PHIs.insert(PN))
          if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
            return false;
        // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
        // cycles.
        if (PHIs.insert(PN))
          if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
            return false;
-    } else {
-      return false;
+      continue;
      }
      }
+    
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+        return false;
+      continue;
+    }
+    
+    return false;
+  }
    return true;
  }
  
    return true;
  }
  
@@ -937,99 +1004,177 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
      } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
        // Insert the load in the corresponding predecessor, not right before the
        // PHI.
      } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
        // Insert the load in the corresponding predecessor, not right before the
        // PHI.
-      unsigned PredNo = Alloc->use_begin().getOperandNo()/2;
-      InsertPt = PN->getIncomingBlock(PredNo)->getTerminator();
+      InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
+    } else if (isa<BitCastInst>(U)) {
+      // Must be bitcast between the malloc and store to initialize the global.
+      ReplaceUsesOfMallocWithGlobal(U, GV);
+      U->eraseFromParent();
+      continue;
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      // If this is a "GEP bitcast" and the user is a store to the global, then
+      // just process it as a bitcast.
+      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
+        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
+          if (SI->getOperand(1) == GV) {
+            // Must be bitcast GEP between the malloc and store to initialize
+            // the global.
+            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
+            GEPI->eraseFromParent();
+            continue;
+          }
      }
      }
-    
+      
      // Insert a load from the global, and use it instead of the malloc.
      Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
      U->replaceUsesOfWith(Alloc, NL);
    }
  }
  
      // Insert a load from the global, and use it instead of the malloc.
      Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
      U->replaceUsesOfWith(Alloc, NL);
    }
  }
  
-/// GlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
+/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
+/// of a load) are simple enough to perform heap SRA on.  This permits GEP's
+/// that index through the array and struct field, icmps of null, and PHIs.
+static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,
+                                     SmallPtrSet<PHINode*, 32> &LoadUsingPHIs) {
+  // We permit two users of the load: setcc comparing against the null
+  // pointer, and a getelementptr of a specific form.
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    Instruction *User = cast<Instruction>(*UI);
+    
+    // Comparison against null is ok.
+    if (ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
+      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+        return false;
+      continue;
+    }
+    
+    // getelementptr is also ok, but only a simple form.
+    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      // Must index into the array and into the struct.
+      if (GEPI->getNumOperands() < 3)
+        return false;
+      
+      // Otherwise the GEP is ok.
+      continue;
+    }
+    
+    if (PHINode *PN = dyn_cast<PHINode>(User)) {
+      // If we have already recursively analyzed this PHI, then it is safe.
+      if (LoadUsingPHIs.insert(PN))
+        continue;
+      
+      // Make sure all uses of the PHI are simple enough to transform.
+      if (!LoadUsesSimpleEnoughForHeapSRA(PN, LoadUsingPHIs))
+        return false;
+      
+      continue;
+    }
+    
+    // Otherwise we don't know what this is, not ok.
+    return false;
+  }
+  
+  return true;
+}
+
+
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
  /// GV are simple enough to perform HeapSRA, return true.
  /// GV are simple enough to perform HeapSRA, return true.
-static bool GlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
-                                                 MallocInst *MI) {
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
+                                                    MallocInst *MI) {
+  SmallPtrSet<PHINode*, 32> LoadUsingPHIs;
    for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
         ++UI)
    for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E; 
         ++UI)
-    if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
-      // We permit two users of the load: setcc comparing against the null
-      // pointer, and a getelementptr of a specific form.
-      for (Value::use_iterator UI = LI->use_begin(), E = LI->use_end(); UI != E; 
-           ++UI) {
-        // Comparison against null is ok.
-        if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
-          if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
-            return false;
-          continue;
-        }
-        
-        // getelementptr is also ok, but only a simple form.
-        if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI)) {
-          // Must index into the array and into the struct.
-          if (GEPI->getNumOperands() < 3)
-            return false;
-          
-          // Otherwise the GEP is ok.
+    if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
+      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs))
+        return false;
+  
+  // If we reach here, we know that all uses of the loads and transitive uses
+  // (through PHI nodes) are simple enough to transform.  However, we don't know
+  // that all inputs the to the PHI nodes are in the same equivalence sets. 
+  // Check to verify that all operands of the PHIs are either PHIS that can be
+  // transformed, loads from GV, or MI itself.
+  for (SmallPtrSet<PHINode*, 32>::iterator I = LoadUsingPHIs.begin(),
+       E = LoadUsingPHIs.end(); I != E; ++I) {
+    PHINode *PN = *I;
+    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+      Value *InVal = PN->getIncomingValue(op);
+      
+      // PHI of the stored value itself is ok.
+      if (InVal == MI) continue;
+      
+      if (PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+        // One of the PHIs in our set is (optimistically) ok.
+        if (LoadUsingPHIs.count(InPN))
            continue;
            continue;
-        }
-        
-        if (PHINode *PN = dyn_cast<PHINode>(*UI)) {
-          // We have a phi of a load from the global.  We can only handle this
-          // if the other PHI'd values are actually the same.  In this case,
-          // the rewriter will just drop the phi entirely.
-          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-            Value *IV = PN->getIncomingValue(i);
-            if (IV == LI) continue;  // Trivial the same.
-            
-            // If the phi'd value is from the malloc that initializes the value,
-            // we can xform it.
-            if (IV == MI) continue;
-            
-            // Otherwise, we don't know what it is.
-            return false;
-          }
-          return true;
-        }
-        
-        // Otherwise we don't know what this is, not ok.
          return false;
        }
          return false;
        }
+      
+      // Load from GV is ok.
+      if (LoadInst *LI = dyn_cast<LoadInst>(InVal))
+        if (LI->getOperand(0) == GV)
+          continue;
+      
+      // UNDEF? NULL?
+      
+      // Anything else is rejected.
+      return false;
      }
      }
+  }
+  
    return true;
  }
  
    return true;
  }
  
-/// GetHeapSROALoad - Return the load for the specified field of the HeapSROA'd
-/// value, lazily creating it on demand.
-static Value *GetHeapSROALoad(Instruction *Load, unsigned FieldNo,
-                              const std::vector<GlobalVariable*> &FieldGlobals,
-                              std::vector<Value *> &InsertedLoadsForPtr) {
-  if (InsertedLoadsForPtr.size() <= FieldNo)
-    InsertedLoadsForPtr.resize(FieldNo+1);
-  if (InsertedLoadsForPtr[FieldNo] == 0)
-    InsertedLoadsForPtr[FieldNo] = new LoadInst(FieldGlobals[FieldNo],
-                                                Load->getName()+".f" + 
-                                                utostr(FieldNo), Load);
-  return InsertedLoadsForPtr[FieldNo];
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+  
+  if (FieldNo >= FieldVals.size())
+    FieldVals.resize(FieldNo+1);
+  
+  // If we already have this value, just reuse the previously scalarized
+  // version.
+  if (Value *FieldVal = FieldVals[FieldNo])
+    return FieldVal;
+  
+  // Depending on what instruction this is, we have several cases.
+  Value *Result;
+  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    // This is a scalarized version of the load from the global.  Just create
+    // a new Load of the scalarized global.
+    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+                                           InsertedScalarizedValues,
+                                           PHIsToRewrite),
+                          LI->getName()+".f" + utostr(FieldNo), LI);
+  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
+    // field.
+    const StructType *ST = 
+      cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+    
+    Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+                            PN->getName()+".f"+utostr(FieldNo), PN);
+    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+  } else {
+    assert(0 && "Unknown usable value");
+    Result = 0;
+  }
+  
+  return FieldVals[FieldNo] = Result;
  }
  
  /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
  /// the load, rewrite the derived value to use the HeapSRoA'd load.
  }
  
  /// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
  /// the load, rewrite the derived value to use the HeapSRoA'd load.
-static void RewriteHeapSROALoadUser(LoadInst *Load, Instruction *LoadUser, 
-                               const std::vector<GlobalVariable*> &FieldGlobals,
-                                    std::vector<Value *> &InsertedLoadsForPtr) {
+static void RewriteHeapSROALoadUser(Instruction *LoadUser, 
+             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
    // If this is a comparison against null, handle it.
    if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
      assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
      // If we have a setcc of the loaded pointer, we can use a setcc of any
      // field.
    // If this is a comparison against null, handle it.
    if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
      assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
      // If we have a setcc of the loaded pointer, we can use a setcc of any
      // field.
-    Value *NPtr;
-    if (InsertedLoadsForPtr.empty()) {
-      NPtr = GetHeapSROALoad(Load, 0, FieldGlobals, InsertedLoadsForPtr);
-    } else {
-      NPtr = InsertedLoadsForPtr.back();
-    }
+    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+                                   InsertedScalarizedValues, PHIsToRewrite);
      
      Value *New = new ICmpInst(SCI->getPredicate(), NPtr,
                                Constant::getNullValue(NPtr->getType()),
      
      Value *New = new ICmpInst(SCI->getPredicate(), NPtr,
                                Constant::getNullValue(NPtr->getType()),
@@ -1039,75 +1184,67 @@ static void RewriteHeapSROALoadUser(LoadInst *Load, Instruction *LoadUser,
      return;
    }
    
      return;
    }
    
-  // Handle 'getelementptr Ptr, Idx, uint FieldNo ...'
+  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
      assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
             && "Unexpected GEPI!");
    
      // Load the pointer for this field.
      unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
      assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
             && "Unexpected GEPI!");
    
      // Load the pointer for this field.
      unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
-    Value *NewPtr = GetHeapSROALoad(Load, FieldNo,
-                                    FieldGlobals, InsertedLoadsForPtr);
+    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+                                     InsertedScalarizedValues, PHIsToRewrite);
      
      // Create the new GEP idx vector.
      SmallVector<Value*, 8> GEPIdx;
      GEPIdx.push_back(GEPI->getOperand(1));
      GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
      
      
      // Create the new GEP idx vector.
      SmallVector<Value*, 8> GEPIdx;
      GEPIdx.push_back(GEPI->getOperand(1));
      GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
      
-    Value *NGEPI = new GetElementPtrInst(NewPtr, GEPIdx.begin(), GEPIdx.end(),
-                                         GEPI->getName(), GEPI);
+    Value *NGEPI = GetElementPtrInst::Create(NewPtr,
+                                             GEPIdx.begin(), GEPIdx.end(),
+                                             GEPI->getName(), GEPI);
      GEPI->replaceAllUsesWith(NGEPI);
      GEPI->eraseFromParent();
      return;
    }
      GEPI->replaceAllUsesWith(NGEPI);
      GEPI->eraseFromParent();
      return;
    }
-  
-  // Handle PHI nodes.  PHI nodes must be merging in the same values, plus
-  // potentially the original malloc.  Insert phi nodes for each field, then
-  // process uses of the PHI.
+
+  // Recursively transform the users of PHI nodes.  This will lazily create the
+  // PHIs that are needed for individual elements.  Keep track of what PHIs we
+  // see in InsertedScalarizedValues so that we don't get infinite loops (very
+  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
+  // already been seen first by another load, so its uses have already been
+  // processed.
    PHINode *PN = cast<PHINode>(LoadUser);
    PHINode *PN = cast<PHINode>(LoadUser);
-  std::vector<Value *> PHIsForField;
-  PHIsForField.resize(FieldGlobals.size());
-  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
-    Value *LoadV = GetHeapSROALoad(Load, i, FieldGlobals, InsertedLoadsForPtr);
-
-    PHINode *FieldPN = new PHINode(LoadV->getType(),
-                                   PN->getName()+"."+utostr(i), PN);
-    // Fill in the predecessor values.
-    for (unsigned pred = 0, e = PN->getNumIncomingValues(); pred != e; ++pred) {
-      // Each predecessor either uses the load or the original malloc.
-      Value *InVal = PN->getIncomingValue(pred);
-      BasicBlock *BB = PN->getIncomingBlock(pred);
-      Value *NewVal;
-      if (isa<MallocInst>(InVal)) {
-        // Insert a reload from the global in the predecessor.
-        NewVal = GetHeapSROALoad(BB->getTerminator(), i, FieldGlobals,
-                                 PHIsForField);
-      } else {
-        NewVal = InsertedLoadsForPtr[i];
-      }
-      FieldPN->addIncoming(NewVal, BB);
-    }
-    PHIsForField[i] = FieldPN;
-  }
+  bool Inserted;
+  DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
+  tie(InsertPos, Inserted) =
+    InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
+  if (!Inserted) return;
    
    
-  // Since PHIsForField specifies a phi for every input value, the lazy inserter
-  // will never insert a load.
-  while (!PN->use_empty())
-    RewriteHeapSROALoadUser(Load, PN->use_back(), FieldGlobals, PHIsForField);
-  PN->eraseFromParent();
+  // If this is the first time we've seen this PHI, recursively process all
+  // users.
+  for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
+    Instruction *User = cast<Instruction>(*UI++);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+  }
  }
  
  /// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
  /// is a value loaded from the global.  Eliminate all uses of Ptr, making them
  /// use FieldGlobals instead.  All uses of loaded values satisfy
  }
  
  /// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
  /// is a value loaded from the global.  Eliminate all uses of Ptr, making them
  /// use FieldGlobals instead.  All uses of loaded values satisfy
-/// GlobalLoadUsesSimpleEnoughForHeapSRA.
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
  static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 
  static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, 
-                             const std::vector<GlobalVariable*> &FieldGlobals) {
-  std::vector<Value *> InsertedLoadsForPtr;
-  //InsertedLoadsForPtr.resize(FieldGlobals.size());
-  while (!Load->use_empty())
-    RewriteHeapSROALoadUser(Load, Load->use_back(), 
-                            FieldGlobals, InsertedLoadsForPtr);
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
+       UI != E; ) {
+    Instruction *User = cast<Instruction>(*UI++);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+  }
+  
+  if (Load->use_empty()) {
+    Load->eraseFromParent();
+    InsertedScalarizedValues.erase(Load);
+  }
  }
  
  /// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break
  }
  
  /// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break
@@ -1124,7 +1261,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
    
    // Okay, at this point, there are no users of the malloc.  Insert N
    // new mallocs at the same place as MI, and N globals.
    
    // Okay, at this point, there are no users of the malloc.  Insert N
    // new mallocs at the same place as MI, and N globals.
-  std::vector<GlobalVariable*> FieldGlobals;
+  std::vector<Value*> FieldGlobals;
    std::vector<MallocInst*> FieldMallocs;
    
    for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
    std::vector<MallocInst*> FieldMallocs;
    
    for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
@@ -1164,7 +1301,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
      if (!RunningOr)
        RunningOr = Cond;   // First seteq
      else
      if (!RunningOr)
        RunningOr = Cond;   // First seteq
      else
-      RunningOr = BinaryOperator::createOr(RunningOr, Cond, "tmp", MI);
+      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", MI);
    }
  
    // Split the basic block at the old malloc.
    }
  
    // Split the basic block at the old malloc.
@@ -1173,13 +1310,13 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
    
    // Create the block to check the first condition.  Put all these blocks at the
    // end of the function as they are unlikely to be executed.
    
    // Create the block to check the first condition.  Put all these blocks at the
    // end of the function as they are unlikely to be executed.
-  BasicBlock *NullPtrBlock = new BasicBlock("malloc_ret_null",
-                                            OrigBB->getParent());
+  BasicBlock *NullPtrBlock = BasicBlock::Create("malloc_ret_null",
+                                                OrigBB->getParent());
    
    // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
    // branch on RunningOr.
    OrigBB->getTerminator()->eraseFromParent();
    
    // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
    // branch on RunningOr.
    OrigBB->getTerminator()->eraseFromParent();
-  new BranchInst(NullPtrBlock, ContBB, RunningOr, OrigBB);
+  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
    
    // Within the NullPtrBlock, we need to emit a comparison and branch for each
    // pointer, because some may be null while others are not.
    
    // Within the NullPtrBlock, we need to emit a comparison and branch for each
    // pointer, because some may be null while others are not.
@@ -1188,78 +1325,195 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){
      Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal, 
                                Constant::getNullValue(GVVal->getType()),
                                "tmp", NullPtrBlock);
      Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal, 
                                Constant::getNullValue(GVVal->getType()),
                                "tmp", NullPtrBlock);
-    BasicBlock *FreeBlock = new BasicBlock("free_it", OrigBB->getParent());
-    BasicBlock *NextBlock = new BasicBlock("next", OrigBB->getParent());
-    new BranchInst(FreeBlock, NextBlock, Cmp, NullPtrBlock);
+    BasicBlock *FreeBlock = BasicBlock::Create("free_it", OrigBB->getParent());
+    BasicBlock *NextBlock = BasicBlock::Create("next", OrigBB->getParent());
+    BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);
  
      // Fill in FreeBlock.
      new FreeInst(GVVal, FreeBlock);
      new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
                    FreeBlock);
  
      // Fill in FreeBlock.
      new FreeInst(GVVal, FreeBlock);
      new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
                    FreeBlock);
-    new BranchInst(NextBlock, FreeBlock);
+    BranchInst::Create(NextBlock, FreeBlock);
      
      NullPtrBlock = NextBlock;
    }
    
      
      NullPtrBlock = NextBlock;
    }
    
-  new BranchInst(ContBB, NullPtrBlock);
-  
+  BranchInst::Create(ContBB, NullPtrBlock);
    
    // MI is no longer needed, remove it.
    MI->eraseFromParent();
  
    
    // MI is no longer needed, remove it.
    MI->eraseFromParent();
  
+  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+  /// update all uses of the load, keep track of what scalarized loads are
+  /// inserted for a given load.
+  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+  InsertedScalarizedValues[GV] = FieldGlobals;
+  
+  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
    
    // Okay, the malloc site is completely handled.  All of the uses of GV are now
    // loads, and all uses of those loads are simple.  Rewrite them to use loads
    // of the per-field globals instead.
    
    // Okay, the malloc site is completely handled.  All of the uses of GV are now
    // loads, and all uses of those loads are simple.  Rewrite them to use loads
    // of the per-field globals instead.
-  while (!GV->use_empty()) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
-      RewriteUsesOfLoadForHeapSRoA(LI, FieldGlobals);
-      LI->eraseFromParent();
-    } else {
-      // Must be a store of null.
-      StoreInst *SI = cast<StoreInst>(GV->use_back());
-      assert(isa<Constant>(SI->getOperand(0)) &&
-             cast<Constant>(SI->getOperand(0))->isNullValue() &&
-             "Unexpected heap-sra user!");
-      
-      // Insert a store of null into each global.
-      for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
-        Constant *Null = 
-          Constant::getNullValue(FieldGlobals[i]->getType()->getElementType());
-        new StoreInst(Null, FieldGlobals[i], SI);
-      }
-      // Erase the original store.
-      SI->eraseFromParent();
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+    Instruction *User = cast<Instruction>(*UI++);
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+      continue;
      }
      }
+    
+    // Must be a store of null.
+    StoreInst *SI = cast<StoreInst>(User);
+    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+           "Unexpected heap-sra user!");
+    
+    // Insert a store of null into each global.
+    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+      const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+      Constant *Null = Constant::getNullValue(PT->getElementType());
+      new StoreInst(Null, FieldGlobals[i], SI);
+    }
+    // Erase the original store.
+    SI->eraseFromParent();
    }
  
    }
  
+  // While we have PHIs that are interesting to rewrite, do it.
+  while (!PHIsToRewrite.empty()) {
+    PHINode *PN = PHIsToRewrite.back().first;
+    unsigned FieldNo = PHIsToRewrite.back().second;
+    PHIsToRewrite.pop_back();
+    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+    // Add all the incoming values.  This can materialize more phis.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+                               PHIsToRewrite);
+      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+    }
+  }
+  
+  // Drop all inter-phi links and any loads that made it this far.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->dropAllReferences();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->dropAllReferences();
+  }
+  
+  // Delete all the phis and loads now that inter-references are dead.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->eraseFromParent();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->eraseFromParent();
+  }
+  
    // The old global is now dead, remove it.
    GV->eraseFromParent();
  
    ++NumHeapSRA;
    // The old global is now dead, remove it.
    GV->eraseFromParent();
  
    ++NumHeapSRA;
-  return FieldGlobals[0];
+  return cast<GlobalVariable>(FieldGlobals[0]);
  }
  
  }
  
+/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
+/// pointer global variable with a single value stored it that is a malloc or
+/// cast of malloc.
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
+                                               MallocInst *MI,
+                                               Module::global_iterator &GVI,
+                                               TargetData &TD) {
+  // If this is a malloc of an abstract type, don't touch it.
+  if (!MI->getAllocatedType()->isSized())
+    return false;
+  
+  // We can't optimize this global unless all uses of it are *known* to be
+  // of the malloc value, not of the null initializer value (consider a use
+  // that compares the global's value against zero to see if the malloc has
+  // been reached).  To do this, we check to see if all uses of the global
+  // would trap if the global were null: this proves that they must all
+  // happen after the malloc.
+  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+    return false;
+  
+  // We can't optimize this if the malloc itself is used in a complex way,
+  // for example, being stored into multiple globals.  This allows the
+  // malloc to be stored into the specified global, loaded setcc'd, and
+  // GEP'd.  These are all things we could transform to using the global
+  // for.
+  {
+    SmallPtrSet<PHINode*, 8> PHIs;
+    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs))
+      return false;
+  }
+  
+  
+  // If we have a global that is only initialized with a fixed size malloc,
+  // transform the program to use global memory instead of malloc'd memory.
+  // This eliminates dynamic allocation, avoids an indirection accessing the
+  // data, and exposes the resultant global to further GlobalOpt.
+  if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) {
+    // Restrict this transformation to only working on small allocations
+    // (2048 bytes currently), as we don't want to introduce a 16M global or
+    // something.
+    if (NElements->getZExtValue()*
+        TD.getTypePaddedSize(MI->getAllocatedType()) < 2048) {
+      GVI = OptimizeGlobalAddressOfMalloc(GV, MI);
+      return true;
+    }
+  }
+  
+  // If the allocation is an array of structures, consider transforming this
+  // into multiple malloc'd arrays, one for each field.  This is basically
+  // SRoA for malloc'd memory.
+  const Type *AllocTy = MI->getAllocatedType();
+  
+  // If this is an allocation of a fixed size array of structs, analyze as a
+  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
+  if (!MI->isArrayAllocation())
+    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+      AllocTy = AT->getElementType();
+  
+  if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) {
+    // This the structure has an unreasonable number of fields, leave it
+    // alone.
+    if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+        AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) {
+      
+      // If this is a fixed size array, transform the Malloc to be an alloc of
+      // structs.  malloc [100 x struct],1 -> malloc struct, 100
+      if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) {
+        MallocInst *NewMI = 
+          new MallocInst(AllocSTy, 
+                         ConstantInt::get(Type::Int32Ty, AT->getNumElements()),
+                         "", MI);
+        NewMI->takeName(MI);
+        Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI);
+        MI->replaceAllUsesWith(Cast);
+        MI->eraseFromParent();
+        MI = NewMI;
+      }
+      
+      GVI = PerformHeapAllocSRoA(GV, MI);
+      return true;
+    }
+  }
+  
+  return false;
+}  
  
  // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
  // that only one value (besides its initializer) is ever stored to the global.
  static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
                                       Module::global_iterator &GVI,
                                       TargetData &TD) {
  
  // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
  // that only one value (besides its initializer) is ever stored to the global.
  static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
                                       Module::global_iterator &GVI,
                                       TargetData &TD) {
-  if (CastInst *CI = dyn_cast<CastInst>(StoredOnceVal))
-    StoredOnceVal = CI->getOperand(0);
-  else if (GetElementPtrInst *GEPI =dyn_cast<GetElementPtrInst>(StoredOnceVal)){
-    // "getelementptr Ptr, 0, 0, 0" is really just a cast.
-    bool IsJustACast = true;
-    for (unsigned i = 1, e = GEPI->getNumOperands(); i != e; ++i)
-      if (!isa<Constant>(GEPI->getOperand(i)) ||
-          !cast<Constant>(GEPI->getOperand(i))->isNullValue()) {
-        IsJustACast = false;
-        break;
-      }
-    if (IsJustACast)
-      StoredOnceVal = GEPI->getOperand(0);
-  }
+  // Ignore no-op GEPs and bitcasts.
+  StoredOnceVal = StoredOnceVal->stripPointerCasts();
  
    // If we are dealing with a pointer global that is initialized to null and
    // only has one (non-null) value stored into it, then we can optimize any
  
    // If we are dealing with a pointer global that is initialized to null and
    // only has one (non-null) value stored into it, then we can optimize any
@@ -1275,59 +1529,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
        if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
          return true;
      } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) {
        if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
          return true;
      } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) {
-      // If this is a malloc of an abstract type, don't touch it.
-      if (!MI->getAllocatedType()->isSized())
-        return false;
-      
-      // We can't optimize this global unless all uses of it are *known* to be
-      // of the malloc value, not of the null initializer value (consider a use
-      // that compares the global's value against zero to see if the malloc has
-      // been reached).  To do this, we check to see if all uses of the global
-      // would trap if the global were null: this proves that they must all
-      // happen after the malloc.
-      if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
-        return false;
-
-      // We can't optimize this if the malloc itself is used in a complex way,
-      // for example, being stored into multiple globals.  This allows the
-      // malloc to be stored into the specified global, loaded setcc'd, and
-      // GEP'd.  These are all things we could transform to using the global
-      // for.
-      {
-        SmallPtrSet<PHINode*, 8> PHIs;
-        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(MI, GV, PHIs))
-          return false;
-      }
-
-      
-      // If we have a global that is only initialized with a fixed size malloc,
-      // transform the program to use global memory instead of malloc'd memory.
-      // This eliminates dynamic allocation, avoids an indirection accessing the
-      // data, and exposes the resultant global to further GlobalOpt.
-      if (ConstantInt *NElements = dyn_cast<ConstantInt>(MI->getArraySize())) {
-        // Restrict this transformation to only working on small allocations
-        // (2048 bytes currently), as we don't want to introduce a 16M global or
-        // something.
-        if (NElements->getZExtValue()*
-                     TD.getABITypeSize(MI->getAllocatedType()) < 2048) {
-          GVI = OptimizeGlobalAddressOfMalloc(GV, MI);
-          return true;
-        }
-      }
-
-      // If the allocation is an array of structures, consider transforming this
-      // into multiple malloc'd arrays, one for each field.  This is basically
-      // SRoA for malloc'd memory.
-      if (const StructType *AllocTy = 
-                  dyn_cast<StructType>(MI->getAllocatedType())) {
-        // This the structure has an unreasonable number of fields, leave it
-        // alone.
-        if (AllocTy->getNumElements() <= 16 && AllocTy->getNumElements() > 0 &&
-            GlobalLoadUsesSimpleEnoughForHeapSRA(GV, MI)) {
-          GVI = PerformHeapAllocSRoA(GV, MI);
-          return true;
-        }
-      }
+      if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD))
+        return true;
      }
    }
  
      }
    }
  
@@ -1411,7 +1614,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
        if (IsOneZero)
          NSI = new ZExtInst(NLI, LI->getType(), "", LI);
        else
        if (IsOneZero)
          NSI = new ZExtInst(NLI, LI->getType(), "", LI);
        else
-        NSI = new SelectInst(NLI, OtherVal, InitVal, "", LI);
+        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
        NSI->takeName(LI);
        LI->replaceAllUsesWith(NSI);
      }
        NSI->takeName(LI);
        LI->replaceAllUsesWith(NSI);
      }
@@ -1427,7 +1630,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
  /// it if possible.  If we make a change, return true.
  bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
                                        Module::global_iterator &GVI) {
  /// it if possible.  If we make a change, return true.
  bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
                                        Module::global_iterator &GVI) {
-  std::set<PHINode*> PHIUsers;
+  SmallPtrSet<PHINode*, 16> PHIUsers;
    GlobalStatus GS;
    GV->removeDeadConstantUsers();
  
    GlobalStatus GS;
    GV->removeDeadConstantUsers();
  
@@ -1465,11 +1668,11 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
      // this global a local variable) we replace the global with a local alloca
      // in this function.
      //
      // this global a local variable) we replace the global with a local alloca
      // in this function.
      //
-    // NOTE: It doesn't make sense to promote non first class types since we
+    // NOTE: It doesn't make sense to promote non single-value types since we
      // are just replacing static memory to stack memory.
      if (!GS.HasMultipleAccessingFunctions &&
          GS.AccessingFunction && !GS.HasNonInstructionUser &&
      // are just replacing static memory to stack memory.
      if (!GS.HasMultipleAccessingFunctions &&
          GS.AccessingFunction && !GS.HasNonInstructionUser &&
-        GV->getType()->getElementType()->isFirstClassType() &&
+        GV->getType()->getElementType()->isSingleValueType() &&
          GS.AccessingFunction->getName() == "main" &&
          GS.AccessingFunction->hasExternalLinkage()) {
        DOUT << "LOCALIZING GLOBAL: " << *GV;
          GS.AccessingFunction->getName() == "main" &&
          GS.AccessingFunction->hasExternalLinkage()) {
        DOUT << "LOCALIZING GLOBAL: " << *GV;
@@ -1520,15 +1723,16 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
  
        ++NumMarked;
        return true;
  
        ++NumMarked;
        return true;
-    } else if (!GV->getInitializer()->getType()->isFirstClassType()) {
-      if (GlobalVariable *FirstNewGV = SRAGlobal(GV)) {
+    } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, 
+                                                 getAnalysis<TargetData>())) {
          GVI = FirstNewGV;  // Don't skip the newly produced globals!
          return true;
        }
      } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
        // If the initial value for the global was an undef value, and if only
        // one other value was stored into it, we can just change the
          GVI = FirstNewGV;  // Don't skip the newly produced globals!
          return true;
        }
      } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
        // If the initial value for the global was an undef value, and if only
        // one other value was stored into it, we can just change the
-      // initializer to be an undef value, then delete all stores to the
+      // initializer to be the stored value, then delete all stores to the
        // global.  This allows us to mark it constant.
        if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
          if (isa<UndefValue>(GV->getInitializer())) {
        // global.  This allows us to mark it constant.
        if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
          if (isa<UndefValue>(GV->getInitializer())) {
@@ -1577,8 +1781,9 @@ static bool OnlyCalledDirectly(Function *F) {
      if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false;
  
      // See if the function address is passed as an argument.
      if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false;
  
      // See if the function address is passed as an argument.
-    for (unsigned i = 1, e = User->getNumOperands(); i != e; ++i)
-      if (User->getOperand(i) == F) return false;
+    for (User::op_iterator i = User->op_begin() + 1, e = User->op_end();
+         i != e; ++i)
+      if (*i == F) return false;
    }
    return true;
  }
    }
    return true;
  }
@@ -1592,23 +1797,23 @@ static void ChangeCalleesToFastCall(Function *F) {
    }
  }
  
    }
  }
  
-static PAListPtr StripNest(const PAListPtr &Attrs) {
+static AttrListPtr StripNest(const AttrListPtr &Attrs) {
    for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
    for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
-    if ((Attrs.getSlot(i).Attrs & ParamAttr::Nest) == 0)
+    if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0)
        continue;
  
      // There can be only one.
        continue;
  
      // There can be only one.
-    return Attrs.removeAttr(Attrs.getSlot(i).Index, ParamAttr::Nest);
+    return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest);
    }
  
    return Attrs;
  }
  
  static void RemoveNestAttribute(Function *F) {
    }
  
    return Attrs;
  }
  
  static void RemoveNestAttribute(Function *F) {
-  F->setParamAttrs(StripNest(F->getParamAttrs()));
+  F->setAttributes(StripNest(F->getAttributes()));
    for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
      CallSite User(cast<Instruction>(*UI));
    for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
      CallSite User(cast<Instruction>(*UI));
-    User.setParamAttrs(StripNest(User.getParamAttrs()));
+    User.setAttributes(StripNest(User.getAttributes()));
    }
  }
  
    }
  }
  
@@ -1618,12 +1823,12 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
    for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
      Function *F = FI++;
      F->removeDeadConstantUsers();
    for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
      Function *F = FI++;
      F->removeDeadConstantUsers();
-    if (F->use_empty() && (F->hasInternalLinkage() ||
+    if (F->use_empty() && (F->hasLocalLinkage() ||
                             F->hasLinkOnceLinkage())) {
        M.getFunctionList().erase(F);
        Changed = true;
        ++NumFnDeleted;
                             F->hasLinkOnceLinkage())) {
        M.getFunctionList().erase(F);
        Changed = true;
        ++NumFnDeleted;
-    } else if (F->hasInternalLinkage()) {
+    } else if (F->hasLocalLinkage()) {
        if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
            OnlyCalledDirectly(F)) {
          // If this function has C calling conventions, is not a varargs
        if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
            OnlyCalledDirectly(F)) {
          // If this function has C calling conventions, is not a varargs
@@ -1635,7 +1840,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
          Changed = true;
        }
  
          Changed = true;
        }
  
-      if (F->getParamAttrs().hasAttrSomewhere(ParamAttr::Nest) &&
+      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
            OnlyCalledDirectly(F)) {
          // The function is not used by a trampoline intrinsic, so it is safe
          // to remove the 'nest' attribute.
            OnlyCalledDirectly(F)) {
          // The function is not used by a trampoline intrinsic, so it is safe
          // to remove the 'nest' attribute.
@@ -1653,7 +1858,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
         GVI != E; ) {
      GlobalVariable *GV = GVI++;
    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
         GVI != E; ) {
      GlobalVariable *GV = GVI++;
-    if (!GV->isConstant() && GV->hasInternalLinkage() &&
+    if (!GV->isConstant() && GV->hasLocalLinkage() &&
          GV->hasInitializer())
        Changed |= ProcessInternalGlobal(GV, GVI);
    }
          GV->hasInitializer())
        Changed |= ProcessInternalGlobal(GV, GVI);
    }
@@ -1683,8 +1888,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
        if (!I->hasInitializer()) return 0;
        ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
        if (!CA) return 0;
        if (!I->hasInitializer()) return 0;
        ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());
        if (!CA) return 0;
-      for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-        if (ConstantStruct *CS = dyn_cast<ConstantStruct>(CA->getOperand(i))) {
+      for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+        if (ConstantStruct *CS = dyn_cast<ConstantStruct>(*i)) {
            if (isa<ConstantPointerNull>(CS->getOperand(1)))
              continue;
  
            if (isa<ConstantPointerNull>(CS->getOperand(1)))
              continue;
  
@@ -1711,8 +1916,8 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
    ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
    std::vector<Function*> Result;
    Result.reserve(CA->getNumOperands());
    ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
    std::vector<Function*> Result;
    Result.reserve(CA->getNumOperands());
-  for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
-    ConstantStruct *CS = cast<ConstantStruct>(CA->getOperand(i));
+  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+    ConstantStruct *CS = cast<ConstantStruct>(*i);
      Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
    }
    return Result;
      Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
    }
    return Result;
@@ -1778,7 +1983,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
  }
  
  
  }
  
  
-static Constant *getVal(std::map<Value*, Constant*> &ComputedValues,
+static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
                          Value *V) {
    if (Constant *CV = dyn_cast<Constant>(V)) return CV;
    Constant *R = ComputedValues[V];
                          Value *V) {
    if (Constant *CV = dyn_cast<Constant>(V)) return CV;
    Constant *R = ComputedValues[V];
@@ -1792,7 +1997,7 @@ static Constant *getVal(std::map<Value*, Constant*> &ComputedValues,
  /// globals.  This should be kept up to date with CommitValueTo.
  static bool isSimpleEnoughPointerToCommit(Constant *C) {
    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
  /// globals.  This should be kept up to date with CommitValueTo.
  static bool isSimpleEnoughPointerToCommit(Constant *C) {
    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-    if (!GV->hasExternalLinkage() && !GV->hasInternalLinkage())
+    if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage())
        return false;  // do not allow weak/linkonce/dllimport/dllexport linkage.
      return !GV->isDeclaration();  // reject external globals.
    }
        return false;  // do not allow weak/linkonce/dllimport/dllexport linkage.
      return !GV->isDeclaration();  // reject external globals.
    }
@@ -1801,7 +2006,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
      if (CE->getOpcode() == Instruction::GetElementPtr &&
          isa<GlobalVariable>(CE->getOperand(0))) {
        GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
      if (CE->getOpcode() == Instruction::GetElementPtr &&
          isa<GlobalVariable>(CE->getOperand(0))) {
        GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
-      if (!GV->hasExternalLinkage() && !GV->hasInternalLinkage())
+      if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage())
          return false;  // do not allow weak/linkonce/dllimport/dllexport linkage.
        return GV->hasInitializer() &&
               ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
          return false;  // do not allow weak/linkonce/dllimport/dllexport linkage.
        return GV->hasInitializer() &&
               ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
@@ -1825,8 +2030,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
  
      // Break up the constant into its elements.
      if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
  
      // Break up the constant into its elements.
      if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
-      for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
-        Elts.push_back(CS->getOperand(i));
+      for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i)
+        Elts.push_back(cast<Constant>(*i));
      } else if (isa<ConstantAggregateZero>(Init)) {
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
          Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
      } else if (isa<ConstantAggregateZero>(Init)) {
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
          Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
@@ -1853,8 +2058,8 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
      // Break up the array into elements.
      std::vector<Constant*> Elts;
      if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
      // Break up the array into elements.
      std::vector<Constant*> Elts;
      if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
-      for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-        Elts.push_back(CA->getOperand(i));
+      for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+        Elts.push_back(cast<Constant>(*i));
      } else if (isa<ConstantAggregateZero>(Init)) {
        Constant *Elt = Constant::getNullValue(ATy->getElementType());
        Elts.assign(ATy->getNumElements(), Elt);
      } else if (isa<ConstantAggregateZero>(Init)) {
        Constant *Elt = Constant::getNullValue(ATy->getElementType());
        Elts.assign(ATy->getNumElements(), Elt);
@@ -1894,10 +2099,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
  /// P after the stores reflected by 'memory' have been performed.  If we can't
  /// decide, return null.
  static Constant *ComputeLoadResult(Constant *P,
  /// P after the stores reflected by 'memory' have been performed.  If we can't
  /// decide, return null.
  static Constant *ComputeLoadResult(Constant *P,
-                                const std::map<Constant*, Constant*> &Memory) {
+                                const DenseMap<Constant*, Constant*> &Memory) {
    // If this memory location has been recently stored, use the stored value: it
    // is the most up-to-date.
    // If this memory location has been recently stored, use the stored value: it
    // is the most up-to-date.
-  std::map<Constant*, Constant*>::const_iterator I = Memory.find(P);
+  DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
    if (I != Memory.end()) return I->second;
   
    // Access it.
    if (I != Memory.end()) return I->second;
   
    // Access it.
@@ -1925,7 +2130,7 @@ static Constant *ComputeLoadResult(Constant *P,
  static bool EvaluateFunction(Function *F, Constant *&RetVal,
                               const std::vector<Constant*> &ActualArgs,
                               std::vector<Function*> &CallStack,
  static bool EvaluateFunction(Function *F, Constant *&RetVal,
                               const std::vector<Constant*> &ActualArgs,
                               std::vector<Function*> &CallStack,
-                             std::map<Constant*, Constant*> &MutatedMemory,
+                             DenseMap<Constant*, Constant*> &MutatedMemory,
                               std::vector<GlobalVariable*> &AllocaTmps) {
    // Check to see if this function is already executing (recursion).  If so,
    // bail out.  TODO: we might want to accept limited recursion.
                               std::vector<GlobalVariable*> &AllocaTmps) {
    // Check to see if this function is already executing (recursion).  If so,
    // bail out.  TODO: we might want to accept limited recursion.
@@ -1935,7 +2140,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
    CallStack.push_back(F);
    
    /// Values - As we compute SSA register values, we store their contents here.
    CallStack.push_back(F);
    
    /// Values - As we compute SSA register values, we store their contents here.
-  std::map<Value*, Constant*> Values;
+  DenseMap<Value*, Constant*> Values;
    
    // Initialize arguments to the incoming values specified.
    unsigned ArgNo = 0;
    
    // Initialize arguments to the incoming values specified.
    unsigned ArgNo = 0;
@@ -1946,7 +2151,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
    /// ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
    /// we can only evaluate any one basic block at most once.  This set keeps
    /// track of what we have executed so we can detect recursive cases etc.
    /// ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
    /// we can only evaluate any one basic block at most once.  This set keeps
    /// track of what we have executed so we can detect recursive cases etc.
-  std::set<BasicBlock*> ExecutedBlocks;
+  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
    
    // CurInst - The current instruction we're evaluating.
    BasicBlock::iterator CurInst = F->begin()->begin();
    
    // CurInst - The current instruction we're evaluating.
    BasicBlock::iterator CurInst = F->begin()->begin();
@@ -1982,8 +2187,9 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
      } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
        Constant *P = getVal(Values, GEP->getOperand(0));
        SmallVector<Constant*, 8> GEPOps;
      } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
        Constant *P = getVal(Values, GEP->getOperand(0));
        SmallVector<Constant*, 8> GEPOps;
-      for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i)
-        GEPOps.push_back(getVal(Values, GEP->getOperand(i)));
+      for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+           i != e; ++i)
+        GEPOps.push_back(getVal(Values, *i));
        InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
      } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
        if (LI->isVolatile()) return false;  // no volatile accesses.
        InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
      } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
        if (LI->isVolatile()) return false;  // no volatile accesses.
@@ -2007,8 +2213,9 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
        if (!Callee) return false;  // Cannot resolve.
  
        std::vector<Constant*> Formals;
        if (!Callee) return false;  // Cannot resolve.
  
        std::vector<Constant*> Formals;
-      for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
-        Formals.push_back(getVal(Values, CI->getOperand(i)));
+      for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end();
+           i != e; ++i)
+        Formals.push_back(getVal(Values, *i));
        
        if (Callee->isDeclaration()) {
          // If this is a function we can constant fold, do it.
        
        if (Callee->isDeclaration()) {
          // If this is a function we can constant fold, do it.
@@ -2061,7 +2268,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
        // Okay, we succeeded in evaluating this control flow.  See if we have
        // executed the new block before.  If so, we have a looping function,
        // which we cannot evaluate in reasonable time.
        // Okay, we succeeded in evaluating this control flow.  See if we have
        // executed the new block before.  If so, we have a looping function,
        // which we cannot evaluate in reasonable time.
-      if (!ExecutedBlocks.insert(NewBB).second)
+      if (!ExecutedBlocks.insert(NewBB))
          return false;  // looped!
        
        // Okay, we have never been in this block before.  Check to see if there
          return false;  // looped!
        
        // Okay, we have never been in this block before.  Check to see if there
@@ -2094,7 +2301,7 @@ static bool EvaluateStaticConstructor(Function *F) {
    /// MutatedMemory - For each store we execute, we update this map.  Loads
    /// check this to get the most up-to-date value.  If evaluation is successful,
    /// this state is committed to the process.
    /// MutatedMemory - For each store we execute, we update this map.  Loads
    /// check this to get the most up-to-date value.  If evaluation is successful,
    /// this state is committed to the process.
-  std::map<Constant*, Constant*> MutatedMemory;
+  DenseMap<Constant*, Constant*> MutatedMemory;
  
    /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
    /// to represent its body.  This vector is needed so we can delete the
  
    /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
    /// to represent its body.  This vector is needed so we can delete the
@@ -2115,7 +2322,7 @@ static bool EvaluateStaticConstructor(Function *F) {
      DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
           << F->getName() << "' to " << MutatedMemory.size()
           << " stores.\n";
      DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
           << F->getName() << "' to " << MutatedMemory.size()
           << " stores.\n";
-    for (std::map<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
+    for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
           E = MutatedMemory.end(); I != E; ++I)
        CommitValueTo(I->second, I->first);
    }
           E = MutatedMemory.end(); I != E; ++I)
        CommitValueTo(I->second, I->first);
    }
@@ -2178,6 +2385,60 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
    return true;
  }
  
    return true;
  }
  
+bool GlobalOpt::ResolveAliases(Module &M) {
+  bool Changed = false;
+
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E;) {
+    Module::alias_iterator J = I++;
+    // If the aliasee may change at link time, nothing can be done - bail out.
+    if (J->mayBeOverridden())
+      continue;
+
+    Constant *Aliasee = J->getAliasee();
+    GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+    Target->removeDeadConstantUsers();
+    bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
+
+    // Make all users of the alias use the aliasee instead.
+    if (!J->use_empty()) {
+      J->replaceAllUsesWith(Aliasee);
+      ++NumAliasesResolved;
+      Changed = true;
+    }
+
+    // If the aliasee has internal linkage, give it the name and linkage
+    // of the alias, and delete the alias.  This turns:
+    //   define internal ... @f(...)
+    //   @a = alias ... @f
+    // into:
+    //   define ... @a(...)
+    if (!Target->hasLocalLinkage())
+      continue;
+
+    // The transform is only useful if the alias does not have internal linkage.
+    if (J->hasLocalLinkage())
+      continue;
+
+    // Do not perform the transform if multiple aliases potentially target the
+    // aliasee.  This check also ensures that it is safe to replace the section
+    // and other attributes of the aliasee with those of the alias.
+    if (!hasOneUse)
+      continue;
+
+    // Give the aliasee the name, linkage and other attributes of the alias.
+    Target->takeName(J);
+    Target->setLinkage(J->getLinkage());
+    Target->GlobalValue::copyAttributesFrom(J);
+
+    // Delete the alias.
+    M.getAliasList().erase(J);
+    ++NumAliasesRemoved;
+    Changed = true;
+  }
+
+  return Changed;
+}
  
  bool GlobalOpt::runOnModule(Module &M) {
    bool Changed = false;
  
  bool GlobalOpt::runOnModule(Module &M) {
    bool Changed = false;
@@ -2198,6 +2459,9 @@ bool GlobalOpt::runOnModule(Module &M) {
      
      // Optimize non-address-taken globals.
      LocalChange |= OptimizeGlobalVars(M);
      
      // Optimize non-address-taken globals.
      LocalChange |= OptimizeGlobalVars(M);
+
+    // Resolve aliases, when possible.
+    LocalChange |= ResolveAliases(M);
      Changed |= LocalChange;
    }
    
      Changed |= LocalChange;
    }