#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include <list>
// pessimize the llvm optimizer.
//
// Since we don't have perfect knowledge here, make some assumptions: assume
- // the maximum GPR width is the same size as the pointer size and assume that
- // this width can be stored. If so, check to see whether we will end up
- // actually reducing the number of stores used.
+ // the maximum GPR width is the same size as the largest legal integer
+ // size. If so, check to see whether we will end up actually reducing the
+ // number of stores used.
unsigned Bytes = unsigned(End-Start);
- unsigned AS = cast<StoreInst>(TheStores[0])->getPointerAddressSpace();
- unsigned NumPointerStores = Bytes/TD.getPointerSize(AS);
+ unsigned MaxIntSize = TD.getLargestLegalIntTypeSize();
+ if (MaxIntSize == 0)
+ MaxIntSize = 1;
+ unsigned NumPointerStores = Bytes / MaxIntSize;
// Assume the remaining bytes if any are done a byte at a time.
- unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(AS);
+ unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
// If we will reduce the # stores (according to this heuristic), do the
// transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetLibraryInfo>();
INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
// Zap all the stores.
- for (SmallVector<Instruction*, 16>::const_iterator
+ for (SmallVectorImpl<Instruction *>::const_iterator
SI = Range.TheStores.begin(),
SE = Range.TheStores.end(); SI != SE; ++SI) {
MD->removeInstruction(*SI);
return false;
Type *StructTy = cast<PointerType>(A->getType())->getElementType();
- uint64_t destSize = TD->getTypeAllocSize(StructTy);
+ if (!StructTy->isSized()) {
+ // The call may never return and hence the copy-instruction may never
+ // be executed, and therefore it's not safe to say "the destination
+ // has at least <cpyLen> bytes, as implied by the copy-instruction",
+ return false;
+ }
+ uint64_t destSize = TD->getTypeAllocSize(StructTy);
if (destSize < srcSize)
return false;
} else {
// Since we're changing the parameter to the callsite, we need to make sure
// that what would be the new parameter dominates the callsite.
- DominatorTree &DT = getAnalysis<DominatorTree>();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
if (!DT.dominates(cpyDestInst, C))
return false;