#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include <list>
using namespace llvm;
if (OpC->isZero()) continue; // No offset.
// Handle struct indices, which add their field offset to the pointer.
- if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
continue;
}
} // end anon namespace
bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
- // If we found more than 8 stores to merge or 64 bytes, use memset.
- if (TheStores.size() >= 8 || End-Start >= 64) return true;
+ // If we found more than 4 stores to merge or 16 bytes, use memset.
+ if (TheStores.size() >= 4 || End-Start >= 16) return true;
// If there is nothing to merge, don't do anything.
if (TheStores.size() < 2) return false;
namespace {
class MemCpyOpt : public FunctionPass {
MemoryDependenceAnalysis *MD;
+ TargetLibraryInfo *TLI;
const TargetData *TD;
public:
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
MD = 0;
+ TLI = 0;
+ TD = 0;
}
bool runOnFunction(Function &F);
AU.addRequired<DominatorTree>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetLibraryInfo>();
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
false, false)
/// tryMergingIntoMemset - When scanning forward over instructions, we look for
/// some other patterns to fold away. In particular, this looks for stores to
-/// neighboring locations of memory. If it sees enough consequtive ones, it
+/// neighboring locations of memory. If it sees enough consecutive ones, it
/// attempts to merge them together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
// If this is a store, see if we can merge it in.
- if (NextStore->isVolatile()) break;
+ if (!NextStore->isSimple()) break;
// Check to see if this stored value is of the same byte-splattable value.
if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
// Determine alignment
unsigned Alignment = Range.Alignment;
if (Alignment == 0) {
- const Type *EltType =
+ Type *EltType =
cast<PointerType>(StartPtr->getType())->getElementType();
Alignment = TD->getABITypeAlignment(EltType);
}
for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
dbgs() << *Range.TheStores[i] << '\n';
dbgs() << "With: " << *AMemSet << '\n');
-
+
+ if (!Range.TheStores.empty())
+ AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
+
// Zap all the stores.
for (SmallVector<Instruction*, 16>::const_iterator
SI = Range.TheStores.begin(),
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
- if (SI->isVolatile()) return false;
+ if (!SI->isSimple()) return false;
if (TD == 0) return false;
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
- if (!LI->isVolatile() && LI->hasOneUse()) {
- MemDepResult dep = MD->getDependency(LI);
+ if (LI->isSimple() && LI->hasOneUse() &&
+ LI->getParent() == SI->getParent()) {
+ MemDepResult ldep = MD->getDependency(LI);
CallInst *C = 0;
- if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
- C = dyn_cast<CallInst>(dep.getInst());
-
+ if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
+ C = dyn_cast<CallInst>(ldep.getInst());
+
+ if (C) {
+ // Check that nothing touches the dest of the "copy" between
+ // the call and the store.
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
+ for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
+ E = C; I != E; --I) {
+ if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
+ C = 0;
+ break;
+ }
+ }
+ }
+
if (C) {
bool changed = performCallSlotOptzn(LI,
SI->getPointerOperand()->stripPointerCasts(),
if (!A->hasStructRetAttr())
return false;
- const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+ Type *StructTy = cast<PointerType>(A->getType())->getElementType();
uint64_t destSize = TD->getTypeAllocSize(StructTy);
if (destSize < srcSize)
if (M->getSource() == MDep->getSource())
return false;
- // Second, the length of the memcpy's must be the same, or the preceeding one
+ // Second, the length of the memcpy's must be the same, or the preceding one
// must be larger than the following one.
- ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- if (!C1) return false;
+ ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+ if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
+ return false;
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
MemDepResult DepInfo = MD->getDependency(M);
- if (!DepInfo.isClobber())
- return false;
-
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
- return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
-
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), C)) {
- MD->removeInstruction(M);
- M->eraseFromParent();
- return true;
+ if (DepInfo.isClobber()) {
+ if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+ if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+ CopySize->getZExtValue(), C)) {
+ MD->removeInstruction(M);
+ M->eraseFromParent();
+ return true;
+ }
}
}
-
+
+ AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
+ MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
+ M, M->getParent());
+ if (SrcDepInfo.isClobber()) {
+ if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+ }
+
return false;
}
bool MemCpyOpt::processMemMove(MemMoveInst *M) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ if (!TLI->has(LibFunc::memmove))
+ return false;
+
// See if the pointers alias.
if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
return false;
// If not, then we know we can transform this.
Module *Mod = M->getParent()->getParent()->getParent();
- const Type *ArgTys[3] = { M->getRawDest()->getType(),
- M->getRawSource()->getType(),
- M->getLength()->getType() };
+ Type *ArgTys[3] = { M->getRawDest()->getType(),
+ M->getRawSource()->getType(),
+ M->getLength()->getType() };
M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
- ArgTys, 3));
+ ArgTys));
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
- const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+ Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
MemDepResult DepInfo =
MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
return false;
- // Get the alignment of the byval. If it is greater than the memcpy, then we
- // can't do the substitution. If the call doesn't specify the alignment, then
- // it is some target specific value that we can't know.
+ // Get the alignment of the byval. If the call doesn't specify the alignment,
+ // then it is some target specific value that we can't know.
unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
- if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
- return false;
+ if (ByValAlign == 0) return false;
+
+ // If it is greater than the memcpy, then we check to see if we can force the
+ // source of the memcpy to the alignment we need. If we fail, we bail out.
+ if (MDep->getAlignment() < ByValAlign &&
+ getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
+ return false;
// Verify that the copied-from memory doesn't change in between the memcpy and
// the byval call.
RepeatInstruction = processMemMove(M);
else if (CallSite CS = (Value*)I) {
for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
- if (CS.paramHasAttr(i+1, Attribute::ByVal))
+ if (CS.isByValArgument(i))
MadeChange |= processByValArgument(CS, i);
}
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
TD = getAnalysisIfAvailable<TargetData>();
+ TLI = &getAnalysis<TargetLibraryInfo>();
+
+ // If we don't have at least memset and memcpy, there is little point of doing
+ // anything here. These are required by a freestanding implementation, so if
+ // even they are disabled, there is no point in trying hard.
+ if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+ return false;
+
while (1) {
if (!iterateOnFunction(F))
break;