X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FScalar%2FLoopIdiomRecognize.cpp;h=a12f5a7a0334f1be7b99b54e7be3fb10b0dc7281;hb=f425efdbc290cc8af63c2a620699bf13c5df6e57;hp=153fedf2c370513079dc11127b39ecab0fb5bb80;hpb=d04a8d4b33ff316ca4cf961e06c9e312eff8e64f;p=oota-llvm.git diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 153fedf2c37..a12f5a7a033 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -41,25 +41,27 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-idiom" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "loop-idiom" + STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); @@ -78,10 +80,7 @@ namespace { return dyn_cast(BB->getTerminator()); } - /// Return the condition of the branch terminating the given basic block. - static Value *getBrCondtion(BasicBlock *); - - /// Derive the precondition block (i.e the block that guards the loop + /// Derive the precondition block (i.e the block that guards the loop /// preheader) from the given preheader. static BasicBlock *getPrecondBb(BasicBlock *PreHead); }; @@ -108,22 +107,22 @@ namespace { bool preliminaryScreen(); /// Check if the given conditional branch is based on the comparison - /// beween a variable and zero, and if the variable is non-zero, the - /// control yeilds to the loop entry. If the branch matches the behavior, + /// between a variable and zero, and if the variable is non-zero, the + /// control yields to the loop entry. If the branch matches the behavior, /// the variable involved in the comparion is returned. This function will - /// be called to see if the precondition and postcondition of the loop + /// be called to see if the precondition and postcondition of the loop /// are in desirable form. - Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const; + Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const; /// Return true iff the idiom is detected in the loop. and 1) \p CntInst - /// is set to the instruction counting the pupulation bit. 2) \p CntPhi + /// is set to the instruction counting the population bit. 2) \p CntPhi /// is set to the corresponding phi node. 3) \p Var is set to the value /// whose population bits are being counted. bool detectIdiom (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const; /// Insert ctpop intrinsic function and some obviously dead instructions. - void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var); + void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var); /// Create llvm.ctpop.* intrinsic function. CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL); @@ -131,19 +130,19 @@ namespace { class LoopIdiomRecognize : public LoopPass { Loop *CurLoop; - const DataLayout *TD; + const DataLayout *DL; DominatorTree *DT; ScalarEvolution *SE; TargetLibraryInfo *TLI; - const ScalarTargetTransformInfo *STTI; + const TargetTransformInfo *TTI; public: static char ID; explicit LoopIdiomRecognize() : LoopPass(ID) { initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - TD = 0; DT = 0; SE = 0; TLI = 0; STTI = 0; + DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr; } - bool runOnLoop(Loop *L, LPPassManager &LPM); + bool runOnLoop(Loop *L, LPPassManager &LPM) override; bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, SmallVectorImpl &ExitBlocks); @@ -163,7 +162,7 @@ namespace { /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. /// - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); @@ -174,17 +173,23 @@ namespace { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addPreserved(); - AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); AU.addRequired(); + AU.addRequired(); } const DataLayout *getDataLayout() { - return TD ? TD : TD=getAnalysisIfAvailable(); + if (DL) + return DL; + DataLayoutPass *DLP = getAnalysisIfAvailable(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + return DL; } DominatorTree *getDominatorTree() { - return DT ? DT : (DT=&getAnalysis()); + return DT ? DT + : (DT = &getAnalysis().getDomTree()); } ScalarEvolution *getScalarEvolution() { @@ -195,12 +200,8 @@ namespace { return TLI ? TLI : (TLI = &getAnalysis()); } - const ScalarTargetTransformInfo *getScalarTargetTransformInfo() { - if (!STTI) { - TargetTransformInfo *TTI = getAnalysisIfAvailable(); - if (TTI) STTI = TTI->getScalarTargetTransformInfo(); - } - return STTI; + const TargetTransformInfo *getTargetTransformInfo() { + return TTI ? TTI : (TTI = &getAnalysis()); } Loop *getLoop() const { return CurLoop; } @@ -215,12 +216,13 @@ char LoopIdiomRecognize::ID = 0; INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", false, false) @@ -246,7 +248,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, 0); + DeadInst->setOperand(op, nullptr); // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; @@ -276,11 +278,11 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, // //===----------------------------------------------------------------------===// -// This fucntion will return true iff the given block contains nothing but goto. -// A typical usage of this function is to check if the preheader fucntion is -// "almost" empty such that generated intrinsic function can be moved across -// preheader and to be placed at the end of the preconditiona block without -// concerning of breaking data dependence. +// This function will return true iff the given block contains nothing but goto. +// A typical usage of this function is to check if the preheader function is +// "almost" empty such that generated intrinsic functions can be moved across +// the preheader and be placed at the end of the precondition block without +// the concern of breaking data dependence. bool LIRUtil::isAlmostEmpty(BasicBlock *BB) { if (BranchInst *Br = getBranch(BB)) { return Br->isUnconditional() && BB->size() == 1; @@ -288,17 +290,12 @@ bool LIRUtil::isAlmostEmpty(BasicBlock *BB) { return false; } -Value *LIRUtil::getBrCondtion(BasicBlock *BB) { - BranchInst *Br = getBranch(BB); - return Br ? Br->getCondition() : 0; -} - BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { if (BasicBlock *BB = PreHead->getSinglePredecessor()) { BranchInst *Br = getBranch(BB); - return Br && Br->isConditional() ? BB : 0; + return Br && Br->isConditional() ? BB : nullptr; } - return 0; + return nullptr; } //===----------------------------------------------------------------------===// @@ -308,15 +305,15 @@ BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { //===----------------------------------------------------------------------===// NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): - LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) { + LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) { } bool NclPopcountRecognize::preliminaryScreen() { - const ScalarTargetTransformInfo *STTI = LIR.getScalarTargetTransformInfo(); - if (STTI->getPopcntHwSupport(32) != ScalarTargetTransformInfo::Fast) + const TargetTransformInfo *TTI = LIR.getTargetTransformInfo(); + if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware) return false; - // Counting population are usually conducted by few arithmetic instrutions. + // Counting population are usually conducted by few arithmetic instructions. // Such instructions can be easilly "absorbed" by vacant slots in a // non-compact loop. Therefore, recognizing popcount idiom only makes sense // in a compact loop. @@ -341,29 +338,29 @@ bool NclPopcountRecognize::preliminaryScreen() { PreCondBB = LIRUtil::getPrecondBb(PreHead); if (!PreCondBB) return false; - + return true; } -Value *NclPopcountRecognize::matchCondition (BranchInst *Br, - BasicBlock *LoopEntry) const { +Value *NclPopcountRecognize::matchCondition(BranchInst *Br, + BasicBlock *LoopEntry) const { if (!Br || !Br->isConditional()) - return 0; + return nullptr; ICmpInst *Cond = dyn_cast(Br->getCondition()); if (!Cond) - return 0; + return nullptr; ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1)); if (!CmpZero || !CmpZero->isZero()) - return 0; + return nullptr; ICmpInst::Predicate Pred = Cond->getPredicate(); if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) || (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry)) return Cond->getOperand(0); - return 0; + return nullptr; } bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, @@ -394,9 +391,9 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, Value *VarX1, *VarX0; PHINode *PhiX, *CountPhi; - DefX2 = CountInst = 0; - VarX1 = VarX0 = 0; - PhiX = CountPhi = 0; + DefX2 = CountInst = nullptr; + VarX1 = VarX0 = nullptr; + PhiX = CountPhi = nullptr; LoopEntry = *(CurLoop->block_begin()); // step 1: Check if the loop-back branch is in desirable form. @@ -409,7 +406,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)" { - if (DefX2->getOpcode() != Instruction::And) + if (!DefX2 || DefX2->getOpcode() != Instruction::And) return false; BinaryOperator *SubOneOp; @@ -443,7 +440,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 { - CountInst = NULL; + CountInst = nullptr; for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(), IterE = LoopEntry->end(); Iter != IterE; Iter++) { Instruction *Inst = Iter; @@ -455,14 +452,13 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, continue; PHINode *Phi = dyn_cast(Inst->getOperand(0)); - if (!Phi && Phi->getParent() != LoopEntry) + if (!Phi || Phi->getParent() != LoopEntry) continue; // Check if the result of the instruction is live of the loop. bool LiveOutLoop = false; - for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end(); - I != E; I++) { - if ((cast(*I))->getParent() != LoopEntry) { + for (User *U : Inst->users()) { + if ((cast(U))->getParent() != LoopEntry) { LiveOutLoop = true; break; } } @@ -506,10 +502,10 @@ void NclPopcountRecognize::transform(Instruction *CntInst, // Assuming before transformation, the loop is following: // if (x) // the precondition // do { cnt++; x &= x - 1; } while(x); - + // Step 1: Insert the ctpop instruction at the end of the precondition block IRBuilderTy Builder(PreCondBr); - Value *PopCnt, *PopCntZext, *NewCount; + Value *PopCnt, *PopCntZext, *NewCount, *TripCnt; { PopCnt = createPopcntIntrinsic(Builder, Var, DL); NewCount = PopCntZext = @@ -518,11 +514,14 @@ void NclPopcountRecognize::transform(Instruction *CntInst, if (NewCount != PopCnt) (cast(NewCount))->setDebugLoc(DL); - // If the popoulation counter's initial value is not zero, insert Add Inst. + // TripCnt is exactly the number of iterations the loop has + TripCnt = NewCount; + + // If the population counter's initial value is not zero, insert Add Inst. Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead); ConstantInt *InitConst = dyn_cast(CntInitVal); if (!InitConst || !InitConst->isZero()) { - NewCount = Builder.CreateAdd(PopCnt, InitConst); + NewCount = Builder.CreateAdd(NewCount, CntInitVal); (cast(NewCount))->setDebugLoc(DL); } } @@ -570,7 +569,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst, { BranchInst *LbBr = LIRUtil::getBranch(Body); ICmpInst *LbCond = cast(LbBr->getCondition()); - Type *Ty = NewCount->getType(); + Type *Ty = TripCnt->getType(); PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin()); @@ -580,7 +579,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst, Instruction *TcDec = cast(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true)); - TcPhi->addIncoming(NewCount, PreHead); + TcPhi->addIncoming(TripCnt, PreHead); TcPhi->addIncoming(TcDec, Body); CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ? @@ -595,11 +594,9 @@ void NclPopcountRecognize::transform(Instruction *CntInst, // __builtin_ctpop(). { SmallVector CntUses; - for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end(); - I != E; I++) { - if (cast(*I)->getParent() != Body) - CntUses.push_back(*I); - } + for (User *U : CntInst->users()) + if (cast(U)->getParent() != Body) + CntUses.push_back(U); for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) { (cast(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount); } @@ -610,7 +607,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst, SE->forgetLoop(CurLoop); } -CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, +CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, Value *Val, DebugLoc DL) { Value *Ops[] = { Val }; Type *Tys[] = { Val->getType() }; @@ -628,7 +625,7 @@ CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, /// call, and return true; otherwise, return false. bool NclPopcountRecognize::recognize() { - if (!LIR.getScalarTargetTransformInfo()) + if (!LIR.getTargetTransformInfo()) return false; LIR.getScalarEvolution(); @@ -666,12 +663,14 @@ bool LoopIdiomRecognize::runOnCountableLoop() { if (!getDataLayout()) return false; - getDominatorTree(); + // set DT + (void)getDominatorTree(); LoopInfo &LI = getAnalysis(); TLI = &getAnalysis(); - getTargetLibraryInfo(); + // set TLI + (void)getTargetLibraryInfo(); SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); @@ -702,6 +701,9 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipOptnoneFunction(L)) + return false; + CurLoop = L; // If the loop could not be converted to canonical form, it must have an @@ -743,7 +745,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, // If processing the store invalidated our iterator, start over from the // top of the block. - if (InstPtr == 0) + if (!InstPtr) I = BB->begin(); continue; } @@ -756,7 +758,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, // If processing the memset invalidated our iterator, start over from the // top of the block. - if (InstPtr == 0) + if (!InstPtr) I = BB->begin(); continue; } @@ -774,7 +776,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { Value *StorePtr = SI->getPointerOperand(); // Reject stores that are so large that they overflow an unsigned. - uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); + uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) return false; @@ -783,7 +785,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { // random store we can't handle. const SCEVAddRecExpr *StoreEv = dyn_cast(SE->getSCEV(StorePtr)); - if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) + if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) return false; // Check to see if the stride matches the size of the store. If so, then we @@ -791,7 +793,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast(StoreEv->getOperand(1)); - if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { + if (!Stride || StoreSize != Stride->getValue()->getValue()) { // TODO: Could also handle negative stride here someday, that will require // the validity check in mayLoopAccessLocation to be updated though. // Enable this to print exact negative strides. @@ -840,7 +842,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. const SCEVAddRecExpr *Ev = dyn_cast(SE->getSCEV(Pointer)); - if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) + if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine()) return false; // Reject memsets that are so large that they overflow an unsigned. @@ -854,7 +856,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // TODO: Could also handle negative stride here someday, that will require the // validity check in mayLoopAccessLocation to be updated though. - if (Stride == 0 || MSI->getLength() != Stride->getValue()) + if (!Stride || MSI->getLength() != Stride->getValue()) return false; return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, @@ -902,28 +904,28 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, /// /// Note that we don't ever attempt to use memset_pattern8 or 4, because these /// just replicate their input array and then pass on to memset_pattern16. -static Constant *getMemSetPatternValue(Value *V, const DataLayout &TD) { +static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) { // If the value isn't a constant, we can't promote it to being in a constant // array. We could theoretically do a store to an alloca or something, but // that doesn't seem worthwhile. Constant *C = dyn_cast(V); - if (C == 0) return 0; + if (!C) return nullptr; // Only handle simple values that are a power of two bytes in size. - uint64_t Size = TD.getTypeSizeInBits(V->getType()); + uint64_t Size = DL.getTypeSizeInBits(V->getType()); if (Size == 0 || (Size & 7) || (Size & (Size-1))) - return 0; + return nullptr; // Don't care enough about darwin/ppc to implement this. - if (TD.isBigEndian()) - return 0; + if (DL.isBigEndian()) + return nullptr; // Convert to size in bytes. Size /= 8; // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see // if the top and bottom are the same (e.g. for vectors and large integers). - if (Size > 16) return 0; + if (Size > 16) return nullptr; // If the constant is exactly 16 bytes, just use it. if (Size == 16) return C; @@ -948,7 +950,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // are stored. A store of i32 0x01020304 can never be turned into a memset, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); - Constant *PatternValue = 0; + Constant *PatternValue = nullptr; + + unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); // If we're allowed to form a memset, and the stored value would be acceptable // for memset, use it. @@ -957,11 +961,13 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // promote the memset. CurLoop->isLoopInvariant(SplatValue)) { // Keep and use SplatValue. - PatternValue = 0; - } else if (TLI->has(LibFunc::memset_pattern16) && - (PatternValue = getMemSetPatternValue(StoredVal, *TD))) { + PatternValue = nullptr; + } else if (DestAS == 0 && + TLI->has(LibFunc::memset_pattern16) && + (PatternValue = getMemSetPatternValue(StoredVal, *DL))) { + // Don't create memset_pattern16s with address spaces. // It looks like we can use PatternValue! - SplatValue = 0; + SplatValue = nullptr; } else { // Otherwise, this isn't an idiom we can transform. For example, we can't // do anything with a 3-byte store. @@ -975,20 +981,20 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, "loop-idiom"); + Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS); + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read // or write to the aliased location. Check for any overlap by generating the // base pointer and checking the region. - unsigned AddrSpace = cast(DestPtr->getType())->getAddressSpace(); Value *BasePtr = - Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), + Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy, Preheader->getTerminator()); - if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, CurLoop, BECount, - StoreSize, getAnalysis(), TheStore)){ + StoreSize, getAnalysis(), TheStore)) { Expander.clear(); // If we generated new code for the base pointer, clean up. deleteIfDeadInstruction(BasePtr, *SE, TLI); @@ -999,28 +1005,36 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); + Type *IntPtr = Builder.getIntPtrTy(DL, DestAS); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), SCEV::FlagNUW); - if (StoreSize != 1) + if (StoreSize != 1) { NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), SCEV::FlagNUW); + } Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); CallInst *NewCall; - if (SplatValue) - NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); - else { + if (SplatValue) { + NewCall = Builder.CreateMemSet(BasePtr, + SplatValue, + NumBytes, + StoreAlignment); + } else { + // Everything is emitted in default address space + Type *Int8PtrTy = DestInt8PtrTy; + Module *M = TheStore->getParent()->getParent()->getParent(); Value *MSP = M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(), - Builder.getInt8PtrTy(), - Builder.getInt8PtrTy(), IntPtr, - (void*)0); + Int8PtrTy, + Int8PtrTy, + IntPtr, + (void*)nullptr); // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. @@ -1029,7 +1043,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, PatternValue, ".memset_pattern"); GV->setUnnamedAddr(true); // Ok to merge these. GV->setAlignment(16); - Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); + Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); } @@ -1105,17 +1119,17 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. - Type *IntPtr = TD->getIntPtrType(SI->getContext()); - BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); + Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace()); + BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); - const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1), SCEV::FlagNUW); if (StoreSize != 1) - NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), + NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW); Value *NumBytes = - Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); + Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); CallInst *NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,