X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FAnalysis%2FScalarEvolutionExpander.cpp;h=bcbf35b046bad116441284f350277da90d204199;hp=8dc8eb68ff4d84711b5392e28d31a826cbb84cdb;hb=8cec2f281696a19faee58cd0749a70fbcc0fa218;hpb=64925c55c65f9345a69fb67db07aa62cfb723577 diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 8dc8eb68ff4..bcbf35b046b 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,15 +14,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; +using namespace PatternMatch; /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, /// reusing an existing cast if a suitable one exists, moving an existing @@ -31,34 +38,76 @@ using namespace llvm; Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, BasicBlock::iterator IP) { + // This function must be called with the builder having a valid insertion + // point. It doesn't need to be the actual IP where the uses of the returned + // cast will be added, but it must dominate such IP. + // We use this precondition to produce a cast that will dominate all its + // uses. In particular, this is crucial for the case where the builder's + // insertion point *is* the point where we were asked to put the cast. + // Since we don't know the builder's insertion point is actually + // where the uses will be added (only that it dominates it), we are + // not allowed to move it. + BasicBlock::iterator BIP = Builder.GetInsertPoint(); + + Instruction *Ret = nullptr; + // Check to see if there is already a cast! - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - User *U = *UI; + for (User *U : V->users()) if (U->getType() == Ty) if (CastInst *CI = dyn_cast(U)) if (CI->getOpcode() == Op) { - // If the cast isn't where we want it, fix it. - if (BasicBlock::iterator(CI) != IP) { + // If the cast isn't where we want it, create a new cast at IP. + // Likewise, do not reuse a cast at BIP because it must dominate + // instructions that might be inserted before BIP. + if (BasicBlock::iterator(CI) != IP || BIP == IP) { // Create a new cast, and leave the old cast in place in case // it is being used as an insert point. Clear its operand // so that it doesn't hold anything live. - Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); + Ret = CastInst::Create(Op, V, Ty, "", &*IP); + Ret->takeName(CI); + CI->replaceAllUsesWith(Ret); CI->setOperand(0, UndefValue::get(V->getType())); - rememberInstruction(NewCI); - return NewCI; + break; } - rememberInstruction(CI); - return CI; + Ret = CI; + break; } - } // Create a new cast. - Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); - rememberInstruction(I); - return I; + if (!Ret) + Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); + + // We assert at the end of the function since IP might point to an + // instruction with different dominance properties than a cast + // (an invoke for example) and not dominate BIP (but the cast does). + assert(SE.DT.dominates(Ret, &*BIP)); + + rememberInstruction(Ret); + return Ret; +} + +static BasicBlock::iterator findInsertPointAfter(Instruction *I, + BasicBlock *MustDominate) { + BasicBlock::iterator IP = ++I->getIterator(); + if (auto *II = dyn_cast(I)) + IP = II->getNormalDest()->begin(); + + while (isa(IP)) + ++IP; + + while (IP->isEHPad()) { + if (isa(IP) || isa(IP)) { + ++IP; + } else if (auto *TPI = dyn_cast(IP)) { + IP = TPI->getUnwindDest()->getFirstNonPHI()->getIterator(); + } else if (isa(IP)) { + IP = MustDominate->getFirstInsertionPt(); + } else { + llvm_unreachable("unexpected eh pad!"); + } + } + + return IP; } /// InsertNoopCastOfTo - Insert a cast of V to the specified type, @@ -110,20 +159,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { while ((isa(IP) && isa(cast(IP)->getOperand(0)) && cast(IP)->getOperand(0) != A) || - isa(IP) || - isa(IP)) + isa(IP)) ++IP; return ReuseOrCreateCast(A, Ty, Op, IP); } // Cast the instruction immediately after the instruction. Instruction *I = cast(V); - BasicBlock::iterator IP = I; ++IP; - if (InvokeInst *II = dyn_cast(I)) - IP = II->getNormalDest()->begin(); - while (isa(IP) || isa(IP) || - isa(IP)) - ++IP; + BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock()); return ReuseOrCreateCast(I, Ty, Op, IP); } @@ -150,34 +193,30 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, ScanLimit++; if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && IP->getOperand(1) == RHS) - return IP; + return &*IP; if (IP == BlockBegin) break; } } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); + BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) break; // Ok, move up a level. - Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + Builder.SetInsertPoint(Preheader->getTerminator()); } // If we haven't found this binop, insert it. Instruction *BO = cast(Builder.CreateBinOp(Opcode, LHS, RHS)); - BO->setDebugLoc(SaveInsertPt->getDebugLoc()); + BO->setDebugLoc(Loc); rememberInstruction(BO); - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - return BO; } @@ -188,11 +227,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made /// unnecessary; in its place, just signed-divide Ops[i] by the scale and /// check to see if the divide was folded. -static bool FactorOutConstant(const SCEV *&S, - const SCEV *&Remainder, - const SCEV *Factor, - ScalarEvolution &SE, - const TargetData *TD) { +static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, + const SCEV *Factor, ScalarEvolution &SE, + const DataLayout &DL) { // Everything is divisible by one. if (Factor->isOne()) return true; @@ -232,50 +269,32 @@ static bool FactorOutConstant(const SCEV *&S, // In a Mul, check if there is a constant operand which is a multiple // of the given factor. if (const SCEVMulExpr *M = dyn_cast(S)) { - if (TD) { - // With TargetData, the size is known. Check if there is a constant - // operand which is a multiple of the given factor. If so, we can - // factor it. - const SCEVConstant *FC = cast(Factor); - if (const SCEVConstant *C = dyn_cast(M->getOperand(0))) - if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { - SmallVector NewMulOps(M->op_begin(), M->op_end()); - NewMulOps[0] = - SE.getConstant(C->getValue()->getValue().sdiv( - FC->getValue()->getValue())); - S = SE.getMulExpr(NewMulOps); - return true; - } - } else { - // Without TargetData, check if Factor can be factored out of any of the - // Mul's operands. If so, we can just remove it. - for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { - const SCEV *SOp = M->getOperand(i); - const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); - if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && - Remainder->isZero()) { - SmallVector NewMulOps(M->op_begin(), M->op_end()); - NewMulOps[i] = SOp; - S = SE.getMulExpr(NewMulOps); - return true; - } + // Size is known, check if there is a constant operand which is a multiple + // of the given factor. If so, we can factor it. + const SCEVConstant *FC = cast(Factor); + if (const SCEVConstant *C = dyn_cast(M->getOperand(0))) + if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { + SmallVector NewMulOps(M->op_begin(), M->op_end()); + NewMulOps[0] = SE.getConstant( + C->getValue()->getValue().sdiv(FC->getValue()->getValue())); + S = SE.getMulExpr(NewMulOps); + return true; } - } } // In an AddRec, check if both start and step are divisible. if (const SCEVAddRecExpr *A = dyn_cast(S)) { const SCEV *Step = A->getStepRecurrence(SE); const SCEV *StepRem = SE.getConstant(Step->getType(), 0); - if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) + if (!FactorOutConstant(Step, StepRem, Factor, SE, DL)) return false; if (!StepRem->isZero()) return false; const SCEV *Start = A->getStart(); - if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) + if (!FactorOutConstant(Start, Remainder, Factor, SE, DL)) return false; - // FIXME: can use A->getNoWrapFlags(FlagNW) - S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); + S = SE.getAddRecExpr(Start, Step, A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW)); return true; } @@ -328,8 +347,7 @@ static void SplitAddRecs(SmallVectorImpl &Ops, AddRecs.push_back(SE.getAddRecExpr(Zero, A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); if (const SCEVAddExpr *Add = dyn_cast(Start)) { Ops[i] = Zero; Ops.append(Add->op_begin(), Add->op_end()); @@ -378,7 +396,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, PointerType *PTy, Type *Ty, Value *V) { - Type *ElTy = PTy->getElementType(); + Type *OriginalElTy = PTy->getElementType(); + Type *ElTy = OriginalElTy; SmallVector GepIndices; SmallVector Ops(op_begin, op_end); bool AnyNonZeroIndices = false; @@ -387,6 +406,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); + Type *IntPtrTy = DL.getIntPtrType(PTy); + // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP // indexes into the array implied by the pointer operand; the rest of @@ -397,13 +418,12 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // array indexing. SmallVector ScaledOps; if (ElTy->isSized()) { - const SCEV *ElSize = SE.getSizeOfExpr(ElTy); + const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy); if (!ElSize->isZero()) { SmallVector NewOps; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - const SCEV *Op = Ops[i]; + for (const SCEV *Op : Ops) { const SCEV *Remainder = SE.getConstant(Ty, 0); - if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { // Op now has ElSize factored out. ScaledOps.push_back(Op); if (!Remainder->isZero()) @@ -412,7 +432,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } else { // The operand was not divisible, so add it to the list of operands // we'll scan next iteration. - NewOps.push_back(Ops[i]); + NewOps.push_back(Op); } } // If we made any changes, update Ops. @@ -437,43 +457,25 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, bool FoundFieldNo = false; // An empty struct has no fields. if (STy->getNumElements() == 0) break; - if (SE.TD) { - // With TargetData, field offsets are known. See if a constant offset - // falls within any of the struct fields. - if (Ops.empty()) break; - if (const SCEVConstant *C = dyn_cast(Ops[0])) - if (SE.getTypeSizeInBits(C->getType()) <= 64) { - const StructLayout &SL = *SE.TD->getStructLayout(STy); - uint64_t FullOffset = C->getValue()->getZExtValue(); - if (FullOffset < SL.getSizeInBytes()) { - unsigned ElIdx = SL.getElementContainingOffset(FullOffset); - GepIndices.push_back( - ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); - ElTy = STy->getTypeAtIndex(ElIdx); - Ops[0] = + // Field offsets are known. See if a constant offset falls within any of + // the struct fields. + if (Ops.empty()) + break; + if (const SCEVConstant *C = dyn_cast(Ops[0])) + if (SE.getTypeSizeInBits(C->getType()) <= 64) { + const StructLayout &SL = *DL.getStructLayout(STy); + uint64_t FullOffset = C->getValue()->getZExtValue(); + if (FullOffset < SL.getSizeInBytes()) { + unsigned ElIdx = SL.getElementContainingOffset(FullOffset); + GepIndices.push_back( + ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); + ElTy = STy->getTypeAtIndex(ElIdx); + Ops[0] = SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); - AnyNonZeroIndices = true; - FoundFieldNo = true; - } - } - } else { - // Without TargetData, just check for an offsetof expression of the - // appropriate struct type. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (const SCEVUnknown *U = dyn_cast(Ops[i])) { - Type *CTy; - Constant *FieldNo; - if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { - GepIndices.push_back(FieldNo); - ElTy = - STy->getTypeAtIndex(cast(FieldNo)->getZExtValue()); - Ops[i] = SE.getConstant(Ty, 0); - AnyNonZeroIndices = true; - FoundFieldNo = true; - break; - } + AnyNonZeroIndices = true; + FoundFieldNo = true; } - } + } // If no struct field offsets were found, tentatively assume that // field zero was selected (since the zero offset would obviously // be folded away). @@ -498,13 +500,17 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, V = InsertNoopCastOfTo(V, Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); + assert(!isa(V) || + SE.DT.dominates(cast(V), &*Builder.GetInsertPoint())); + // Expand the operands for a plain byte offset. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast(V)) if (Constant *CRHS = dyn_cast(Idx)) - return ConstantExpr::getGetElementPtr(CLHS, CRHS); + return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()), + CLHS, CRHS); // Do a quick scan to see if we have this GEP nearby. If so, reuse it. unsigned ScanLimit = 6; @@ -520,51 +526,42 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, ScanLimit++; if (IP->getOpcode() == Instruction::GetElementPtr && IP->getOperand(0) == V && IP->getOperand(1) == Idx) - return IP; + return &*IP; if (IP == BlockBegin) break; } } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) break; // Ok, move up a level. - Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + Builder.SetInsertPoint(Preheader->getTerminator()); } // Emit a GEP. - Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); + Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); rememberInstruction(GEP); - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - return GEP; } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPoint SaveInsertPt = Builder.saveIP(); // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { if (!L->isLoopInvariant(V)) break; - bool AnyIndexNotLoopInvariant = false; - for (SmallVectorImpl::const_iterator I = GepIndices.begin(), - E = GepIndices.end(); I != E; ++I) - if (!L->isLoopInvariant(*I)) { - AnyIndexNotLoopInvariant = true; - break; - } + bool AnyIndexNotLoopInvariant = + std::any_of(GepIndices.begin(), GepIndices.end(), + [L](Value *Op) { return !L->isLoopInvariant(Op); }); + if (AnyIndexNotLoopInvariant) break; @@ -572,7 +569,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, if (!Preheader) break; // Ok, move up a level. - Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); + Builder.SetInsertPoint(Preheader->getTerminator()); } // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, @@ -581,15 +578,12 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *Casted = V; if (V->getType() != PTy) Casted = InsertNoopCastOfTo(Casted, PTy); - Value *GEP = Builder.CreateGEP(Casted, - GepIndices, - "scevgep"); + Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); Ops.push_back(SE.getUnknown(GEP)); rememberInstruction(GEP); // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Builder.restoreIP(SaveInsertPt); return expand(SE.getAddExpr(Ops)); } @@ -612,27 +606,25 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, /// expression, according to PickMostRelevantLoop. const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { // Test whether we've already computed the most relevant loop for this SCEV. - std::pair::iterator, bool> Pair = - RelevantLoops.insert(std::make_pair(S, static_cast(0))); + auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr)); if (!Pair.second) return Pair.first->second; if (isa(S)) // A constant has no relevant loops. - return 0; + return nullptr; if (const SCEVUnknown *U = dyn_cast(S)) { if (const Instruction *I = dyn_cast(U->getValue())) - return Pair.first->second = SE.LI->getLoopFor(I->getParent()); + return Pair.first->second = SE.LI.getLoopFor(I->getParent()); // A non-instruction has no relevant loops. - return 0; + return nullptr; } if (const SCEVNAryExpr *N = dyn_cast(S)) { - const Loop *L = 0; + const Loop *L = nullptr; if (const SCEVAddRecExpr *AR = dyn_cast(S)) L = AR->getLoop(); - for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); - I != E; ++I) - L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); + for (const SCEV *Op : N->operands()) + L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); return RelevantLoops[N] = L; } if (const SCEVCastExpr *C = dyn_cast(S)) { @@ -640,14 +632,11 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { return RelevantLoops[C] = Result; } if (const SCEVUDivExpr *D = dyn_cast(S)) { - const Loop *Result = - PickMostRelevantLoop(getRelevantLoop(D->getLHS()), - getRelevantLoop(D->getRHS()), - *SE.DT); + const Loop *Result = PickMostRelevantLoop( + getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT); return RelevantLoops[D] = Result; } llvm_unreachable("Unexpected SCEV type!"); - return 0; } namespace { @@ -699,13 +688,12 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Sort by loop. Use a stable sort so that constants follow non-constants and // pointer operands precede non-pointer operands. - std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); // Emit instructions to add all the operands. Hoist as much as possible // out of loops, and form meaningful getelementptrs where possible. - Value *Sum = 0; - for (SmallVectorImpl >::iterator - I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { + Value *Sum = nullptr; + for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) { const Loop *CurLoop = I->first; const SCEV *Op = I->second; if (!Sum) { @@ -767,31 +755,35 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); // Sort by loop. Use a stable sort so that constants follow non-constants. - std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); + std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT)); // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. - Value *Prod = 0; - for (SmallVectorImpl >::iterator - I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { - const SCEV *Op = I->second; + Value *Prod = nullptr; + for (const auto &I : OpsAndLoops) { + const SCEV *Op = I.second; if (!Prod) { // This is the first operand. Just expand it. Prod = expand(Op); - ++I; } else if (Op->isAllOnesValue()) { // Instead of doing a multiply by negative one, just do a negate. Prod = InsertNoopCastOfTo(Prod, Ty); Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); - ++I; } else { // A simple mul. Value *W = expandCodeFor(Op, Ty); Prod = InsertNoopCastOfTo(Prod, Ty); // Canonicalize a constant to the RHS. if (isa(Prod)) std::swap(Prod, W); - Prod = InsertBinop(Instruction::Mul, Prod, W); - ++I; + const APInt *RHS; + if (match(W, m_Power2(RHS))) { + // Canonicalize Prod*(1<isVectorTy() && "vector types are not SCEVable"); + Prod = InsertBinop(Instruction::Shl, Prod, + ConstantInt::get(Ty, RHS->logBase2())); + } else { + Prod = InsertBinop(Instruction::Mul, Prod, W); + } } } @@ -824,8 +816,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, SE.getAddRecExpr(SE.getConstant(A->getType(), 0), A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); } if (const SCEVAddExpr *A = dyn_cast(Base)) { Base = A->getOperand(A->getNumOperands()-1); @@ -850,7 +841,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, for (User::op_iterator OI = IncV->op_begin()+1, OE = IncV->op_end(); OI != OE; ++OI) if (Instruction *OInst = dyn_cast(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) + if (!SE.DT.dominates(OInst, IVIncInsertPos)) return false; } // Advance to the next instruction. @@ -867,61 +858,110 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, return isNormalAddRecExprPHI(PN, IncV, L); } -/// Determine if this cyclic phi is in a form that would have been generated by -/// LSR. We don't care if the phi was actually expanded in this pass, as long -/// as it is in a low-cost form, for example, no implied multiplication. This -/// should match any patterns generated by getAddRecExprPHILiterally and -/// expandAddtoGEP. -bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, - const Loop *L) { - if (ChainedPhis.count(PN)) - return true; +/// getIVIncOperand returns an induction variable increment's induction +/// variable operand. +/// +/// If allowScale is set, any type of GEP is allowed as long as the nonIV +/// operands dominate InsertPos. +/// +/// If allowScale is not set, ensure that a GEP increment conforms to one of the +/// simple patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. If the pattern isn't recognized, return NULL. +Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, + Instruction *InsertPos, + bool allowScale) { + if (IncV == InsertPos) + return nullptr; switch (IncV->getOpcode()) { + default: + return nullptr; // Check for a simple Add/Sub or GEP of a loop invariant step. case Instruction::Add: - case Instruction::Sub: - return IncV->getOperand(0) == PN - && L->isLoopInvariant(IncV->getOperand(1)); + case Instruction::Sub: { + Instruction *OInst = dyn_cast(IncV->getOperand(1)); + if (!OInst || SE.DT.dominates(OInst, InsertPos)) + return dyn_cast(IncV->getOperand(0)); + return nullptr; + } case Instruction::BitCast: - IncV = dyn_cast(IncV->getOperand(0)); - if (!IncV) - return false; - // fall-thru to GEP handling - case Instruction::GetElementPtr: { - // This must be a pointer addition of constants (pretty) or some number of - // address-size elements (ugly). - for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); - I != E; ++I) { + return dyn_cast(IncV->getOperand(0)); + case Instruction::GetElementPtr: + for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) { if (isa(*I)) continue; - // ugly geps have 2 operands. - // i1* is used by the expander to represent an address-size element. + if (Instruction *OInst = dyn_cast(*I)) { + if (!SE.DT.dominates(OInst, InsertPos)) + return nullptr; + } + if (allowScale) { + // allow any kind of GEP as long as it can be hoisted. + continue; + } + // This must be a pointer addition of constants (pretty), which is already + // handled, or some number of address-size elements (ugly). Ugly geps + // have 2 operands. i1* is used by the expander to represent an + // address-size element. if (IncV->getNumOperands() != 2) - return false; + return nullptr; unsigned AS = cast(IncV->getType())->getAddressSpace(); if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return false; - // Ensure the operands dominate the insertion point. I don't know of a - // case when this would not be true, so this is somewhat untested. - if (L == IVIncInsertLoop) { - for (User::op_iterator OI = IncV->op_begin()+1, - OE = IncV->op_end(); OI != OE; ++OI) - if (Instruction *OInst = dyn_cast(OI)) - if (!SE.DT->dominates(OInst, IVIncInsertPos)) - return false; - } + return nullptr; break; } - IncV = dyn_cast(IncV->getOperand(0)); - if (IncV && IncV->getOpcode() == Instruction::BitCast) - IncV = dyn_cast(IncV->getOperand(0)); - return IncV == PN; + return dyn_cast(IncV->getOperand(0)); } - default: +} + +/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make +/// it available to other uses in this loop. Recursively hoist any operands, +/// until we reach a value that dominates InsertPos. +bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { + if (SE.DT.dominates(IncV, InsertPos)) + return true; + + // InsertPos must itself dominate IncV so that IncV's new position satisfies + // its existing users. + if (isa(InsertPos) || + !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) + return false; + + if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) return false; + + // Check that the chain of IV operands leading back to Phi can be hoisted. + SmallVector IVIncs; + for(;;) { + Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); + if (!Oper) + return false; + // IncV is safe to hoist. + IVIncs.push_back(IncV); + IncV = Oper; + if (SE.DT.dominates(IncV, InsertPos)) + break; + } + for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) { + (*I)->moveBefore(InsertPos); } + return true; +} + +/// Determine if this cyclic phi is in a form that would have been generated by +/// LSR. We don't care if the phi was actually expanded in this pass, as long +/// as it is in a low-cost form, for example, no implied multiplication. This +/// should match any patterns generated by getAddRecExprPHILiterally and +/// expandAddtoGEP. +bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, + const Loop *L) { + for(Instruction *IVOper = IncV; + (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), + /*allowScale=*/false));) { + if (IVOper == PN) + return true; + } + return false; } /// expandIVInc - Expand an IV increment at Builder's current InsertPos. @@ -954,6 +994,82 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, return IncV; } +/// \brief Hoist the addrec instruction chain rooted in the loop phi above the +/// position. This routine assumes that this is possible (has been checked). +static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, + Instruction *Pos, PHINode *LoopPhi) { + do { + if (DT->dominates(InstToHoist, Pos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + InstToHoist->moveBefore(Pos); + Pos = InstToHoist; + InstToHoist = cast(InstToHoist->getOperand(0)); + } while (InstToHoist != LoopPhi); +} + +/// \brief Check whether we can cheaply express the requested SCEV in terms of +/// the available PHI SCEV by truncation and/or inversion of the step. +static bool canBeCheaplyTransformed(ScalarEvolution &SE, + const SCEVAddRecExpr *Phi, + const SCEVAddRecExpr *Requested, + bool &InvertStep) { + Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); + Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); + + if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) + return false; + + // Try truncate it if necessary. + Phi = dyn_cast(SE.getTruncateOrNoop(Phi, RequestedTy)); + if (!Phi) + return false; + + // Check whether truncation will help. + if (Phi == Requested) { + InvertStep = false; + return true; + } + + // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. + if (SE.getAddExpr(Requested->getStart(), + SE.getNegativeSCEV(Requested)) == Phi) { + InvertStep = true; + return true; + } + + return false; +} + +static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { + if (!isa(AR->getType())) + return false; + + unsigned BitWidth = cast(AR->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); + const SCEV *Step = AR->getStepRecurrence(SE); + const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy), + SE.getSignExtendExpr(AR, WideTy)); + const SCEV *ExtendAfterOp = + SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy); + return ExtendAfterOp == OpAfterExtend; +} + +static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { + if (!isa(AR->getType())) + return false; + + unsigned BitWidth = cast(AR->getType())->getBitWidth(); + Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); + const SCEV *Step = AR->getStepRecurrence(SE); + const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy), + SE.getZeroExtendExpr(AR, WideTy)); + const SCEV *ExtendAfterOp = + SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy); + return ExtendAfterOp == OpAfterExtend; +} + /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. @@ -961,53 +1077,93 @@ PHINode * SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, Type *ExpandTy, - Type *IntTy) { + Type *IntTy, + Type *&TruncTy, + bool &InvertStep) { assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); // Reuse a previously-inserted PHI, if present. BasicBlock *LatchBlock = L->getLoopLatch(); if (LatchBlock) { - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *PN = dyn_cast(I); ++I) { - if (!SE.isSCEVable(PN->getType()) || - (SE.getEffectiveSCEVType(PN->getType()) != - SE.getEffectiveSCEVType(Normalized->getType())) || - SE.getSCEV(PN) != Normalized) + PHINode *AddRecPhiMatch = nullptr; + Instruction *IncV = nullptr; + TruncTy = nullptr; + InvertStep = false; + + // Only try partially matching scevs that need truncation and/or + // step-inversion if we know this loop is outside the current loop. + bool TryNonMatchingSCEV = + IVIncInsertLoop && + SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); + + for (auto &I : *L->getHeader()) { + auto *PN = dyn_cast(&I); + if (!PN || !SE.isSCEVable(PN->getType())) continue; - Instruction *IncV = - cast(PN->getIncomingValueForBlock(LatchBlock)); + const SCEVAddRecExpr *PhiSCEV = dyn_cast(SE.getSCEV(PN)); + if (!PhiSCEV) + continue; + bool IsMatchingSCEV = PhiSCEV == Normalized; + // We only handle truncation and inversion of phi recurrences for the + // expanded expression if the expanded expression's loop dominates the + // loop we insert to. Check now, so we can bail out early. + if (!IsMatchingSCEV && !TryNonMatchingSCEV) + continue; + + Instruction *TempIncV = + cast(PN->getIncomingValueForBlock(LatchBlock)); + + // Check whether we can reuse this PHI node. if (LSRMode) { - if (!isExpandedAddRecExprPHI(PN, IncV, L)) + if (!isExpandedAddRecExprPHI(PN, TempIncV, L)) continue; - } - else { - if (!isNormalAddRecExprPHI(PN, IncV, L)) + if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) + continue; + } else { + if (!isNormalAddRecExprPHI(PN, TempIncV, L)) continue; } + + // Stop if we have found an exact match SCEV. + if (IsMatchingSCEV) { + IncV = TempIncV; + TruncTy = nullptr; + InvertStep = false; + AddRecPhiMatch = PN; + break; + } + + // Try whether the phi can be translated into the requested form + // (truncated and/or offset by a constant). + if ((!TruncTy || InvertStep) && + canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { + // Record the phi node. But don't stop we might find an exact match + // later. + AddRecPhiMatch = PN; + IncV = TempIncV; + TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); + } + } + + if (AddRecPhiMatch) { + // Potentially, move the increment. We have made sure in + // isExpandedAddRecExprPHI or hoistIVInc that this is possible. + if (L == IVIncInsertLoop) + hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); + // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. - InsertedValues.insert(PN); + InsertedValues.insert(AddRecPhiMatch); // Remember the increment. rememberInstruction(IncV); - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast(IncV->getOperand(0)); - } while (IncV != PN); - return PN; + return AddRecPhiMatch; } } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); // Another AddRec may need to be recursively expanded below. For example, if // this AddRec is quadratic, the StepV may itself be an AddRec in this @@ -1020,13 +1176,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, PostIncLoops.clear(); // Expand code for the start value. - Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, - L->getHeader()->begin()); + Value *StartV = + expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front()); // StartV must be hoisted into L's preheader to dominate the new phi. assert(!isa(StartV) || - SE.DT->properlyDominates(cast(StartV)->getParent(), - L->getHeader())); + SE.DT.properlyDominates(cast(StartV)->getParent(), + L->getHeader())); // Expand code for the step value. Do this before creating the PHI so that PHI // reuse code doesn't see an incomplete PHI. @@ -1038,7 +1194,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, if (useSubtract) Step = SE.getNegativeSCEV(Step); // Expand the step somewhere that dominates the loop header. - Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); + + // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if + // we actually do emit an addition. It does not apply if we emit a + // subtraction. + bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized); + bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized); // Create the PHI. BasicBlock *Header = L->getHeader(); @@ -1066,13 +1228,15 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, Builder.SetInsertPoint(InsertPos); Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); + if (isa(IncV)) { + if (IncrementIsNUW) + cast(IncV)->setHasNoUnsignedWrap(); + if (IncrementIsNSW) + cast(IncV)->setHasNoSignedWrap(); + } PN->addIncoming(IncV, Pred); } - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - // After expanding subexpressions, restore the PostIncLoops set so the caller // can ensure that IVIncrement dominates the current uses. PostIncLoops = SavedPostIncLoops; @@ -1094,42 +1258,43 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (PostIncLoops.count(L)) { PostIncLoopSet Loops; Loops.insert(L); - Normalized = - cast(TransformForPostIncUse(Normalize, S, 0, 0, - Loops, SE, *SE.DT)); + Normalized = cast(TransformForPostIncUse( + Normalize, S, nullptr, nullptr, Loops, SE, SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. const SCEV *Start = Normalized->getStart(); - const SCEV *PostLoopOffset = 0; + const SCEV *PostLoopOffset = nullptr; if (!SE.properlyDominates(Start, L->getHeader())) { PostLoopOffset = Start; Start = SE.getConstant(Normalized->getType(), 0); Normalized = cast( SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), Normalized->getLoop(), - // FIXME: Normalized->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Strip off any non-loop-dominating component from the addrec step. const SCEV *Step = Normalized->getStepRecurrence(SE); - const SCEV *PostLoopScale = 0; + const SCEV *PostLoopScale = nullptr; if (!SE.dominates(Step, L->getHeader())) { PostLoopScale = Step; Step = SE.getConstant(Normalized->getType(), 1); Normalized = - cast(SE.getAddRecExpr(Start, Step, - Normalized->getLoop(), - // FIXME: Normalized - // ->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + cast(SE.getAddRecExpr( + Start, Step, Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Expand the core addrec. If we need post-loop scaling, force it to // expand to an integer type to avoid the need for additional casting. Type *ExpandTy = PostLoopScale ? IntTy : STy; - PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); + // In some cases, we decide to reuse an existing phi node but need to truncate + // it and/or invert the step. + Type *TruncTy = nullptr; + bool InvertStep = false; + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy, + TruncTy, InvertStep); // Accommodate post-inc mode, if necessary. Value *Result; @@ -1144,9 +1309,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // For an expansion to use the postinc form, the client must call // expandCodeFor with an InsertPoint that is either outside the PostIncLoop // or dominated by IVIncInsertPos. - if (isa(Result) - && !SE.DT->dominates(cast(Result), - Builder.GetInsertPoint())) { + if (isa(Result) && + !SE.DT.dominates(cast(Result), + &*Builder.GetInsertPoint())) { // The induction variable's postinc expansion does not dominate this use. // IVUsers tries to prevent this case, so it is rare. However, it can // happen when an IVUser outside the loop is not dominated by the latch @@ -1160,19 +1325,39 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); if (useSubtract) Step = SE.getNegativeSCEV(Step); - // Expand the step somewhere that dominates the loop header. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); - // Restore the insertion point to the place where the caller has - // determined dominates all uses. - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Value *StepV; + { + // Expand the step somewhere that dominates the loop header. + BuilderType::InsertPointGuard Guard(Builder); + StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); + } Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); } } + // We have decided to reuse an induction variable of a dominating loop. Apply + // truncation and/or invertion of the step. + if (TruncTy) { + Type *ResTy = Result->getType(); + // Normalize the result type. + if (ResTy != SE.getEffectiveSCEVType(ResTy)) + Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); + // Truncate the result. + if (TruncTy != Result->getType()) { + Result = Builder.CreateTrunc(Result, TruncTy); + rememberInstruction(Result); + } + // Invert the result. + if (InvertStep) { + Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy), + Result); + rememberInstruction(Result); + } + } + // Re-apply any non-loop-dominating scale. if (PostLoopScale) { + assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateMul(Result, expandCodeFor(PostLoopScale, IntTy)); @@ -1202,7 +1387,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { const Loop *L = S->getLoop(); // First check for an existing canonical IV in a suitable type. - PHINode *CanonicalIV = 0; + PHINode *CanonicalIV = nullptr; if (PHINode *PN = L->getCanonicalInductionVariable()) if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) CanonicalIV = PN; @@ -1216,18 +1401,11 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), - // FIXME: S->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = - llvm::next(BasicBlock::iterator(cast(V))); - while (isa(NewInsertPt) || isa(NewInsertPt) || - isa(NewInsertPt)) - ++NewInsertPt; - V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, - NewInsertPt); - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + findInsertPointAfter(cast(V), Builder.GetInsertBlock()); + V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, + &*NewInsertPt); return V; } @@ -1235,8 +1413,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!S->getStart()->isZero()) { SmallVector NewOps(S->op_begin(), S->op_end()); NewOps[0] = SE.getConstant(Ty, 0); - // FIXME: can use S->getNoWrapFlags() - const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, + S->getNoWrapFlags(SCEV::FlagNW)); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. @@ -1268,12 +1446,20 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { BasicBlock *Header = L->getHeader(); pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", - Header->begin()); + &Header->front()); rememberInstruction(CanonicalIV); + SmallSet PredSeen; Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; + if (!PredSeen.insert(HP).second) { + // There must be an incoming value for each predecessor, even the + // duplicates! + CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); + continue; + } + if (L->contains(HP)) { // Insert a unit add instruction right before the terminator // corresponding to the back-edge. @@ -1404,11 +1590,9 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { } Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, - Instruction *I) { - BasicBlock::iterator IP = I; - while (isInsertedInstruction(IP) || isa(IP)) - ++IP; - Builder.SetInsertPoint(IP->getParent(), IP); + Instruction *IP) { + assert(IP); + Builder.SetInsertPoint(IP); return expandCodeFor(SH, Ty); } @@ -1426,8 +1610,8 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { Value *SCEVExpander::expand(const SCEV *S) { // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. - Instruction *InsertPt = Builder.GetInsertPoint(); - for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; + Instruction *InsertPt = &*Builder.GetInsertPoint(); + for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; L = L->getParentLoop()) if (SE.isLoopInvariant(S, L)) { if (!L) break; @@ -1437,29 +1621,29 @@ Value *SCEVExpander::expand(const SCEV *S) { // LSR sets the insertion point for AddRec start/step values to the // block start to simplify value reuse, even though it's an invalid // position. SCEVExpander must correct for this in all cases. - InsertPt = L->getHeader()->getFirstInsertionPt(); + InsertPt = &*L->getHeader()->getFirstInsertionPt(); } } else { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) - InsertPt = L->getHeader()->getFirstInsertionPt(); - while (isInsertedInstruction(InsertPt) || isa(InsertPt)) - InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + InsertPt = &*L->getHeader()->getFirstInsertionPt(); + while (InsertPt != Builder.GetInsertPoint() + && (isInsertedInstruction(InsertPt) + || isa(InsertPt))) { + InsertPt = &*std::next(InsertPt->getIterator()); + } break; } // Check to see if we already expanded this here. - std::map, - AssertingVH >::iterator I = - InsertedExpressions.find(std::make_pair(S, InsertPt)); + auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); + BuilderType::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(InsertPt); // Expand the expression into instructions. Value *V = visit(S); @@ -1468,11 +1652,9 @@ Value *SCEVExpander::expand(const SCEV *S) { // // This is independent of PostIncLoops. The mapped value simply materializes // the expression at this insertion point. If the mapped value happened to be - // a postinc expansion, it could be reused by a non postinc user, but only if + // a postinc expansion, it could be reused by a non-postinc user, but only if // its insertion point was already at the head of the loop. InsertedExpressions[std::make_pair(S, InsertPt)] = V; - - restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1481,24 +1663,6 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedPostIncValues.insert(I); else InsertedValues.insert(I); - - // If we just claimed an existing instruction and that instruction had - // been the insert point, adjust the insert point forward so that - // subsequently inserted code will be dominated. - if (Builder.GetInsertPoint() == I) { - BasicBlock::iterator It = cast(I); - do { ++It; } while (isInsertedInstruction(It) || - isa(It)); - Builder.SetInsertPoint(Builder.GetInsertBlock(), It); - } -} - -void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { - // If we acquired more instructions since the old insert point was saved, - // advance past them. - while (isInsertedInstruction(I) || isa(I)) ++I; - - Builder.SetInsertPoint(BB, I); } /// getOrInsertCanonicalInductionVariable - This method returns the @@ -1516,60 +1680,13 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); // Emit code for it. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - PHINode *V = cast(expandCodeFor(H, 0, L->getHeader()->begin())); - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + BuilderType::InsertPointGuard Guard(Builder); + PHINode *V = + cast(expandCodeFor(H, nullptr, &L->getHeader()->front())); return V; } -/// hoistStep - Attempt to hoist an IV increment above a potential use. -/// -/// To successfully hoist, two criteria must be met: -/// - IncV operands dominate InsertPos and -/// - InsertPos dominates IncV -/// -/// Meeting the second condition means that we don't need to check all of IncV's -/// existing uses (it's moving up in the domtree). -/// -/// This does not yet recursively hoist the operands, although that would -/// not be difficult. -/// -/// This does not require a SCEVExpander instance and could be replaced by a -/// general code-insertion helper. -bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, - const DominatorTree *DT) { - if (DT->dominates(IncV, InsertPos)) - return true; - - if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) - return false; - - if (IncV->mayHaveSideEffects()) - return false; - - // Attempt to hoist IncV - for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); - OI != OE; ++OI) { - Instruction *OInst = dyn_cast(OI); - if (OInst && (OInst == InsertPos || !DT->dominates(OInst, InsertPos))) - return false; - } - IncV->moveBefore(InsertPos); - return true; -} - -/// Sort values by integer width for replaceCongruentIVs. -static bool width_descending(Value *lhs, Value *rhs) { - // Put pointers at the back and make sure pointer < pointer = false. - if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy()) - return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy(); - return rhs->getType()->getPrimitiveSizeInBits() - < lhs->getType()->getPrimitiveSizeInBits(); -} - /// replaceCongruentIVs - Check for congruent phis in this loop header and /// replace them with their most canonical representative. Return the number of /// phis eliminated. @@ -1578,23 +1695,53 @@ static bool width_descending(Value *lhs, Value *rhs) { /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl &DeadInsts, - const TargetLowering *TLI) { + const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector Phis; - for (BasicBlock::iterator I = L->getHeader()->begin(); - PHINode *Phi = dyn_cast(I); ++I) { - Phis.push_back(Phi); + for (auto &I : *L->getHeader()) { + if (auto *PN = dyn_cast(&I)) + Phis.push_back(PN); + else + break; } - if (TLI) - std::sort(Phis.begin(), Phis.end(), width_descending); + + if (TTI) + std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) { + // Put pointers at the back and make sure pointer < pointer = false. + if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) + return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); + return RHS->getType()->getPrimitiveSizeInBits() < + LHS->getType()->getPrimitiveSizeInBits(); + }); unsigned NumElim = 0; DenseMap ExprToIVMap; - // Process phis from wide to narrow. Mapping wide phis to the their truncation + // Process phis from wide to narrow. Map wide phis to their truncation // so narrow phis can reuse them. - for (SmallVectorImpl::const_iterator PIter = Phis.begin(), - PEnd = Phis.end(); PIter != PEnd; ++PIter) { - PHINode *Phi = *PIter; + for (PHINode *Phi : Phis) { + auto SimplifyPHINode = [&](PHINode *PN) -> Value * { + if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC)) + return V; + if (!SE.isSCEVable(PN->getType())) + return nullptr; + auto *Const = dyn_cast(SE.getSCEV(PN)); + if (!Const) + return nullptr; + return Const->getValue(); + }; + + // Fold constant phis. They may be congruent to other constant phis and + // would confuse the logic below that expects proper IVs. + if (Value *V = SimplifyPHINode(Phi)) { + if (V->getType() != Phi->getType()) + continue; + Phi->replaceAllUsesWith(V); + DeadInsts.emplace_back(Phi); + ++NumElim; + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); + continue; + } if (!SE.isSCEVable(Phi->getType())) continue; @@ -1602,8 +1749,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; - if (Phi->getType()->isIntegerTy() && TLI - && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + if (Phi->getType()->isIntegerTy() && TTI + && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { // This phi can be freely truncated to the narrowest phi type. Map the // truncated expression to it so it will be reused for narrow types. const SCEV *TruncExpr = @@ -1625,10 +1772,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, cast(Phi->getIncomingValueForBlock(LatchBlock)); // If this phi has the same width but is more canonical, replace the - // original with it. + // original with it. As part of the "more canonical" determination, + // respect a prior decision to use an IV chain. if (OrigPhiRef->getType() == Phi->getType() - && !isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L) - && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) { + && !(ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) + && (ChainedPhis.count(Phi) + || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { std::swap(OrigPhiRef, Phi); std::swap(OrigInc, IsomorphicInc); } @@ -1642,19 +1792,26 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, IsomorphicInc->getType()); if (OrigInc != IsomorphicInc && TruncExpr == SE.getSCEV(IsomorphicInc) - && hoistStep(OrigInc, IsomorphicInc, DT)) { + && ((isa(OrigInc) && isa(IsomorphicInc)) + || hoistIVInc(OrigInc, IsomorphicInc))) { DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv.inc: " << *IsomorphicInc << '\n'); Value *NewInc = OrigInc; if (OrigInc->getType() != IsomorphicInc->getType()) { - IRBuilder<> Builder(OrigInc->getNextNode()); + Instruction *IP = nullptr; + if (PHINode *PN = dyn_cast(OrigInc)) + IP = &*PN->getParent()->getFirstInsertionPt(); + else + IP = OrigInc->getNextNode(); + + IRBuilder<> Builder(IP); Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); NewInc = Builder. CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName); } IsomorphicInc->replaceAllUsesWith(NewInc); - DeadInsts.push_back(IsomorphicInc); + DeadInsts.emplace_back(IsomorphicInc); } } DEBUG_WITH_TYPE(DebugType, dbgs() @@ -1662,12 +1819,215 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, ++NumElim; Value *NewIV = OrigPhiRef; if (OrigPhiRef->getType() != Phi->getType()) { - IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); + IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); } Phi->replaceAllUsesWith(NewIV); - DeadInsts.push_back(Phi); + DeadInsts.emplace_back(Phi); } return NumElim; } + +Value *SCEVExpander::findExistingExpansion(const SCEV *S, + const Instruction *At, Loop *L) { + using namespace llvm::PatternMatch; + + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Look for suitable value in simple conditions at the loop exits. + for (BasicBlock *BB : ExitingBlocks) { + ICmpInst::Predicate Pred; + Instruction *LHS, *RHS; + BasicBlock *TrueBB, *FalseBB; + + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), + TrueBB, FalseBB))) + continue; + + if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) + return LHS; + + if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) + return RHS; + } + + // There is potential to make this significantly smarter, but this simple + // heuristic already gets some interesting cases. + + // Can not find suitable value. + return nullptr; +} + +bool SCEVExpander::isHighCostExpansionHelper( + const SCEV *S, Loop *L, const Instruction *At, + SmallPtrSetImpl &Processed) { + + // If we can find an existing value for this scev avaliable at the point "At" + // then consider the expression cheap. + if (At && findExistingExpansion(S, At, L) != nullptr) + return false; + + // Zero/One operand expressions + switch (S->getSCEVType()) { + case scUnknown: + case scConstant: + return false; + case scTruncate: + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, At, Processed); + case scZeroExtend: + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, At, Processed); + case scSignExtend: + return isHighCostExpansionHelper(cast(S)->getOperand(), + L, At, Processed); + } + + if (!Processed.insert(S).second) + return false; + + if (auto *UDivExpr = dyn_cast(S)) { + // If the divisor is a power of two and the SCEV type fits in a native + // integer, consider the division cheap irrespective of whether it occurs in + // the user code since it can be lowered into a right shift. + if (auto *SC = dyn_cast(UDivExpr->getRHS())) + if (SC->getValue()->getValue().isPowerOf2()) { + const DataLayout &DL = + L->getHeader()->getParent()->getParent()->getDataLayout(); + unsigned Width = cast(UDivExpr->getType())->getBitWidth(); + return DL.isIllegalInteger(Width); + } + + // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or + // HowManyLessThans produced to compute a precise expression, rather than a + // UDiv from the user's code. If we can't find a UDiv in the code with some + // simple searching, assume the former consider UDivExpr expensive to + // compute. + BasicBlock *ExitingBB = L->getExitingBlock(); + if (!ExitingBB) + return true; + + // At the beginning of this function we already tried to find existing value + // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern + // involving division. This is just a simple search heuristic. + if (!At) + At = &ExitingBB->back(); + if (!findExistingExpansion( + SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L)) + return true; + } + + // HowManyLessThans uses a Max expression whenever the loop is not guarded by + // the exit condition. + if (isa(S) || isa(S)) + return true; + + // Recurse past nary expressions, which commonly occur in the + // BackedgeTakenCount. They may already exist in program code, and if not, + // they are not too expensive rematerialize. + if (const SCEVNAryExpr *NAry = dyn_cast(S)) { + for (auto *Op : NAry->operands()) + if (isHighCostExpansionHelper(Op, L, At, Processed)) + return true; + } + + // If we haven't recognized an expensive SCEV pattern, assume it's an + // expression produced by program code. + return false; +} + +Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, + Instruction *IP) { + assert(IP); + switch (Pred->getKind()) { + case SCEVPredicate::P_Union: + return expandUnionPredicate(cast(Pred), IP); + case SCEVPredicate::P_Equal: + return expandEqualPredicate(cast(Pred), IP); + } + llvm_unreachable("Unknown SCEV predicate type"); +} + +Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, + Instruction *IP) { + Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP); + Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP); + + Builder.SetInsertPoint(IP); + auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); + return I; +} + +Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, + Instruction *IP) { + auto *BoolType = IntegerType::get(IP->getContext(), 1); + Value *Check = ConstantInt::getNullValue(BoolType); + + // Loop over all checks in this set. + for (auto Pred : Union->getPredicates()) { + auto *NextCheck = expandCodeForPredicate(Pred, IP); + Builder.SetInsertPoint(IP); + Check = Builder.CreateOr(Check, NextCheck); + } + + return Check; +} + +namespace { +// Search for a SCEV subexpression that is not safe to expand. Any expression +// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely +// UDiv expressions. We don't know if the UDiv is derived from an IR divide +// instruction, but the important thing is that we prove the denominator is +// nonzero before expansion. +// +// IVUsers already checks that IV-derived expressions are safe. So this check is +// only needed when the expression includes some subexpression that is not IV +// derived. +// +// Currently, we only allow division by a nonzero constant here. If this is +// inadequate, we could easily allow division by SCEVUnknown by using +// ValueTracking to check isKnownNonZero(). +// +// We cannot generally expand recurrences unless the step dominates the loop +// header. The expander handles the special case of affine recurrences by +// scaling the recurrence outside the loop, but this technique isn't generally +// applicable. Expanding a nested recurrence outside a loop requires computing +// binomial coefficients. This could be done, but the recurrence has to be in a +// perfectly reduced form, which can't be guaranteed. +struct SCEVFindUnsafe { + ScalarEvolution &SE; + bool IsUnsafe; + + SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} + + bool follow(const SCEV *S) { + if (const SCEVUDivExpr *D = dyn_cast(S)) { + const SCEVConstant *SC = dyn_cast(D->getRHS()); + if (!SC || SC->getValue()->isZero()) { + IsUnsafe = true; + return false; + } + } + if (const SCEVAddRecExpr *AR = dyn_cast(S)) { + const SCEV *Step = AR->getStepRecurrence(SE); + if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { + IsUnsafe = true; + return false; + } + } + return true; + } + bool isDone() const { return IsUnsafe; } +}; +} + +namespace llvm { +bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { + SCEVFindUnsafe Search(SE); + visitAll(S, Search); + return !Search.IsUnsafe; +} +}