X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=lib%2FAnalysis%2FScalarEvolutionExpander.cpp;h=bee36857a2ca13c938a7a4c6e978849d0449e757;hp=e7fe672ad317c62db0d6424d565ef617698a3a50;hb=b4401e33d533275568edc5e479b43c0a8e64c6f9;hpb=3de8ad8bf9081fcb7ec0600fd2a8d1ce809f556d diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index e7fe672ad31..bee36857a2c 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,13 +14,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -37,17 +40,15 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // We use this precondition to produce a cast that will dominate all its // uses. In particular, this is crucial for the case where the builder's // insertion point *is* the point where we were asked to put the cast. - // Since we don't know the the builder's insertion point is actually + // Since we don't know the builder's insertion point is actually // where the uses will be added (only that it dominates it), we are // not allowed to move it. BasicBlock::iterator BIP = Builder.GetInsertPoint(); - Instruction *Ret = NULL; + Instruction *Ret = nullptr; // Check to see if there is already a cast! - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - User *U = *UI; + for (User *U : V->users()) if (U->getType() == Ty) if (CastInst *CI = dyn_cast(U)) if (CI->getOpcode() == Op) { @@ -67,7 +68,6 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, Ret = CI; break; } - } // Create a new cast. if (!Ret) @@ -176,8 +176,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); + BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { @@ -191,13 +191,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, // If we haven't found this binop, insert it. Instruction *BO = cast(Builder.CreateBinOp(Opcode, LHS, RHS)); - BO->setDebugLoc(SaveInsertPt->getDebugLoc()); + BO->setDebugLoc(Loc); rememberInstruction(BO); - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - return BO; } @@ -212,7 +208,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, const SCEV *Factor, ScalarEvolution &SE, - const TargetData *TD) { + const DataLayout *DL) { // Everything is divisible by one. if (Factor->isOne()) return true; @@ -252,8 +248,8 @@ static bool FactorOutConstant(const SCEV *&S, // In a Mul, check if there is a constant operand which is a multiple // of the given factor. if (const SCEVMulExpr *M = dyn_cast(S)) { - if (TD) { - // With TargetData, the size is known. Check if there is a constant + if (DL) { + // With DataLayout, the size is known. Check if there is a constant // operand which is a multiple of the given factor. If so, we can // factor it. const SCEVConstant *FC = cast(Factor); @@ -267,12 +263,12 @@ static bool FactorOutConstant(const SCEV *&S, return true; } } else { - // Without TargetData, check if Factor can be factored out of any of the + // Without DataLayout, check if Factor can be factored out of any of the // Mul's operands. If so, we can just remove it. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { const SCEV *SOp = M->getOperand(i); const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); - if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && + if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) && Remainder->isZero()) { SmallVector NewMulOps(M->op_begin(), M->op_end()); NewMulOps[i] = SOp; @@ -287,15 +283,15 @@ static bool FactorOutConstant(const SCEV *&S, if (const SCEVAddRecExpr *A = dyn_cast(S)) { const SCEV *Step = A->getStepRecurrence(SE); const SCEV *StepRem = SE.getConstant(Step->getType(), 0); - if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) + if (!FactorOutConstant(Step, StepRem, Factor, SE, DL)) return false; if (!StepRem->isZero()) return false; const SCEV *Start = A->getStart(); - if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) + if (!FactorOutConstant(Start, Remainder, Factor, SE, DL)) return false; - // FIXME: can use A->getNoWrapFlags(FlagNW) - S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); + S = SE.getAddRecExpr(Start, Step, A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW)); return true; } @@ -348,8 +344,7 @@ static void SplitAddRecs(SmallVectorImpl &Ops, AddRecs.push_back(SE.getAddRecExpr(Zero, A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); if (const SCEVAddExpr *Add = dyn_cast(Start)) { Ops[i] = Zero; Ops.append(Add->op_begin(), Add->op_end()); @@ -407,6 +402,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); + Type *IntPtrTy = SE.DL + ? SE.DL->getIntPtrType(PTy) + : Type::getInt64Ty(PTy->getContext()); + // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP // indexes into the array implied by the pointer operand; the rest of @@ -417,13 +416,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // array indexing. SmallVector ScaledOps; if (ElTy->isSized()) { - const SCEV *ElSize = SE.getSizeOfExpr(ElTy); + const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy); if (!ElSize->isZero()) { SmallVector NewOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { const SCEV *Op = Ops[i]; const SCEV *Remainder = SE.getConstant(Ty, 0); - if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { + if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) { // Op now has ElSize factored out. ScaledOps.push_back(Op); if (!Remainder->isZero()) @@ -457,13 +456,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, bool FoundFieldNo = false; // An empty struct has no fields. if (STy->getNumElements() == 0) break; - if (SE.TD) { - // With TargetData, field offsets are known. See if a constant offset + if (SE.DL) { + // With DataLayout, field offsets are known. See if a constant offset // falls within any of the struct fields. if (Ops.empty()) break; if (const SCEVConstant *C = dyn_cast(Ops[0])) if (SE.getTypeSizeInBits(C->getType()) <= 64) { - const StructLayout &SL = *SE.TD->getStructLayout(STy); + const StructLayout &SL = *SE.DL->getStructLayout(STy); uint64_t FullOffset = C->getValue()->getZExtValue(); if (FullOffset < SL.getSizeInBytes()) { unsigned ElIdx = SL.getElementContainingOffset(FullOffset); @@ -477,7 +476,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } } } else { - // Without TargetData, just check for an offsetof expression of the + // Without DataLayout, just check for an offsetof expression of the // appropriate struct type. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (const SCEVUnknown *U = dyn_cast(Ops[i])) { @@ -549,8 +548,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { @@ -566,16 +564,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); rememberInstruction(GEP); - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - return GEP; } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPoint SaveInsertPt = Builder.saveIP(); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { @@ -611,8 +604,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, rememberInstruction(GEP); // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Builder.restoreIP(SaveInsertPt); return expand(SE.getAddExpr(Ops)); } @@ -636,21 +628,21 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { // Test whether we've already computed the most relevant loop for this SCEV. std::pair::iterator, bool> Pair = - RelevantLoops.insert(std::make_pair(S, static_cast(0))); + RelevantLoops.insert(std::make_pair(S, nullptr)); if (!Pair.second) return Pair.first->second; if (isa(S)) // A constant has no relevant loops. - return 0; + return nullptr; if (const SCEVUnknown *U = dyn_cast(S)) { if (const Instruction *I = dyn_cast(U->getValue())) return Pair.first->second = SE.LI->getLoopFor(I->getParent()); // A non-instruction has no relevant loops. - return 0; + return nullptr; } if (const SCEVNAryExpr *N = dyn_cast(S)) { - const Loop *L = 0; + const Loop *L = nullptr; if (const SCEVAddRecExpr *AR = dyn_cast(S)) L = AR->getLoop(); for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); @@ -725,7 +717,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Emit instructions to add all the operands. Hoist as much as possible // out of loops, and form meaningful getelementptrs where possible. - Value *Sum = 0; + Value *Sum = nullptr; for (SmallVectorImpl >::iterator I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { const Loop *CurLoop = I->first; @@ -793,7 +785,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. - Value *Prod = 0; + Value *Prod = nullptr; for (SmallVectorImpl >::iterator I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { const SCEV *Op = I->second; @@ -846,8 +838,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, SE.getAddRecExpr(SE.getConstant(A->getType(), 0), A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); } if (const SCEVAddExpr *A = dyn_cast(Base)) { Base = A->getOperand(A->getNumOperands()-1); @@ -902,18 +893,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale) { if (IncV == InsertPos) - return NULL; + return nullptr; switch (IncV->getOpcode()) { default: - return NULL; + return nullptr; // Check for a simple Add/Sub or GEP of a loop invariant step. case Instruction::Add: case Instruction::Sub: { Instruction *OInst = dyn_cast(IncV->getOperand(1)); if (!OInst || SE.DT->dominates(OInst, InsertPos)) return dyn_cast(IncV->getOperand(0)); - return NULL; + return nullptr; } case Instruction::BitCast: return dyn_cast(IncV->getOperand(0)); @@ -924,7 +915,7 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, continue; if (Instruction *OInst = dyn_cast(*I)) { if (!SE.DT->dominates(OInst, InsertPos)) - return NULL; + return nullptr; } if (allowScale) { // allow any kind of GEP as long as it can be hoisted. @@ -935,11 +926,11 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, // have 2 operands. i1* is used by the expander to represent an // address-size element. if (IncV->getNumOperands() != 2) - return NULL; + return nullptr; unsigned AS = cast(IncV->getType())->getAddressSpace(); if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return NULL; + return nullptr; break; } return dyn_cast(IncV->getOperand(0)); @@ -1024,6 +1015,54 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, return IncV; } +/// \brief Hoist the addrec instruction chain rooted in the loop phi above the +/// position. This routine assumes that this is possible (has been checked). +static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, + Instruction *Pos, PHINode *LoopPhi) { + do { + if (DT->dominates(InstToHoist, Pos)) + break; + // Make sure the increment is where we want it. But don't move it + // down past a potential existing post-inc user. + InstToHoist->moveBefore(Pos); + Pos = InstToHoist; + InstToHoist = cast(InstToHoist->getOperand(0)); + } while (InstToHoist != LoopPhi); +} + +/// \brief Check whether we can cheaply express the requested SCEV in terms of +/// the available PHI SCEV by truncation and/or invertion of the step. +static bool canBeCheaplyTransformed(ScalarEvolution &SE, + const SCEVAddRecExpr *Phi, + const SCEVAddRecExpr *Requested, + bool &InvertStep) { + Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); + Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); + + if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) + return false; + + // Try truncate it if necessary. + Phi = dyn_cast(SE.getTruncateOrNoop(Phi, RequestedTy)); + if (!Phi) + return false; + + // Check whether truncation will help. + if (Phi == Requested) { + InvertStep = false; + return true; + } + + // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. + if (SE.getAddExpr(Requested->getStart(), + SE.getNegativeSCEV(Requested)) == Phi) { + InvertStep = true; + return true; + } + + return false; +} + /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand /// the base addrec, which is the addrec without any non-loop-dominating /// values, and return the PHI. @@ -1031,55 +1070,92 @@ PHINode * SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, const Loop *L, Type *ExpandTy, - Type *IntTy) { + Type *IntTy, + Type *&TruncTy, + bool &InvertStep) { assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); // Reuse a previously-inserted PHI, if present. BasicBlock *LatchBlock = L->getLoopLatch(); if (LatchBlock) { + PHINode *AddRecPhiMatch = nullptr; + Instruction *IncV = nullptr; + TruncTy = nullptr; + InvertStep = false; + + // Only try partially matching scevs that need truncation and/or + // step-inversion if we know this loop is outside the current loop. + bool TryNonMatchingSCEV = IVIncInsertLoop && + SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); + for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *PN = dyn_cast(I); ++I) { - if (!SE.isSCEVable(PN->getType()) || - (SE.getEffectiveSCEVType(PN->getType()) != - SE.getEffectiveSCEVType(Normalized->getType())) || - SE.getSCEV(PN) != Normalized) + if (!SE.isSCEVable(PN->getType())) continue; - Instruction *IncV = - cast(PN->getIncomingValueForBlock(LatchBlock)); + const SCEVAddRecExpr *PhiSCEV = dyn_cast(SE.getSCEV(PN)); + if (!PhiSCEV) + continue; + + bool IsMatchingSCEV = PhiSCEV == Normalized; + // We only handle truncation and inversion of phi recurrences for the + // expanded expression if the expanded expression's loop dominates the + // loop we insert to. Check now, so we can bail out early. + if (!IsMatchingSCEV && !TryNonMatchingSCEV) + continue; + Instruction *TempIncV = + cast(PN->getIncomingValueForBlock(LatchBlock)); + + // Check whether we can reuse this PHI node. if (LSRMode) { - if (!isExpandedAddRecExprPHI(PN, IncV, L)) + if (!isExpandedAddRecExprPHI(PN, TempIncV, L)) continue; - if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos)) + if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) continue; - } - else { - if (!isNormalAddRecExprPHI(PN, IncV, L)) + } else { + if (!isNormalAddRecExprPHI(PN, TempIncV, L)) continue; - if (L == IVIncInsertLoop) - do { - if (SE.DT->dominates(IncV, IVIncInsertPos)) - break; - // Make sure the increment is where we want it. But don't move it - // down past a potential existing post-inc user. - IncV->moveBefore(IVIncInsertPos); - IVIncInsertPos = IncV; - IncV = cast(IncV->getOperand(0)); - } while (IncV != PN); } + + // Stop if we have found an exact match SCEV. + if (IsMatchingSCEV) { + IncV = TempIncV; + TruncTy = nullptr; + InvertStep = false; + AddRecPhiMatch = PN; + break; + } + + // Try whether the phi can be translated into the requested form + // (truncated and/or offset by a constant). + if ((!TruncTy || InvertStep) && + canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { + // Record the phi node. But don't stop we might find an exact match + // later. + AddRecPhiMatch = PN; + IncV = TempIncV; + TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); + } + } + + if (AddRecPhiMatch) { + // Potentially, move the increment. We have made sure in + // isExpandedAddRecExprPHI or hoistIVInc that this is possible. + if (L == IVIncInsertLoop) + hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); + // Ok, the add recurrence looks usable. // Remember this PHI, even in post-inc mode. - InsertedValues.insert(PN); + InsertedValues.insert(AddRecPhiMatch); // Remember the increment. rememberInstruction(IncV); - return PN; + return AddRecPhiMatch; } } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); // Another AddRec may need to be recursively expanded below. For example, if // this AddRec is quadratic, the StepV may itself be an AddRec in this @@ -1137,14 +1213,15 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); - + if (isa(IncV)) { + if (Normalized->getNoWrapFlags(SCEV::FlagNUW)) + cast(IncV)->setHasNoUnsignedWrap(); + if (Normalized->getNoWrapFlags(SCEV::FlagNSW)) + cast(IncV)->setHasNoSignedWrap(); + } PN->addIncoming(IncV, Pred); } - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - // After expanding subexpressions, restore the PostIncLoops set so the caller // can ensure that IVIncrement dominates the current uses. PostIncLoops = SavedPostIncLoops; @@ -1167,41 +1244,43 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { PostIncLoopSet Loops; Loops.insert(L); Normalized = - cast(TransformForPostIncUse(Normalize, S, 0, 0, - Loops, SE, *SE.DT)); + cast(TransformForPostIncUse(Normalize, S, nullptr, + nullptr, Loops, SE, *SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. const SCEV *Start = Normalized->getStart(); - const SCEV *PostLoopOffset = 0; + const SCEV *PostLoopOffset = nullptr; if (!SE.properlyDominates(Start, L->getHeader())) { PostLoopOffset = Start; Start = SE.getConstant(Normalized->getType(), 0); Normalized = cast( SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), Normalized->getLoop(), - // FIXME: Normalized->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Strip off any non-loop-dominating component from the addrec step. const SCEV *Step = Normalized->getStepRecurrence(SE); - const SCEV *PostLoopScale = 0; + const SCEV *PostLoopScale = nullptr; if (!SE.dominates(Step, L->getHeader())) { PostLoopScale = Step; Step = SE.getConstant(Normalized->getType(), 1); Normalized = - cast(SE.getAddRecExpr(Start, Step, - Normalized->getLoop(), - // FIXME: Normalized - // ->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + cast(SE.getAddRecExpr( + Start, Step, Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Expand the core addrec. If we need post-loop scaling, force it to // expand to an integer type to avoid the need for additional casting. Type *ExpandTy = PostLoopScale ? IntTy : STy; - PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); + // In some cases, we decide to reuse an existing phi node but need to truncate + // it and/or invert the step. + Type *TruncTy = nullptr; + bool InvertStep = false; + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy, + TruncTy, InvertStep); // Accommodate post-inc mode, if necessary. Value *Result; @@ -1232,19 +1311,39 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); if (useSubtract) Step = SE.getNegativeSCEV(Step); - // Expand the step somewhere that dominates the loop header. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); - // Restore the insertion point to the place where the caller has - // determined dominates all uses. - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Value *StepV; + { + // Expand the step somewhere that dominates the loop header. + BuilderType::InsertPointGuard Guard(Builder); + StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + } Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); } } + // We have decided to reuse an induction variable of a dominating loop. Apply + // truncation and/or invertion of the step. + if (TruncTy) { + Type *ResTy = Result->getType(); + // Normalize the result type. + if (ResTy != SE.getEffectiveSCEVType(ResTy)) + Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); + // Truncate the result. + if (TruncTy != Result->getType()) { + Result = Builder.CreateTrunc(Result, TruncTy); + rememberInstruction(Result); + } + // Invert the result. + if (InvertStep) { + Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy), + Result); + rememberInstruction(Result); + } + } + // Re-apply any non-loop-dominating scale. if (PostLoopScale) { + assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateMul(Result, expandCodeFor(PostLoopScale, IntTy)); @@ -1274,7 +1373,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { const Loop *L = S->getLoop(); // First check for an existing canonical IV in a suitable type. - PHINode *CanonicalIV = 0; + PHINode *CanonicalIV = nullptr; if (PHINode *PN = L->getCanonicalInductionVariable()) if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) CanonicalIV = PN; @@ -1288,18 +1387,15 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), - // FIXME: S->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = - llvm::next(BasicBlock::iterator(cast(V))); + std::next(BasicBlock::iterator(cast(V))); + BuilderType::InsertPointGuard Guard(Builder); while (isa(NewInsertPt) || isa(NewInsertPt) || isa(NewInsertPt)) ++NewInsertPt; - V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, + V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, NewInsertPt); - restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1307,8 +1403,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!S->getStart()->isZero()) { SmallVector NewOps(S->op_begin(), S->op_end()); NewOps[0] = SE.getConstant(Ty, 0); - // FIXME: can use S->getNoWrapFlags() - const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, + S->getNoWrapFlags(SCEV::FlagNW)); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. @@ -1343,9 +1439,17 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Header->begin()); rememberInstruction(CanonicalIV); + SmallSet PredSeen; Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; + if (!PredSeen.insert(HP).second) { + // There must be an incoming value for each predecessor, even the + // duplicates! + CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); + continue; + } + if (L->contains(HP)) { // Insert a unit add instruction right before the terminator // corresponding to the back-edge. @@ -1517,20 +1621,18 @@ Value *SCEVExpander::expand(const SCEV *S) { while (InsertPt != Builder.GetInsertPoint() && (isInsertedInstruction(InsertPt) || isa(InsertPt))) { - InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); + InsertPt = std::next(BasicBlock::iterator(InsertPt)); } break; } // Check to see if we already expanded this here. - std::map, - AssertingVH >::iterator I = - InsertedExpressions.find(std::make_pair(S, InsertPt)); + std::map, TrackingVH >::iterator + I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. @@ -1540,11 +1642,9 @@ Value *SCEVExpander::expand(const SCEV *S) { // // This is independent of PostIncLoops. The mapped value simply materializes // the expression at this insertion point. If the mapped value happened to be - // a postinc expansion, it could be reused by a non postinc user, but only if + // a postinc expansion, it could be reused by a non-postinc user, but only if // its insertion point was already at the head of the loop. InsertedExpressions[std::make_pair(S, InsertPt)] = V; - - restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1555,10 +1655,6 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedValues.insert(I); } -void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { - Builder.SetInsertPoint(BB, I); -} - /// getOrInsertCanonicalInductionVariable - This method returns the /// canonical induction variable of the specified type for the specified /// loop (inserting one if there is none). A canonical induction variable @@ -1574,24 +1670,13 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); // Emit code for it. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - PHINode *V = cast(expandCodeFor(H, 0, L->getHeader()->begin())); - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + BuilderType::InsertPointGuard Guard(Builder); + PHINode *V = cast(expandCodeFor(H, nullptr, + L->getHeader()->begin())); return V; } -/// Sort values by integer width for replaceCongruentIVs. -static bool width_descending(Value *lhs, Value *rhs) { - // Put pointers at the back and make sure pointer < pointer = false. - if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy()) - return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy(); - return rhs->getType()->getPrimitiveSizeInBits() - < lhs->getType()->getPrimitiveSizeInBits(); -} - /// replaceCongruentIVs - Check for congruent phis in this loop header and /// replace them with their most canonical representative. Return the number of /// phis eliminated. @@ -1600,15 +1685,21 @@ static bool width_descending(Value *lhs, Value *rhs) { /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl &DeadInsts, - const TargetLowering *TLI) { + const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector Phis; for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *Phi = dyn_cast(I); ++I) { Phis.push_back(Phi); } - if (TLI) - std::sort(Phis.begin(), Phis.end(), width_descending); + if (TTI) + std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) { + // Put pointers at the back and make sure pointer < pointer = false. + if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) + return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); + return RHS->getType()->getPrimitiveSizeInBits() < + LHS->getType()->getPrimitiveSizeInBits(); + }); unsigned NumElim = 0; DenseMap ExprToIVMap; @@ -1618,14 +1709,25 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, PEnd = Phis.end(); PIter != PEnd; ++PIter) { PHINode *Phi = *PIter; + // Fold constant phis. They may be congruent to other constant phis and + // would confuse the logic below that expects proper IVs. + if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT, SE.AT)) { + Phi->replaceAllUsesWith(V); + DeadInsts.push_back(Phi); + ++NumElim; + DEBUG_WITH_TYPE(DebugType, dbgs() + << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); + continue; + } + if (!SE.isSCEVable(Phi->getType())) continue; PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; - if (Phi->getType()->isIntegerTy() && TLI - && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + if (Phi->getType()->isIntegerTy() && TTI + && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { // This phi can be freely truncated to the narrowest phi type. Map the // truncated expression to it so it will be reused for narrow types. const SCEV *TruncExpr = @@ -1700,3 +1802,59 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, } return NumElim; } + +namespace { +// Search for a SCEV subexpression that is not safe to expand. Any expression +// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely +// UDiv expressions. We don't know if the UDiv is derived from an IR divide +// instruction, but the important thing is that we prove the denominator is +// nonzero before expansion. +// +// IVUsers already checks that IV-derived expressions are safe. So this check is +// only needed when the expression includes some subexpression that is not IV +// derived. +// +// Currently, we only allow division by a nonzero constant here. If this is +// inadequate, we could easily allow division by SCEVUnknown by using +// ValueTracking to check isKnownNonZero(). +// +// We cannot generally expand recurrences unless the step dominates the loop +// header. The expander handles the special case of affine recurrences by +// scaling the recurrence outside the loop, but this technique isn't generally +// applicable. Expanding a nested recurrence outside a loop requires computing +// binomial coefficients. This could be done, but the recurrence has to be in a +// perfectly reduced form, which can't be guaranteed. +struct SCEVFindUnsafe { + ScalarEvolution &SE; + bool IsUnsafe; + + SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} + + bool follow(const SCEV *S) { + if (const SCEVUDivExpr *D = dyn_cast(S)) { + const SCEVConstant *SC = dyn_cast(D->getRHS()); + if (!SC || SC->getValue()->isZero()) { + IsUnsafe = true; + return false; + } + } + if (const SCEVAddRecExpr *AR = dyn_cast(S)) { + const SCEV *Step = AR->getStepRecurrence(SE); + if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { + IsUnsafe = true; + return false; + } + } + return true; + } + bool isDone() const { return IsUnsafe; } +}; +} + +namespace llvm { +bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { + SCEVFindUnsafe Search(SE); + visitAll(S, Search); + return !Search.IsUnsafe; +} +}