X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FAnalysis%2FScalarEvolution.cpp;h=8c2ba6ae83a5e4da913c877ff52ee21191505e57;hb=ff18310274e872429cd06d679b1c8c8a14166328;hp=bcaefd0a4ff95b321e6635191132393855192a41;hpb=618c1dbd293d15ee19f61b1156ab8086ad28311a;p=oota-llvm.git diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index bcaefd0a4ff..8c2ba6ae83a 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -73,7 +73,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" @@ -105,6 +105,11 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, "derived loop"), cl::init(100)); +// FIXME: Enable this with XDEBUG when the test suite is clean. +static cl::opt +VerifySCEV("verify-scev", + cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); + INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", "Scalar Evolution Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) @@ -122,10 +127,12 @@ char ScalarEvolution::ID = 0; // Implementation of the SCEV class. // +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SCEV::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif void SCEV::print(raw_ostream &OS) const { switch (getSCEVType()) { @@ -259,11 +266,9 @@ Type *SCEV::getType() const { return cast(this)->getType(); case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return 0; - default: break; + default: + llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return 0; } bool SCEV::isZero() const { @@ -284,6 +289,20 @@ bool SCEV::isAllOnesValue() const { return false; } +/// isNonConstantNegative - Return true if the specified scev is negated, but +/// not a constant. +bool SCEV::isNonConstantNegative() const { + const SCEVMulExpr *Mul = dyn_cast(this); + if (!Mul) return false; + + // If there is a constant factor, it will be first. + const SCEVConstant *SC = dyn_cast(Mul->getOperand(0)); + if (!SC) return false; + + // Return true if the value is negative, this matches things like (-42 * V). + return SC->getValue()->getValue().isNegative(); +} + SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} @@ -597,11 +616,8 @@ namespace { } default: - break; + llvm_unreachable("Unknown SCEV kind!"); } - - llvm_unreachable("Unknown SCEV kind!"); - return 0; } }; } @@ -817,8 +833,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getTrunc(SC->getValue(), - getEffectiveSCEVType(Ty)))); + cast(ConstantExpr::getTrunc(SC->getValue(), Ty))); // trunc(trunc(x)) --> trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) @@ -870,13 +885,6 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } - // As a special case, fold trunc(undef) to undef. We don't want to - // know too much about SCEVUnknowns, but this special case is handy - // and harmless. - if (const SCEVUnknown *U = dyn_cast(Op)) - if (isa(U->getValue())) - return getSCEV(UndefValue::get(Ty)); - // The cast wasn't folded; create an explicit cast node. We can reuse // the existing insert position since if we get here, we won't have // made any changes which would invalidate it. @@ -897,8 +905,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getZExt(SC->getValue(), - getEffectiveSCEVType(Ty)))); + cast(ConstantExpr::getZExt(SC->getValue(), Ty))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) @@ -967,12 +974,15 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no unsigned overflow. const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *Add = getAddExpr(Start, ZMul); + const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy); + const SCEV *WideStart = getZeroExtendExpr(Start, WideTy); + const SCEV *WideMaxBECount = + getZeroExtendExpr(CastedMaxBECount, WideTy); const SCEV *OperandExtendedAdd = - getAddExpr(getZeroExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NUW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. @@ -982,13 +992,11 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. - const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); - Add = getAddExpr(Start, SMul); OperandExtendedAdd = - getAddExpr(getZeroExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (ZAdd == OperandExtendedAdd) { // Cache knowledge of AR NW, which is propagated to this AddRec. // Negative step causes unsigned wrap, but it still can't self-wrap. const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); @@ -1155,8 +1163,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getSExt(SC->getValue(), - getEffectiveSCEVType(Ty)))); + cast(ConstantExpr::getSExt(SC->getValue(), Ty))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) @@ -1233,12 +1240,15 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); // Check whether Start+Step*MaxBECount has no signed overflow. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); - const SCEV *Add = getAddExpr(Start, SMul); + const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy); + const SCEV *WideStart = getSignExtendExpr(Start, WideTy); + const SCEV *WideMaxBECount = + getZeroExtendExpr(CastedMaxBECount, WideTy); const SCEV *OperandExtendedAdd = - getAddExpr(getSignExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getSignExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. @@ -1248,13 +1258,11 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. - const SCEV *UMul = getMulExpr(CastedMaxBECount, Step); - Add = getAddExpr(Start, UMul); OperandExtendedAdd = - getAddExpr(getSignExtendExpr(Start, WideTy), - getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), + getAddExpr(WideStart, + getMulExpr(WideMaxBECount, getZeroExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + if (SAdd == OperandExtendedAdd) { // Cache knowledge of AR NSW, which is propagated to this AddRec. const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. @@ -1336,13 +1344,6 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } - // As a special case, fold anyext(undef) to undef. We don't want to - // know too much about SCEVUnknowns, but this special case is handy - // and harmless. - if (const SCEVUnknown *U = dyn_cast(Op)) - if (isa(U->getValue())) - return getSCEV(UndefValue::get(Ty)); - // If the expression is obviously signed, use the sext cast value. if (isa(Op)) return SExt; @@ -1830,7 +1831,7 @@ static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { /// Compute the result of "n choose k", the binomial coefficient. If an /// intermediate computation overflows, Overflow will be set and the return will -/// be garbage. Overflow is not cleared on absense of overflow. +/// be garbage. Overflow is not cleared on absence of overflow. static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { // We use the multiplicative formula: // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . @@ -2029,63 +2030,67 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, for (unsigned OtherIdx = Idx+1; OtherIdx < Ops.size() && isa(Ops[OtherIdx]); ++OtherIdx) { - if (AddRecLoop == cast(Ops[OtherIdx])->getLoop()) { - // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} - // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ - // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z - // ]]],+,...up to x=2n}. - // Note that the arguments to choose() are always integers with values - // known at compile time, never SCEV objects. - // - // The implementation avoids pointless extra computations when the two - // addrec's are of different length (mathematically, it's equivalent to - // an infinite stream of zeros on the right). - bool OpsModified = false; - for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) - if (const SCEVAddRecExpr *OtherAddRec = - dyn_cast(Ops[OtherIdx])) - if (OtherAddRec->getLoop() == AddRecLoop) { - bool Overflow = false; - Type *Ty = AddRec->getType(); - bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; - SmallVector AddRecOps; - for (int x = 0, xe = AddRec->getNumOperands() + - OtherAddRec->getNumOperands() - 1; - x != xe && !Overflow; ++x) { - const SCEV *Term = getConstant(Ty, 0); - for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { - uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); - for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), - ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); - z < ze && !Overflow; ++z) { - uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); - uint64_t Coeff; - if (LargerThan64Bits) - Coeff = umul_ov(Coeff1, Coeff2, Overflow); - else - Coeff = Coeff1*Coeff2; - const SCEV *CoeffTerm = getConstant(Ty, Coeff); - const SCEV *Term1 = AddRec->getOperand(y-z); - const SCEV *Term2 = OtherAddRec->getOperand(z); - Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); - } - } - AddRecOps.push_back(Term); - } - if (!Overflow) { - const SCEV *NewAddRec = getAddRecExpr(AddRecOps, - AddRec->getLoop(), - SCEV::FlagAnyWrap); - if (Ops.size() == 2) return NewAddRec; - Ops[Idx] = AddRec = cast(NewAddRec); - Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; - OpsModified = true; - } + if (AddRecLoop != cast(Ops[OtherIdx])->getLoop()) + continue; + + // {A1,+,A2,+,...,+,An} * {B1,+,B2,+,...,+,Bn} + // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ + // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z + // ]]],+,...up to x=2n}. + // Note that the arguments to choose() are always integers with values + // known at compile time, never SCEV objects. + // + // The implementation avoids pointless extra computations when the two + // addrec's are of different length (mathematically, it's equivalent to + // an infinite stream of zeros on the right). + bool OpsModified = false; + for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); + ++OtherIdx) { + const SCEVAddRecExpr *OtherAddRec = + dyn_cast(Ops[OtherIdx]); + if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) + continue; + + bool Overflow = false; + Type *Ty = AddRec->getType(); + bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; + SmallVector AddRecOps; + for (int x = 0, xe = AddRec->getNumOperands() + + OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { + const SCEV *Term = getConstant(Ty, 0); + for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { + uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); + for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), + ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); + z < ze && !Overflow; ++z) { + uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); + uint64_t Coeff; + if (LargerThan64Bits) + Coeff = umul_ov(Coeff1, Coeff2, Overflow); + else + Coeff = Coeff1*Coeff2; + const SCEV *CoeffTerm = getConstant(Ty, Coeff); + const SCEV *Term1 = AddRec->getOperand(y-z); + const SCEV *Term2 = OtherAddRec->getOperand(z); + Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2)); } - if (OpsModified) - return getMulExpr(Ops); + } + AddRecOps.push_back(Term); + } + if (!Overflow) { + const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(), + SCEV::FlagAnyWrap); + if (Ops.size() == 2) return NewAddRec; + Ops[Idx] = NewAddRec; + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; + OpsModified = true; + AddRec = dyn_cast(NewAddRec); + if (!AddRec) + break; + } } + if (OpsModified) + return getMulExpr(Ops); } // Otherwise couldn't fold anything into this recurrence. Move onto the @@ -2581,17 +2586,16 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { - // If we have TargetData, we can bypass creating a target-independent +const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy, Type *IntPtrTy) { + // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) - return getConstant(TD->getIntPtrType(getContext()), - TD->getTypeAllocSize(AllocTy)); + return getConstant(IntPtrTy, TD->getTypeAllocSize(AllocTy)); Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2600,24 +2604,24 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { Constant *C = ConstantExpr::getAlignOf(AllocTy); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, +const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, Type *IntPtrTy, unsigned FieldNo) { - // If we have TargetData, we can bypass creating a target-independent + // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) - return getConstant(TD->getIntPtrType(getContext()), + return getConstant(IntPtrTy, TD->getStructLayout(STy)->getElementOffset(FieldNo)); Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2627,7 +2631,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy, Constant *FieldNo) { Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); @@ -2673,7 +2677,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const { uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - // If we have a TargetData, use it! + // If we have a DataLayout, use it! if (TD) return TD->getTypeSizeInBits(Ty); @@ -2681,7 +2685,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { if (Ty->isIntegerTy()) return Ty->getPrimitiveSizeInBits(); - // The only other support type is pointer. Without TargetData, conservatively + // The only other support type is pointer. Without DataLayout, conservatively // assume pointers are 64-bit. assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!"); return 64; @@ -2699,9 +2703,9 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - if (TD) return TD->getIntPtrType(getContext()); + if (TD) return TD->getIntPtrType(Ty); - // Without TargetData, conservatively assume pointers are 64-bit. + // Without DataLayout, conservatively assume pointers are 64-bit. return Type::getInt64Ty(getContext()); } @@ -2714,7 +2718,7 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - ValueExprMapType::const_iterator I = ValueExprMap.find(V); + ValueExprMapType::const_iterator I = ValueExprMap.find_as(V); if (I != ValueExprMap.end()) return I->second; const SCEV *S = createSCEV(V); @@ -2951,7 +2955,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) { if (!Visited.insert(I)) continue; ValueExprMapType::iterator It = - ValueExprMap.find(static_cast(I)); + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -3008,7 +3012,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (BEValueV && StartValueV) { // While we are analyzing this PHI node, handle its value symbolically. const SCEV *SymbolicName = getUnknown(PN); - assert(ValueExprMap.find(PN) == ValueExprMap.end() && + assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && "PHI node already processed?"); ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); @@ -3152,13 +3156,13 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { if (StructType *STy = dyn_cast(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast(Index)->getZExtValue(); - const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); + const SCEV *FieldOffset = getOffsetOfExpr(STy, IntPtrTy, FieldNo); // Add the field offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, FieldOffset); } else { // For an array, add the element offset, explicitly scaled. - const SCEV *ElementSize = getSizeOfExpr(*GTI); + const SCEV *ElementSize = getSizeOfExpr(*GTI, IntPtrTy); const SCEV *IndexS = getSCEV(Index); // Getelementptr indices are signed. IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); @@ -3252,9 +3256,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones); + ComputeMaskedBits(U->getValue(), Zeros, Ones); return Zeros.countTrailingOnes(); } @@ -3392,9 +3395,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); + ComputeMaskedBits(U->getValue(), Zeros, Ones, TD); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3651,9 +3653,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countLeadingZeros(); unsigned BitWidth = A.getBitWidth(); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); @@ -3925,13 +3926,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a -/// normal unsigned value, if possible. Returns 0 if the trip count is unknown -/// or not constant. Will also return 0 if the maximum trip count is very large -/// (>= 2^32) -unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, - BasicBlock *ExitBlock) { +/// normal unsigned value. Returns 0 if the trip count is unknown or not +/// constant. Will also return 0 if the maximum trip count is very large (>= +/// 2^32). +/// +/// This "trip count" assumes that control exits via ExitingBlock. More +/// precisely, it is the number of times that control may reach ExitingBlock +/// before taking the branch. For loops with multiple exits, it may not be the +/// number times that the loop header executes because the loop may exit +/// prematurely via another branch. +unsigned ScalarEvolution:: +getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) { const SCEVConstant *ExitCount = - dyn_cast(getExitCount(L, ExitBlock)); + dyn_cast(getExitCount(L, ExitingBlock)); if (!ExitCount) return 0; @@ -3954,9 +3961,12 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L, /// multiple of a constant (which is also the case if the trip count is simply /// constant, use getSmallConstantTripCount for that case), Will also return 1 /// if the trip count is very large (>= 2^32). -unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L, - BasicBlock *ExitBlock) { - const SCEV *ExitCount = getExitCount(L, ExitBlock); +/// +/// As explained in the comments for getSmallConstantTripCount, this assumes +/// that control exits the loop via ExitingBlock. +unsigned ScalarEvolution:: +getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) { + const SCEV *ExitCount = getExitCount(L, ExitingBlock); if (ExitCount == getCouldNotCompute()) return 1; @@ -3974,8 +3984,11 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L, ConstantInt *Result = MulC->getValue(); - // Guard against huge trip counts. - if (!Result || Result->getValue().getActiveBits() > 32) + // Guard against huge trip counts (this requires checking + // for zero to handle the case where the trip count == -1 and the + // addition wraps). + if (!Result || Result->getValue().getActiveBits() > 32 || + Result->getValue().getActiveBits() == 0) return 1; return (unsigned)Result->getZExtValue(); @@ -4066,7 +4079,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { if (!Visited.insert(I)) continue; ValueExprMapType::iterator It = - ValueExprMap.find(static_cast(I)); + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { const SCEV *Old = It->second; @@ -4117,7 +4130,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) { Instruction *I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - ValueExprMapType::iterator It = ValueExprMap.find(static_cast(I)); + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); ValueExprMap.erase(It); @@ -4150,7 +4164,8 @@ void ScalarEvolution::forgetValue(Value *V) { I = Worklist.pop_back_val(); if (!Visited.insert(I)) continue; - ValueExprMapType::iterator It = ValueExprMap.find(static_cast(I)); + ValueExprMapType::iterator It = + ValueExprMap.find_as(static_cast(I)); if (It != ValueExprMap.end()) { forgetMemoizedResults(It->second); ValueExprMap.erase(It); @@ -4562,40 +4577,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, return cast(Val)->getValue(); } -/// GetAddressedElementFromGlobal - Given a global variable with an initializer -/// and a GEP expression (missing the pointer index) indexing into it, return -/// the addressed element of the initializer or null if the index expression is -/// invalid. -static Constant * -GetAddressedElementFromGlobal(GlobalVariable *GV, - const std::vector &Indices) { - Constant *Init = GV->getInitializer(); - for (unsigned i = 0, e = Indices.size(); i != e; ++i) { - uint64_t Idx = Indices[i]->getZExtValue(); - if (ConstantStruct *CS = dyn_cast(Init)) { - assert(Idx < CS->getNumOperands() && "Bad struct index!"); - Init = cast(CS->getOperand(Idx)); - } else if (ConstantArray *CA = dyn_cast(Init)) { - if (Idx >= CA->getNumOperands()) return 0; // Bogus program - Init = cast(CA->getOperand(Idx)); - } else if (isa(Init)) { - if (StructType *STy = dyn_cast(Init->getType())) { - assert(Idx < STy->getNumElements() && "Bad struct index!"); - Init = Constant::getNullValue(STy->getElementType(Idx)); - } else if (ArrayType *ATy = dyn_cast(Init->getType())) { - if (Idx >= ATy->getNumElements()) return 0; // Bogus program - Init = Constant::getNullValue(ATy->getElementType()); - } else { - llvm_unreachable("Unknown constant aggregate type!"); - } - return 0; - } else { - return 0; // Unknown initializer type - } - } - return Init; -} - /// ComputeLoadConstantCompareExitLimit - Given an exit condition of /// 'icmp op load X, cst', try to see if we can compute the backedge /// execution count. @@ -4623,7 +4604,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( // Okay, we allow one non-constant index into the GEP instruction. Value *VarIdx = 0; - std::vector Indexes; + std::vector Indexes; unsigned VarIdxNum = 0; for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) if (ConstantInt *CI = dyn_cast(GEP->getOperand(i))) { @@ -4635,6 +4616,10 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( Indexes.push_back(0); } + // Loop-invariant loads may be a byproduct of loop optimization. Skip them. + if (!VarIdx) + return getCouldNotCompute(); + // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. // Check to see if X is a loop variant variable value now. const SCEV *Idx = getSCEV(VarIdx); @@ -4657,7 +4642,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( // Form the GEP offset. Indexes[VarIdxNum] = Val; - Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); + Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), + Indexes); if (Result == 0) break; // Cannot compute! // Evaluate the condition for this iteration. @@ -4772,7 +4758,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { /// reason, return null. static Constant *EvaluateExpression(Value *V, const Loop *L, DenseMap &Vals, - const TargetData *TD) { + const DataLayout *TD, + const TargetLibraryInfo *TLI) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast(V)) return C; Instruction *I = dyn_cast(V); @@ -4798,7 +4785,7 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, if (!Operands[i]) return 0; continue; } - Constant *C = EvaluateExpression(Operand, L, Vals, TD); + Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI); Vals[Operand] = C; if (!C) return 0; Operands[i] = C; @@ -4806,12 +4793,13 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, if (CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], - Operands[1], TD); + Operands[1], TD, TLI); if (LoadInst *LI = dyn_cast(I)) { if (!LI->isVolatile()) return ConstantFoldLoadFromConstPtr(Operands[0], TD); } - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD); + return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD, + TLI); } /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is @@ -4866,7 +4854,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap NextIterVals; - Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, + TLI); if (NextPHI == 0) return 0; // Couldn't evaluate! NextIterVals[PN] = NextPHI; @@ -4891,7 +4880,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); } if (NextPHI != I->second) StoppedEvolving = false; @@ -4946,8 +4935,8 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = - dyn_cast_or_null(EvaluateExpression(Cond, L, - CurrentIterVals, TD)); + dyn_cast_or_null(EvaluateExpression(Cond, L, CurrentIterVals, + TD, TLI)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4977,7 +4966,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, if (NextPHI) continue; // Already computed! Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI); } CurrentIterVals.swap(NextIterVals); } @@ -5169,13 +5158,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { Constant *C = 0; if (const CmpInst *CI = dyn_cast(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], TD); + Operands[0], Operands[1], TD, + TLI); else if (const LoadInst *LI = dyn_cast(I)) { if (!LI->isVolatile()) C = ConstantFoldLoadFromConstPtr(Operands[0], TD); } else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - Operands, TD); + Operands, TD, TLI); if (!C) return V; return getSCEV(C); } @@ -5293,7 +5283,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { } llvm_unreachable("Unknown SCEV type!"); - return 0; } /// getSCEVAtScope - This is a convenience function which does @@ -5390,6 +5379,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { SqrtTerm *= B; SqrtTerm -= Four * (A * C); + if (SqrtTerm.isNegative()) { + // The loop is provably infinite. + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest // integer value or else APInt::sqrt() will assert. APInt SqrtVal(SqrtTerm.sqrt()); @@ -5492,7 +5487,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast(Step); - if (StepC == 0) + if (StepC == 0 || StepC->getValue()->equalsInt(0)) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): @@ -5613,9 +5608,14 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) { /// predicate Pred. Return true iff any changes were made. /// bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, - const SCEV *&LHS, const SCEV *&RHS) { + const SCEV *&LHS, const SCEV *&RHS, + unsigned Depth) { bool Changed = false; + // If we hit the max recursion limit bail out. + if (Depth >= 3) + return false; + // Canonicalize a constant to the right side. if (const SCEVConstant *LHSC = dyn_cast(LHS)) { // Check for both operands constant. @@ -5653,6 +5653,16 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_NE: + // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. + if (!RA) + if (const SCEVAddExpr *AE = dyn_cast(LHS)) + if (const SCEVMulExpr *ME = dyn_cast(AE->getOperand(0))) + if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && + ME->getOperand(0)->isAllOnesValue()) { + RHS = AE->getOperand(1); + LHS = ME->getOperand(1); + Changed = true; + } break; case ICmpInst::ICMP_UGE: if ((RA - 1).isMinValue()) { @@ -5854,6 +5864,11 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, // TODO: More simplifications are possible here. + // Recursively simplify until we either hit a recursion limit or nothing + // changes. + if (Changed) + return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1); + return Changed; trivially_true: @@ -5924,7 +5939,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred, switch (Pred) { default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); - break; case ICmpInst::ICMP_SGT: Pred = ICmpInst::ICMP_SLT; std::swap(LHS, RHS); @@ -6052,12 +6066,34 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, return false; } +/// RAII wrapper to prevent recursive application of isImpliedCond. +/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are +/// currently evaluating isImpliedCond. +struct MarkPendingLoopPredicate { + Value *Cond; + DenseSet &LoopPreds; + bool Pending; + + MarkPendingLoopPredicate(Value *C, DenseSet &LP) + : Cond(C), LoopPreds(LP) { + Pending = !LoopPreds.insert(Cond).second; + } + ~MarkPendingLoopPredicate() { + if (!Pending) + LoopPreds.erase(Cond); + } +}; + /// isImpliedCond - Test whether the condition described by Pred, LHS, /// and RHS is true whenever the given Cond value evaluates to true. bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, Value *FoundCondValue, bool Inverse) { + MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates); + if (Mark.Pending) + return false; + // Recursively handle And and Or conditions. if (BinaryOperator *BO = dyn_cast(FoundCondValue)) { if (BO->getOpcode() == Instruction::And) { @@ -6561,7 +6597,7 @@ ScalarEvolution::ScalarEvolution() bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis(); - TD = getAnalysisIfAvailable(); + TD = getAnalysisIfAvailable(); TLI = &getAnalysis(); DT = &getAnalysis(); return false; @@ -6584,6 +6620,8 @@ void ScalarEvolution::releaseMemory() { I->second.clear(); } + assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); + BackedgeTakenCounts.clear(); ConstantEvolutionLoopExitValue.clear(); ValuesAtScopes.clear(); @@ -6775,11 +6813,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { return LoopInvariant; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return LoopVariant; - default: break; + default: llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return LoopVariant; } bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { @@ -6861,11 +6896,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { return ProperlyDominatesBlock; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return DoesNotDominateBlock; - default: break; + default: + llvm_unreachable("Unknown SCEV kind!"); } - llvm_unreachable("Unknown SCEV kind!"); - return DoesNotDominateBlock; } bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { @@ -6876,46 +6909,27 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) == ProperlyDominatesBlock; } -bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { - switch (S->getSCEVType()) { - case scConstant: - return false; - case scTruncate: - case scZeroExtend: - case scSignExtend: { - const SCEVCastExpr *Cast = cast(S); - const SCEV *CastOp = Cast->getOperand(); - return Op == CastOp || hasOperand(CastOp, Op); - } - case scAddRecExpr: - case scAddExpr: - case scMulExpr: - case scUMaxExpr: - case scSMaxExpr: { - const SCEVNAryExpr *NAry = cast(S); - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - const SCEV *NAryOp = *I; - if (NAryOp == Op || hasOperand(NAryOp, Op)) - return true; - } - return false; - } - case scUDivExpr: { - const SCEVUDivExpr *UDiv = cast(S); - const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); - return LHS == Op || hasOperand(LHS, Op) || - RHS == Op || hasOperand(RHS, Op); - } - case scUnknown: - return false; - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - return false; - default: break; +namespace { +// Search for a SCEV expression node within an expression tree. +// Implements SCEVTraversal::Visitor. +struct SCEVSearch { + const SCEV *Node; + bool IsFound; + + SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} + + bool follow(const SCEV *S) { + IsFound |= (S == Node); + return !IsFound; } - llvm_unreachable("Unknown SCEV kind!"); - return false; + bool isDone() const { return IsFound; } +}; +} + +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + SCEVSearch Search(Op); + visitAll(S, Search); + return Search.IsFound; } void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { @@ -6925,3 +6939,66 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { UnsignedRanges.erase(S); SignedRanges.erase(S); } + +typedef DenseMap VerifyMap; +/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis. +static void +getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { + for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) { + getLoopBackedgeTakenCounts(*I, Map, SE); // recurse. + + std::string &S = Map[L]; + if (S.empty()) { + raw_string_ostream OS(S); + SE.getBackedgeTakenCount(L)->print(OS); + } + } +} + +void ScalarEvolution::verifyAnalysis() const { + if (!VerifySCEV) + return; + + ScalarEvolution &SE = *const_cast(this); + + // Gather stringified backedge taken counts for all loops using SCEV's caches. + // FIXME: It would be much better to store actual values instead of strings, + // but SCEV pointers will change if we drop the caches. + VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); + + // Gather stringified backedge taken counts for all loops without using + // SCEV's caches. + SE.releaseMemory(); + for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) + getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE); + + // Now compare whether they're the same with and without caches. This allows + // verifying that no pass changed the cache. + assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() && + "New loops suddenly appeared!"); + + for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(), + OldE = BackedgeDumpsOld.end(), + NewI = BackedgeDumpsNew.begin(); + OldI != OldE; ++OldI, ++NewI) { + assert(OldI->first == NewI->first && "Loop order changed!"); + + // Compare the stringified SCEVs. We don't care if undef backedgetaken count + // changes. + // FIXME: We currently ignore SCEV changes towards CouldNotCompute. This + // means that a pass is buggy or SCEV has to learn a new pattern but is + // usually not harmful. + if (OldI->second != NewI->second && + OldI->second.find("undef") == std::string::npos && + NewI->second != "***COULDNOTCOMPUTE***") { + dbgs() << "SCEVValidator: SCEV for Loop '" + << OldI->first->getHeader()->getName() + << "' from '" << OldI->second << "' to '" << NewI->second << "'!"; + std::abort(); + } + } + + // TODO: Verify more things. +}