-//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===//
//
// The LLVM Compiler Infrastructure
//
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scalar-evolution"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Operator.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "scalar-evolution"
+
STATISTIC(NumArrayLenItCounts,
"Number of trip counts computed with array length");
STATISTIC(NumTripCountsComputed,
"derived loop"),
cl::init(100));
+// FIXME: Enable this with XDEBUG when the test suite is clean.
+static cl::opt<bool>
+VerifySCEV("verify-scev",
+ cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
+
INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
"Scalar Evolution Analysis", false, true)
char ScalarEvolution::ID = 0;
// Implementation of the SCEV class.
//
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void SCEV::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void SCEV::print(raw_ostream &OS) const {
- switch (getSCEVType()) {
+ switch (static_cast<SCEVTypes>(getSCEVType())) {
case scConstant:
- WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+ cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
return;
case scTruncate: {
const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
if (AR->getNoWrapFlags(FlagNW) &&
!AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
OS << "nw><";
- WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+ AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ">";
return;
}
case scUMaxExpr:
case scSMaxExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
- const char *OpStr = 0;
+ const char *OpStr = nullptr;
switch (NAry->getSCEVType()) {
case scAddExpr: OpStr = " + "; break;
case scMulExpr: OpStr = " * "; break;
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
I != E; ++I) {
OS << **I;
- if (llvm::next(I) != E)
+ if (std::next(I) != E)
OS << OpStr;
}
OS << ")";
+ switch (NAry->getSCEVType()) {
+ case scAddExpr:
+ case scMulExpr:
+ if (NAry->getNoWrapFlags(FlagNUW))
+ OS << "<nuw>";
+ if (NAry->getNoWrapFlags(FlagNSW))
+ OS << "<nsw>";
+ }
return;
}
case scUDivExpr: {
}
case scUnknown: {
const SCEVUnknown *U = cast<SCEVUnknown>(this);
- const Type *AllocTy;
+ Type *AllocTy;
if (U->isSizeOf(AllocTy)) {
OS << "sizeof(" << *AllocTy << ")";
return;
return;
}
- const Type *CTy;
+ Type *CTy;
Constant *FieldNo;
if (U->isOffsetOf(CTy, FieldNo)) {
OS << "offsetof(" << *CTy << ", ";
- WriteAsOperand(OS, FieldNo, false);
+ FieldNo->printAsOperand(OS, false);
OS << ")";
return;
}
// Otherwise just print it normally.
- WriteAsOperand(OS, U->getValue(), false);
+ U->getValue()->printAsOperand(OS, false);
return;
}
case scCouldNotCompute:
OS << "***COULDNOTCOMPUTE***";
return;
- default: break;
}
llvm_unreachable("Unknown SCEV kind!");
}
-const Type *SCEV::getType() const {
- switch (getSCEVType()) {
+Type *SCEV::getType() const {
+ switch (static_cast<SCEVTypes>(getSCEVType())) {
case scConstant:
return cast<SCEVConstant>(this)->getType();
case scTruncate:
return cast<SCEVUnknown>(this)->getType();
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return 0;
- default: break;
}
llvm_unreachable("Unknown SCEV kind!");
- return 0;
}
bool SCEV::isZero() const {
return false;
}
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+bool SCEV::isNonConstantNegative() const {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
+ if (!Mul) return false;
+
+ // If there is a constant factor, it will be first.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+ if (!SC) return false;
+
+ // Return true if the value is negative, this matches things like (-42 * V).
+ return SC->getValue()->getValue().isNegative();
+}
+
SCEVCouldNotCompute::SCEVCouldNotCompute() :
SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
FoldingSetNodeID ID;
ID.AddInteger(scConstant);
ID.AddPointer(V);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
-const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
return getConstant(ConstantInt::get(getContext(), Val));
}
const SCEV *
-ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
- const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
+ IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
return getConstant(ConstantInt::get(ITy, V, isSigned));
}
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
- unsigned SCEVTy, const SCEV *op, const Type *ty)
+ unsigned SCEVTy, const SCEV *op, Type *ty)
: SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
- const SCEV *op, const Type *ty)
+ const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scTruncate, op, ty) {
assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
}
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
- const SCEV *op, const Type *ty)
+ const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scZeroExtend, op, ty) {
assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
}
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
- const SCEV *op, const Type *ty)
+ const SCEV *op, Type *ty)
: SCEVCastExpr(ID, scSignExtend, op, ty) {
assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
SE->UniqueSCEVs.RemoveNode(this);
// Release the value.
- setValPtr(0);
+ setValPtr(nullptr);
}
void SCEVUnknown::allUsesReplacedWith(Value *New) {
setValPtr(New);
}
-bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
+bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
return false;
}
-bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
+bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
CE->getOperand(0)->isNullValue()) {
- const Type *Ty =
+ Type *Ty =
cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
- if (const StructType *STy = dyn_cast<StructType>(Ty))
+ if (StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked() &&
CE->getNumOperands() == 3 &&
CE->getOperand(1)->isNullValue()) {
return false;
}
-bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
+bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
CE->getNumOperands() == 3 &&
CE->getOperand(0)->isNullValue() &&
CE->getOperand(1)->isNullValue()) {
- const Type *Ty =
+ Type *Ty =
cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
// Ignore vector types here so that ScalarEvolutionExpander doesn't
// emit getelementptrs that index into vectors.
// Aside from the getSCEVType() ordering, the particular ordering
// isn't very important except that it's beneficial to be consistent,
// so that (a + b) and (b + a) don't end up as different expressions.
- switch (LType) {
+ switch (static_cast<SCEVTypes>(LType)) {
case scUnknown: {
const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
// Lexicographically compare n-ary expressions.
unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
+
for (unsigned i = 0; i != LNumOps; ++i) {
if (i >= RNumOps)
return 1;
return compare(LC->getOperand(), RC->getOperand());
}
- default:
- break;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
}
-
llvm_unreachable("Unknown SCEV kind!");
- return 0;
}
};
}
}
}
+static const APInt urem(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.zext(ABW);
+ else if (ABW < BBW)
+ A = A.zext(BBW);
+
+ return APIntOps::urem(A, B);
+}
+
+static const APInt udiv(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue();
+ APInt B = C2->getValue()->getValue();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.zext(ABW);
+ else if (ABW < BBW)
+ A = A.zext(BBW);
+
+ return APIntOps::udiv(A, B);
+}
+
+namespace {
+struct FindSCEVSize {
+ int Size;
+ FindSCEVSize() : Size(0) {}
+
+ bool follow(const SCEV *S) {
+ ++Size;
+ // Keep looking at all operands of S.
+ return true;
+ }
+ bool isDone() const {
+ return false;
+ }
+};
+}
+
+// Returns the size of the SCEV S.
+static inline int sizeOfSCEV(const SCEV *S) {
+ FindSCEVSize F;
+ SCEVTraversal<FindSCEVSize> ST(F);
+ ST.visitAll(S);
+ return F.Size;
+}
+
+namespace {
+
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
+public:
+ // Computes the Quotient and Remainder of the division of Numerator by
+ // Denominator.
+ static void divide(ScalarEvolution &SE, const SCEV *Numerator,
+ const SCEV *Denominator, const SCEV **Quotient,
+ const SCEV **Remainder) {
+ assert(Numerator && Denominator && "Uninitialized SCEV");
+
+ SCEVDivision D(SE, Numerator, Denominator);
+
+ // Check for the trivial case here to avoid having to check for it in the
+ // rest of the code.
+ if (Numerator == Denominator) {
+ *Quotient = D.One;
+ *Remainder = D.Zero;
+ return;
+ }
+
+ if (Numerator->isZero()) {
+ *Quotient = D.Zero;
+ *Remainder = D.Zero;
+ return;
+ }
+
+ // Split the Denominator when it is a product.
+ if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
+ const SCEV *Q, *R;
+ *Quotient = Numerator;
+ for (const SCEV *Op : T->operands()) {
+ divide(SE, *Quotient, Op, &Q, &R);
+ *Quotient = Q;
+
+ // Bail out when the Numerator is not divisible by one of the terms of
+ // the Denominator.
+ if (!R->isZero()) {
+ *Quotient = D.Zero;
+ *Remainder = Numerator;
+ return;
+ }
+ }
+ *Remainder = D.Zero;
+ return;
+ }
+
+ D.visit(Numerator);
+ *Quotient = D.Quotient;
+ *Remainder = D.Remainder;
+ }
+
+ SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator)
+ : SE(S), Denominator(Denominator) {
+ Zero = SE.getConstant(Denominator->getType(), 0);
+ One = SE.getConstant(Denominator->getType(), 1);
+
+ // By default, we don't know how to divide Expr by Denominator.
+ // Providing the default here simplifies the rest of the code.
+ Quotient = Zero;
+ Remainder = Numerator;
+ }
+
+ // Except in the trivial case described above, we do not know how to divide
+ // Expr by Denominator for the following functions with empty implementation.
+ void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
+ void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
+ void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
+ void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
+ void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
+ void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+ void visitUnknown(const SCEVUnknown *Numerator) {}
+ void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
+
+ void visitConstant(const SCEVConstant *Numerator) {
+ if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
+ Quotient = SE.getConstant(udiv(Numerator, D));
+ Remainder = SE.getConstant(urem(Numerator, D));
+ return;
+ }
+ }
+
+ void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
+ const SCEV *StartQ, *StartR, *StepQ, *StepR;
+ assert(Numerator->isAffine() && "Numerator should be affine");
+ divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
+ divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
+ Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
+ Numerator->getNoWrapFlags());
+ Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
+ Numerator->getNoWrapFlags());
+ }
+
+ void visitAddExpr(const SCEVAddExpr *Numerator) {
+ SmallVector<const SCEV *, 2> Qs, Rs;
+ Type *Ty = Denominator->getType();
+
+ for (const SCEV *Op : Numerator->operands()) {
+ const SCEV *Q, *R;
+ divide(SE, Op, Denominator, &Q, &R);
+
+ // Bail out if types do not match.
+ if (Ty != Q->getType() || Ty != R->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ Qs.push_back(Q);
+ Rs.push_back(R);
+ }
+
+ if (Qs.size() == 1) {
+ Quotient = Qs[0];
+ Remainder = Rs[0];
+ return;
+ }
+
+ Quotient = SE.getAddExpr(Qs);
+ Remainder = SE.getAddExpr(Rs);
+ }
+
+ void visitMulExpr(const SCEVMulExpr *Numerator) {
+ SmallVector<const SCEV *, 2> Qs;
+ Type *Ty = Denominator->getType();
+
+ bool FoundDenominatorTerm = false;
+ for (const SCEV *Op : Numerator->operands()) {
+ // Bail out if types do not match.
+ if (Ty != Op->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ if (FoundDenominatorTerm) {
+ Qs.push_back(Op);
+ continue;
+ }
+
+ // Check whether Denominator divides one of the product operands.
+ const SCEV *Q, *R;
+ divide(SE, Op, Denominator, &Q, &R);
+ if (!R->isZero()) {
+ Qs.push_back(Op);
+ continue;
+ }
+
+ // Bail out if types do not match.
+ if (Ty != Q->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ FoundDenominatorTerm = true;
+ Qs.push_back(Q);
+ }
+
+ if (FoundDenominatorTerm) {
+ Remainder = Zero;
+ if (Qs.size() == 1)
+ Quotient = Qs[0];
+ else
+ Quotient = SE.getMulExpr(Qs);
+ return;
+ }
+
+ if (!isa<SCEVUnknown>(Denominator)) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ // The Remainder is obtained by replacing Denominator by 0 in Numerator.
+ ValueToValueMap RewriteMap;
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+ cast<SCEVConstant>(Zero)->getValue();
+ Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+
+ if (Remainder->isZero()) {
+ // The Quotient is obtained by replacing Denominator by 1 in Numerator.
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+ cast<SCEVConstant>(One)->getValue();
+ Quotient =
+ SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+ return;
+ }
+
+ // Quotient is (Numerator - Remainder) divided by Denominator.
+ const SCEV *Q, *R;
+ const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
+ if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
+ // This SCEV does not seem to simplify: fail the division here.
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+ divide(SE, Diff, Denominator, &Q, &R);
+ assert(R == Zero &&
+ "(Numerator - Remainder) should evenly divide Denominator");
+ Quotient = Q;
+ }
+
+private:
+ ScalarEvolution &SE;
+ const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
+};
+}
+
//===----------------------------------------------------------------------===//
/// Assume, K > 0.
static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
ScalarEvolution &SE,
- const Type* ResultTy) {
+ Type *ResultTy) {
// Handle the simplest case efficiently.
if (K == 1)
return SE.getTruncateOrZeroExtend(It, ResultTy);
unsigned CalculationBits = W + T;
// Calculate 2^T, at width T+W.
- APInt DivFactor = APInt(CalculationBits, 1).shl(T);
+ APInt DivFactor = APInt::getOneBitSet(CalculationBits, T);
// Calculate the multiplicative inverse of K! / 2^T;
// this multiplication factor will perform the exact division by
MultiplyFactor = MultiplyFactor.trunc(W);
// Calculate the product, at width T+W
- const IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+ IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
CalculationBits);
const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
for (unsigned i = 1; i != K; ++i) {
//===----------------------------------------------------------------------===//
const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
- const Type *Ty) {
+ Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
"This is not a truncating conversion!");
assert(isSCEVable(Ty) &&
ID.AddInteger(scTruncate);
ID.AddPointer(Op);
ID.AddPointer(Ty);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(),
- getEffectiveSCEVType(Ty))));
+ cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
// trunc(trunc(x)) --> trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
- // As a special case, fold trunc(undef) to undef. We don't want to
- // know too much about SCEVUnknowns, but this special case is handy
- // and harmless.
- if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
- if (isa<UndefValue>(U->getValue()))
- return getSCEV(UndefValue::get(Ty));
-
// The cast wasn't folded; create an explicit cast node. We can reuse
// the existing insert position since if we get here, we won't have
// made any changes which would invalidate it.
}
const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
- const Type *Ty) {
+ Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(),
- getEffectiveSCEVType(Ty))));
+ cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty)));
// zext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
ID.AddInteger(scZeroExtend);
ID.AddPointer(Op);
ID.AddPointer(Ty);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// zext(trunc(x)) --> zext(x) or x or trunc(x)
if (AR->getNoWrapFlags(SCEV::FlagNUW))
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getZeroExtendExpr(Step, Ty),
- // FIXME: Can use SCEV::FlagNUW
- L, SCEV::FlagAnyWrap);
+ L, AR->getNoWrapFlags());
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
const SCEV *RecastedMaxBECount =
getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
if (MaxBECount == RecastedMaxBECount) {
- const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
- const SCEV *Add = getAddExpr(Start, ZMul);
+ const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul), WideTy);
+ const SCEV *WideStart = getZeroExtendExpr(Start, WideTy);
+ const SCEV *WideMaxBECount =
+ getZeroExtendExpr(CastedMaxBECount, WideTy);
const SCEV *OperandExtendedAdd =
- getAddExpr(getZeroExtendExpr(Start, WideTy),
- getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
getZeroExtendExpr(Step, WideTy)));
- if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd)
+ if (ZAdd == OperandExtendedAdd) {
+ // Cache knowledge of AR NUW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getZeroExtendExpr(Step, Ty),
- // FIXME: can use FlagNUW
- L, SCEV::FlagAnyWrap);
-
+ L, AR->getNoWrapFlags());
+ }
// Similar to above, only this time treat the step value as signed.
// This covers loops that count down.
- const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
- Add = getAddExpr(Start, SMul);
OperandExtendedAdd =
- getAddExpr(getZeroExtendExpr(Start, WideTy),
- getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
getSignExtendExpr(Step, WideTy)));
- if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd)
+ if (ZAdd == OperandExtendedAdd) {
+ // Cache knowledge of AR NW, which is propagated to this AddRec.
+ // Negative step causes unsigned wrap, but it still can't self-wrap.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getSignExtendExpr(Step, Ty),
- // FIXME: can use FlagNW
- L, SCEV::FlagAnyWrap);
+ L, AR->getNoWrapFlags());
+ }
}
// If the backedge is guarded by a comparison with the pre-inc value
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
- AR->getPostIncExpr(*this), N)))
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NUW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW);
// Return the expression with the addrec on the outside.
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getZeroExtendExpr(Step, Ty),
- // FIXME: can use FlagNUW
- L, SCEV::FlagAnyWrap);
+ L, AR->getNoWrapFlags());
+ }
} else if (isKnownNegative(Step)) {
const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
getSignedRange(Step).getSignedMin());
if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
(isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
- AR->getPostIncExpr(*this), N)))
- // Return the expression with the addrec on the outside. The
- // negative step causes unsigned wrap, but it still can't self-wrap.
+ AR->getPostIncExpr(*this), N))) {
+ // Cache knowledge of AR NW, which is propagated to this AddRec.
+ // Negative step causes unsigned wrap, but it still can't self-wrap.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW);
+ // Return the expression with the addrec on the outside.
return getAddRecExpr(getZeroExtendExpr(Start, Ty),
getSignExtendExpr(Step, Ty),
- // FIXME: can use FlagNW
- L, SCEV::FlagAnyWrap);
+ L, AR->getNoWrapFlags());
+ }
}
}
}
return S;
}
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+ ICmpInst::Predicate *Pred,
+ ScalarEvolution *SE) {
+ unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+ if (SE->isKnownPositive(Step)) {
+ *Pred = ICmpInst::ICMP_SLT;
+ return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMax());
+ }
+ if (SE->isKnownNegative(Step)) {
+ *Pred = ICmpInst::ICMP_SGT;
+ return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+ SE->getSignedRange(Step).getSignedMin());
+ }
+ return nullptr;
+}
+
+// The recurrence AR has been shown to have no signed wrap. Typically, if we can
+// prove NSW for AR, then we can just as easily prove NSW for its preincrement
+// or postincrement sibling. This allows normalizing a sign extended AddRec as
+// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
+// result, the expression "Step + sext(PreIncAR)" is congruent with
+// "sext(PostIncAR)"
+static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
+ Type *Ty,
+ ScalarEvolution *SE) {
+ const Loop *L = AR->getLoop();
+ const SCEV *Start = AR->getStart();
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // Check for a simple looking step prior to loop entry.
+ const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+ if (!SA)
+ return nullptr;
+
+ // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
+ // subtraction is expensive. For this purpose, perform a quick and dirty
+ // difference, by checking for Step in the operand list.
+ SmallVector<const SCEV *, 4> DiffOps;
+ for (const SCEV *Op : SA->operands())
+ if (Op != Step)
+ DiffOps.push_back(Op);
+
+ if (DiffOps.size() == SA->getNumOperands())
+ return nullptr;
+
+ // This is a postinc AR. Check for overflow on the preinc recurrence using the
+ // same three conditions that getSignExtendedExpr checks.
+
+ // 1. NSW flags on the step increment.
+ const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
+ const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+ SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+ if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
+ return PreStart;
+
+ // 2. Direct overflow check on the step operation's expression.
+ unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+ Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+ const SCEV *OperandExtendedStart =
+ SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
+ SE->getSignExtendExpr(Step, WideTy));
+ if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
+ // Cache knowledge of PreAR NSW.
+ if (PreAR)
+ const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
+ // FIXME: this optimization needs a unit test
+ DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
+ return PreStart;
+ }
+
+ // 3. Loop precondition.
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
+
+ if (OverflowLimit &&
+ SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+ return PreStart;
+ }
+ return nullptr;
+}
+
+// Get the normalized sign-extended expression for this AddRec's Start.
+static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
+ Type *Ty,
+ ScalarEvolution *SE) {
+ const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
+ if (!PreStart)
+ return SE->getSignExtendExpr(AR->getStart(), Ty);
+
+ return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
+ SE->getSignExtendExpr(PreStart, Ty));
+}
+
const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
- const Type *Ty) {
+ Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(),
- getEffectiveSCEVType(Ty))));
+ cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty)));
// sext(sext(x)) --> sext(x)
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
ID.AddInteger(scSignExtend);
ID.AddPointer(Op);
ID.AddPointer(Ty);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// If the input value is provably positive, build a zext instead.
return getTruncateOrSignExtend(X, Ty);
}
+ // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
+ if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
+ if (SA->getNumOperands() == 2) {
+ auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
+ auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
+ if (SMul && SC1) {
+ if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
+ const APInt &C1 = SC1->getValue()->getValue();
+ const APInt &C2 = SC2->getValue()->getValue();
+ if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
+ C2.ugt(C1) && C2.isPowerOf2())
+ return getAddExpr(getSignExtendExpr(SC1, Ty),
+ getSignExtendExpr(SMul, Ty));
+ }
+ }
+ }
+ }
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can sign extend all of the
// operands (often constants). This allows analysis of something like
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->getNoWrapFlags(SCEV::FlagNSW))
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
- // FIXME: can use SCEV::FlagNSW
- L, SCEV::FlagAnyWrap);
+ L, SCEV::FlagNSW);
// Check whether the backedge-taken count is SCEVCouldNotCompute.
// Note that this serves two purposes: It filters out loops that are
const SCEV *RecastedMaxBECount =
getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
if (MaxBECount == RecastedMaxBECount) {
- const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+ Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
- const SCEV *Add = getAddExpr(Start, SMul);
+ const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul), WideTy);
+ const SCEV *WideStart = getSignExtendExpr(Start, WideTy);
+ const SCEV *WideMaxBECount =
+ getZeroExtendExpr(CastedMaxBECount, WideTy);
const SCEV *OperandExtendedAdd =
- getAddExpr(getSignExtendExpr(Start, WideTy),
- getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
getSignExtendExpr(Step, WideTy)));
- if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd)
+ if (SAdd == OperandExtendedAdd) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getSignExtendExpr(Step, Ty),
- // FIXME: can use SCEV::FlagNSW
- L, SCEV::FlagAnyWrap);
-
+ L, AR->getNoWrapFlags());
+ }
// Similar to above, only this time treat the step value as unsigned.
// This covers loops that count up with an unsigned step.
- const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
- Add = getAddExpr(Start, UMul);
OperandExtendedAdd =
- getAddExpr(getSignExtendExpr(Start, WideTy),
- getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+ getAddExpr(WideStart,
+ getMulExpr(WideMaxBECount,
getZeroExtendExpr(Step, WideTy)));
- if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd)
+ if (SAdd == OperandExtendedAdd) {
+ // Cache knowledge of AR NSW, which is propagated to this AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
// Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
getZeroExtendExpr(Step, Ty),
- // FIXME: can use SCEV::FlagNSW
- L, SCEV::FlagAnyWrap);
+ L, AR->getNoWrapFlags());
+ }
}
// If the backedge is guarded by a comparison with the pre-inc value
// the addrec is safe. Also, if the entry is guarded by a comparison
// with the start value and the backedge is guarded by a comparison
// with the post-inc value, the addrec is safe.
- if (isKnownPositive(Step)) {
- const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
- getSignedRange(Step).getSignedMax());
- if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
- (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
- isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
- AR->getPostIncExpr(*this), N)))
- // Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
- getSignExtendExpr(Step, Ty),
- // FIXME: can use SCEV::FlagNSW
- L, SCEV::FlagAnyWrap);
- } else if (isKnownNegative(Step)) {
- const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
- getSignedRange(Step).getSignedMin());
- if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
- (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
- isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
- AR->getPostIncExpr(*this), N)))
- // Return the expression with the addrec on the outside.
- return getAddRecExpr(getSignExtendExpr(Start, Ty),
- getSignExtendExpr(Step, Ty),
- // FIXME: can use SCEV::FlagNSW
- L, SCEV::FlagAnyWrap);
+ ICmpInst::Predicate Pred;
+ const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+ if (OverflowLimit &&
+ (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+ (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
+ isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
+ OverflowLimit)))) {
+ // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
+ const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+ return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+ getSignExtendExpr(Step, Ty),
+ L, AR->getNoWrapFlags());
+ }
+ }
+ // If Start and Step are constants, check if we can apply this
+ // transformation:
+ // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
+ auto SC1 = dyn_cast<SCEVConstant>(Start);
+ auto SC2 = dyn_cast<SCEVConstant>(Step);
+ if (SC1 && SC2) {
+ const APInt &C1 = SC1->getValue()->getValue();
+ const APInt &C2 = SC2->getValue()->getValue();
+ if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
+ C2.isPowerOf2()) {
+ Start = getSignExtendExpr(Start, Ty);
+ const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
+ L, AR->getNoWrapFlags());
+ return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
}
}
}
/// unspecified bits out to the given type.
///
const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
- const Type *Ty) {
+ Type *Ty) {
assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
"This is not an extending conversion!");
assert(isSCEVable(Ty) &&
// Force the cast to be folded into the operands of an addrec.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Ops;
- for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
- I != E; ++I)
- Ops.push_back(getAnyExtendExpr(*I, Ty));
- // FIXME: can use AR->getNoWrapFlags(SCEV::FlagNW)
- return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagAnyWrap);
+ for (const SCEV *Op : AR->operands())
+ Ops.push_back(getAnyExtendExpr(Op, Ty));
+ return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
}
- // As a special case, fold anyext(undef) to undef. We don't want to
- // know too much about SCEVUnknowns, but this special case is handy
- // and harmless.
- if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
- if (isa<UndefValue>(U->getValue()))
- return getSCEV(UndefValue::get(Ty));
-
// If the expression is obviously signed, use the sext cast value.
if (isa<SCEVSMaxExpr>(Op))
return SExt;
/// what it does, given a sequence of operands that would form an add
/// expression like this:
///
-/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+/// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
///
/// where A and B are constants, update the map with these values:
///
///
static bool
CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
- SmallVector<const SCEV *, 8> &NewOps,
+ SmallVectorImpl<const SCEV *> &NewOps,
APInt &AccumulatedConstant,
const SCEV *const *Ops, size_t NumOperands,
const APInt &Scale,
assert(!Ops.empty() && "Cannot get empty add!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVAddExpr operand types don't match!");
#endif
// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- if (!(Flags & SCEV::FlagNUW) && (Flags & SCEV::FlagNSW)) {
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
bool All = true;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
E = Ops.end(); I != E; ++I)
All = false;
break;
}
- if (All) Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
}
// Sort by complexity, this groups all similar expression types together.
// Okay, check to see if the same value occurs in the operand list more than
// once. If so, merge them together into an multiply expression. Since we
// sorted the list, these values are required to be adjacent.
- const Type *Ty = Ops[0]->getType();
+ Type *Ty = Ops[0]->getType();
bool FoundMatch = false;
for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
// if the contents of the resulting outer trunc fold to something simple.
for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
- const Type *DstType = Trunc->getType();
- const Type *SrcType = Trunc->getOperand()->getType();
+ Type *DstType = Trunc->getType();
+ Type *SrcType = Trunc->getOperand()->getType();
SmallVector<const SCEV *, 8> LargeOps;
bool Ok = true;
// Check all the operands to see if they can be represented in the
// re-generate the operands list. Group the operands by constant scale,
// to avoid multiplying by the same constant scale multiple times.
std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
- for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
E = NewOps.end(); I != E; ++I)
MulOpLists[M.find(*I)->second].push_back(*I);
// Re-generate the operands list.
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer add and the inner addrec are guaranteed to have no overflow.
- // FIXME: Always propagate NW
- // AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW))
- Flags = AddRec->getNoWrapFlags(Flags);
+ // Always propagate NW.
+ Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
- // Otherwise, add the folded AddRec by the non-liv parts.
+ // Otherwise, add the folded AddRec by the non-invariant parts.
for (unsigned i = 0;; ++i)
if (Ops[i] == AddRec) {
Ops[i] = NewRec;
ID.AddInteger(scAddExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- void *IP = 0;
+ void *IP = nullptr;
SCEVAddExpr *S =
static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
return S;
}
+static uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) {
+ uint64_t k = i*j;
+ if (j > 1 && k / j != i) Overflow = true;
+ return k;
+}
+
+/// Compute the result of "n choose k", the binomial coefficient. If an
+/// intermediate computation overflows, Overflow will be set and the return will
+/// be garbage. Overflow is not cleared on absence of overflow.
+static uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) {
+ // We use the multiplicative formula:
+ // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 .
+ // At each iteration, we take the n-th term of the numeral and divide by the
+ // (k-n)th term of the denominator. This division will always produce an
+ // integral result, and helps reduce the chance of overflow in the
+ // intermediate computations. However, we can still overflow even when the
+ // final result would fit.
+
+ if (n == 0 || n == k) return 1;
+ if (k > n) return 0;
+
+ if (k > n/2)
+ k = n-k;
+
+ uint64_t r = 1;
+ for (uint64_t i = 1; i <= k; ++i) {
+ r = umul_ov(r, n-(i-1), Overflow);
+ r /= i;
+ }
+ return r;
+}
+
/// getMulExpr - Get a canonical multiply expression, or something simpler if
/// possible.
const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
assert(!Ops.empty() && "Cannot get empty mul!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVMulExpr operand types don't match!");
#endif
// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- if (!(Flags & SCEV::FlagNUW) && (Flags & SCEV::FlagNSW)) {
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
bool All = true;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
E = Ops.end(); I != E; ++I)
All = false;
break;
}
- if (All) Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
}
// Sort by complexity, this groups all similar expression types together.
if (AnyFolded)
return getAddExpr(NewOps);
}
+ else if (const SCEVAddRecExpr *
+ AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+ // Negation preserves a recurrence's no self-wrap property.
+ SmallVector<const SCEV *, 4> Operands;
+ for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
+ E = AddRec->op_end(); I != E; ++I) {
+ Operands.push_back(getMulExpr(Ops[0], *I));
+ }
+ return getAddRecExpr(Operands, AddRec->getLoop(),
+ AddRec->getNoWrapFlags(SCEV::FlagNW));
+ }
}
}
// outer mul and the inner addrec are guaranteed to have no overflow.
//
// No self-wrap cannot be guaranteed after changing the step size, but
- // will be infered if either NUW or NSW is true.
+ // will be inferred if either NUW or NSW is true.
Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
- // Otherwise, multiply the folded AddRec by the non-liv parts.
+ // Otherwise, multiply the folded AddRec by the non-invariant parts.
for (unsigned i = 0;; ++i)
if (Ops[i] == AddRec) {
Ops[i] = NewRec;
// Okay, if there weren't any loop invariants to be folded, check to see if
// there are multiple AddRec's with the same loop induction variable being
// multiplied together. If so, we can fold them.
+
+ // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L>
+ // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [
+ // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z
+ // ]]],+,...up to x=2n}.
+ // Note that the arguments to choose() are always integers with values
+ // known at compile time, never SCEV objects.
+ //
+ // The implementation avoids pointless extra computations when the two
+ // addrec's are of different length (mathematically, it's equivalent to
+ // an infinite stream of zeros on the right).
+ bool OpsModified = false;
for (unsigned OtherIdx = Idx+1;
- OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
- ++OtherIdx)
- if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
- // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> -->
- // {A*C,+,F*D + G*B + B*D}<L>
- for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
- ++OtherIdx)
- if (const SCEVAddRecExpr *OtherAddRec =
- dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
- if (OtherAddRec->getLoop() == AddRecLoop) {
- const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
- const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
- const SCEV *B = F->getStepRecurrence(*this);
- const SCEV *D = G->getStepRecurrence(*this);
- const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
- getMulExpr(G, B),
- getMulExpr(B, D));
- const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
- F->getLoop(),
- SCEV::FlagAnyWrap);
- if (Ops.size() == 2) return NewAddRec;
- Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
- Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
- }
- return getMulExpr(Ops);
+ OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx) {
+ const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
+ if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
+ continue;
+
+ bool Overflow = false;
+ Type *Ty = AddRec->getType();
+ bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64;
+ SmallVector<const SCEV*, 7> AddRecOps;
+ for (int x = 0, xe = AddRec->getNumOperands() +
+ OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
+ const SCEV *Term = getConstant(Ty, 0);
+ for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
+ uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
+ for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
+ ze = std::min(x+1, (int)OtherAddRec->getNumOperands());
+ z < ze && !Overflow; ++z) {
+ uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow);
+ uint64_t Coeff;
+ if (LargerThan64Bits)
+ Coeff = umul_ov(Coeff1, Coeff2, Overflow);
+ else
+ Coeff = Coeff1*Coeff2;
+ const SCEV *CoeffTerm = getConstant(Ty, Coeff);
+ const SCEV *Term1 = AddRec->getOperand(y-z);
+ const SCEV *Term2 = OtherAddRec->getOperand(z);
+ Term = getAddExpr(Term, getMulExpr(CoeffTerm, Term1,Term2));
+ }
+ }
+ AddRecOps.push_back(Term);
+ }
+ if (!Overflow) {
+ const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
+ SCEV::FlagAnyWrap);
+ if (Ops.size() == 2) return NewAddRec;
+ Ops[Idx] = NewAddRec;
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ OpsModified = true;
+ AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec);
+ if (!AddRec)
+ break;
}
+ }
+ if (OpsModified)
+ return getMulExpr(Ops);
// Otherwise couldn't fold anything into this recurrence. Move onto the
// next one.
ID.AddInteger(scMulExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- void *IP = 0;
+ void *IP = nullptr;
SCEVMulExpr *S =
static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
// Determine if the division can be folded into the operands of
// its operands.
// TODO: Generalize this to non-constants by using known-bits information.
- const Type *Ty = LHS->getType();
+ Type *Ty = LHS->getType();
unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
// For non-power-of-two values, effectively round the value up to the
// nearest power of two.
if (!RHSC->getValue()->getValue().isPowerOf2())
++MaxShiftAmt;
- const IntegerType *ExtTy =
+ IntegerType *ExtTy =
IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
- // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
if (const SCEVConstant *Step =
- dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
- if (!Step->getValue()->getValue()
- .urem(RHSC->getValue()->getValue()) &&
+ dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
+ // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+ const APInt &StepInt = Step->getValue()->getValue();
+ const APInt &DivInt = RHSC->getValue()->getValue();
+ if (!StepInt.urem(DivInt) &&
getZeroExtendExpr(AR, ExtTy) ==
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
return getAddRecExpr(Operands, AR->getLoop(),
- // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
- SCEV::FlagAnyWrap);
+ SCEV::FlagNW);
+ }
+ /// Get a canonical UDivExpr for a recurrence.
+ /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
+ // We can currently only fold X%N if X is constant.
+ const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
+ if (StartC && !DivInt.urem(StepInt) &&
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy),
+ AR->getLoop(), SCEV::FlagAnyWrap)) {
+ const APInt &StartInt = StartC->getValue()->getValue();
+ const APInt &StartRem = StartInt.urem(StepInt);
+ if (StartRem != 0)
+ LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
+ AR->getLoop(), SCEV::FlagNW);
}
+ }
// (A*B)/C --> A*(B/C) if safe and B/C can be folded.
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
}
}
// (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
- if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
+ if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
SmallVector<const SCEV *, 4> Operands;
for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
ID.AddInteger(scUDivExpr);
ID.AddPointer(LHS);
ID.AddPointer(RHS);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
LHS, RHS);
return S;
}
+static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+ APInt A = C1->getValue()->getValue().abs();
+ APInt B = C2->getValue()->getValue().abs();
+ uint32_t ABW = A.getBitWidth();
+ uint32_t BBW = B.getBitWidth();
+
+ if (ABW > BBW)
+ B = B.zext(ABW);
+ else if (ABW < BBW)
+ A = A.zext(BBW);
+
+ return APIntOps::GreatestCommonDivisor(A, B);
+}
+
+/// getUDivExactExpr - Get a canonical unsigned division expression, or
+/// something simpler if possible. There is no representation for an exact udiv
+/// in SCEV IR, but we can attempt to remove factors from the LHS and RHS.
+/// We can't do this when it's not exact because the udiv may be clearing bits.
+const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
+ const SCEV *RHS) {
+ // TODO: we could try to find factors in all sorts of things, but for now we
+ // just deal with u/exact (multiply, constant). See SCEVDivision towards the
+ // end of this file for inspiration.
+
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
+ if (!Mul)
+ return getUDivExpr(LHS, RHS);
+
+ if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
+ // If the mulexpr multiplies by a constant, then that constant must be the
+ // first element of the mulexpr.
+ if (const SCEVConstant *LHSCst =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ if (LHSCst == RHSCst) {
+ SmallVector<const SCEV *, 2> Operands;
+ Operands.append(Mul->op_begin() + 1, Mul->op_end());
+ return getMulExpr(Operands);
+ }
+
+ // We can't just assume that LHSCst divides RHSCst cleanly, it could be
+ // that there's a factor provided by one of the other terms. We need to
+ // check.
+ APInt Factor = gcd(LHSCst, RHSCst);
+ if (!Factor.isIntN(1)) {
+ LHSCst = cast<SCEVConstant>(
+ getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
+ RHSCst = cast<SCEVConstant>(
+ getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
+ SmallVector<const SCEV *, 2> Operands;
+ Operands.push_back(LHSCst);
+ Operands.append(Mul->op_begin() + 1, Mul->op_end());
+ LHS = getMulExpr(Operands);
+ RHS = RHSCst;
+ Mul = dyn_cast<SCEVMulExpr>(LHS);
+ if (!Mul)
+ return getUDivExactExpr(LHS, RHS);
+ }
+ }
+ }
+
+ for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
+ if (Mul->getOperand(i) == RHS) {
+ SmallVector<const SCEV *, 2> Operands;
+ Operands.append(Mul->op_begin(), Mul->op_begin() + i);
+ Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
+ return getMulExpr(Operands);
+ }
+ }
+
+ return getUDivExpr(LHS, RHS);
+}
/// getAddRecExpr - Get an add recurrence expression for the specified loop.
/// Simplify the expression as much as possible.
if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
if (StepChrec->getLoop() == L) {
Operands.append(StepChrec->op_begin(), StepChrec->op_end());
- // FIXME: can use maskFlags(Flags, SCEV::FlagNW)
- return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
+ return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW));
}
Operands.push_back(Step);
const Loop *L, SCEV::NoWrapFlags Flags) {
if (Operands.size() == 1) return Operands[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
for (unsigned i = 1, e = Operands.size(); i != e; ++i)
assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
"SCEVAddRecExpr operand types don't match!");
// with a SCEVCouldNotCompute as the cached BE count).
// If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
- if (!(Flags & SCEV::FlagNUW) && (Flags & SCEV::FlagNSW)) {
+ // And vice-versa.
+ int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
+ SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask);
+ if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) {
bool All = true;
for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
E = Operands.end(); I != E; ++I)
All = false;
break;
}
- if (All) Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
}
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
if (AllInvariant) {
// Create a recurrence for the outer loop with the same step size.
//
- // FIXME:
// The outer recurrence keeps its NW flag but only keeps NUW/NSW if the
// inner recurrence has the same property.
- // maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
- SCEV::NoWrapFlags OuterFlags = SCEV::FlagAnyWrap;
+ SCEV::NoWrapFlags OuterFlags =
+ maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
AllInvariant = true;
if (AllInvariant) {
// Ok, both add recurrences are valid after the transformation.
//
- // FIXME:
// The inner recurrence keeps its NW flag but only keeps NUW/NSW if
// the outer recurrence has the same property.
- // maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
- SCEV::NoWrapFlags InnerFlags = SCEV::FlagAnyWrap;
+ SCEV::NoWrapFlags InnerFlags =
+ maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags);
return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags);
}
}
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
ID.AddPointer(Operands[i]);
ID.AddPointer(L);
- void *IP = 0;
+ void *IP = nullptr;
SCEVAddRecExpr *S =
static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
assert(!Ops.empty() && "Cannot get empty smax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVSMaxExpr operand types don't match!");
ID.AddInteger(scSMaxExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
assert(!Ops.empty() && "Cannot get empty umax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
- const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+ Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVUMaxExpr operand types don't match!");
ID.AddInteger(scUMaxExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
}
-const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
- // If we have TargetData, we can bypass creating a target-independent
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
+ // If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
- TD->getTypeAllocSize(AllocTy));
+ if (DL)
+ return getConstant(IntTy, DL->getTypeAllocSize(AllocTy));
Constant *C = ConstantExpr::getSizeOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
- C = Folded;
- const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-
-const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
- Constant *C = ConstantExpr::getAlignOf(AllocTy);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
C = Folded;
- const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+ assert(Ty == IntTy && "Effective SCEV type doesn't match");
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
-const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
+ StructType *STy,
unsigned FieldNo) {
- // If we have TargetData, we can bypass creating a target-independent
+ // If we have DataLayout, we can bypass creating a target-independent
// constant expression and then folding it back into a ConstantInt.
// This is just a compile-time optimization.
- if (TD)
- return getConstant(TD->getIntPtrType(getContext()),
- TD->getStructLayout(STy)->getElementOffset(FieldNo));
+ if (DL) {
+ return getConstant(IntTy,
+ DL->getStructLayout(STy)->getElementOffset(FieldNo));
+ }
Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
C = Folded;
- const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
- return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy,
- Constant *FieldNo) {
- Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
- C = Folded;
- const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+ Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
FoldingSetNodeID ID;
ID.AddInteger(scUnknown);
ID.AddPointer(V);
- void *IP = 0;
+ void *IP = nullptr;
if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
assert(cast<SCEVUnknown>(S)->getValue() == V &&
"Stale SCEVUnknown in uniquing map!");
/// the SCEV framework. This primarily includes integer types, and it
/// can optionally include pointer types if the ScalarEvolution class
/// has access to target-specific information.
-bool ScalarEvolution::isSCEVable(const Type *Ty) const {
+bool ScalarEvolution::isSCEVable(Type *Ty) const {
// Integers and pointers are always SCEVable.
return Ty->isIntegerTy() || Ty->isPointerTy();
}
/// getTypeSizeInBits - Return the size in bits of the specified type,
/// for which isSCEVable must return true.
-uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
+uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- // If we have a TargetData, use it!
- if (TD)
- return TD->getTypeSizeInBits(Ty);
+ // If we have a DataLayout, use it!
+ if (DL)
+ return DL->getTypeSizeInBits(Ty);
// Integer types have fixed sizes.
if (Ty->isIntegerTy())
return Ty->getPrimitiveSizeInBits();
- // The only other support type is pointer. Without TargetData, conservatively
+ // The only other support type is pointer. Without DataLayout, conservatively
// assume pointers are 64-bit.
assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
return 64;
/// the given type and which represents how SCEV will treat the given
/// type, for which isSCEVable must return true. For pointer types,
/// this is the pointer-sized integer type.
-const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
+Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
assert(isSCEVable(Ty) && "Type is not SCEVable!");
- if (Ty->isIntegerTy())
+ if (Ty->isIntegerTy()) {
return Ty;
+ }
// The only other support type is pointer.
assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
- if (TD) return TD->getIntPtrType(getContext());
- // Without TargetData, conservatively assume pointers are 64-bit.
+ if (DL)
+ return DL->getIntPtrType(Ty);
+
+ // Without DataLayout, conservatively assume pointers are 64-bit.
return Type::getInt64Ty(getContext());
}
return &CouldNotCompute;
}
+namespace {
+ // Helper class working with SCEVTraversal to figure out if a SCEV contains
+ // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
+ // is set iff if find such SCEVUnknown.
+ //
+ struct FindInvalidSCEVUnknown {
+ bool FindOne;
+ FindInvalidSCEVUnknown() { FindOne = false; }
+ bool follow(const SCEV *S) {
+ switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+ case scConstant:
+ return false;
+ case scUnknown:
+ if (!cast<SCEVUnknown>(S)->getValue())
+ FindOne = true;
+ return false;
+ default:
+ return true;
+ }
+ }
+ bool isDone() const { return FindOne; }
+ };
+}
+
+bool ScalarEvolution::checkValidity(const SCEV *S) const {
+ FindInvalidSCEVUnknown F;
+ SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
+ ST.visitAll(S);
+
+ return !F.FindOne;
+}
+
/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
/// expression and create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
- ValueExprMapType::const_iterator I = ValueExprMap.find(V);
- if (I != ValueExprMap.end()) return I->second;
+ ValueExprMapType::iterator I = ValueExprMap.find_as(V);
+ if (I != ValueExprMap.end()) {
+ const SCEV *S = I->second;
+ if (checkValidity(S))
+ return S;
+ else
+ ValueExprMap.erase(I);
+ }
const SCEV *S = createSCEV(V);
// The process of creating a SCEV for V may have caused other SCEVs
return getConstant(
cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
- const Type *Ty = V->getType();
+ Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
return getMulExpr(V,
getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
return getConstant(
cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
- const Type *Ty = V->getType();
+ Type *Ty = V->getType();
Ty = getEffectiveSCEVType(Ty);
const SCEV *AllOnes =
getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
}
/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
-///
-/// FIXME: prohibit FlagNUW here, as soon as getMinusSCEVForExitTest goes.
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
SCEV::NoWrapFlags Flags) {
+ assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
+
// Fast path: X - X --> 0.
if (LHS == RHS)
return getConstant(LHS->getType(), 0);
/// input value to the specified type. If the type must be extended, it is zero
/// extended.
const SCEV *
-ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
/// extended.
const SCEV *
ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
- const Type *Ty) {
- const Type *SrcTy = V->getType();
+ Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
/// input value to the specified type. If the type must be extended, it is zero
/// extended. The conversion must not be narrowing.
const SCEV *
-ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or zero extend with non-integer arguments!");
/// input value to the specified type. If the type must be extended, it is sign
/// extended. The conversion must not be narrowing.
const SCEV *
-ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or sign extend with non-integer arguments!");
/// it is extended with unspecified bits. The conversion must not be
/// narrowing.
const SCEV *
-ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or any extend with non-integer arguments!");
/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
/// input value to the specified type. The conversion must not be widening.
const SCEV *
-ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
- const Type *SrcTy = V->getType();
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) {
+ Type *SrcTy = V->getType();
assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
(Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or noop with non-integer arguments!");
return getUMinExpr(PromotedLHS, PromotedRHS);
}
+/// getPointerBase - Transitively follow the chain of pointer-type operands
+/// until reaching a SCEV that does not have a single pointer operand. This
+/// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
+/// but corner cases do exist.
+const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
+ // A pointer operand may evaluate to a nonpointer expression, such as null.
+ if (!V->getType()->isPointerTy())
+ return V;
+
+ if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
+ return getPointerBase(Cast->getOperand());
+ }
+ else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+ const SCEV *PtrOp = nullptr;
+ for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+ I != E; ++I) {
+ if ((*I)->getType()->isPointerTy()) {
+ // Cannot find the base of an expression with multiple pointer operands.
+ if (PtrOp)
+ return V;
+ PtrOp = *I;
+ }
+ }
+ if (!PtrOp)
+ return V;
+ return getPointerBase(PtrOp);
+ }
+ return V;
+}
+
/// PushDefUseChildren - Push users of the given Instruction
/// onto the given Worklist.
static void
PushDefUseChildren(Instruction *I,
SmallVectorImpl<Instruction *> &Worklist) {
// Push the def-use children onto the Worklist stack.
- for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
- UI != UE; ++UI)
- Worklist.push_back(cast<Instruction>(*UI));
+ for (User *U : I->users())
+ Worklist.push_back(cast<Instruction>(U));
}
/// ForgetSymbolicValue - This looks up computed SCEV values for all
if (!Visited.insert(I)) continue;
ValueExprMapType::iterator It =
- ValueExprMap.find(static_cast<Value *>(I));
+ ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
const SCEV *Old = It->second;
// The loop may have multiple entrances or multiple exits; we can analyze
// this phi as an addrec if it has a unique entry value and a unique
// backedge value.
- Value *BEValueV = 0, *StartValueV = 0;
+ Value *BEValueV = nullptr, *StartValueV = nullptr;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
if (L->contains(PN->getIncomingBlock(i))) {
if (!BEValueV) {
BEValueV = V;
} else if (BEValueV != V) {
- BEValueV = 0;
+ BEValueV = nullptr;
break;
}
} else if (!StartValueV) {
StartValueV = V;
} else if (StartValueV != V) {
- StartValueV = 0;
+ StartValueV = nullptr;
break;
}
}
if (BEValueV && StartValueV) {
// While we are analyzing this PHI node, handle its value symbolically.
const SCEV *SymbolicName = getUnknown(PN);
- assert(ValueExprMap.find(PN) == ValueExprMap.end() &&
+ assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
"PHI node already processed?");
ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
Flags = setFlags(Flags, SCEV::FlagNUW);
if (OBO->hasNoSignedWrap())
Flags = setFlags(Flags, SCEV::FlagNSW);
- } else if (const GEPOperator *GEP =
- dyn_cast<GEPOperator>(BEValueV)) {
+ } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
// If the increment is an inbounds GEP, then we know the address
// space cannot be wrapped around. We cannot make any guarantee
// about signed or unsigned overflow because pointers are
// unsigned but we may have a negative index from the base
- // pointer.
- if (GEP->isInBounds())
- // FIXME: should be SCEV::FlagNW
+ // pointer. We can guarantee that no unsigned wrap occurs if the
+ // indices form a positive value.
+ if (GEP->isInBounds()) {
+ Flags = setFlags(Flags, SCEV::FlagNW);
+
+ const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+ if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ }
+ } else if (const SubOperator *OBO =
+ dyn_cast<SubOperator>(BEValueV)) {
+ if (OBO->hasNoUnsignedWrap())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ if (OBO->hasNoSignedWrap())
Flags = setFlags(Flags, SCEV::FlagNSW);
}
// PHI's incoming blocks are in a different loop, in which case doing so
// risks breaking LCSSA form. Instcombine would normally zap these, but
// it doesn't have DominatorTree information, so it may miss cases.
- if (Value *V = SimplifyInstruction(PN, TD, DT))
+ if (Value *V = SimplifyInstruction(PN, DL, TLI, DT, AT))
if (LI->replacementPreservesLCSSAForm(PN, V))
return getSCEV(V);
/// operations. This allows them to be analyzed by regular SCEV code.
///
const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+ Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+ Value *Base = GEP->getOperand(0);
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!Base->getType()->getPointerElementType()->isSized())
+ return getUnknown(GEP);
// Don't blindly transfer the inbounds flag from the GEP instruction to the
// Add expression, because the Instruction may be guarded by control flow
// and the no-overflow bits may not be valid for the expression in any
// context.
- bool isInBounds = GEP->isInBounds();
+ SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
- const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
- Value *Base = GEP->getOperand(0);
- // Don't attempt to analyze GEPs over unsized objects.
- if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
- return getUnknown(GEP);
const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
gep_type_iterator GTI = gep_type_begin(GEP);
- for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
+ for (GetElementPtrInst::op_iterator I = std::next(GEP->op_begin()),
E = GEP->op_end();
I != E; ++I) {
Value *Index = *I;
// Compute the (potentially symbolic) offset in bytes for this index.
- if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+ const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
// Add the field offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, FieldOffset);
} else {
// For an array, add the element offset, explicitly scaled.
- const SCEV *ElementSize = getSizeOfExpr(*GTI);
+ const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI);
const SCEV *IndexS = getSCEV(Index);
// Getelementptr indices are signed.
IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
// Multiply the index by the element size to compute the element offset.
- const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
- isInBounds ? SCEV::FlagNSW :
- SCEV::FlagAnyWrap);
+ const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap);
// Add the element offset to the running total offset.
TotalOffset = getAddExpr(TotalOffset, LocalOffset);
const SCEV *BaseS = getSCEV(Base);
// Add the total offset from all the GEP indices to the base.
- return getAddExpr(BaseS, TotalOffset,
- isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+ return getAddExpr(BaseS, TotalOffset, Wrap);
}
/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT);
return Zeros.countTrailingOnes();
}
return 0;
}
+/// GetRangeFromMetadata - Helper method to assign a range to V from
+/// metadata present in the IR.
+static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
+ if (Instruction *I = dyn_cast<Instruction>(V)) {
+ if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) {
+ ConstantRange TotalRange(
+ cast<IntegerType>(I->getType())->getBitWidth(), false);
+
+ unsigned NumRanges = MD->getNumOperands() / 2;
+ assert(NumRanges >= 1);
+
+ for (unsigned i = 0; i < NumRanges; ++i) {
+ ConstantInt *Lower = cast<ConstantInt>(MD->getOperand(2*i + 0));
+ ConstantInt *Upper = cast<ConstantInt>(MD->getOperand(2*i + 1));
+ ConstantRange Range(Lower->getValue(), Upper->getValue());
+ TotalRange = TotalRange.unionWith(Range);
+ }
+
+ return TotalRange;
+ }
+ }
+
+ return None;
+}
+
/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
///
ConstantRange
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
// If there's no unsigned wrap, the value will never be less than its
// initial value.
- // FIXME: can broaden to FlagNW?
if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
if (!C->getValue()->isZero())
// TODO: non-affine addrec
if (AddRec->isAffine()) {
- const Type *Ty = AddRec->getType();
+ Type *Ty = AddRec->getType();
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // Check if the IR explicitly contains !range metadata.
+ Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
+ if (MDRange.hasValue())
+ ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+
// For a SCEVUnknown, ask ValueTracking.
- APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AT, nullptr, DT);
if (Ones == ~Zeros + 1)
return setUnsignedRange(U, ConservativeResult);
return setUnsignedRange(U,
// TODO: non-affine addrec
if (AddRec->isAffine()) {
- const Type *Ty = AddRec->getType();
+ Type *Ty = AddRec->getType();
const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
}
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // Check if the IR explicitly contains !range metadata.
+ Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
+ if (MDRange.hasValue())
+ ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue());
+
// For a SCEVUnknown, ask ValueTracking.
- if (!U->getValue()->getType()->isIntegerTy() && !TD)
+ if (!U->getValue()->getType()->isIntegerTy() && !DL)
return setSignedRange(U, ConservativeResult);
- unsigned NS = ComputeNumSignBits(U->getValue(), TD);
- if (NS == 1)
+ unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AT, nullptr, DT);
+ if (NS <= 1)
return setSignedRange(U, ConservativeResult);
return setSignedRange(U, ConservativeResult.intersectWith(
ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
// because it leads to N-1 getAddExpr calls for N ultimate operands.
// Instead, gather up all the operands and make a single getAddExpr call.
// LLVM IR canonical form means we need only traverse the left operands.
+ //
+ // Don't apply this instruction's NSW or NUW flags to the new
+ // expression. The instruction may be guarded by control flow that the
+ // no-wrap behavior depends on. Non-control-equivalent instructions can be
+ // mapped to the same SCEV expression, and it would be incorrect to transfer
+ // NSW/NUW semantics to those operations.
SmallVector<const SCEV *, 4> AddOps;
AddOps.push_back(getSCEV(U->getOperand(1)));
for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
return getAddExpr(AddOps);
}
case Instruction::Mul: {
- // See the Add code above.
+ // Don't transfer NSW/NUW for the same reason as AddExpr.
SmallVector<const SCEV *, 4> MulOps;
MulOps.push_back(getSCEV(U->getOperand(1)));
for (Value *Op = U->getOperand(0);
// Instcombine's ShrinkDemandedConstant may strip bits out of
// constants, obscuring what would otherwise be a low-bits mask.
- // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+ // Use computeKnownBits to compute what ShrinkDemandedConstant
// knew about to reconstruct a low-bits mask value.
unsigned LZ = A.countLeadingZeros();
+ unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
-
- APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
-
- if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
- return
- getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
- IntegerType::get(getContext(), BitWidth - LZ)),
- U->getType());
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL,
+ 0, AT, nullptr, DT);
+
+ APInt EffectiveMask =
+ APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
+ if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
+ const SCEV *MulCount = getConstant(
+ ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ)));
+ return getMulExpr(
+ getZeroExtendExpr(
+ getTruncateExpr(
+ getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount),
+ IntegerType::get(getContext(), BitWidth - LZ - TZ)),
+ U->getType()),
+ MulCount);
+ }
}
break;
LCI->getValue() == CI->getValue())
if (const SCEVZeroExtendExpr *Z =
dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
- const Type *UTy = U->getType();
+ Type *UTy = U->getType();
const SCEV *Z0 = Z->getOperand();
- const Type *Z0Ty = Z0->getType();
+ Type *Z0Ty = Z0->getType();
unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
// If C is a low-bits mask, the zero extend is serving to
break;
Constant *X = ConstantInt::get(getContext(),
- APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
}
break;
break;
Constant *X = ConstantInt::get(getContext(),
- APInt(BitWidth, 1).shl(SA->getZExtValue()));
+ APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
}
break;
// Iteration Count Computation Code
//
-/// getBackedgeTakenCount - If the specified loop has a predictable
-/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
-/// object. The backedge-taken count is the number of times the loop header
-/// will be branched to from within the loop. This is one less than the
-/// trip count of the loop, since it doesn't count the first iteration,
-/// when the header is branched to from outside the loop.
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
+ if (BasicBlock *ExitingBB = L->getExitingBlock())
+ return getSmallConstantTripCount(L, ExitingBB);
+
+ // No trip count information for multiple exits.
+ return 0;
+}
+
+/// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
+/// normal unsigned value. Returns 0 if the trip count is unknown or not
+/// constant. Will also return 0 if the maximum trip count is very large (>=
+/// 2^32).
+///
+/// This "trip count" assumes that control exits via ExitingBlock. More
+/// precisely, it is the number of times that control may reach ExitingBlock
+/// before taking the branch. For loops with multiple exits, it may not be the
+/// number times that the loop header executes because the loop may exit
+/// prematurely via another branch.
+unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+ BasicBlock *ExitingBlock) {
+ assert(ExitingBlock && "Must pass a non-null exiting block!");
+ assert(L->isLoopExiting(ExitingBlock) &&
+ "Exiting block must actually branch out of the loop!");
+ const SCEVConstant *ExitCount =
+ dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
+ if (!ExitCount)
+ return 0;
+
+ ConstantInt *ExitConst = ExitCount->getValue();
+
+ // Guard against huge trip counts.
+ if (ExitConst->getValue().getActiveBits() > 32)
+ return 0;
+
+ // In case of integer overflow, this returns 0, which is correct.
+ return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
+unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
+ if (BasicBlock *ExitingBB = L->getExitingBlock())
+ return getSmallConstantTripMultiple(L, ExitingBB);
+
+ // No trip multiple information for multiple exits.
+ return 0;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+///
+/// As explained in the comments for getSmallConstantTripCount, this assumes
+/// that control exits the loop via ExitingBlock.
+unsigned
+ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+ BasicBlock *ExitingBlock) {
+ assert(ExitingBlock && "Must pass a non-null exiting block!");
+ assert(L->isLoopExiting(ExitingBlock) &&
+ "Exiting block must actually branch out of the loop!");
+ const SCEV *ExitCount = getExitCount(L, ExitingBlock);
+ if (ExitCount == getCouldNotCompute())
+ return 1;
+
+ // Get the trip count from the BE count by adding 1.
+ const SCEV *TCMul = getAddExpr(ExitCount,
+ getConstant(ExitCount->getType(), 1));
+ // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
+ // to factor simple cases.
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
+ TCMul = Mul->getOperand(0);
+
+ const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
+ if (!MulC)
+ return 1;
+
+ ConstantInt *Result = MulC->getValue();
+
+ // Guard against huge trip counts (this requires checking
+ // for zero to handle the case where the trip count == -1 and the
+ // addition wraps).
+ if (!Result || Result->getValue().getActiveBits() > 32 ||
+ Result->getValue().getActiveBits() == 0)
+ return 1;
+
+ return (unsigned)Result->getZExtValue();
+}
+
+// getExitCount - Get the expression for the number of loop iterations for which
+// this loop is guaranteed not to exit via ExitingBlock. Otherwise return
+// SCEVCouldNotCompute.
+const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
+ return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
+}
+
+/// getBackedgeTakenCount - If the specified loop has a predictable
+/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
+/// object. The backedge-taken count is the number of times the loop header
+/// will be branched to from within the loop. This is one less than the
+/// trip count of the loop, since it doesn't count the first iteration,
+/// when the header is branched to from outside the loop.
///
/// Note that it is not valid to call this method on a loop without a
/// loop-invariant backedge-taken count (see
/// hasLoopInvariantBackedgeTakenCount).
///
const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
- return getBackedgeTakenInfo(L).Exact;
+ return getBackedgeTakenInfo(L).getExact(this);
}
/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
/// return the least SCEV value that is known never to be less than the
/// actual backedge taken count.
const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
- return getBackedgeTakenInfo(L).Max;
+ return getBackedgeTakenInfo(L).getMax(this);
}
/// PushLoopPHIs - Push PHI nodes in the header of the given loop
const ScalarEvolution::BackedgeTakenInfo &
ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
- // Initially insert a CouldNotCompute for this loop. If the insertion
+ // Initially insert an invalid entry for this loop. If the insertion
// succeeds, proceed to actually compute a backedge-taken count and
// update the value. The temporary CouldNotCompute value tells SCEV
// code elsewhere that it shouldn't attempt to request a new
// backedge-taken count, which could result in infinite recursion.
- std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
- BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+ std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+ BackedgeTakenCounts.insert(std::make_pair(L, BackedgeTakenInfo()));
if (!Pair.second)
return Pair.first->second;
- BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L);
- if (BECount.Exact != getCouldNotCompute()) {
- assert(isLoopInvariant(BECount.Exact, L) &&
- isLoopInvariant(BECount.Max, L) &&
+ // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
+ // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
+ // must be cleared in this scope.
+ BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
+
+ if (Result.getExact(this) != getCouldNotCompute()) {
+ assert(isLoopInvariant(Result.getExact(this), L) &&
+ isLoopInvariant(Result.getMax(this), L) &&
"Computed backedge-taken count isn't loop invariant for loop!");
++NumTripCountsComputed;
-
- // Update the value in the map.
- Pair.first->second = BECount;
- } else {
- if (BECount.Max != getCouldNotCompute())
- // Update the value in the map.
- Pair.first->second = BECount;
- if (isa<PHINode>(L->getHeader()->begin()))
- // Only count loops that have phi nodes as not being computable.
- ++NumTripCountsNotComputed;
+ }
+ else if (Result.getMax(this) == getCouldNotCompute() &&
+ isa<PHINode>(L->getHeader()->begin())) {
+ // Only count loops that have phi nodes as not being computable.
+ ++NumTripCountsNotComputed;
}
// Now that we know more about the trip count for this loop, forget any
// conservative estimates made without the benefit of trip count
// information. This is similar to the code in forgetLoop, except that
// it handles SCEVUnknown PHI nodes specially.
- if (BECount.hasAnyInfo()) {
+ if (Result.hasAnyInfo()) {
SmallVector<Instruction *, 16> Worklist;
PushLoopPHIs(L, Worklist);
if (!Visited.insert(I)) continue;
ValueExprMapType::iterator It =
- ValueExprMap.find(static_cast<Value *>(I));
+ ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
const SCEV *Old = It->second;
PushDefUseChildren(I, Worklist);
}
}
- return Pair.first->second;
+
+ // Re-lookup the insert position, since the call to
+ // ComputeBackedgeTakenCount above could result in a
+ // recusive call to getBackedgeTakenInfo (on a different
+ // loop), which would invalidate the iterator computed
+ // earlier.
+ return BackedgeTakenCounts.find(L)->second = Result;
}
/// forgetLoop - This method should be called by the client when it has
/// compute a trip count, or if the loop is deleted.
void ScalarEvolution::forgetLoop(const Loop *L) {
// Drop any stored trip count value.
- BackedgeTakenCounts.erase(L);
+ DenseMap<const Loop*, BackedgeTakenInfo>::iterator BTCPos =
+ BackedgeTakenCounts.find(L);
+ if (BTCPos != BackedgeTakenCounts.end()) {
+ BTCPos->second.clear();
+ BackedgeTakenCounts.erase(BTCPos);
+ }
// Drop information about expressions based on loop-header PHIs.
SmallVector<Instruction *, 16> Worklist;
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ ValueExprMapType::iterator It =
+ ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
forgetMemoizedResults(It->second);
ValueExprMap.erase(It);
I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ ValueExprMapType::iterator It =
+ ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
forgetMemoizedResults(It->second);
ValueExprMap.erase(It);
}
}
+/// getExact - Get the exact loop backedge taken count considering all loop
+/// exits. A computable result can only be return for loops with a single exit.
+/// Returning the minimum taken count among all exits is incorrect because one
+/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
+/// the limit of each loop test is never skipped. This is a valid assumption as
+/// long as the loop exits via that test. For precise results, it is the
+/// caller's responsibility to specify the relevant loop exit using
+/// getExact(ExitingBlock, SE).
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
+ // If any exits were not computable, the loop is not computable.
+ if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
+
+ // We need exactly one computable exit.
+ if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
+ assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
+
+ const SCEV *BECount = nullptr;
+ for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+ ENT != nullptr; ENT = ENT->getNextExit()) {
+
+ assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
+
+ if (!BECount)
+ BECount = ENT->ExactNotTaken;
+ else if (BECount != ENT->ExactNotTaken)
+ return SE->getCouldNotCompute();
+ }
+ assert(BECount && "Invalid not taken count for loop exit");
+ return BECount;
+}
+
+/// getExact - Get the exact not taken count for this loop exit.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
+ ScalarEvolution *SE) const {
+ for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+ ENT != nullptr; ENT = ENT->getNextExit()) {
+
+ if (ENT->ExitingBlock == ExitingBlock)
+ return ENT->ExactNotTaken;
+ }
+ return SE->getCouldNotCompute();
+}
+
+/// getMax - Get the max backedge taken count for the loop.
+const SCEV *
+ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
+ return Max ? Max : SE->getCouldNotCompute();
+}
+
+bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
+ ScalarEvolution *SE) const {
+ if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
+ return true;
+
+ if (!ExitNotTaken.ExitingBlock)
+ return false;
+
+ for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
+ ENT != nullptr; ENT = ENT->getNextExit()) {
+
+ if (ENT->ExactNotTaken != SE->getCouldNotCompute()
+ && SE->hasOperand(ENT->ExactNotTaken, S)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
+/// computable exit into a persistent ExitNotTakenInfo array.
+ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
+ SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
+ bool Complete, const SCEV *MaxCount) : Max(MaxCount) {
+
+ if (!Complete)
+ ExitNotTaken.setIncomplete();
+
+ unsigned NumExits = ExitCounts.size();
+ if (NumExits == 0) return;
+
+ ExitNotTaken.ExitingBlock = ExitCounts[0].first;
+ ExitNotTaken.ExactNotTaken = ExitCounts[0].second;
+ if (NumExits == 1) return;
+
+ // Handle the rare case of multiple computable exits.
+ ExitNotTakenInfo *ENT = new ExitNotTakenInfo[NumExits-1];
+
+ ExitNotTakenInfo *PrevENT = &ExitNotTaken;
+ for (unsigned i = 1; i < NumExits; ++i, PrevENT = ENT, ++ENT) {
+ PrevENT->setNextExit(ENT);
+ ENT->ExitingBlock = ExitCounts[i].first;
+ ENT->ExactNotTaken = ExitCounts[i].second;
+ }
+}
+
+/// clear - Invalidate this result and free the ExitNotTakenInfo array.
+void ScalarEvolution::BackedgeTakenInfo::clear() {
+ ExitNotTaken.ExitingBlock = nullptr;
+ ExitNotTaken.ExactNotTaken = nullptr;
+ delete[] ExitNotTaken.getNextExit();
+}
+
/// ComputeBackedgeTakenCount - Compute the number of times the backedge
/// of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- // Examine all exits and pick the most conservative values.
- const SCEV *BECount = getCouldNotCompute();
- const SCEV *MaxBECount = getCouldNotCompute();
- bool CouldNotComputeBECount = false;
+ SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
+ bool CouldComputeBECount = true;
+ BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
+ const SCEV *MustExitMaxBECount = nullptr;
+ const SCEV *MayExitMaxBECount = nullptr;
+
+ // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
+ // and compute maxBECount.
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- BackedgeTakenInfo NewBTI =
- ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
+ BasicBlock *ExitBB = ExitingBlocks[i];
+ ExitLimit EL = ComputeExitLimit(L, ExitBB);
- if (NewBTI.Exact == getCouldNotCompute()) {
+ // 1. For each exit that can be computed, add an entry to ExitCounts.
+ // CouldComputeBECount is true only if all exits can be computed.
+ if (EL.Exact == getCouldNotCompute())
// We couldn't compute an exact value for this exit, so
// we won't be able to compute an exact value for the loop.
- CouldNotComputeBECount = true;
- BECount = getCouldNotCompute();
- } else if (!CouldNotComputeBECount) {
- if (BECount == getCouldNotCompute())
- BECount = NewBTI.Exact;
- else
- BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
+ CouldComputeBECount = false;
+ else
+ ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact));
+
+ // 2. Derive the loop's MaxBECount from each exit's max number of
+ // non-exiting iterations. Partition the loop exits into two kinds:
+ // LoopMustExits and LoopMayExits.
+ //
+ // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
+ // is a LoopMayExit. If any computable LoopMustExit is found, then
+ // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
+ // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
+ // considered greater than any computable EL.Max.
+ if (EL.Max != getCouldNotCompute() && Latch &&
+ DT->dominates(ExitBB, Latch)) {
+ if (!MustExitMaxBECount)
+ MustExitMaxBECount = EL.Max;
+ else {
+ MustExitMaxBECount =
+ getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
+ }
+ } else if (MayExitMaxBECount != getCouldNotCompute()) {
+ if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
+ MayExitMaxBECount = EL.Max;
+ else {
+ MayExitMaxBECount =
+ getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
+ }
}
- if (MaxBECount == getCouldNotCompute())
- MaxBECount = NewBTI.Max;
- else if (NewBTI.Max != getCouldNotCompute())
- MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
}
-
- return BackedgeTakenInfo(BECount, MaxBECount);
+ const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
+ (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
+ return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
}
-/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
-/// of the specified loop will execute if it exits via the specified block.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
- BasicBlock *ExitingBlock) {
+/// ComputeExitLimit - Compute the number of times the backedge of the specified
+/// loop will execute if it exits via the specified block.
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
// Okay, we've chosen an exiting block. See what condition causes us to
- // exit at this block.
- //
- // FIXME: we should be able to handle switch instructions (with a single exit)
- BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (ExitBr == 0) return getCouldNotCompute();
- assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+ // exit at this block and remember the exit block and whether all other targets
+ // lead to the loop header.
+ bool MustExecuteLoopHeader = true;
+ BasicBlock *Exit = nullptr;
+ for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
+ SI != SE; ++SI)
+ if (!L->contains(*SI)) {
+ if (Exit) // Multiple exit successors.
+ return getCouldNotCompute();
+ Exit = *SI;
+ } else if (*SI != L->getHeader()) {
+ MustExecuteLoopHeader = false;
+ }
// At this point, we know we have a conditional branch that determines whether
// the loop is exited. However, we don't know if the branch is executed each
//
// More extensive analysis could be done to handle more cases here.
//
- if (ExitBr->getSuccessor(0) != L->getHeader() &&
- ExitBr->getSuccessor(1) != L->getHeader() &&
- ExitBr->getParent() != L->getHeader()) {
+ if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
// The simple checks failed, try climbing the unique predecessor chain
// up to the header.
bool Ok = false;
- for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+ for (BasicBlock *BB = ExitingBlock; BB; ) {
BasicBlock *Pred = BB->getUniquePredecessor();
if (!Pred)
return getCouldNotCompute();
return getCouldNotCompute();
}
- // Proceed to the next level to examine the exit condition expression.
- return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
- ExitBr->getSuccessor(0),
- ExitBr->getSuccessor(1));
+ bool IsOnlyExit = (L->getExitingBlock() != nullptr);
+ TerminatorInst *Term = ExitingBlock->getTerminator();
+ if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
+ assert(BI->isConditional() && "If unconditional, it can't be in loop!");
+ // Proceed to the next level to examine the exit condition expression.
+ return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
+ BI->getSuccessor(1),
+ /*ControlsExit=*/IsOnlyExit);
+ }
+
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
+ return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
+ /*ControlsExit=*/IsOnlyExit);
+
+ return getCouldNotCompute();
}
-/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// ComputeExitLimitFromCond - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of ExitCond, TBB, and FBB.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
- Value *ExitCond,
- BasicBlock *TBB,
- BasicBlock *FBB) {
+///
+/// @param ControlsExit is true if ExitCond directly controls the exit
+/// branch. In this case, we can assume that the loop exits only if the
+/// condition is true and can infer that failing to meet the condition prior to
+/// integer wraparound results in undefined behavior.
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
+ Value *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB,
+ bool ControlsExit) {
// Check if the controlling expression for this loop is an And or Or.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
if (BO->getOpcode() == Instruction::And) {
// Recurse on the operands of the and.
- BackedgeTakenInfo BTI0 =
- ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
- BackedgeTakenInfo BTI1 =
- ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ bool EitherMayExit = L->contains(TBB);
+ ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+ ControlsExit && !EitherMayExit);
+ ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+ ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
- if (L->contains(TBB)) {
+ if (EitherMayExit) {
// Both conditions must be true for the loop to continue executing.
// Choose the less conservative count.
- if (BTI0.Exact == getCouldNotCompute() ||
- BTI1.Exact == getCouldNotCompute())
+ if (EL0.Exact == getCouldNotCompute() ||
+ EL1.Exact == getCouldNotCompute())
BECount = getCouldNotCompute();
else
- BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
- if (BTI0.Max == getCouldNotCompute())
- MaxBECount = BTI1.Max;
- else if (BTI1.Max == getCouldNotCompute())
- MaxBECount = BTI0.Max;
+ BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+ if (EL0.Max == getCouldNotCompute())
+ MaxBECount = EL1.Max;
+ else if (EL1.Max == getCouldNotCompute())
+ MaxBECount = EL0.Max;
else
- MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
} else {
// Both conditions must be true at the same time for the loop to exit.
// For now, be conservative.
assert(L->contains(FBB) && "Loop block has no successor in loop!");
- if (BTI0.Max == BTI1.Max)
- MaxBECount = BTI0.Max;
- if (BTI0.Exact == BTI1.Exact)
- BECount = BTI0.Exact;
+ if (EL0.Max == EL1.Max)
+ MaxBECount = EL0.Max;
+ if (EL0.Exact == EL1.Exact)
+ BECount = EL0.Exact;
}
- return BackedgeTakenInfo(BECount, MaxBECount);
+ return ExitLimit(BECount, MaxBECount);
}
if (BO->getOpcode() == Instruction::Or) {
// Recurse on the operands of the or.
- BackedgeTakenInfo BTI0 =
- ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
- BackedgeTakenInfo BTI1 =
- ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ bool EitherMayExit = L->contains(FBB);
+ ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+ ControlsExit && !EitherMayExit);
+ ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+ ControlsExit && !EitherMayExit);
const SCEV *BECount = getCouldNotCompute();
const SCEV *MaxBECount = getCouldNotCompute();
- if (L->contains(FBB)) {
+ if (EitherMayExit) {
// Both conditions must be false for the loop to continue executing.
// Choose the less conservative count.
- if (BTI0.Exact == getCouldNotCompute() ||
- BTI1.Exact == getCouldNotCompute())
+ if (EL0.Exact == getCouldNotCompute() ||
+ EL1.Exact == getCouldNotCompute())
BECount = getCouldNotCompute();
else
- BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
- if (BTI0.Max == getCouldNotCompute())
- MaxBECount = BTI1.Max;
- else if (BTI1.Max == getCouldNotCompute())
- MaxBECount = BTI0.Max;
+ BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
+ if (EL0.Max == getCouldNotCompute())
+ MaxBECount = EL1.Max;
+ else if (EL1.Max == getCouldNotCompute())
+ MaxBECount = EL0.Max;
else
- MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
} else {
// Both conditions must be false at the same time for the loop to exit.
// For now, be conservative.
assert(L->contains(TBB) && "Loop block has no successor in loop!");
- if (BTI0.Max == BTI1.Max)
- MaxBECount = BTI0.Max;
- if (BTI0.Exact == BTI1.Exact)
- BECount = BTI0.Exact;
+ if (EL0.Max == EL1.Max)
+ MaxBECount = EL0.Max;
+ if (EL0.Exact == EL1.Exact)
+ BECount = EL0.Exact;
}
- return BackedgeTakenInfo(BECount, MaxBECount);
+ return ExitLimit(BECount, MaxBECount);
}
}
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
- return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+ return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
// Check for a constant condition. These are normally stripped out by
// SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
}
// If it's not an integer or pointer comparison then compute it the hard way.
- return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
-}
-
-static const SCEVAddRecExpr *
-isSimpleUnwrappingAddRec(const SCEV *S, const Loop *L) {
- const SCEVAddRecExpr *SA = dyn_cast<SCEVAddRecExpr>(S);
-
- // The SCEV must be an addrec of this loop.
- if (!SA || SA->getLoop() != L || !SA->isAffine())
- return 0;
-
- // The SCEV must be known to not wrap in some way to be interesting.
- if (!SA->getNoWrapFlags(SCEV::FlagNW))
- return 0;
-
- // The stride must be a constant so that we know if it is striding up or down.
- if (!isa<SCEVConstant>(SA->getOperand(1)))
- return 0;
- return SA;
-}
-
-/// getMinusSCEVForExitTest - When considering an exit test for a loop with a
-/// "x != y" exit test, we turn this into a computation that evaluates x-y != 0,
-/// and this function returns the expression to use for x-y. We know and take
-/// advantage of the fact that this subtraction is only being used in a
-/// comparison by zero context.
-///
-/// FIXME: this can be completely removed once AddRec FlagNWs are propagated.
-static const SCEV *getMinusSCEVForExitTest(const SCEV *LHS, const SCEV *RHS,
- const Loop *L, ScalarEvolution &SE) {
- // If either LHS or RHS is an AddRec SCEV (of this loop) that is known to not
- // self-wrap, then we know that the value will either become the other one
- // (and thus the loop terminates), that the loop will terminate through some
- // other exit condition first, or that the loop has undefined behavior. This
- // information is useful when the addrec has a stride that is != 1 or -1,
- // because it means we can't "miss" the exit value.
- //
- // In any of these three cases, it is safe to turn the exit condition into a
- // "counting down" AddRec (to zero) by subtracting the two inputs as normal,
- // but since we know that the "end cannot be missed" we can force the
- // resulting AddRec to be a NUW addrec. Since it is counting down, this means
- // that the AddRec *cannot* pass zero.
-
- // See if LHS and RHS are addrec's we can handle.
- const SCEVAddRecExpr *LHSA = isSimpleUnwrappingAddRec(LHS, L);
- const SCEVAddRecExpr *RHSA = isSimpleUnwrappingAddRec(RHS, L);
-
- // If neither addrec is interesting, just return a minus.
- if (RHSA == 0 && LHSA == 0)
- return SE.getMinusSCEV(LHS, RHS);
-
- // If only one of LHS and RHS are an AddRec of this loop, make sure it is LHS.
- if (RHSA && LHSA == 0) {
- // Safe because a-b === b-a for comparisons against zero.
- std::swap(LHS, RHS);
- std::swap(LHSA, RHSA);
- }
-
- // Handle the case when only one is advancing in a non-overflowing way.
- if (RHSA == 0) {
- // If RHS is loop varying, then we can't predict when LHS will cross it.
- if (!SE.isLoopInvariant(RHS, L))
- return SE.getMinusSCEV(LHS, RHS);
-
- // If LHS has a positive stride, then we compute RHS-LHS, because the loop
- // is counting up until it crosses RHS (which must be larger than LHS). If
- // it is negative, we compute LHS-RHS because we're counting down to RHS.
- const ConstantInt *Stride =
- cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
- if (Stride->getValue().isNegative())
- std::swap(LHS, RHS);
-
- return SE.getMinusSCEV(RHS, LHS, SCEV::FlagNUW);
- }
-
- // If both LHS and RHS are interesting, we have something like:
- // a+i*4 != b+i*8.
- const ConstantInt *LHSStride =
- cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
- const ConstantInt *RHSStride =
- cast<SCEVConstant>(RHSA->getOperand(1))->getValue();
-
- // If the strides are equal, then this is just a (complex) loop invariant
- // comparison of a and b.
- if (LHSStride == RHSStride)
- return SE.getMinusSCEV(LHSA->getStart(), RHSA->getStart());
-
- // If the signs of the strides differ, then the negative stride is counting
- // down to the positive stride.
- if (LHSStride->getValue().isNegative() != RHSStride->getValue().isNegative()){
- if (RHSStride->getValue().isNegative())
- std::swap(LHS, RHS);
- } else {
- // If LHS's stride is smaller than RHS's stride, then "b" must be less than
- // "a" and "b" is RHS is counting up (catching up) to LHS. This is true
- // whether the strides are positive or negative.
- if (RHSStride->getValue().slt(LHSStride->getValue()))
- std::swap(LHS, RHS);
- }
-
- return SE.getMinusSCEV(LHS, RHS, SCEV::FlagNUW);
+ return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
}
-/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// ComputeExitLimitFromICmp - Compute the number of times the
/// backedge of the specified loop will execute if its exit condition
/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
- ICmpInst *ExitCond,
- BasicBlock *TBB,
- BasicBlock *FBB) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
+ ICmpInst *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB,
+ bool ControlsExit) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Cond;
// Handle common loops like: for (X = "string"; *X; ++X)
if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
- BackedgeTakenInfo ItCnt =
- ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
+ ExitLimit ItCnt =
+ ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
if (ItCnt.hasAnyInfo())
return ItCnt;
}
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
- // FIXME: Once AddRec FlagNW are propagated, should be:
- // BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
- BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEVForExitTest(LHS, RHS, L,
- *this), L);
- if (BTI.hasAnyInfo()) return BTI;
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
+ if (EL.hasAnyInfo()) return EL;
break;
}
case ICmpInst::ICMP_EQ: { // while (X == Y)
// Convert to: while (X-Y == 0)
- BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
- if (BTI.hasAnyInfo()) return BTI;
- break;
- }
- case ICmpInst::ICMP_SLT: {
- BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true);
- if (BTI.hasAnyInfo()) return BTI;
- break;
- }
- case ICmpInst::ICMP_SGT: {
- BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, true);
- if (BTI.hasAnyInfo()) return BTI;
+ ExitLimit EL = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+ if (EL.hasAnyInfo()) return EL;
break;
}
- case ICmpInst::ICMP_ULT: {
- BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false);
- if (BTI.hasAnyInfo()) return BTI;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_ULT: { // while (X < Y)
+ bool IsSigned = Cond == ICmpInst::ICMP_SLT;
+ ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, ControlsExit);
+ if (EL.hasAnyInfo()) return EL;
break;
}
- case ICmpInst::ICMP_UGT: {
- BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
- getNotSCEV(RHS), L, false);
- if (BTI.hasAnyInfo()) return BTI;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT: { // while (X > Y)
+ bool IsSigned = Cond == ICmpInst::ICMP_SGT;
+ ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit);
+ if (EL.hasAnyInfo()) return EL;
break;
}
default:
#endif
break;
}
- return
- ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+ return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
+ SwitchInst *Switch,
+ BasicBlock *ExitingBlock,
+ bool ControlsExit) {
+ assert(!L->contains(ExitingBlock) && "Not an exiting block!");
+
+ // Give up if the exit is the default dest of a switch.
+ if (Switch->getDefaultDest() == ExitingBlock)
+ return getCouldNotCompute();
+
+ assert(L->contains(Switch->getDefaultDest()) &&
+ "Default case must not exit the loop!");
+ const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
+ const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
+
+ // while (X != Y) --> while (X-Y != 0)
+ ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
+ if (EL.hasAnyInfo())
+ return EL;
+
+ return getCouldNotCompute();
}
static ConstantInt *
return cast<SCEVConstant>(Val)->getValue();
}
-/// GetAddressedElementFromGlobal - Given a global variable with an initializer
-/// and a GEP expression (missing the pointer index) indexing into it, return
-/// the addressed element of the initializer or null if the index expression is
-/// invalid.
-static Constant *
-GetAddressedElementFromGlobal(GlobalVariable *GV,
- const std::vector<ConstantInt*> &Indices) {
- Constant *Init = GV->getInitializer();
- for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
- uint64_t Idx = Indices[i]->getZExtValue();
- if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
- assert(Idx < CS->getNumOperands() && "Bad struct index!");
- Init = cast<Constant>(CS->getOperand(Idx));
- } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
- if (Idx >= CA->getNumOperands()) return 0; // Bogus program
- Init = cast<Constant>(CA->getOperand(Idx));
- } else if (isa<ConstantAggregateZero>(Init)) {
- if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
- assert(Idx < STy->getNumElements() && "Bad struct index!");
- Init = Constant::getNullValue(STy->getElementType(Idx));
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
- if (Idx >= ATy->getNumElements()) return 0; // Bogus program
- Init = Constant::getNullValue(ATy->getElementType());
- } else {
- llvm_unreachable("Unknown constant aggregate type!");
- }
- return 0;
- } else {
- return 0; // Unknown initializer type
- }
- }
- return Init;
-}
-
-/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
+/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
/// 'icmp op load X, cst', try to see if we can compute the backedge
/// execution count.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
- LoadInst *LI,
- Constant *RHS,
- const Loop *L,
- ICmpInst::Predicate predicate) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeLoadConstantCompareExitLimit(
+ LoadInst *LI,
+ Constant *RHS,
+ const Loop *L,
+ ICmpInst::Predicate predicate) {
+
if (LI->isVolatile()) return getCouldNotCompute();
// Check to see if the loaded pointer is a getelementptr of a global.
return getCouldNotCompute();
// Okay, we allow one non-constant index into the GEP instruction.
- Value *VarIdx = 0;
- std::vector<ConstantInt*> Indexes;
+ Value *VarIdx = nullptr;
+ std::vector<Constant*> Indexes;
unsigned VarIdxNum = 0;
for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
VarIdx = GEP->getOperand(i);
VarIdxNum = i-2;
- Indexes.push_back(0);
+ Indexes.push_back(nullptr);
}
+ // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
+ if (!VarIdx)
+ return getCouldNotCompute();
+
// Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
// Check to see if X is a loop variant variable value now.
const SCEV *Idx = getSCEV(VarIdx);
// Form the GEP offset.
Indexes[VarIdxNum] = Val;
- Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
- if (Result == 0) break; // Cannot compute!
+ Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
+ Indexes);
+ if (!Result) break; // Cannot compute!
// Evaluate the condition for this iteration.
Result = ConstantExpr::getICmp(predicate, Result, RHS);
/// specified type, assuming that all operands were constants.
static bool CanConstantFold(const Instruction *I) {
if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
- isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I))
+ isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
+ isa<LoadInst>(I))
return true;
if (const CallInst *CI = dyn_cast<CallInst>(I))
return false;
}
-/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
-/// in the loop that V is derived from. We allow arbitrary operations along the
-/// way, but the operands of an operation must either be constants or a value
-/// derived from a constant PHI. If this expression does not fit with these
-/// constraints, return null.
-static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
- // If this is not an instruction, or if this is an instruction outside of the
- // loop, it can't be derived from a loop PHI.
- Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || !L->contains(I)) return 0;
+/// Determine whether this instruction can constant evolve within this loop
+/// assuming its operands can all constant evolve.
+static bool canConstantEvolve(Instruction *I, const Loop *L) {
+ // An instruction outside of the loop can't be derived from a loop PHI.
+ if (!L->contains(I)) return false;
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ if (isa<PHINode>(I)) {
if (L->getHeader() == I->getParent())
- return PN;
+ return true;
else
// We don't currently keep track of the control flow needed to evaluate
// PHIs, so we cannot handle PHIs inside of loops.
- return 0;
+ return false;
}
// If we won't be able to constant fold this expression even if the operands
- // are constants, return early.
- if (!CanConstantFold(I)) return 0;
+ // are constants, bail early.
+ return CanConstantFold(I);
+}
+
+/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by
+/// recursing through each instruction operand until reaching a loop header phi.
+static PHINode *
+getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
+ DenseMap<Instruction *, PHINode *> &PHIMap) {
// Otherwise, we can evaluate this instruction if all of its operands are
// constant or derived from a PHI node themselves.
- PHINode *PHI = 0;
- for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
- if (!isa<Constant>(I->getOperand(Op))) {
- PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
- if (P == 0) return 0; // Not evolving from PHI
- if (PHI == 0)
- PHI = P;
- else if (PHI != P)
- return 0; // Evolving from multiple different PHIs.
+ PHINode *PHI = nullptr;
+ for (Instruction::op_iterator OpI = UseInst->op_begin(),
+ OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
+
+ if (isa<Constant>(*OpI)) continue;
+
+ Instruction *OpInst = dyn_cast<Instruction>(*OpI);
+ if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
+
+ PHINode *P = dyn_cast<PHINode>(OpInst);
+ if (!P)
+ // If this operand is already visited, reuse the prior result.
+ // We may have P != PHI if this is the deepest point at which the
+ // inconsistent paths meet.
+ P = PHIMap.lookup(OpInst);
+ if (!P) {
+ // Recurse and memoize the results, whether a phi is found or not.
+ // This recursive call invalidates pointers into PHIMap.
+ P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
+ PHIMap[OpInst] = P;
}
-
+ if (!P)
+ return nullptr; // Not evolving from PHI
+ if (PHI && PHI != P)
+ return nullptr; // Evolving from multiple different PHIs.
+ PHI = P;
+ }
// This is a expression evolving from a constant PHI!
return PHI;
}
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from. We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI. If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || !canConstantEvolve(I, L)) return nullptr;
+
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ return PN;
+ }
+
+ // Record non-constant instructions contained by the loop.
+ DenseMap<Instruction *, PHINode *> PHIMap;
+ return getConstantEvolvingPHIOperands(I, L, PHIMap);
+}
+
/// EvaluateExpression - Given an expression that passes the
/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
/// in the loop has the value PHIVal. If we can't fold this expression for some
/// reason, return null.
-static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
- const TargetData *TD) {
- if (isa<PHINode>(V)) return PHIVal;
+static Constant *EvaluateExpression(Value *V, const Loop *L,
+ DenseMap<Instruction *, Constant *> &Vals,
+ const DataLayout *DL,
+ const TargetLibraryInfo *TLI) {
+ // Convenient constant check, but redundant for recursive calls.
if (Constant *C = dyn_cast<Constant>(V)) return C;
- Instruction *I = cast<Instruction>(V);
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return nullptr;
+
+ if (Constant *C = Vals.lookup(I)) return C;
+
+ // An instruction inside the loop depends on a value outside the loop that we
+ // weren't given a mapping for, or a value such as a call inside the loop.
+ if (!canConstantEvolve(I, L)) return nullptr;
+
+ // An unmapped PHI can be due to a branch or another loop inside this loop,
+ // or due to this not being the initial iteration through a loop where we
+ // couldn't compute the evolution of this particular PHI last time.
+ if (isa<PHINode>(I)) return nullptr;
std::vector<Constant*> Operands(I->getNumOperands());
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
- if (Operands[i] == 0) return 0;
+ Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
+ if (!Operand) {
+ Operands[i] = dyn_cast<Constant>(I->getOperand(i));
+ if (!Operands[i]) return nullptr;
+ continue;
+ }
+ Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
+ Vals[Operand] = C;
+ if (!C) return nullptr;
+ Operands[i] = C;
}
- if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ if (CmpInst *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
- Operands[1], TD);
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- &Operands[0], Operands.size(), TD);
+ Operands[1], DL, TLI);
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isVolatile())
+ return ConstantFoldLoadFromConstPtr(Operands[0], DL);
+ }
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL,
+ TLI);
}
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
const APInt &BEs,
const Loop *L) {
- std::map<PHINode*, Constant*>::const_iterator I =
+ DenseMap<PHINode*, Constant*>::const_iterator I =
ConstantEvolutionLoopExitValue.find(PN);
if (I != ConstantEvolutionLoopExitValue.end())
return I->second;
if (BEs.ugt(MaxBruteForceIterations))
- return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it.
+ return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it.
Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
+ DenseMap<Instruction *, Constant *> CurrentIterVals;
+ BasicBlock *Header = L->getHeader();
+ assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
// Since the loop is canonicalized, the PHI node must have two entries. One
// entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- Constant *StartCST =
- dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
- if (StartCST == 0)
- return RetVal = 0; // Must be a constant.
+ PHINode *PHI = nullptr;
+ for (BasicBlock::iterator I = Header->begin();
+ (PHI = dyn_cast<PHINode>(I)); ++I) {
+ Constant *StartCST =
+ dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ if (!StartCST) continue;
+ CurrentIterVals[PHI] = StartCST;
+ }
+ if (!CurrentIterVals.count(PN))
+ return RetVal = nullptr;
Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
- if (getConstantEvolvingPHI(BEValue, L) != PN &&
- !isa<Constant>(BEValue))
- return RetVal = 0; // Not derived from same PHI.
// Execute the loop symbolically to determine the exit value.
if (BEs.getActiveBits() >= 32)
- return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+ return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
- for (Constant *PHIVal = StartCST; ; ++IterationNum) {
+ for (; ; ++IterationNum) {
if (IterationNum == NumIterations)
- return RetVal = PHIVal; // Got exit value!
+ return RetVal = CurrentIterVals[PN]; // Got exit value!
+
+ // Compute the value of the PHIs for the next iteration.
+ // EvaluateExpression adds non-phi values to the CurrentIterVals map.
+ DenseMap<Instruction *, Constant *> NextIterVals;
+ Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL,
+ TLI);
+ if (!NextPHI)
+ return nullptr; // Couldn't evaluate!
+ NextIterVals[PN] = NextPHI;
+
+ bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
+
+ // Also evaluate the other PHI nodes. However, we don't get to stop if we
+ // cease to be able to evaluate one of them or if they stop evolving,
+ // because that doesn't necessarily prevent us from computing PN.
+ SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
+ for (DenseMap<Instruction *, Constant *>::const_iterator
+ I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+ PHINode *PHI = dyn_cast<PHINode>(I->first);
+ if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
+ PHIsToCompute.push_back(std::make_pair(PHI, I->second));
+ }
+ // We use two distinct loops because EvaluateExpression may invalidate any
+ // iterators into CurrentIterVals.
+ for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
+ I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
+ PHINode *PHI = I->first;
+ Constant *&NextPHI = NextIterVals[PHI];
+ if (!NextPHI) { // Not already computed.
+ Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+ }
+ if (NextPHI != I->second)
+ StoppedEvolving = false;
+ }
- // Compute the value of the PHI node for the next iteration.
- Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
- if (NextPHI == PHIVal)
- return RetVal = NextPHI; // Stopped evolving!
- if (NextPHI == 0)
- return 0; // Couldn't evaluate!
- PHIVal = NextPHI;
+ // If all entries in CurrentIterVals == NextIterVals then we can stop
+ // iterating, the loop can't continue to change.
+ if (StoppedEvolving)
+ return RetVal = CurrentIterVals[PN];
+
+ CurrentIterVals.swap(NextIterVals);
}
}
-/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
+/// ComputeExitCountExhaustively - If the loop is known to execute a
/// constant number of times (the condition evolves only from constants),
/// try to evaluate a few iterations of the loop until we get the exit
/// condition gets a value of ExitWhen (true or false). If we cannot
/// evaluate the trip count of the loop, return getCouldNotCompute().
-const SCEV *
-ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
- Value *Cond,
- bool ExitWhen) {
+const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+ Value *Cond,
+ bool ExitWhen) {
PHINode *PN = getConstantEvolvingPHI(Cond, L);
- if (PN == 0) return getCouldNotCompute();
+ if (!PN) return getCouldNotCompute();
// If the loop is canonicalized, the PHI will have exactly two entries.
// That's the only form we support here.
if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
+ DenseMap<Instruction *, Constant *> CurrentIterVals;
+ BasicBlock *Header = L->getHeader();
+ assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
// One entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- Constant *StartCST =
- dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
- if (StartCST == 0) return getCouldNotCompute(); // Must be a constant.
-
- Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
- if (getConstantEvolvingPHI(BEValue, L) != PN &&
- !isa<Constant>(BEValue))
- return getCouldNotCompute(); // Not derived from same PHI.
+ PHINode *PHI = nullptr;
+ for (BasicBlock::iterator I = Header->begin();
+ (PHI = dyn_cast<PHINode>(I)); ++I) {
+ Constant *StartCST =
+ dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+ if (!StartCST) continue;
+ CurrentIterVals[PHI] = StartCST;
+ }
+ if (!CurrentIterVals.count(PN))
+ return getCouldNotCompute();
// Okay, we find a PHI node that defines the trip count of this loop. Execute
// the loop symbolically to determine when the condition gets a value of
// "ExitWhen".
- unsigned IterationNum = 0;
+
unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis.
- for (Constant *PHIVal = StartCST;
- IterationNum != MaxIterations; ++IterationNum) {
+ for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
ConstantInt *CondVal =
- dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
+ dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
+ DL, TLI));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
return getConstant(Type::getInt32Ty(getContext()), IterationNum);
}
- // Compute the value of the PHI node for the next iteration.
- Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
- if (NextPHI == 0 || NextPHI == PHIVal)
- return getCouldNotCompute();// Couldn't evaluate or not making progress...
- PHIVal = NextPHI;
+ // Update all the PHI nodes for the next iteration.
+ DenseMap<Instruction *, Constant *> NextIterVals;
+
+ // Create a list of which PHIs we need to compute. We want to do this before
+ // calling EvaluateExpression on them because that may invalidate iterators
+ // into CurrentIterVals.
+ SmallVector<PHINode *, 8> PHIsToCompute;
+ for (DenseMap<Instruction *, Constant *>::const_iterator
+ I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+ PHINode *PHI = dyn_cast<PHINode>(I->first);
+ if (!PHI || PHI->getParent() != Header) continue;
+ PHIsToCompute.push_back(PHI);
+ }
+ for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
+ E = PHIsToCompute.end(); I != E; ++I) {
+ PHINode *PHI = *I;
+ Constant *&NextPHI = NextIterVals[PHI];
+ if (NextPHI) continue; // Already computed!
+
+ Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+ NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+ }
+ CurrentIterVals.swap(NextIterVals);
}
// Too many iterations were needed to evaluate.
/// original value V is returned.
const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if we've folded this expression at this loop before.
- std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
- std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
- Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
- if (!Pair.second)
- return Pair.first->second ? Pair.first->second : V;
-
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == L)
+ return Values[u].second ? Values[u].second : V;
+ }
+ Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
// Otherwise compute it.
const SCEV *C = computeSCEVAtScope(V, L);
- ValuesAtScopes[V][L] = C;
+ SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == L) {
+ Values2[u - 1].second = C;
+ break;
+ }
+ }
return C;
}
+/// This builds up a Constant using the ConstantExpr interface. That way, we
+/// will return Constants for objects which aren't represented by a
+/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
+/// Returns NULL if the SCEV isn't representable as a Constant.
+static Constant *BuildConstantFromSCEV(const SCEV *V) {
+ switch (static_cast<SCEVTypes>(V->getSCEVType())) {
+ case scCouldNotCompute:
+ case scAddRecExpr:
+ break;
+ case scConstant:
+ return cast<SCEVConstant>(V)->getValue();
+ case scUnknown:
+ return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
+ case scSignExtend: {
+ const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
+ return ConstantExpr::getSExt(CastOp, SS->getType());
+ break;
+ }
+ case scZeroExtend: {
+ const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
+ return ConstantExpr::getZExt(CastOp, SZ->getType());
+ break;
+ }
+ case scTruncate: {
+ const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
+ if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
+ return ConstantExpr::getTrunc(CastOp, ST->getType());
+ break;
+ }
+ case scAddExpr: {
+ const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
+ if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ unsigned AS = PTy->getAddressSpace();
+ Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+ C = ConstantExpr::getBitCast(C, DestPtrTy);
+ }
+ for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
+ Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
+ if (!C2) return nullptr;
+
+ // First pointer!
+ if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+ unsigned AS = C2->getType()->getPointerAddressSpace();
+ std::swap(C, C2);
+ Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+ // The offsets have been converted to bytes. We can add bytes to an
+ // i8* by GEP with the byte count in the first index.
+ C = ConstantExpr::getBitCast(C, DestPtrTy);
+ }
+
+ // Don't bother trying to sum two pointers. We probably can't
+ // statically compute a load that results from it anyway.
+ if (C2->getType()->isPointerTy())
+ return nullptr;
+
+ if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+ if (PTy->getElementType()->isStructTy())
+ C2 = ConstantExpr::getIntegerCast(
+ C2, Type::getInt32Ty(C->getContext()), true);
+ C = ConstantExpr::getGetElementPtr(C, C2);
+ } else
+ C = ConstantExpr::getAdd(C, C2);
+ }
+ return C;
+ }
+ break;
+ }
+ case scMulExpr: {
+ const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
+ if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
+ // Don't bother with pointers at all.
+ if (C->getType()->isPointerTy()) return nullptr;
+ for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
+ Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
+ if (!C2 || C2->getType()->isPointerTy()) return nullptr;
+ C = ConstantExpr::getMul(C, C2);
+ }
+ return C;
+ }
+ break;
+ }
+ case scUDivExpr: {
+ const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
+ if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
+ if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
+ if (LHS->getType() == RHS->getType())
+ return ConstantExpr::getUDiv(LHS, RHS);
+ break;
+ }
+ case scSMaxExpr:
+ case scUMaxExpr:
+ break; // TODO: smax, umax.
+ }
+ return nullptr;
+}
+
const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
if (isa<SCEVConstant>(V)) return V;
const SCEV *OpV = getSCEVAtScope(OrigV, L);
MadeImprovement |= OrigV != OpV;
- Constant *C = 0;
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV))
- C = SC->getValue();
- if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV))
- C = dyn_cast<Constant>(SU->getValue());
+ Constant *C = BuildConstantFromSCEV(OpV);
if (!C) return V;
if (C->getType() != Op->getType())
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
// Check to see if getSCEVAtScope actually made an improvement.
if (MadeImprovement) {
- Constant *C = 0;
+ Constant *C = nullptr;
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(),
- Operands[0], Operands[1], TD);
- else
+ Operands[0], Operands[1], DL,
+ TLI);
+ else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isVolatile())
+ C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
+ } else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- &Operands[0], Operands.size(), TD);
+ Operands, DL, TLI);
if (!C) return V;
return getSCEV(C);
}
for (++i; i != e; ++i)
NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
- AddRec = cast<SCEVAddRecExpr>(
+ const SCEV *FoldedRec =
getAddRecExpr(NewOps, AddRec->getLoop(),
- // FIXME: AddRec->getNoWrapFlags(SCEV::FlagNW)
- SCEV::FlagAnyWrap));
+ AddRec->getNoWrapFlags(SCEV::FlagNW));
+ AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
+ // The addrec may be folded to a nonrecurrence, for example, if the
+ // induction variable is multiplied by zero after constant folding. Go
+ // ahead and return the folded value.
+ if (!AddRec)
+ return FoldedRec;
break;
}
}
llvm_unreachable("Unknown SCEV type!");
- return 0;
}
/// getSCEVAtScope - This is a convenience function which does
SqrtTerm *= B;
SqrtTerm -= Four * (A * C);
+ if (SqrtTerm.isNegative()) {
+ // The loop is provably infinite.
+ const SCEV *CNC = SE.getCouldNotCompute();
+ return std::make_pair(CNC, CNC);
+ }
+
// Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
// integer value or else APInt::sqrt() will assert.
APInt SqrtVal(SqrtTerm.sqrt());
// Compute the two solutions for the quadratic formula.
// The divisions must be performed as signed divisions.
APInt NegB(-B);
- APInt TwoA( A << 1 );
+ APInt TwoA(A << 1);
if (TwoA.isMinValue()) {
const SCEV *CNC = SE.getCouldNotCompute();
return std::make_pair(CNC, CNC);
return std::make_pair(SE.getConstant(Solution1),
SE.getConstant(Solution2));
- } // end APIntOps namespace
+ } // end APIntOps namespace
}
/// HowFarToZero - Return the number of times a backedge comparing the specified
/// now expressed as a single expression, V = x-y. So the exit test is
/// effectively V != 0. We know and take advantage of the fact that this
/// expression only being used in a comparison by zero context.
-ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
- // If the AddRec is NUW, then (in an unsigned sense) it cannot be counting up
- // to wrap to 0, it must be counting down to equal 0. Also, while counting
- // down, it cannot "miss" 0 (which would cause it to wrap), regardless of what
- // the stride is. As such, NUW addrec's will always become zero in
- // "start / -stride" steps, and we know that the division is exact.
- if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
- // FIXME: We really want an "isexact" bit for udiv.
- return getUDivExpr(Start, getNegativeSCEV(Step));
-
// For now we handle only constant steps.
//
// TODO: Handle a nonconstant Step given AddRec<NUW>. If the
// to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
// We have not yet seen any such cases.
const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
- if (StepC == 0)
+ if (!StepC || StepC->getValue()->equalsInt(0))
return getCouldNotCompute();
// For positive steps (counting up until unsigned overflow):
// For negative steps (counting down to zero):
// N = Start/-Step
// First compute the unsigned distance from zero in the direction of Step.
- const SCEV *Distance = StepC->getValue()->getValue().isNonNegative() ?
- getNegativeSCEV(Start) : Start;
+ bool CountDown = StepC->getValue()->getValue().isNegative();
+ const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
// Handle unitary steps, which cannot wraparound.
- if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so:
- return Distance; // N = -Start (as unsigned)
+ // 1*N = -Start; -1*N = Start (mod 2^BW), so:
+ // N = Distance (as unsigned)
+ if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) {
+ ConstantRange CR = getUnsignedRange(Start);
+ const SCEV *MaxBECount;
+ if (!CountDown && CR.getUnsignedMin().isMinValue())
+ // When counting up, the worst starting value is 1, not 0.
+ MaxBECount = CR.getUnsignedMax().isMinValue()
+ ? getConstant(APInt::getMinValue(CR.getBitWidth()))
+ : getConstant(APInt::getMaxValue(CR.getBitWidth()));
+ else
+ MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
+ : -CR.getUnsignedMin());
+ return ExitLimit(Distance, MaxBECount);
+ }
+
+ // If the step exactly divides the distance then unsigned divide computes the
+ // backedge count.
+ const SCEV *Q, *R;
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+ SCEVDivision::divide(SE, Distance, Step, &Q, &R);
+ if (R->isZero()) {
+ const SCEV *Exact =
+ getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+ return ExitLimit(Exact, Exact);
+ }
- if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so:
- return Distance; // N = Start (as unsigned)
+ // If the condition controls loop exit (the loop exits only if the expression
+ // is true) and the addition is no-wrap we can use unsigned divide to
+ // compute the backedge count. In this case, the step may not divide the
+ // distance, but we don't care because if the condition is "missed" the loop
+ // will have undefined behavior due to wrapping.
+ if (ControlsExit && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
+ const SCEV *Exact =
+ getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+ return ExitLimit(Exact, Exact);
+ }
// Then, try to solve the above equation provided that Start is constant.
if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
/// HowFarToNonZero - Return the number of times a backedge checking the
/// specified value for nonzero will execute. If not computable, return
/// CouldNotCompute
-ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ExitLimit
ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
// Loops that look like: while (X == 0) are very strange indeed. We don't
// handle them yet except for the trivial case. This could be expanded in the
/// predicate Pred. Return true iff any changes were made.
///
bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
- const SCEV *&LHS, const SCEV *&RHS) {
+ const SCEV *&LHS, const SCEV *&RHS,
+ unsigned Depth) {
bool Changed = false;
+ // If we hit the max recursion limit bail out.
+ if (Depth >= 3)
+ return false;
+
// Canonicalize a constant to the right side.
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
// Check for both operands constant.
default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_NE:
+ // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
+ if (!RA)
+ if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
+ if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
+ if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
+ ME->getOperand(0)->isAllOnesValue()) {
+ RHS = AE->getOperand(1);
+ LHS = ME->getOperand(1);
+ Changed = true;
+ }
break;
case ICmpInst::ICMP_UGE:
if ((RA - 1).isMinValue()) {
// TODO: More simplifications are possible here.
+ // Recursively simplify until we either hit a recursion limit or nothing
+ // changes.
+ if (Changed)
+ return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1);
+
return Changed;
trivially_true:
// If LHS or RHS is an addrec, check to see if the condition is true in
// every iteration of the loop.
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
- if (isLoopEntryGuardedByCond(
- AR->getLoop(), Pred, AR->getStart(), RHS) &&
- isLoopBackedgeGuardedByCond(
- AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
- return true;
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
- if (isLoopEntryGuardedByCond(
- AR->getLoop(), Pred, LHS, AR->getStart()) &&
- isLoopBackedgeGuardedByCond(
- AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
- return true;
+ // If LHS and RHS are both addrec, both conditions must be true in
+ // every iteration of the loop.
+ const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+ const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+ bool LeftGuarded = false;
+ bool RightGuarded = false;
+ if (LAR) {
+ const Loop *L = LAR->getLoop();
+ if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
+ isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
+ if (!RAR) return true;
+ LeftGuarded = true;
+ }
+ }
+ if (RAR) {
+ const Loop *L = RAR->getLoop();
+ if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
+ isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
+ if (!LAR) return true;
+ RightGuarded = true;
+ }
+ }
+ if (LeftGuarded && RightGuarded)
+ return true;
// Otherwise see what can be done with known constant ranges.
return isKnownPredicateWithRanges(Pred, LHS, RHS);
switch (Pred) {
default:
llvm_unreachable("Unexpected ICmpInst::Predicate value!");
- break;
case ICmpInst::ICMP_SGT:
- Pred = ICmpInst::ICMP_SLT;
std::swap(LHS, RHS);
case ICmpInst::ICMP_SLT: {
ConstantRange LHSRange = getSignedRange(LHS);
break;
}
case ICmpInst::ICMP_SGE:
- Pred = ICmpInst::ICMP_SLE;
std::swap(LHS, RHS);
case ICmpInst::ICMP_SLE: {
ConstantRange LHSRange = getSignedRange(LHS);
break;
}
case ICmpInst::ICMP_UGT:
- Pred = ICmpInst::ICMP_ULT;
std::swap(LHS, RHS);
case ICmpInst::ICMP_ULT: {
ConstantRange LHSRange = getUnsignedRange(LHS);
break;
}
case ICmpInst::ICMP_UGE:
- Pred = ICmpInst::ICMP_ULE;
std::swap(LHS, RHS);
case ICmpInst::ICMP_ULE: {
ConstantRange LHSRange = getUnsignedRange(LHS);
// (interprocedural conditions notwithstanding).
if (!L) return true;
+ if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
+
BasicBlock *Latch = L->getLoopLatch();
if (!Latch)
return false;
BranchInst *LoopContinuePredicate =
dyn_cast<BranchInst>(Latch->getTerminator());
- if (!LoopContinuePredicate ||
- LoopContinuePredicate->isUnconditional())
- return false;
+ if (LoopContinuePredicate && LoopContinuePredicate->isConditional() &&
+ isImpliedCond(Pred, LHS, RHS,
+ LoopContinuePredicate->getCondition(),
+ LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
+ return true;
+
+ // Check conditions due to any @llvm.assume intrinsics.
+ for (auto &CI : AT->assumptions(F)) {
+ if (!DT->dominates(CI, Latch->getTerminator()))
+ continue;
+
+ if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+ return true;
+ }
- return isImpliedCond(Pred, LHS, RHS,
- LoopContinuePredicate->getCondition(),
- LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+ return false;
}
/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
// (interprocedural conditions notwithstanding).
if (!L) return false;
+ if (isKnownPredicateWithRanges(Pred, LHS, RHS)) return true;
+
// Starting at the loop predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original header.
return true;
}
+ // Check conditions due to any @llvm.assume intrinsics.
+ for (auto &CI : AT->assumptions(F)) {
+ if (!DT->dominates(CI, L->getHeader()))
+ continue;
+
+ if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
+ return true;
+ }
+
return false;
}
+/// RAII wrapper to prevent recursive application of isImpliedCond.
+/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
+/// currently evaluating isImpliedCond.
+struct MarkPendingLoopPredicate {
+ Value *Cond;
+ DenseSet<Value*> &LoopPreds;
+ bool Pending;
+
+ MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
+ : Cond(C), LoopPreds(LP) {
+ Pending = !LoopPreds.insert(Cond).second;
+ }
+ ~MarkPendingLoopPredicate() {
+ if (!Pending)
+ LoopPreds.erase(Cond);
+ }
+};
+
/// isImpliedCond - Test whether the condition described by Pred, LHS,
/// and RHS is true whenever the given Cond value evaluates to true.
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
Value *FoundCondValue,
bool Inverse) {
+ MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
+ if (Mark.Pending)
+ return false;
+
// Recursively handle And and Or conditions.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
if (BO->getOpcode() == Instruction::And) {
getTypeSizeInBits(ICI->getOperand(0)->getType()))
return false;
- // Now that we found a conditional branch that dominates the loop, check to
- // see if it is the comparison we are looking for.
+ // Now that we found a conditional branch that dominates the loop or controls
+ // the loop latch. Check to see if it is the comparison we are looking for.
ICmpInst::Predicate FoundPred;
if (Inverse)
FoundPred = ICI->getInversePredicate();
// LHS' type is checked for above.
if (getTypeSizeInBits(LHS->getType()) >
getTypeSizeInBits(FoundLHS->getType())) {
- if (CmpInst::isSigned(Pred)) {
+ if (CmpInst::isSigned(FoundPred)) {
FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
} else {
return CmpInst::isTrueWhenEqual(Pred);
if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
if (FoundLHS == FoundRHS)
- return CmpInst::isFalseWhenEqual(Pred);
+ return CmpInst::isFalseWhenEqual(FoundPred);
// Check to see if we can make the LHS or RHS match.
if (LHS == FoundRHS || RHS == FoundLHS) {
RHS, LHS, FoundLHS, FoundRHS);
}
+ // Check if we can make progress by sharpening ranges.
+ if (FoundPred == ICmpInst::ICMP_NE &&
+ (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
+
+ const SCEVConstant *C = nullptr;
+ const SCEV *V = nullptr;
+
+ if (isa<SCEVConstant>(FoundLHS)) {
+ C = cast<SCEVConstant>(FoundLHS);
+ V = FoundRHS;
+ } else {
+ C = cast<SCEVConstant>(FoundRHS);
+ V = FoundLHS;
+ }
+
+ // The guarding predicate tells us that C != V. If the known range
+ // of V is [C, t), we can sharpen the range to [C + 1, t). The
+ // range we consider has to correspond to same signedness as the
+ // predicate we're interested in folding.
+
+ APInt Min = ICmpInst::isSigned(Pred) ?
+ getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
+
+ if (Min == C->getValue()->getValue()) {
+ // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
+ // This is true even if (Min + 1) wraps around -- in case of
+ // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
+
+ APInt SharperMin = Min + 1;
+
+ switch (Pred) {
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE:
+ // We know V `Pred` SharperMin. If this implies LHS `Pred`
+ // RHS, we're done.
+ if (isImpliedCondOperands(Pred, LHS, RHS, V,
+ getConstant(SharperMin)))
+ return true;
+
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_UGT:
+ // We know from the range information that (V `Pred` Min ||
+ // V == Min). We know from the guarding condition that !(V
+ // == Min). This gives us
+ //
+ // V `Pred` Min || V == Min && !(V == Min)
+ // => V `Pred` Min
+ //
+ // If V `Pred` Min implies LHS `Pred` RHS, we're done.
+
+ if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min)))
+ return true;
+
+ default:
+ // No change
+ break;
+ }
+ }
+ }
+
// Check whether the actual condition is beyond sufficient.
if (FoundPred == ICmpInst::ICMP_EQ)
if (ICmpInst::isTrueWhenEqual(Pred))
return false;
}
-/// getBECount - Subtract the end and start values and divide by the step,
-/// rounding up, to get the number of times the backedge is executed. Return
-/// CouldNotCompute if an intermediate computation overflows.
-const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
- const SCEV *End,
- const SCEV *Step,
- bool NoWrap) {
- assert(!isKnownNegative(Step) &&
- "This code doesn't handle negative strides yet!");
-
- const Type *Ty = Start->getType();
-
- // When Start == End, we have an exact BECount == 0. Short-circuit this case
- // here because SCEV may not be able to determine that the unsigned division
- // after rounding is zero.
- if (Start == End)
- return getConstant(Ty, 0);
-
- const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
- const SCEV *Diff = getMinusSCEV(End, Start);
- const SCEV *RoundUp = getAddExpr(Step, NegOne);
-
- // Add an adjustment to the difference between End and Start so that
- // the division will effectively round up.
- const SCEV *Add = getAddExpr(Diff, RoundUp);
-
- if (!NoWrap) {
- // Check Add for unsigned overflow.
- // TODO: More sophisticated things could be done here.
- const Type *WideTy = IntegerType::get(getContext(),
- getTypeSizeInBits(Ty) + 1);
- const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
- const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
- const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
- if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
- return getCouldNotCompute();
+// Verify if an linear IV with positive stride can overflow when in a
+// less-than comparison, knowing the invariant term of the comparison, the
+// stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
+ bool IsSigned, bool NoWrap) {
+ if (NoWrap) return false;
+
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *One = getConstant(Stride->getType(), 1);
+
+ if (IsSigned) {
+ APInt MaxRHS = getSignedRange(RHS).getSignedMax();
+ APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
+ APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+ .getSignedMax();
+
+ // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
+ return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
+ }
+
+ APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
+ APInt MaxValue = APInt::getMaxValue(BitWidth);
+ APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+ .getUnsignedMax();
+
+ // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
+ return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
+}
+
+// Verify if an linear IV with negative stride can overflow when in a
+// greater-than comparison, knowing the invariant term of the comparison,
+// the stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
+ bool IsSigned, bool NoWrap) {
+ if (NoWrap) return false;
+
+ unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+ const SCEV *One = getConstant(Stride->getType(), 1);
+
+ if (IsSigned) {
+ APInt MinRHS = getSignedRange(RHS).getSignedMin();
+ APInt MinValue = APInt::getSignedMinValue(BitWidth);
+ APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+ .getSignedMax();
+
+ // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
+ return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
}
- return getUDivExpr(Add, Step);
+ APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
+ APInt MinValue = APInt::getMinValue(BitWidth);
+ APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+ .getUnsignedMax();
+
+ // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
+ return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
+}
+
+// Compute the backedge taken count knowing the interval difference, the
+// stride and presence of the equality in the comparison.
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
+ bool Equality) {
+ const SCEV *One = getConstant(Step->getType(), 1);
+ Delta = Equality ? getAddExpr(Delta, Step)
+ : getAddExpr(Delta, getMinusSCEV(Step, One));
+ return getUDivExpr(Delta, Step);
}
/// HowManyLessThans - Return the number of times a backedge containing the
/// specified less-than comparison will execute. If not computable, return
/// CouldNotCompute.
-ScalarEvolution::BackedgeTakenInfo
+///
+/// @param ControlsExit is true when the LHS < RHS condition directly controls
+/// the branch (loops exits only if condition is true). In this case, we can use
+/// NoWrapFlags to skip overflow checks.
+ScalarEvolution::ExitLimit
ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
- const Loop *L, bool isSigned) {
- // Only handle: "ADDREC < LoopInvariant".
- if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+ const Loop *L, bool IsSigned,
+ bool ControlsExit) {
+ // We handle only IV < Invariant
+ if (!isLoopInvariant(RHS, L))
+ return getCouldNotCompute();
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
- if (!AddRec || AddRec->getLoop() != L)
+ const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+ // Avoid weird loops
+ if (!IV || IV->getLoop() != L || !IV->isAffine())
return getCouldNotCompute();
- // Check to see if we have a flag which makes analysis easy.
- bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) :
- AddRec->getNoWrapFlags(SCEV::FlagNUW);
+ bool NoWrap = ControlsExit &&
+ IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
- if (AddRec->isAffine()) {
- unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
- const SCEV *Step = AddRec->getStepRecurrence(*this);
+ const SCEV *Stride = IV->getStepRecurrence(*this);
- if (Step->isZero())
- return getCouldNotCompute();
- if (Step->isOne()) {
- // With unit stride, the iteration never steps past the limit value.
- } else if (isKnownPositive(Step)) {
- // Test whether a positive iteration can step past the limit
- // value and past the maximum value for its type in a single step.
- // Note that it's not sufficient to check NoWrap here, because even
- // though the value after a wrap is undefined, it's not undefined
- // behavior, so if wrap does occur, the loop could either terminate or
- // loop infinitely, but in either case, the loop is guaranteed to
- // iterate at least until the iteration where the wrapping occurs.
- const SCEV *One = getConstant(Step->getType(), 1);
- if (isSigned) {
- APInt Max = APInt::getSignedMaxValue(BitWidth);
- if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
- .slt(getSignedRange(RHS).getSignedMax()))
- return getCouldNotCompute();
- } else {
- APInt Max = APInt::getMaxValue(BitWidth);
- if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
- .ult(getUnsignedRange(RHS).getUnsignedMax()))
- return getCouldNotCompute();
- }
- } else
- // TODO: Handle negative strides here and below.
- return getCouldNotCompute();
+ // Avoid negative or zero stride values
+ if (!isKnownPositive(Stride))
+ return getCouldNotCompute();
+
+ // Avoid proven overflow cases: this will ensure that the backedge taken count
+ // will not generate any unsigned overflow. Relaxed no-overflow conditions
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // behaviors like the case of C language.
+ if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+ return getCouldNotCompute();
- // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
- // m. So, we count the number of iterations in which {n,+,s} < m is true.
- // Note that we cannot simply return max(m-n,0)/s because it's not safe to
- // treat m-n as signed nor unsigned due to overflow possibility.
-
- // First, we get the value of the LHS in the first iteration: n
- const SCEV *Start = AddRec->getOperand(0);
-
- // Determine the minimum constant start value.
- const SCEV *MinStart = getConstant(isSigned ?
- getSignedRange(Start).getSignedMin() :
- getUnsignedRange(Start).getUnsignedMin());
-
- // If we know that the condition is true in order to enter the loop,
- // then we know that it will run exactly (m-n)/s times. Otherwise, we
- // only know that it will execute (max(m,n)-n)/s times. In both cases,
- // the division must round up.
- const SCEV *End = RHS;
- if (!isLoopEntryGuardedByCond(L,
- isSigned ? ICmpInst::ICMP_SLT :
- ICmpInst::ICMP_ULT,
- getMinusSCEV(Start, Step), RHS))
- End = isSigned ? getSMaxExpr(RHS, Start)
+ ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
+ : ICmpInst::ICMP_ULT;
+ const SCEV *Start = IV->getStart();
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) {
+ const SCEV *Diff = getMinusSCEV(RHS, Start);
+ // If we have NoWrap set, then we can assume that the increment won't
+ // overflow, in which case if RHS - Start is a constant, we don't need to
+ // do a max operation since we can just figure it out statically
+ if (NoWrap && isa<SCEVConstant>(Diff)) {
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ if (D.isNegative())
+ End = Start;
+ } else
+ End = IsSigned ? getSMaxExpr(RHS, Start)
: getUMaxExpr(RHS, Start);
+ }
+
+ const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+
+ APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+ : getUnsignedRange(Start).getUnsignedMin();
+
+ APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+ APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
+ : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+ // Although End can be a MAX expression we estimate MaxEnd considering only
+ // the case End = RHS. This is safe because in the other case (End - Start)
+ // is zero, leading to a zero maximum backedge taken count.
+ APInt MaxEnd =
+ IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+ : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+
+ const SCEV *MaxBECount;
+ if (isa<SCEVConstant>(BECount))
+ MaxBECount = BECount;
+ else
+ MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
+ getConstant(MinStride), false);
- // Determine the maximum constant end value.
- const SCEV *MaxEnd = getConstant(isSigned ?
- getSignedRange(End).getSignedMax() :
- getUnsignedRange(End).getUnsignedMax());
-
- // If MaxEnd is within a step of the maximum integer value in its type,
- // adjust it down to the minimum value which would produce the same effect.
- // This allows the subsequent ceiling division of (N+(step-1))/step to
- // compute the correct value.
- const SCEV *StepMinusOne = getMinusSCEV(Step,
- getConstant(Step->getType(), 1));
- MaxEnd = isSigned ?
- getSMinExpr(MaxEnd,
- getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
- StepMinusOne)) :
- getUMinExpr(MaxEnd,
- getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
- StepMinusOne));
-
- // Finally, we subtract these two values and divide, rounding up, to get
- // the number of times the backedge is executed.
- const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
-
- // The maximum backedge count is similar, except using the minimum start
- // value and the maximum end value.
- // If we already have an exact constant BECount, use it instead.
- const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
- : getBECount(MinStart, MaxEnd, Step, NoWrap);
-
- // If the stride is nonconstant, and NoWrap == true, then
- // getBECount(MinStart, MaxEnd) may not compute. This would result in an
- // exact BECount and invalid MaxBECount, which should be avoided to catch
- // more optimization opportunities.
- if (isa<SCEVCouldNotCompute>(MaxBECount))
- MaxBECount = BECount;
-
- return BackedgeTakenInfo(BECount, MaxBECount);
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return ExitLimit(BECount, MaxBECount);
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
+ const Loop *L, bool IsSigned,
+ bool ControlsExit) {
+ // We handle only IV > Invariant
+ if (!isLoopInvariant(RHS, L))
+ return getCouldNotCompute();
+
+ const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+ // Avoid weird loops
+ if (!IV || IV->getLoop() != L || !IV->isAffine())
+ return getCouldNotCompute();
+
+ bool NoWrap = ControlsExit &&
+ IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
+
+ const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
+
+ // Avoid negative or zero stride values
+ if (!isKnownPositive(Stride))
+ return getCouldNotCompute();
+
+ // Avoid proven overflow cases: this will ensure that the backedge taken count
+ // will not generate any unsigned overflow. Relaxed no-overflow conditions
+ // exploit NoWrapFlags, allowing to optimize in presence of undefined
+ // behaviors like the case of C language.
+ if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
+ return getCouldNotCompute();
+
+ ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
+ : ICmpInst::ICMP_UGT;
+
+ const SCEV *Start = IV->getStart();
+ const SCEV *End = RHS;
+ if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) {
+ const SCEV *Diff = getMinusSCEV(RHS, Start);
+ // If we have NoWrap set, then we can assume that the increment won't
+ // overflow, in which case if RHS - Start is a constant, we don't need to
+ // do a max operation since we can just figure it out statically
+ if (NoWrap && isa<SCEVConstant>(Diff)) {
+ APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+ if (!D.isNegative())
+ End = Start;
+ } else
+ End = IsSigned ? getSMinExpr(RHS, Start)
+ : getUMinExpr(RHS, Start);
}
- return getCouldNotCompute();
+ const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
+
+ APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
+ : getUnsignedRange(Start).getUnsignedMax();
+
+ APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+ : getUnsignedRange(Stride).getUnsignedMin();
+
+ unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+ APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
+ : APInt::getMinValue(BitWidth) + (MinStride - 1);
+
+ // Although End can be a MIN expression we estimate MinEnd considering only
+ // the case End = RHS. This is safe because in the other case (Start - End)
+ // is zero, leading to a zero maximum backedge taken count.
+ APInt MinEnd =
+ IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
+ : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
+
+
+ const SCEV *MaxBECount = getCouldNotCompute();
+ if (isa<SCEVConstant>(BECount))
+ MaxBECount = BECount;
+ else
+ MaxBECount = computeBECount(getConstant(MaxStart - MinEnd),
+ getConstant(MinStride), false);
+
+ if (isa<SCEVCouldNotCompute>(MaxBECount))
+ MaxBECount = BECount;
+
+ return ExitLimit(BECount, MaxBECount);
}
/// getNumIterationsInRange - Return the number of iterations of this loop that
SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
Operands[0] = SE.getConstant(SC->getType(), 0);
const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
- // FIXME: getNoWrapFlags(FlagNW)
- FlagAnyWrap);
+ getNoWrapFlags(FlagNW));
if (const SCEVAddRecExpr *ShiftedAddRec =
dyn_cast<SCEVAddRecExpr>(Shifted))
return ShiftedAddRec->getNumIterationsInRange(
return SE.getCouldNotCompute();
}
+namespace {
+struct FindUndefs {
+ bool Found;
+ FindUndefs() : Found(false) {}
+
+ bool follow(const SCEV *S) {
+ if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) {
+ if (isa<UndefValue>(C->getValue()))
+ Found = true;
+ } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ if (isa<UndefValue>(C->getValue()))
+ Found = true;
+ }
+
+ // Keep looking if we haven't found it yet.
+ return !Found;
+ }
+ bool isDone() const {
+ // Stop recursion if we have found an undef.
+ return Found;
+ }
+};
+}
+
+// Return true when S contains at least an undef value.
+static inline bool
+containsUndefs(const SCEV *S) {
+ FindUndefs F;
+ SCEVTraversal<FindUndefs> ST(F);
+ ST.visitAll(S);
+
+ return F.Found;
+}
+
+namespace {
+// Collect all steps of SCEV expressions.
+struct SCEVCollectStrides {
+ ScalarEvolution &SE;
+ SmallVectorImpl<const SCEV *> &Strides;
+
+ SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
+ : SE(SE), Strides(S) {}
+
+ bool follow(const SCEV *S) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ Strides.push_back(AR->getStepRecurrence(SE));
+ return true;
+ }
+ bool isDone() const { return false; }
+};
+
+// Collect all SCEVUnknown and SCEVMulExpr expressions.
+struct SCEVCollectTerms {
+ SmallVectorImpl<const SCEV *> &Terms;
+
+ SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T)
+ : Terms(T) {}
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
+ if (!containsUndefs(S))
+ Terms.push_back(S);
+
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const { return false; }
+};
+}
+
+/// Find parametric terms in this SCEVAddRecExpr.
+void SCEVAddRecExpr::collectParametricTerms(
+ ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) const {
+ SmallVector<const SCEV *, 4> Strides;
+ SCEVCollectStrides StrideCollector(SE, Strides);
+ visitAll(this, StrideCollector);
+
+ DEBUG({
+ dbgs() << "Strides:\n";
+ for (const SCEV *S : Strides)
+ dbgs() << *S << "\n";
+ });
+
+ for (const SCEV *S : Strides) {
+ SCEVCollectTerms TermCollector(Terms);
+ visitAll(S, TermCollector);
+ }
+
+ DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
+}
+
+static bool findArrayDimensionsRec(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes) {
+ int Last = Terms.size() - 1;
+ const SCEV *Step = Terms[Last];
+
+ // End of recursion.
+ if (Last == 0) {
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
+ SmallVector<const SCEV *, 2> Qs;
+ for (const SCEV *Op : M->operands())
+ if (!isa<SCEVConstant>(Op))
+ Qs.push_back(Op);
+ Step = SE.getMulExpr(Qs);
+ }
+
+ Sizes.push_back(Step);
+ return true;
+ }
+
+ for (const SCEV *&Term : Terms) {
+ // Normalize the terms before the next call to findArrayDimensionsRec.
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Term, Step, &Q, &R);
+
+ // Bail out when GCD does not evenly divide one of the terms.
+ if (!R->isZero())
+ return false;
+
+ Term = Q;
+ }
+
+ // Remove all SCEVConstants.
+ Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) {
+ return isa<SCEVConstant>(E);
+ }),
+ Terms.end());
+
+ if (Terms.size() > 0)
+ if (!findArrayDimensionsRec(SE, Terms, Sizes))
+ return false;
+
+ Sizes.push_back(Step);
+ return true;
+}
+
+namespace {
+struct FindParameter {
+ bool FoundParameter;
+ FindParameter() : FoundParameter(false) {}
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVUnknown>(S)) {
+ FoundParameter = true;
+ // Stop recursion: we found a parameter.
+ return false;
+ }
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const {
+ // Stop recursion if we have found a parameter.
+ return FoundParameter;
+ }
+};
+}
+
+// Returns true when S contains at least a SCEVUnknown parameter.
+static inline bool
+containsParameters(const SCEV *S) {
+ FindParameter F;
+ SCEVTraversal<FindParameter> ST(F);
+ ST.visitAll(S);
+
+ return F.FoundParameter;
+}
+
+// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
+static inline bool
+containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+ for (const SCEV *T : Terms)
+ if (containsParameters(T))
+ return true;
+ return false;
+}
+
+// Return the number of product terms in S.
+static inline int numberOfTerms(const SCEV *S) {
+ if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
+ return Expr->getNumOperands();
+ return 1;
+}
+
+static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
+ if (isa<SCEVConstant>(T))
+ return nullptr;
+
+ if (isa<SCEVUnknown>(T))
+ return T;
+
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
+ SmallVector<const SCEV *, 2> Factors;
+ for (const SCEV *Op : M->operands())
+ if (!isa<SCEVConstant>(Op))
+ Factors.push_back(Op);
+
+ return SE.getMulExpr(Factors);
+ }
+
+ return T;
+}
+
+/// Return the size of an element read or written by Inst.
+const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
+ Type *Ty;
+ if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+ Ty = Store->getValueOperand()->getType();
+ else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
+ Ty = Load->getType();
+ else
+ return nullptr;
+
+ Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
+ return getSizeOfExpr(ETy, Ty);
+}
+
+/// Second step of delinearization: compute the array dimensions Sizes from the
+/// set of Terms extracted from the memory access function of this SCEVAddRec.
+void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize) const {
+
+ if (Terms.size() < 1 || !ElementSize)
+ return;
+
+ // Early return when Terms do not contain parameters: we do not delinearize
+ // non parametric SCEVs.
+ if (!containsParameters(Terms))
+ return;
+
+ DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
+
+ // Remove duplicates.
+ std::sort(Terms.begin(), Terms.end());
+ Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
+
+ // Put larger terms first.
+ std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
+ return numberOfTerms(LHS) > numberOfTerms(RHS);
+ });
+
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+ // Divide all terms by the element size.
+ for (const SCEV *&Term : Terms) {
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
+ Term = Q;
+ }
+
+ SmallVector<const SCEV *, 4> NewTerms;
+
+ // Remove constant factors.
+ for (const SCEV *T : Terms)
+ if (const SCEV *NewT = removeConstantFactors(SE, T))
+ NewTerms.push_back(NewT);
+
+ DEBUG({
+ dbgs() << "Terms after sorting:\n";
+ for (const SCEV *T : NewTerms)
+ dbgs() << *T << "\n";
+ });
+
+ if (NewTerms.empty() ||
+ !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
+ Sizes.clear();
+ return;
+ }
+
+ // The last element to be pushed into Sizes is the size of an element.
+ Sizes.push_back(ElementSize);
+
+ DEBUG({
+ dbgs() << "Sizes:\n";
+ for (const SCEV *S : Sizes)
+ dbgs() << *S << "\n";
+ });
+}
+
+/// Third step of delinearization: compute the access functions for the
+/// Subscripts based on the dimensions in Sizes.
+void SCEVAddRecExpr::computeAccessFunctions(
+ ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes) const {
+
+ // Early exit in case this SCEV is not an affine multivariate function.
+ if (Sizes.empty() || !this->isAffine())
+ return;
+
+ const SCEV *Res = this;
+ int Last = Sizes.size() - 1;
+ for (int i = Last; i >= 0; i--) {
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
+
+ DEBUG({
+ dbgs() << "Res: " << *Res << "\n";
+ dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
+ dbgs() << "Res divided by Sizes[i]:\n";
+ dbgs() << "Quotient: " << *Q << "\n";
+ dbgs() << "Remainder: " << *R << "\n";
+ });
+
+ Res = Q;
+
+ // Do not record the last subscript corresponding to the size of elements in
+ // the array.
+ if (i == Last) {
+
+ // Bail out if the remainder is too complex.
+ if (isa<SCEVAddRecExpr>(R)) {
+ Subscripts.clear();
+ Sizes.clear();
+ return;
+ }
+
+ continue;
+ }
+
+ // Record the access function for the current subscript.
+ Subscripts.push_back(R);
+ }
+
+ // Also push in last position the remainder of the last division: it will be
+ // the access function of the innermost dimension.
+ Subscripts.push_back(Res);
+
+ std::reverse(Subscripts.begin(), Subscripts.end());
+
+ DEBUG({
+ dbgs() << "Subscripts:\n";
+ for (const SCEV *S : Subscripts)
+ dbgs() << *S << "\n";
+ });
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access. Returns the remainder of the delinearization that
+/// is the offset start of the array. The SCEV->delinearize algorithm computes
+/// the multiples of SCEV coefficients: that is a pattern matching of sub
+/// expressions in the stride and base of a SCEV corresponding to the
+/// computation of a GCD (greatest common divisor) of base and stride. When
+/// SCEV->delinearize fails, it returns the SCEV unchanged.
+///
+/// For example: when analyzing the memory access A[i][j][k] in this loop nest
+///
+/// void foo(long n, long m, long o, double A[n][m][o]) {
+///
+/// for (long i = 0; i < n; i++)
+/// for (long j = 0; j < m; j++)
+/// for (long k = 0; k < o; k++)
+/// A[i][j][k] = 1.0;
+/// }
+///
+/// the delinearization input is the following AddRec SCEV:
+///
+/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+///
+/// From this SCEV, we are able to say that the base offset of the access is %A
+/// because it appears as an offset that does not divide any of the strides in
+/// the loops:
+///
+/// CHECK: Base offset: %A
+///
+/// and then SCEV->delinearize determines the size of some of the dimensions of
+/// the array as these are the multiples by which the strides are happening:
+///
+/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+///
+/// Note that the outermost dimension remains of UnknownSize because there are
+/// no strides that would help identifying the size of the last dimension: when
+/// the array has been statically allocated, one could compute the size of that
+/// dimension by dividing the overall size of the array by the size of the known
+/// dimensions: %m * %o * 8.
+///
+/// Finally delinearize provides the access functions for the array reference
+/// that does correspond to A[i][j][k] of the above C testcase:
+///
+/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// The testcases are checking the output of a function pass:
+/// DelinearizationPass that walks through all loads and stores of a function
+/// asking for the SCEV of the memory access with respect to all enclosing
+/// loops, calling SCEV->delinearize on that and printing the results.
+
+void SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize) const {
+ // First step: collect parametric terms.
+ SmallVector<const SCEV *, 4> Terms;
+ collectParametricTerms(SE, Terms);
+
+ if (Terms.empty())
+ return;
+
+ // Second step: find subscript sizes.
+ SE.findArrayDimensions(Terms, Sizes, ElementSize);
+
+ if (Sizes.empty())
+ return;
+
+ // Third step: compute the access functions for each subscript.
+ computeAccessFunctions(SE, Subscripts, Sizes);
+
+ if (Subscripts.empty())
+ return;
+
+ DEBUG({
+ dbgs() << "succeeded to delinearize " << *this << "\n";
+ dbgs() << "ArrayDecl[UnknownSize]";
+ for (const SCEV *S : Sizes)
+ dbgs() << "[" << *S << "]";
+
+ dbgs() << "\nArrayRef";
+ for (const SCEV *S : Subscripts)
+ dbgs() << "[" << *S << "]";
+ dbgs() << "\n";
+ });
+}
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
// so that future queries will recompute the expressions using the new
// value.
Value *Old = getValPtr();
- SmallVector<User *, 16> Worklist;
+ SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
SmallPtrSet<User *, 8> Visited;
- for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
- UI != UE; ++UI)
- Worklist.push_back(*UI);
while (!Worklist.empty()) {
User *U = Worklist.pop_back_val();
// Deleting the Old value will cause this to dangle. Postpone
if (PHINode *PN = dyn_cast<PHINode>(U))
SE->ConstantEvolutionLoopExitValue.erase(PN);
SE->ValueExprMap.erase(U);
- for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
- UI != UE; ++UI)
- Worklist.push_back(*UI);
+ Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
}
// Delete the Old value.
if (PHINode *PN = dyn_cast<PHINode>(Old))
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(ID), FirstUnknown(0) {
+ : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
+ BlockDispositions(64), FirstUnknown(nullptr) {
initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
}
bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
+ AT = &getAnalysis<AssumptionTracker>();
LI = &getAnalysis<LoopInfo>();
- TD = getAnalysisIfAvailable<TargetData>();
- DT = &getAnalysis<DominatorTree>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+ TLI = &getAnalysis<TargetLibraryInfo>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return false;
}
// destructors, so that they release their references to their values.
for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
U->~SCEVUnknown();
- FirstUnknown = 0;
+ FirstUnknown = nullptr;
ValueExprMap.clear();
+
+ // Free any extra memory created for ExitNotTakenInfo in the unlikely event
+ // that a loop had multiple computable exits.
+ for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+ BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
+ I != E; ++I) {
+ I->second.clear();
+ }
+
+ assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+
BackedgeTakenCounts.clear();
ConstantEvolutionLoopExitValue.clear();
ValuesAtScopes.clear();
void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
+ AU.addRequired<AssumptionTracker>();
AU.addRequiredTransitive<LoopInfo>();
- AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfo>();
}
bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
PrintLoopInfo(OS, SE, *I);
OS << "Loop ";
- WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+ L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
SmallVector<BasicBlock *, 8> ExitBlocks;
OS << "\n"
"Loop ";
- WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+ L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
OS << ": ";
if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
OS << "Classifying expressions for: ";
- WriteAsOperand(OS, F, /*PrintType=*/false);
+ F->printAsOperand(OS, /*PrintType=*/false);
OS << "\n";
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
}
OS << "Determining loop execution counts for: ";
- WriteAsOperand(OS, F, /*PrintType=*/false);
+ F->printAsOperand(OS, /*PrintType=*/false);
OS << "\n";
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
PrintLoopInfo(OS, &SE, *I);
ScalarEvolution::LoopDisposition
ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
- std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
- std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
- Values.insert(std::make_pair(L, LoopVariant));
- if (!Pair.second)
- return Pair.first->second;
-
+ SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == L)
+ return Values[u].second;
+ }
+ Values.push_back(std::make_pair(L, LoopVariant));
LoopDisposition D = computeLoopDisposition(S, L);
- return LoopDispositions[S][L] = D;
+ SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == L) {
+ Values2[u - 1].second = D;
+ break;
+ }
+ }
+ return D;
}
ScalarEvolution::LoopDisposition
ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
- switch (S->getSCEVType()) {
+ switch (static_cast<SCEVTypes>(S->getSCEVType())) {
case scConstant:
return LoopInvariant;
case scTruncate:
return LoopInvariant;
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return LoopVariant;
- default: break;
}
llvm_unreachable("Unknown SCEV kind!");
- return LoopVariant;
}
bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
ScalarEvolution::BlockDisposition
ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
- std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
- std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
- Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
- if (!Pair.second)
- return Pair.first->second;
-
+ SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
+ for (unsigned u = 0; u < Values.size(); u++) {
+ if (Values[u].first == BB)
+ return Values[u].second;
+ }
+ Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
BlockDisposition D = computeBlockDisposition(S, BB);
- return BlockDispositions[S][BB] = D;
+ SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
+ for (unsigned u = Values2.size(); u > 0; u--) {
+ if (Values2[u - 1].first == BB) {
+ Values2[u - 1].second = D;
+ break;
+ }
+ }
+ return D;
}
ScalarEvolution::BlockDisposition
ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
- switch (S->getSCEVType()) {
+ switch (static_cast<SCEVTypes>(S->getSCEVType())) {
case scConstant:
return ProperlyDominatesBlock;
case scTruncate:
return ProperlyDominatesBlock;
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return DoesNotDominateBlock;
- default: break;
}
llvm_unreachable("Unknown SCEV kind!");
- return DoesNotDominateBlock;
}
bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
}
-bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
- switch (S->getSCEVType()) {
- case scConstant:
- return false;
- case scTruncate:
- case scZeroExtend:
- case scSignExtend: {
- const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S);
- const SCEV *CastOp = Cast->getOperand();
- return Op == CastOp || hasOperand(CastOp, Op);
- }
- case scAddRecExpr:
- case scAddExpr:
- case scMulExpr:
- case scUMaxExpr:
- case scSMaxExpr: {
- const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
- for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
- I != E; ++I) {
- const SCEV *NAryOp = *I;
- if (NAryOp == Op || hasOperand(NAryOp, Op))
- return true;
- }
- return false;
- }
- case scUDivExpr: {
- const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
- const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
- return LHS == Op || hasOperand(LHS, Op) ||
- RHS == Op || hasOperand(RHS, Op);
- }
- case scUnknown:
- return false;
- case scCouldNotCompute:
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- return false;
- default: break;
+namespace {
+// Search for a SCEV expression node within an expression tree.
+// Implements SCEVTraversal::Visitor.
+struct SCEVSearch {
+ const SCEV *Node;
+ bool IsFound;
+
+ SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
+
+ bool follow(const SCEV *S) {
+ IsFound |= (S == Node);
+ return !IsFound;
}
- llvm_unreachable("Unknown SCEV kind!");
- return false;
+ bool isDone() const { return IsFound; }
+};
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+ SCEVSearch Search(Op);
+ visitAll(S, Search);
+ return Search.IsFound;
}
void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
BlockDispositions.erase(S);
UnsignedRanges.erase(S);
SignedRanges.erase(S);
+
+ for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
+ BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) {
+ BackedgeTakenInfo &BEInfo = I->second;
+ if (BEInfo.hasOperand(S, this)) {
+ BEInfo.clear();
+ BackedgeTakenCounts.erase(I++);
+ }
+ else
+ ++I;
+ }
+}
+
+typedef DenseMap<const Loop *, std::string> VerifyMap;
+
+/// replaceSubString - Replaces all occurrences of From in Str with To.
+static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
+ size_t Pos = 0;
+ while ((Pos = Str.find(From, Pos)) != std::string::npos) {
+ Str.replace(Pos, From.size(), To.data(), To.size());
+ Pos += To.size();
+ }
+}
+
+/// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
+static void
+getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
+ for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
+ getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
+
+ std::string &S = Map[L];
+ if (S.empty()) {
+ raw_string_ostream OS(S);
+ SE.getBackedgeTakenCount(L)->print(OS);
+
+ // false and 0 are semantically equivalent. This can happen in dead loops.
+ replaceSubString(OS.str(), "false", "0");
+ // Remove wrap flags, their use in SCEV is highly fragile.
+ // FIXME: Remove this when SCEV gets smarter about them.
+ replaceSubString(OS.str(), "<nw>", "");
+ replaceSubString(OS.str(), "<nsw>", "");
+ replaceSubString(OS.str(), "<nuw>", "");
+ }
+ }
+}
+
+void ScalarEvolution::verifyAnalysis() const {
+ if (!VerifySCEV)
+ return;
+
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+ // Gather stringified backedge taken counts for all loops using SCEV's caches.
+ // FIXME: It would be much better to store actual values instead of strings,
+ // but SCEV pointers will change if we drop the caches.
+ VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
+ for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+ getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
+
+ // Gather stringified backedge taken counts for all loops without using
+ // SCEV's caches.
+ SE.releaseMemory();
+ for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+ getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
+
+ // Now compare whether they're the same with and without caches. This allows
+ // verifying that no pass changed the cache.
+ assert(BackedgeDumpsOld.size() == BackedgeDumpsNew.size() &&
+ "New loops suddenly appeared!");
+
+ for (VerifyMap::iterator OldI = BackedgeDumpsOld.begin(),
+ OldE = BackedgeDumpsOld.end(),
+ NewI = BackedgeDumpsNew.begin();
+ OldI != OldE; ++OldI, ++NewI) {
+ assert(OldI->first == NewI->first && "Loop order changed!");
+
+ // Compare the stringified SCEVs. We don't care if undef backedgetaken count
+ // changes.
+ // FIXME: We currently ignore SCEV changes from/to CouldNotCompute. This
+ // means that a pass is buggy or SCEV has to learn a new pattern but is
+ // usually not harmful.
+ if (OldI->second != NewI->second &&
+ OldI->second.find("undef") == std::string::npos &&
+ NewI->second.find("undef") == std::string::npos &&
+ OldI->second != "***COULDNOTCOMPUTE***" &&
+ NewI->second != "***COULDNOTCOMPUTE***") {
+ dbgs() << "SCEVValidator: SCEV for loop '"
+ << OldI->first->getHeader()->getName()
+ << "' changed from '" << OldI->second
+ << "' to '" << NewI->second << "'!\n";
+ std::abort();
+ }
+ }
+
+ // TODO: Verify more things.
}