return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
case LSRUse::Special:
- // Only handle -1 scales, or no scale.
- return AM.Scale == 0 || AM.Scale == -1;
+ // Special case Basic to handle -1 scales.
+ return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
}
llvm_unreachable("Invalid LSRUse Kind!");
return &LU;
// This is the formula where all the registers and symbols matched;
// there aren't going to be any others. Since we declined it, we
- // can skip the rest of the formulae and procede to the next LSRUse.
+ // can skip the rest of the formulae and proceed to the next LSRUse.
break;
}
}
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
- if (SE.isLoopInvariant(N, L)) {
+ if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,
/// CollectSubexprs - Split S into subexpressions which can be pulled out into
/// separate registers. If C is non-null, multiply each subexpression by C.
-static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
- SmallVectorImpl<const SCEV *> &Ops,
- const Loop *L,
- ScalarEvolution &SE) {
+///
+/// Return remainder expression after factoring the subexpressions captured by
+/// Ops. If Ops is complete, return NULL.
+static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+ SmallVectorImpl<const SCEV *> &Ops,
+ const Loop *L,
+ ScalarEvolution &SE,
+ unsigned Depth = 0) {
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return S;
+
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I)
- CollectSubexprs(*I, C, Ops, L, SE);
- return;
+ I != E; ++I) {
+ const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ }
+ return NULL;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
- if (!AR->getStart()->isZero()) {
- CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
- AR->getStepRecurrence(SE),
- AR->getLoop(),
- //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
- SCEV::FlagAnyWrap),
- C, Ops, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, L, SE);
- return;
+ if (AR->getStart()->isZero())
+ return S;
+
+ const SCEV *Remainder = CollectSubexprs(AR->getStart(),
+ C, Ops, L, SE, Depth+1);
+ // Split the non-zero AddRec unless it is part of a nested recurrence that
+ // does not pertain to this loop.
+ if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
+ Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
+ Remainder = NULL;
+ }
+ if (Remainder != AR->getStart()) {
+ if (!Remainder)
+ Remainder = SE.getConstant(AR->getType(), 0);
+ return SE.getAddRecExpr(Remainder,
+ AR->getStepRecurrence(SE),
+ AR->getLoop(),
+ //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
+ SCEV::FlagAnyWrap);
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
// Break (C * (a + b + c)) into C*a + C*b + C*c.
- if (Mul->getNumOperands() == 2)
- if (const SCEVConstant *Op0 =
- dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
- CollectSubexprs(Mul->getOperand(1),
- C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, L, SE);
- return;
- }
+ if (Mul->getNumOperands() != 2)
+ return S;
+ if (const SCEVConstant *Op0 =
+ dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+ C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
+ const SCEV *Remainder =
+ CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
+ if (Remainder)
+ Ops.push_back(SE.getMulExpr(C, Remainder));
+ return NULL;
+ }
}
-
- // Otherwise use the value itself, optionally with a scale applied.
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ return S;
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
const SCEV *BaseReg = Base.BaseRegs[i];
SmallVector<const SCEV *, 8> AddOps;
- CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
if (AddOps.size() == 1) continue;
// Attempt to find an insert position in the middle of the block,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
- (!BetterPos || DT.dominates(BetterPos, Inst)))
+ (!BetterPos || !DT.dominates(Inst, BetterPos)))
BetterPos = llvm::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
- // Flush the operand list to suppress SCEVExpander hoisting.
- if (!Ops.empty()) {
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
- }
-
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
if (F.AM.Scale != 0) {
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
+
+ // Flush the operand list to suppress SCEVExpander hoisting address modes.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
ScaledS = SE.getMulExpr(ScaledS,
SE.getConstant(ScaledS->getType(), F.AM.Scale));
Ops.push_back(ScaledS);
-
- // Flush the operand list to suppress SCEVExpander hoisting.
- Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
- Ops.clear();
- Ops.push_back(SE.getUnknown(FullV));
}
}
// Expand the GV portion.
if (F.AM.BaseGV) {
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+ }
- // Flush the operand list to suppress SCEVExpander hoisting.
+ // Flush the operand list to suppress SCEVExpander hoisting of both folded and
+ // unfolded offsets. LSR assumes they both live next to their uses.
+ if (!Ops.empty()) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));