// Don't allow an estimate of size zero. This would allows unrolling of loops
// with huge iteration counts, which is a compile time problem even if it's
- // not a problem for code quality.
- if (LoopSize == 0) LoopSize = 1;
+ // not a problem for code quality. Also, the code using this size may assume
+ // that each loop has at least three instructions (likely a conditional
+ // branch, a comparison feeding that branch, and some kind of loop increment
+ // feeding that comparison instruction).
+ LoopSize = std::max(LoopSize, 3u);
return LoopSize;
}
unsigned LoopSize =
ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, &AC);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
- uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
+
+ // When computing the unrolled size, note that the conditional branch on the
+ // backedge and the comparison feeding it are not replicated like the rest of
+ // the loop body (which is why 2 is subtracted).
+ uint64_t UnrolledSize = (uint64_t)(LoopSize-2) * Count + 2;
if (notDuplicatable) {
DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
<< " instructions.\n");
}
if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
- Count = PartialThreshold / LoopSize;
+ Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2);
while (Count != 0 && TripCount % Count != 0)
Count--;
}
// the original count which satisfies the threshold limit.
while (Count != 0 && UnrolledSize > PartialThreshold) {
Count >>= 1;
- UnrolledSize = LoopSize * Count;
+ UnrolledSize = (LoopSize-2) * Count + 2;
}
if (Count > UP.MaxCount)
Count = UP.MaxCount;
; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
; Loop size = 3, when the function has the optsize attribute, the
; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
-; by 16 times because 3 * 16 < 50.
+; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not).
define void @unroll_opt_for_size() nounwind optsize {
entry:
br label %loop
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
; CHECK-NEXT: icmp
+