From fdefc694cd20958bbeb10bd66767fda371452c7c Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 27 Jan 2015 21:38:12 +0000 Subject: [PATCH] Teach IRCE to look at branch weights when recognizing range checks Splitting a loop to make range checks redundant is profitable only if the range check "never" fails. Make this fact a part of recognizing a range check -- a branch is a range check only if it is expected to pass (via branch_weights metadata). Differential Revision: http://reviews.llvm.org/D7192 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227249 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/InductiveRangeCheckElimination.cpp | 17 ++++++-- .../IRCE/multiple-access-no-preloop.ll | 5 ++- test/Transforms/IRCE/not-likely-taken.ll | 40 +++++++++++++++++++ .../IRCE/single-access-no-preloop.ll | 5 ++- .../IRCE/single-access-with-preloop.ll | 3 +- test/Transforms/IRCE/with-parent-loops.ll | 19 ++++----- 6 files changed, 72 insertions(+), 17 deletions(-) create mode 100644 test/Transforms/IRCE/not-likely-taken.ll diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index ea621ba6642..327d4fea25d 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -43,6 +43,7 @@ #include "llvm/ADT/Optional.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -169,7 +170,8 @@ public: /// Create an inductive range check out of BI if possible, else return /// nullptr. static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI, - Loop *L, ScalarEvolution &SE); + Loop *L, ScalarEvolution &SE, + BranchProbabilityInfo &BPI); }; class InductiveRangeCheckElimination : public LoopPass { @@ -187,6 +189,7 @@ public: AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); AU.addRequired(); + AU.addRequired(); } bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -354,13 +357,20 @@ static bool SplitRangeCheckCondition(Loop *L, ScalarEvolution &SE, return true; } + InductiveRangeCheck * InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, - Loop *L, ScalarEvolution &SE) { + Loop *L, ScalarEvolution &SE, + BranchProbabilityInfo &BPI) { if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch()) return nullptr; + BranchProbability LikelyTaken(15, 16); + + if (BPI.getEdgeProbability(BI->getParent(), (unsigned) 0) < LikelyTaken) + return nullptr; + Value *Length = nullptr; const SCEV *IndexSCEV = nullptr; @@ -1175,11 +1185,12 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { InductiveRangeCheck::AllocatorTy IRCAlloc; SmallVector RangeChecks; ScalarEvolution &SE = getAnalysis(); + BranchProbabilityInfo &BPI = getAnalysis(); for (auto BBI : L->getBlocks()) if (BranchInst *TBI = dyn_cast(BBI->getTerminator())) if (InductiveRangeCheck *IRC = - InductiveRangeCheck::create(IRCAlloc, TBI, L, SE)) + InductiveRangeCheck::create(IRCAlloc, TBI, L, SE, BPI)) RangeChecks.push_back(IRC); if (RangeChecks.empty()) diff --git a/test/Transforms/IRCE/multiple-access-no-preloop.ll b/test/Transforms/IRCE/multiple-access-no-preloop.ll index 56b7b7b6c65..b8d2f140bab 100644 --- a/test/Transforms/IRCE/multiple-access-no-preloop.ll +++ b/test/Transforms/IRCE/multiple-access-no-preloop.ll @@ -13,13 +13,13 @@ define void @multiple_access_no_preloop( %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds.b ] %idx.next = add i32 %idx, 1 %abc.a = icmp slt i32 %idx, %len.a - br i1 %abc.a, label %in.bounds.a, label %out.of.bounds + br i1 %abc.a, label %in.bounds.a, label %out.of.bounds, !prof !1 in.bounds.a: %addr.a = getelementptr i32* %arr_a, i32 %idx store i32 0, i32* %addr.a %abc.b = icmp slt i32 %idx, %len.b - br i1 %abc.b, label %in.bounds.b, label %out.of.bounds + br i1 %abc.b, label %in.bounds.b, label %out.of.bounds, !prof !1 in.bounds.b: %addr.b = getelementptr i32* %arr_b, i32 %idx @@ -57,3 +57,4 @@ define void @multiple_access_no_preloop( ; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit !0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 64, i32 4} diff --git a/test/Transforms/IRCE/not-likely-taken.ll b/test/Transforms/IRCE/not-likely-taken.ll new file mode 100644 index 00000000000..c044530b4b9 --- /dev/null +++ b/test/Transforms/IRCE/not-likely-taken.ll @@ -0,0 +1,40 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce < %s 2>&1 | FileCheck %s + +; CHECK-NOT: constrained Loop + +define void @multiple_access_no_preloop( + i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) { + + entry: + %len.a = load i32* %a_len_ptr, !range !0 + %len.b = load i32* %b_len_ptr, !range !0 + %first.itr.check = icmp sgt i32 %n, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds.b ] + %idx.next = add i32 %idx, 1 + %abc.a = icmp slt i32 %idx, %len.a + br i1 %abc.a, label %in.bounds.a, label %out.of.bounds, !prof !1 + + in.bounds.a: + %addr.a = getelementptr i32* %arr_a, i32 %idx + store i32 0, i32* %addr.a + %abc.b = icmp slt i32 %idx, %len.b + br i1 %abc.b, label %in.bounds.b, label %out.of.bounds, !prof !1 + + in.bounds.b: + %addr.b = getelementptr i32* %arr_b, i32 %idx + store i32 -1, i32* %addr.b + %next = icmp slt i32 %idx.next, %n + br i1 %next, label %loop, label %exit + + out.of.bounds: + ret void + + exit: + ret void +} + +!0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 1, i32 1} \ No newline at end of file diff --git a/test/Transforms/IRCE/single-access-no-preloop.ll b/test/Transforms/IRCE/single-access-no-preloop.ll index cf073b359a7..34d8cd64b1c 100644 --- a/test/Transforms/IRCE/single-access-no-preloop.ll +++ b/test/Transforms/IRCE/single-access-no-preloop.ll @@ -10,7 +10,7 @@ define void @single_access_no_preloop_no_offset(i32 *%arr, i32 *%a_len_ptr, i32 %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ] %idx.next = add i32 %idx, 1 %abc = icmp slt i32 %idx, %len - br i1 %abc, label %in.bounds, label %out.of.bounds + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 in.bounds: %addr = getelementptr i32* %arr, i32 %idx @@ -65,7 +65,7 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3 %idx.next = add i32 %idx, 1 %idx.for.abc = add i32 %idx, 4 %abc = icmp slt i32 %idx.for.abc, %len - br i1 %abc, label %in.bounds, label %out.of.bounds + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 in.bounds: %addr = getelementptr i32* %arr, i32 %idx.for.abc @@ -108,3 +108,4 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3 ; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit !0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 64, i32 4} diff --git a/test/Transforms/IRCE/single-access-with-preloop.ll b/test/Transforms/IRCE/single-access-with-preloop.ll index 6775d335a04..dacf697e98a 100644 --- a/test/Transforms/IRCE/single-access-with-preloop.ll +++ b/test/Transforms/IRCE/single-access-with-preloop.ll @@ -13,7 +13,7 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 %abc.high = icmp slt i32 %array.idx, %len %abc.low = icmp sge i32 %array.idx, 0 %abc = and i1 %abc.low, %abc.high - br i1 %abc, label %in.bounds, label %out.of.bounds + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 in.bounds: %addr = getelementptr i32* %arr, i32 %array.idx @@ -57,3 +57,4 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 ; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit !0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 64, i32 4} diff --git a/test/Transforms/IRCE/with-parent-loops.ll b/test/Transforms/IRCE/with-parent-loops.ll index 25dfb133f54..f8d6c83ecae 100644 --- a/test/Transforms/IRCE/with-parent-loops.ll +++ b/test/Transforms/IRCE/with-parent-loops.ll @@ -16,7 +16,7 @@ loop: ; preds = %in.bounds, %entry %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] %idx.next = add i32 %idx, 1 %abc = icmp slt i32 %idx, %len - br i1 %abc, label %in.bounds, label %out.of.bounds + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 in.bounds: ; preds = %loop %addr = getelementptr i32* %arr, i32 %idx @@ -50,7 +50,7 @@ loop.i: ; preds = %in.bounds.i, %loop %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ] %idx.next.i = add i32 %idx.i, 1 %abc.i = icmp slt i32 %idx.i, %len.i - br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i + br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i, !prof !1 in.bounds.i: ; preds = %loop.i %addr.i = getelementptr i32* %arr, i32 %idx.i @@ -96,7 +96,7 @@ loop.i.i: ; preds = %in.bounds.i.i, %loo %idx.i.i = phi i32 [ 0, %loop.i ], [ %idx.next.i.i, %in.bounds.i.i ] %idx.next.i.i = add i32 %idx.i.i, 1 %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i - br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i + br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i, !prof !1 in.bounds.i.i: ; preds = %loop.i.i %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i @@ -140,7 +140,7 @@ loop.i: ; preds = %in.bounds.i, %loop %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ] %idx.next.i = add i32 %idx.i, 1 %abc.i = icmp slt i32 %idx.i, %len.i - br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i + br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i, !prof !1 in.bounds.i: ; preds = %loop.i %addr.i = getelementptr i32* %arr, i32 %idx.i @@ -163,7 +163,7 @@ loop.i6: ; preds = %in.bounds.i9, %inne %idx.i3 = phi i32 [ 0, %inner_loop.exit ], [ %idx.next.i4, %in.bounds.i9 ] %idx.next.i4 = add i32 %idx.i3, 1 %abc.i5 = icmp slt i32 %idx.i3, %len.i1 - br i1 %abc.i5, label %in.bounds.i9, label %out.of.bounds.i10 + br i1 %abc.i5, label %in.bounds.i9, label %out.of.bounds.i10, !prof !1 in.bounds.i9: ; preds = %loop.i6 %addr.i7 = getelementptr i32* %arr, i32 %idx.i3 @@ -210,7 +210,7 @@ loop.i.i: ; preds = %in.bounds.i.i, %loo %idx.i.i = phi i32 [ 0, %loop.i ], [ %idx.next.i.i, %in.bounds.i.i ] %idx.next.i.i = add i32 %idx.i.i, 1 %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i - br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i + br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i, !prof !1 in.bounds.i.i: ; preds = %loop.i.i %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i @@ -242,7 +242,7 @@ loop.i.i10: ; preds = %in.bounds.i.i13, %l %idx.i.i7 = phi i32 [ 0, %loop.i6 ], [ %idx.next.i.i8, %in.bounds.i.i13 ] %idx.next.i.i8 = add i32 %idx.i.i7, 1 %abc.i.i9 = icmp slt i32 %idx.i.i7, %len.i.i4 - br i1 %abc.i.i9, label %in.bounds.i.i13, label %out.of.bounds.i.i14 + br i1 %abc.i.i9, label %in.bounds.i.i13, label %out.of.bounds.i.i14, !prof !1 in.bounds.i.i13: ; preds = %loop.i.i10 %addr.i.i11 = getelementptr i32* %arr, i32 %idx.i.i7 @@ -286,7 +286,7 @@ loop.i: ; preds = %in.bounds.i, %loop %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ] %idx.next.i = add i32 %idx.i, 1 %abc.i = icmp slt i32 %idx.i, %len.i - br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i + br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i, !prof !1 in.bounds.i: ; preds = %loop.i %addr.i = getelementptr i32* %arr, i32 %idx.i @@ -315,7 +315,7 @@ loop.i.i: ; preds = %in.bounds.i.i, %loo %idx.i.i = phi i32 [ 0, %loop.i4 ], [ %idx.next.i.i, %in.bounds.i.i ] %idx.next.i.i = add i32 %idx.i.i, 1 %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i - br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i + br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i, !prof !1 in.bounds.i.i: ; preds = %loop.i.i %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i @@ -342,3 +342,4 @@ exit: ; preds = %with_parent.exit attributes #0 = { alwaysinline } !0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 64, i32 4} -- 2.34.1