From: Cong Hou Date: Fri, 18 Sep 2015 18:19:40 +0000 (+0000) Subject: Scaling up values in ARMBaseInstrInfo::isProfitableToIfCvt() before they are scaled... X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=759d988de368e4adf535390b0416c5264620419e Scaling up values in ARMBaseInstrInfo::isProfitableToIfCvt() before they are scaled by a probability to avoid precision issue. In ARMBaseInstrInfo::isProfitableToIfCvt(), there is a simple cost model in which the number of cycles is scaled by a probability to estimate the cost. However, when the number of cycles is small (which is usually the case), there is a precision issue after the computation. To avoid this issue, this patch scales those cycles by 1024 (chosen to make the multiplication a litter faster) before they are scaled by the probability. Other variables are also scaled up for the final comparison. Differential Revision: http://reviews.llvm.org/D12742 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248018 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 79ab692c76a..abb1fdad080 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1670,11 +1670,14 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, } // Attempt to estimate the relative costs of predication versus branching. - unsigned UnpredCost = Probability.scale(NumCycles); - UnpredCost += 1; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() / 10; - - return (NumCycles + ExtraPredCycles) <= UnpredCost; + // Here we scale up each component of UnpredCost to avoid precision issue when + // scaling NumCycles by Probability. + const unsigned ScalingUpFactor = 1024; + unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor); + UnpredCost += ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; + + return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost; } bool ARMBaseInstrInfo:: @@ -1687,13 +1690,17 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB, return false; // Attempt to estimate the relative costs of predication versus branching. - unsigned TUnpredCost = Probability.scale(TCycles); - unsigned FUnpredCost = Probability.getCompl().scale(FCycles); + // Here we scale up each component of UnpredCost to avoid precision issue when + // scaling TCycles/FCycles by Probability. + const unsigned ScalingUpFactor = 1024; + unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor); + unsigned FUnpredCost = + Probability.getCompl().scale(FCycles * ScalingUpFactor); unsigned UnpredCost = TUnpredCost + FUnpredCost; - UnpredCost += 1; // The branch itself - UnpredCost += Subtarget.getMispredictionPenalty() / 10; + UnpredCost += 1 * ScalingUpFactor; // The branch itself + UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10; - return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost; + return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost; } bool diff --git a/test/CodeGen/ARM/2013-10-11-select-stalls.ll b/test/CodeGen/ARM/2013-10-11-select-stalls.ll index d6045c7b8c8..c41fe905aa8 100644 --- a/test/CodeGen/ARM/2013-10-11-select-stalls.ll +++ b/test/CodeGen/ARM/2013-10-11-select-stalls.ll @@ -1,8 +1,13 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=thumbv7-apple-ios -stats 2>&1 | not grep "Number of pipeline stalls" +; RUN: llc < %s -mtriple=thumbv7-apple-ios -disable-ifcvt-diamond -stats 2>&1 | FileCheck %s ; Evaluate the two vld1.8 instructions in separate MBB's, ; instead of stalling on one and conditionally overwriting its result. +; +; Update: After if-conversion the two vld1.8 instructions are in the same MBB +; again. So we disable this if-conversion to eliminate its influence to this +; test. +; CHECK-NOT: Number of pipeline stalls define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) { entry: %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1) diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll index 8c6825aeda9..0a6b99fb89b 100644 --- a/test/CodeGen/ARM/ifcvt4.ll +++ b/test/CodeGen/ARM/ifcvt4.ll @@ -1,10 +1,8 @@ ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s -; Do not if-convert when branches go to the different loops. ; CHECK-LABEL: t: -; CHECK-NOT: subgt -; CHECK-NOT: suble -; Don't use +; CHECK: subgt +; CHECK: suble define i32 @t(i32 %a, i32 %b) { entry: %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] diff --git a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll index c7f47b0962d..a1abef9605c 100644 --- a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll +++ b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll @@ -77,8 +77,8 @@ declare void @terminatev() ; CHECK: blx __Znwm ; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge ; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]] -; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]] ; CHECK: {{.*}}@ %do.body.i.i.i +; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]] ; CHECK: cbz [[R0]] %"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair" } diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll index 77d0f30485d..db32f18d82c 100644 --- a/test/CodeGen/ARM/test-sharedidx.ll +++ b/test/CodeGen/ARM/test-sharedidx.ll @@ -59,9 +59,6 @@ for.body.1: ; preds = %for.body br i1 %cmp.1, label %for.body.2, label %for.end for.body.2: ; preds = %for.body.1 -; CHECK: %for.body.2 -; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]! -; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]! %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1 %4 = load i8, i8* %arrayidx.2, align 1 %conv6.2 = zext i8 %4 to i32 diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll index da1057b8bb4..09def7372f5 100644 --- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll +++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll @@ -24,11 +24,10 @@ cond_next: define i32 @t2(i32 %a, i32 %b) nounwind { entry: -; Do not if-convert when branches go to the different loops. ; CHECK-LABEL: t2: -; CHECK-NOT: ite gt -; CHECK-NOT: subgt -; CHECK-NOT: suble +; CHECK: ite gt +; CHECK: subgt +; CHECK: suble %tmp1434 = icmp eq i32 %a, %b ; [#uses=1] br i1 %tmp1434, label %bb17, label %bb.outer