Scaling up values in ARMBaseInstrInfo::isProfitableToIfCvt() before they are scaled...

author Cong Hou <congh@google.com>

Fri, 18 Sep 2015 18:19:40 +0000 (18:19 +0000)

committer Cong Hou <congh@google.com>

Fri, 18 Sep 2015 18:19:40 +0000 (18:19 +0000)
author Cong Hou <congh@google.com>
Fri, 18 Sep 2015 18:19:40 +0000 (18:19 +0000)
committer Cong Hou <congh@google.com>
Fri, 18 Sep 2015 18:19:40 +0000 (18:19 +0000)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index 79ab692c76a44ca35a4fbb2b26ea43a1b5f23df8..abb1fdad080f43b933e71160d68c4248112d08e2 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1670,11 +1670,14 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
    }
  
    // Attempt to estimate the relative costs of predication versus branching.
-  unsigned UnpredCost = Probability.scale(NumCycles);
-  UnpredCost += 1; // The branch itself
-  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
-
-  return (NumCycles + ExtraPredCycles) <= UnpredCost;
+  // Here we scale up each component of UnpredCost to avoid precision issue when
+  // scaling NumCycles by Probability.
+  const unsigned ScalingUpFactor = 1024;
+  unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
+  UnpredCost += ScalingUpFactor; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+
+  return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
  }
  
  bool ARMBaseInstrInfo::
@@ -1687,13 +1690,17 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
      return false;
  
    // Attempt to estimate the relative costs of predication versus branching.
-  unsigned TUnpredCost = Probability.scale(TCycles);
-  unsigned FUnpredCost = Probability.getCompl().scale(FCycles);
+  // Here we scale up each component of UnpredCost to avoid precision issue when
+  // scaling TCycles/FCycles by Probability.
+  const unsigned ScalingUpFactor = 1024;
+  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+  unsigned FUnpredCost =
+      Probability.getCompl().scale(FCycles * ScalingUpFactor);
    unsigned UnpredCost = TUnpredCost + FUnpredCost;
-  UnpredCost += 1; // The branch itself
-  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+  UnpredCost += 1 * ScalingUpFactor; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
  
-  return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
+  return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
  }
  
  bool
diff --git a/test/CodeGen/ARM/2013-10-11-select-stalls.ll b/test/CodeGen/ARM/2013-10-11-select-stalls.ll

index d6045c7b8c8c798b95202ff8ed8fee73ac375e43..c41fe905aa831bef796c6e987cf54d8c4b814e5f 100644 (file)
--- a/test/CodeGen/ARM/2013-10-11-select-stalls.ll
+++ b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
@@ -1,8 +1,13 @@
  ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -stats 2>&1 | not grep "Number of pipeline stalls"
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -disable-ifcvt-diamond -stats 2>&1 | FileCheck %s
  ; Evaluate the two vld1.8 instructions in separate MBB's,
  ; instead of stalling on one and conditionally overwriting its result.
+;
+; Update: After if-conversion the two vld1.8 instructions are in the same MBB
+; again. So we disable this if-conversion to eliminate its influence to this
+; test.
  
+; CHECK-NOT: Number of pipeline stalls
  define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) {
  entry:
    %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1)
diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll

index 8c6825aeda9732e4740491c36193cbcbe8015b00..0a6b99fb89b36c9f969a83ddd2bce2801b1f1e60 100644 (file)
--- a/test/CodeGen/ARM/ifcvt4.ll
+++ b/test/CodeGen/ARM/ifcvt4.ll
@@ -1,10 +1,8 @@
  ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
  
-; Do not if-convert when branches go to the different loops.
  ; CHECK-LABEL: t:
-; CHECK-NOT: subgt
-; CHECK-NOT: suble
-; Don't use
+; CHECK: subgt
+; CHECK: suble
  define i32 @t(i32 %a, i32 %b) {
  entry:
         %tmp1434 = icmp eq i32 %a, %b           ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll

index c7f47b0962dc65b4eb368af55e4e29efb296dd7e..a1abef9605ca184c9bccad2e1ffc81cfbad6a09f 100644 (file)
--- a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
+++ b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
@@ -77,8 +77,8 @@ declare void @terminatev()
  ; CHECK: blx __Znwm
  ; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
  ; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
-; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
  ; CHECK: {{.*}}@ %do.body.i.i.i
+; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
  ; CHECK: cbz [[R0]]
  
  %"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair" }
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll

index 77d0f30485df9e10cf4145f9c7aab9eda6cda155..db32f18d82c00a152ceec7965cfe73e60ff5b10a 100644 (file)
--- a/test/CodeGen/ARM/test-sharedidx.ll
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -59,9 +59,6 @@ for.body.1:                                       ; preds = %for.body
    br i1 %cmp.1, label %for.body.2, label %for.end
  
  for.body.2:                                       ; preds = %for.body.1
-; CHECK: %for.body.2
-; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
-; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
    %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
    %4 = load i8, i8* %arrayidx.2, align 1
    %conv6.2 = zext i8 %4 to i32
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll

index da1057b8bb4ab84f90eef4e3d2bc513175b836f0..09def7372f5bad22659bd5e59c9a5a071cba4123 100644 (file)
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -24,11 +24,10 @@ cond_next:
  
  define i32 @t2(i32 %a, i32 %b) nounwind {
  entry:
-; Do not if-convert when branches go to the different loops.
  ; CHECK-LABEL: t2:
-; CHECK-NOT: ite gt
-; CHECK-NOT: subgt
-; CHECK-NOT: suble
+; CHECK: ite gt
+; CHECK: subgt
+; CHECK: suble
         %tmp1434 = icmp eq i32 %a, %b           ; <i1> [#uses=1]
         br i1 %tmp1434, label %bb17, label %bb.outer
author	Cong Hou <congh@google.com>
	Fri, 18 Sep 2015 18:19:40 +0000 (18:19 +0000)
committer	Cong Hou <congh@google.com>
	Fri, 18 Sep 2015 18:19:40 +0000 (18:19 +0000)
lib/Target/ARM/ARMBaseInstrInfo.cpp		patch \| blob \| history
test/CodeGen/ARM/2013-10-11-select-stalls.ll		patch \| blob \| history
test/CodeGen/ARM/ifcvt4.ll		patch \| blob \| history
test/CodeGen/ARM/sjlj-prepare-critical-edge.ll		patch \| blob \| history
test/CodeGen/ARM/test-sharedidx.ll		patch \| blob \| history
test/CodeGen/Thumb2/thumb2-ifcvt1.ll		patch \| blob \| history