From: Andrew Trick <atrick@apple.com>
Date: Fri, 22 Jun 2012 02:50:31 +0000 (+0000)
Subject: ARM scheduling fix: compute predicated implicit use properly.
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=ef2d9e59aba381c42e018df9c26f9025c1995a64;p=oota-llvm.git

ARM scheduling fix: compute predicated implicit use properly.

Minor drive by fix to cleanup latency computation. Calling
getOperandLatency with a deliberately incorrect operand index does not
give you the latency you want.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158959 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6bed1371feb..cceb05e4615 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2998,9 +2998,7 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
     return 1;
 
   // If the second MI is predicated, then there is an implicit use dependency.
-  int Latency = getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
-                                  DepMI->getNumOperands());
-  return (Latency <= 0) ? 1 : Latency;
+  return getInstrLatency(ItinData, DefMI);
 }
 
 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
index de48feeb9ec..4a82c36676f 100644
--- a/test/CodeGen/ARM/neon_div.ll
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source -disable-post-ra | FileCheck %s
 
 define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vrecpe.f32
-;CHECK: vrecpe.f32
 ;CHECK: vmovn.i32
+;CHECK: vrecpe.f32
 ;CHECK: vmovn.i32
 ;CHECK: vmovn.i16
 	%tmp1 = load <8 x i8>* %A
@@ -15,10 +15,10 @@ define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vrecpe.f32
 ;CHECK: vrecps.f32
+;CHECK: vmovn.i32
 ;CHECK: vrecpe.f32
 ;CHECK: vrecps.f32
 ;CHECK: vmovn.i32
-;CHECK: vmovn.i32
 ;CHECK: vqmovun.s16
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B