Add pseudo dependency to force two-address instruction to be scheduled after
authorEvan Cheng <evan.cheng@apple.com>
Tue, 6 Nov 2007 08:44:59 +0000 (08:44 +0000)
committerEvan Cheng <evan.cheng@apple.com>
Tue, 6 Nov 2007 08:44:59 +0000 (08:44 +0000)
other uses. There was a overly restricted check that prevented some obvious
cases.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43762 91177308-0d34-0410-b5e6-96231b3b80d8

lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
test/CodeGen/X86/2007-11-06-InstrSched.ll [new file with mode: 0644]

index 5a2f200303678c2c6a011c5bb3ee424310a7021a..9c03b4e5b0f0ae0bc8b908f7ef9da9561a691db9 100644 (file)
@@ -1308,8 +1308,11 @@ void BURegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
           // plus it may increase register pressures.
           if (SuccSU == SU || SuccSU->hasPhysRegDefs)
             continue;
-          // Be conservative. Ignore if nodes aren't at the same depth.
-          if (SuccSU->Depth != SU->Depth)
+          // Be conservative. Ignore if nodes aren't at roughly the same
+          // depth and height.
+          if (SuccSU->Height < SU->Height && (SU->Height - SuccSU->Height) > 1)
+            continue;
+          if (SuccSU->Depth > SU->Depth && (SuccSU->Depth - SU->Depth) > 1)
             continue;
           if (!SuccSU->Node || !SuccSU->Node->isTargetOpcode())
             continue;
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
new file mode 100644 (file)
index 0000000..f2542ea
--- /dev/null
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep lea
+
+define float @foo(i32* %x, float* %y, i32 %c) {
+entry:
+       %tmp2132 = icmp eq i32 %c, 0            ; <i1> [#uses=1]
+       br i1 %tmp2132, label %bb23, label %bb18
+
+bb18:          ; preds = %bb18, %entry
+       %i.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp17, %bb18 ]               ; <i32> [#uses=3]
+       %res.0.reg2mem.0 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ]                ; <float> [#uses=1]
+       %tmp3 = getelementptr i32* %x, i32 %i.0.reg2mem.0               ; <i32*> [#uses=1]
+       %tmp4 = load i32* %tmp3, align 4                ; <i32> [#uses=1]
+       %tmp45 = sitofp i32 %tmp4 to float              ; <float> [#uses=1]
+       %tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0             ; <float*> [#uses=1]
+       %tmp9 = load float* %tmp8, align 4              ; <float> [#uses=1]
+       %tmp11 = mul float %tmp9, %tmp45                ; <float> [#uses=1]
+       %tmp14 = add float %tmp11, %res.0.reg2mem.0             ; <float> [#uses=2]
+       %tmp17 = add i32 %i.0.reg2mem.0, 1              ; <i32> [#uses=2]
+       %tmp21 = icmp ult i32 %tmp17, %c                ; <i1> [#uses=1]
+       br i1 %tmp21, label %bb18, label %bb23
+
+bb23:          ; preds = %bb18, %entry
+       %res.0.reg2mem.1 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ]                ; <float> [#uses=1]
+       ret float %res.0.reg2mem.1
+}