From: Quentin Colombet <qcolombet@apple.com>
Date: Wed, 1 Jul 2015 23:12:13 +0000 (+0000)
Subject: [TwoAddressInstructionPass] Try 3 Addr Conversion After Commuting.
X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=a1a323c6376719271f7f1506bd38062189328ef6

[TwoAddressInstructionPass] Try 3 Addr Conversion After Commuting.

TwoAddressInstructionPass stops after a successful commuting but 3 Addr
conversion might be good for some cases.

Consider:

int foo(int a, int b) {
  return a + b;
}

Before this commit, we emit:

addl	%esi, %edi
movl	%edi, %eax
ret

After this commit, we try 3 Addr conversion:

leal	(%rsi,%rdi), %eax
ret

Patch by Volkan Keles <vkeles@apple.com>!

Differential Revision: http://reviews.llvm.org/D10851


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241206 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 6bceccca778..e84bea63995 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1207,12 +1207,24 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
     }
   }
 
+  // If the instruction is convertible to 3 Addr, instead
+  // of returning try 3 Addr transformation aggresively and
+  // use this variable to check later. Because it might be better.
+  // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
+  // instead of the following code.
+  //   addl	%esi, %edi
+  //   movl	%edi, %eax
+  //   ret
+  bool commuted = false;
+
   // If it's profitable to commute, try to do so.
   if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
+    commuted = true;
     ++NumCommuted;
     if (AggressiveCommute)
       ++NumAggrCommuted;
-    return false;
+    if (!MI.isConvertibleTo3Addr())
+      return false;
   }
 
   if (shouldOnlyCommute)
@@ -1220,7 +1232,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule this MI below it.
-  if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
+  if (!commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
     ++NumReSchedDowns;
     return true;
   }
@@ -1237,6 +1249,10 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
     }
   }
 
+  // Return if it is commuted but 3 addr conversion is failed.
+  if (commuted)
+    return false;
+
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule it before this MI if it's legal.
   if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) {
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 656c385e2bc..5b01e2f4e90 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -39,7 +39,7 @@ define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8
 entry:
 ; DARWIN-LABEL: t3:
 ; DARWIN: shlq $32, %rcx
-; DARWIN-NEXT: orq %rcx, %rax
+; DARWIN-NEXT: leaq (%rax,%rcx), %rax
 ; DARWIN-NEXT: shll $8
 ; DARWIN-NOT: leaq
   %tmp21 = zext i32 %lb to i64
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index b5ca0275d8d..5779cf33ac8 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -25,8 +25,7 @@ define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind {
 entry:
 ; CHECK-LABEL: test2:
 ; CHECK: leal
-; CHECK-NOT: leal
-; CHECK-NOT: mov
+; CHECK-NEXT: addl
 ; CHECK-NEXT: addl
 ; CHECK-NEXT: ret
  %add = add i32 %b, %a
diff --git a/test/CodeGen/X86/win64_params.ll b/test/CodeGen/X86/win64_params.ll
index 9718c86300c..a0b552d4d58 100644
--- a/test/CodeGen/X86/win64_params.ll
+++ b/test/CodeGen/X86/win64_params.ll
@@ -7,8 +7,7 @@ define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind re
 entry:
 ; CHECK: movl    48(%rsp), %eax
 ; CHECK: addl    40(%rsp), %eax
-; LINUX: addl    %r9d, %r8d
-; LINUX: movl    %r8d, %eax
+; LINUX: leal    (%r8,%r9), %eax
   %add = add nsw i32 %p6, %p5
   ret i32 %add
 }
@@ -27,10 +26,8 @@ entry:
 ; on other platforms here (note the x86_64_sysvcc calling convention).
 define x86_64_sysvcc i32 @f8(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
 entry:
-; CHECK: addl    %r9d, %r8d
-; CHECK: movl    %r8d, %eax
-; LINUX: addl    %r9d, %r8d
-; LINUX: movl    %r8d, %eax
+; CHECK: leal    (%r8,%r9), %eax
+; LINUX: leal    (%r8,%r9), %eax
   %add = add nsw i32 %p6, %p5
   ret i32 %add
 }
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
index 7925bf01020..24be0dc42d6 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -23,7 +23,7 @@
 ; X32: add
 ; X32: add
 ; X32: add
-; X32: add
+; X32: leal
 ; X32: %for.body.3
 define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
 entry: