X86 Peephole: fold loads to the source register operand if possible.

author Manman Ren <mren@apple.com>

Thu, 2 Aug 2012 19:37:32 +0000 (19:37 +0000)

committer Manman Ren <mren@apple.com>

Thu, 2 Aug 2012 19:37:32 +0000 (19:37 +0000)
author Manman Ren <mren@apple.com>
Thu, 2 Aug 2012 19:37:32 +0000 (19:37 +0000)
committer Manman Ren <mren@apple.com>
Thu, 2 Aug 2012 19:37:32 +0000 (19:37 +0000)
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h

index cfb9dd7de2496c49cd040296e418a5722f2e7fc6..850eccc7f3c1c16a129f13b8dbf2fed5077009c6 100644 (file)
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -696,7 +696,11 @@ public:
  
    /// optimizeLoadInstr - Try to remove the load by folding it to a register
    /// operand at the use. We fold the load instructions if and only if the
-  /// def and use are in the same BB.
+  /// def and use are in the same BB. We only look at one load and see
+  /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+  /// defined by the load we are trying to fold. DefMI returns the machine
+  /// instruction that defines FoldAsLoadDefReg, and the function returns
+  /// the machine instruction generated due to folding.
    virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
                          const MachineRegisterInfo *MRI,
                          unsigned &FoldAsLoadDefReg,
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp

index d9474bf2400badbcc135a01c90313627de9526f5..6bc7e37e3d879a3752a0638bc9ece62ad8f402ac 100644 (file)
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -391,20 +391,21 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
  /// register defined has a single use.
  bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
                                         unsigned &FoldAsLoadDefReg) {
-  if (MI->canFoldAsLoad()) {
-    const MCInstrDesc &MCID = MI->getDesc();
-    if (MCID.getNumDefs() == 1) {
-      unsigned Reg = MI->getOperand(0).getReg();
-      // To reduce compilation time, we check MRI->hasOneUse when inserting
-      // loads. It should be checked when processing uses of the load, since
-      // uses can be removed during peephole.
-      if (!MI->getOperand(0).getSubReg() &&
-          TargetRegisterInfo::isVirtualRegister(Reg) &&
-          MRI->hasOneUse(Reg)) {
-        FoldAsLoadDefReg = Reg;
-        return true;
-      }
-    }
+  if (!MI->canFoldAsLoad() || !MI->mayLoad())
+    return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.getNumDefs() != 1)
+    return false;
+
+  unsigned Reg = MI->getOperand(0).getReg();
+  // To reduce compilation time, we check MRI->hasOneUse when inserting
+  // loads. It should be checked when processing uses of the load, since
+  // uses can be removed during peephole.
+  if (!MI->getOperand(0).getSubReg() &&
+      TargetRegisterInfo::isVirtualRegister(Reg) &&
+      MRI->hasOneUse(Reg)) {
+    FoldAsLoadDefReg = Reg;
+    return true;
    }
    return false;
  }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h

index 9ed5210b22ff39c5dd70b0921ac828e8a5a1e89a..b6f69af037c2901c88d9bd89bcee833175f91992 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -389,7 +389,11 @@ public:
  
    /// optimizeLoadInstr - Try to remove the load by folding it to a register
    /// operand at the use. We fold the load instructions if and only if the
-  /// def and use are in the same BB.
+  /// def and use are in the same BB. We only look at one load and see
+  /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+  /// defined by the load we are trying to fold. DefMI returns the machine
+  /// instruction that defines FoldAsLoadDefReg, and the function returns
+  /// the machine instruction generated due to folding.
    virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
                          const MachineRegisterInfo *MRI,
                          unsigned &FoldAsLoadDefReg,
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll

index 5d3dbce1df911f30fecdad6ca7d302248ac12d50..3839e875615f63d2be4938222616bbf8c1d931fc 100644 (file)
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -137,13 +137,16 @@ define double @ole_inverse(double %x, double %y) nounwind {
  }
  
  ; CHECK:      ogt_x:
-; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE:      ogt_x:
-; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
  ; UNSAFE-NEXT: ret
  ; FINITE:      ogt_x:
-; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @ogt_x(double %x) nounwind {
    %c = fcmp ogt double %x, 0.000000e+00
@@ -152,13 +155,16 @@ define double @ogt_x(double %x) nounwind {
  }
  
  ; CHECK:      olt_x:
-; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE:      olt_x:
-; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
  ; UNSAFE-NEXT: ret
  ; FINITE:      olt_x:
-; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @olt_x(double %x) nounwind {
    %c = fcmp olt double %x, 0.000000e+00
@@ -211,10 +217,12 @@ define double @olt_inverse_x(double %x) nounwind {
  ; CHECK:      oge_x:
  ; CHECK:      ucomisd %xmm1, %xmm0
  ; UNSAFE:      oge_x:
-; UNSAFE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
  ; UNSAFE-NEXT: ret
  ; FINITE:      oge_x:
-; FINITE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @oge_x(double %x) nounwind {
    %c = fcmp oge double %x, 0.000000e+00
@@ -225,10 +233,12 @@ define double @oge_x(double %x) nounwind {
  ; CHECK:      ole_x:
  ; CHECK:      ucomisd %xmm0, %xmm1
  ; UNSAFE:      ole_x:
-; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
  ; UNSAFE-NEXT: ret
  ; FINITE:      ole_x:
-; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @ole_x(double %x) nounwind {
    %c = fcmp ole double %x, 0.000000e+00
@@ -401,10 +411,12 @@ define double @ule_inverse(double %x, double %y) nounwind {
  ; CHECK:      ugt_x:
  ; CHECK:      ucomisd %xmm0, %xmm1
  ; UNSAFE:      ugt_x:
-; UNSAFE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
  ; UNSAFE-NEXT: ret
  ; FINITE:      ugt_x:
-; FINITE-NEXT: maxsd   LCP{{.*}}(%rip), %xmm0
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @ugt_x(double %x) nounwind {
    %c = fcmp ugt double %x, 0.000000e+00
@@ -415,10 +427,12 @@ define double @ugt_x(double %x) nounwind {
  ; CHECK:      ult_x:
  ; CHECK:      ucomisd %xmm1, %xmm0
  ; UNSAFE:      ult_x:
-; UNSAFE-NEXT: minsd   LCP{{.*}}(%rip), %xmm0
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
  ; UNSAFE-NEXT: ret
  ; FINITE:      ult_x:
-; FINITE-NEXT: minsd   LCP{{.*}}(%rip), %xmm0
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @ult_x(double %x) nounwind {
    %c = fcmp ult double %x, 0.000000e+00
@@ -468,10 +482,12 @@ define double @ult_inverse_x(double %x) nounwind {
  ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE:      uge_x:
-; UNSAFE-NEXT: maxsd  LCP{{.*}}(%rip), %xmm0
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
  ; UNSAFE-NEXT: ret
  ; FINITE:      uge_x:
-; FINITE-NEXT: maxsd  LCP{{.*}}(%rip), %xmm0
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
+; FINITE-NEXT: maxsd  %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @uge_x(double %x) nounwind {
    %c = fcmp uge double %x, 0.000000e+00
@@ -485,10 +501,12 @@ define double @uge_x(double %x) nounwind {
  ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE:      ule_x:
-; UNSAFE-NEXT: minsd  LCP{{.*}}(%rip), %xmm0
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
+; UNSAFE-NEXT: minsd  %xmm1, %xmm0
  ; UNSAFE-NEXT: ret
  ; FINITE:      ule_x:
-; FINITE-NEXT: minsd  LCP{{.*}}(%rip), %xmm0
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
+; FINITE-NEXT: minsd  %xmm1, %xmm0
  ; FINITE-NEXT: ret
  define double @ule_x(double %x) nounwind {
    %c = fcmp ule double %x, 0.000000e+00
@@ -497,7 +515,8 @@ define double @ule_x(double %x) nounwind {
  }
  
  ; CHECK:      uge_inverse_x:
-; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE:      uge_inverse_x:
  ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -516,7 +535,8 @@ define double @uge_inverse_x(double %x) nounwind {
  }
  
  ; CHECK:      ule_inverse_x:
-; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
  ; CHECK-NEXT: ret
  ; UNSAFE:      ule_inverse_x:
  ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll

index 1e04f19ee892a6d4a91bb24a6c4b5380700a6e38..367dd27f3076999169187967039cf6c1ad5235c8 100644 (file)
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -14,8 +14,8 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
  define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
  ; CHECK: test2:
  ; CHECK: pcmp
-; CHECK: pxor LCP
-; CHECK: movdqa
+; CHECK: pcmp
+; CHECK: pxor
  ; CHECK: ret
         %C = icmp sge <4 x i32> %A, %B
          %D = sext <4 x i1> %C to <4 x i32>
author	Manman Ren <mren@apple.com>
	Thu, 2 Aug 2012 19:37:32 +0000 (19:37 +0000)
committer	Manman Ren <mren@apple.com>
	Thu, 2 Aug 2012 19:37:32 +0000 (19:37 +0000)
include/llvm/Target/TargetInstrInfo.h		patch \| blob \| history
lib/CodeGen/PeepholeOptimizer.cpp		patch \| blob \| history
lib/Target/X86/X86InstrInfo.h		patch \| blob \| history
test/CodeGen/X86/sse-minmax.ll		patch \| blob \| history
test/CodeGen/X86/vec_compare.ll		patch \| blob \| history