From 127eea87d666ccc9fe7025f41148c33af0f8c84b Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Thu, 2 Aug 2012 19:37:32 +0000 Subject: [PATCH] X86 Peephole: fold loads to the source register operand if possible. Add more comments and use early returns to reduce nesting in isLoadFoldable. Also disable folding for V_SET0 to avoid introducing a const pool entry and a const pool load. rdar://10554090 and rdar://11873276 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161207 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetInstrInfo.h | 6 ++- lib/CodeGen/PeepholeOptimizer.cpp | 29 ++++++------- lib/Target/X86/X86InstrInfo.h | 6 ++- test/CodeGen/X86/sse-minmax.ll | 60 ++++++++++++++++++--------- test/CodeGen/X86/vec_compare.ll | 4 +- 5 files changed, 67 insertions(+), 38 deletions(-) diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index cfb9dd7de24..850eccc7f3c 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -696,7 +696,11 @@ public: /// optimizeLoadInstr - Try to remove the load by folding it to a register /// operand at the use. We fold the load instructions if and only if the - /// def and use are in the same BB. + /// def and use are in the same BB. We only look at one load and see + /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register + /// defined by the load we are trying to fold. DefMI returns the machine + /// instruction that defines FoldAsLoadDefReg, and the function returns + /// the machine instruction generated due to folding. virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index d9474bf2400..6bc7e37e3d8 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -391,20 +391,21 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, /// register defined has a single use. bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg) { - if (MI->canFoldAsLoad()) { - const MCInstrDesc &MCID = MI->getDesc(); - if (MCID.getNumDefs() == 1) { - unsigned Reg = MI->getOperand(0).getReg(); - // To reduce compilation time, we check MRI->hasOneUse when inserting - // loads. It should be checked when processing uses of the load, since - // uses can be removed during peephole. - if (!MI->getOperand(0).getSubReg() && - TargetRegisterInfo::isVirtualRegister(Reg) && - MRI->hasOneUse(Reg)) { - FoldAsLoadDefReg = Reg; - return true; - } - } + if (!MI->canFoldAsLoad() || !MI->mayLoad()) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; } return false; } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 9ed5210b22f..b6f69af037c 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -389,7 +389,11 @@ public: /// optimizeLoadInstr - Try to remove the load by folding it to a register /// operand at the use. We fold the load instructions if and only if the - /// def and use are in the same BB. + /// def and use are in the same BB. We only look at one load and see + /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register + /// defined by the load we are trying to fold. DefMI returns the machine + /// instruction that defines FoldAsLoadDefReg, and the function returns + /// the machine instruction generated due to folding. virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll index 5d3dbce1df9..3839e875615 100644 --- a/test/CodeGen/X86/sse-minmax.ll +++ b/test/CodeGen/X86/sse-minmax.ll @@ -137,13 +137,16 @@ define double @ole_inverse(double %x, double %y) nounwind { } ; CHECK: ogt_x: -; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ogt_x: -; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ogt_x: -; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ogt_x(double %x) nounwind { %c = fcmp ogt double %x, 0.000000e+00 @@ -152,13 +155,16 @@ define double @ogt_x(double %x) nounwind { } ; CHECK: olt_x: -; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: olt_x: -; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: olt_x: -; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @olt_x(double %x) nounwind { %c = fcmp olt double %x, 0.000000e+00 @@ -211,10 +217,12 @@ define double @olt_inverse_x(double %x) nounwind { ; CHECK: oge_x: ; CHECK: ucomisd %xmm1, %xmm0 ; UNSAFE: oge_x: -; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: oge_x: -; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @oge_x(double %x) nounwind { %c = fcmp oge double %x, 0.000000e+00 @@ -225,10 +233,12 @@ define double @oge_x(double %x) nounwind { ; CHECK: ole_x: ; CHECK: ucomisd %xmm0, %xmm1 ; UNSAFE: ole_x: -; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ole_x: -; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ole_x(double %x) nounwind { %c = fcmp ole double %x, 0.000000e+00 @@ -401,10 +411,12 @@ define double @ule_inverse(double %x, double %y) nounwind { ; CHECK: ugt_x: ; CHECK: ucomisd %xmm0, %xmm1 ; UNSAFE: ugt_x: -; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ugt_x: -; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ugt_x(double %x) nounwind { %c = fcmp ugt double %x, 0.000000e+00 @@ -415,10 +427,12 @@ define double @ugt_x(double %x) nounwind { ; CHECK: ult_x: ; CHECK: ucomisd %xmm1, %xmm0 ; UNSAFE: ult_x: -; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ult_x: -; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ult_x(double %x) nounwind { %c = fcmp ult double %x, 0.000000e+00 @@ -468,10 +482,12 @@ define double @ult_inverse_x(double %x) nounwind { ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: uge_x: -; UNSAFE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: maxsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: uge_x: -; FINITE-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: maxsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @uge_x(double %x) nounwind { %c = fcmp uge double %x, 0.000000e+00 @@ -485,10 +501,12 @@ define double @uge_x(double %x) nounwind { ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ule_x: -; UNSAFE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; UNSAFE-NEXT: minsd %xmm1, %xmm0 ; UNSAFE-NEXT: ret ; FINITE: ule_x: -; FINITE-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; FINITE-NEXT: minsd %xmm1, %xmm0 ; FINITE-NEXT: ret define double @ule_x(double %x) nounwind { %c = fcmp ule double %x, 0.000000e+00 @@ -497,7 +515,8 @@ define double @ule_x(double %x) nounwind { } ; CHECK: uge_inverse_x: -; CHECK-NEXT: minsd LCP{{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: uge_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 @@ -516,7 +535,8 @@ define double @uge_inverse_x(double %x) nounwind { } ; CHECK: ule_inverse_x: -; CHECK-NEXT: maxsd LCP{{.*}}(%rip), %xmm0 +; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1 +; CHECK-NEXT: maxsd %xmm1, %xmm0 ; CHECK-NEXT: ret ; UNSAFE: ule_inverse_x: ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1 diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll index 1e04f19ee89..367dd27f307 100644 --- a/test/CodeGen/X86/vec_compare.ll +++ b/test/CodeGen/X86/vec_compare.ll @@ -14,8 +14,8 @@ define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind { define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind { ; CHECK: test2: ; CHECK: pcmp -; CHECK: pxor LCP -; CHECK: movdqa +; CHECK: pcmp +; CHECK: pxor ; CHECK: ret %C = icmp sge <4 x i32> %A, %B %D = sext <4 x i1> %C to <4 x i32> -- 2.34.1