From 426921ffc7c9c31a5ffe6e767d867d5e414dd898 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 12 Aug 2015 10:14:58 +0000 Subject: [PATCH] [X86] Allow x86 call frame optimization to fold more loads into pushes This abstracts away the test for "when can we fold across a MachineInstruction" into the the MI interface, and changes call-frame optimization use the same test the peephole optimizer users. Differential Revision: http://reviews.llvm.org/D11945 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244729 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/MachineInstr.h | 3 +++ lib/CodeGen/MachineInstr.cpp | 4 ++++ lib/CodeGen/PeepholeOptimizer.cpp | 6 +++--- lib/Target/X86/X86CallFrameOptimization.cpp | 9 +++----- test/CodeGen/X86/movtopush.ll | 23 +++++++++++++++++++++ 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 1a4d750a941..8c0b567c708 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -1100,6 +1100,9 @@ public: /// bool hasUnmodeledSideEffects() const; + /// Returns true if it is illegal to fold a load across this instruction. + bool isLoadFoldBarrier() const; + /// Return true if all the defs of this instruction are dead. bool allDefsAreDead() const; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index c3d58426277..e072ee1d6d5 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1503,6 +1503,10 @@ bool MachineInstr::hasUnmodeledSideEffects() const { return false; } +bool MachineInstr::isLoadFoldBarrier() const { + return mayStore() || isCall() || hasUnmodeledSideEffects(); +} + /// allDefsAreDead - Return true if all the defs of this instruction are dead. /// bool MachineInstr::allDefsAreDead() const { diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 3dc73bac5b8..4d593543806 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -1234,9 +1234,9 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->isDebugValue()) continue; - // If there exists an instruction which belongs to the following - // categories, we will discard the load candidates. - if (MI->mayStore() || MI->isCall() || MI->hasUnmodeledSideEffects()) + // If we run into an instruction we can't fold across, discard + // the load candidates. + if (MI->isLoadFoldBarrier()) FoldAsLoadDefCandidates.clear(); if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 4fdb89631a9..dd33c2e54b8 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -528,13 +528,10 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush( DefMI->getParent() != FrameSetup->getParent()) return nullptr; - // Now, make sure everything else up until the ADJCALLSTACK is a sequence - // of MOVs. To be less conservative would require duplicating a lot of the - // logic from PeepholeOptimizer. - // FIXME: A possibly better approach would be to teach the PeepholeOptimizer - // to be smarter about folding into pushes. + // Make sure we don't have any instructions between DefMI and the + // push that make folding the load illegal. for (auto I = DefMI; I != FrameSetup; ++I) - if (I->getOpcode() != X86::MOV32rm) + if (I->isLoadFoldBarrier()) return nullptr; return DefMI; diff --git a/test/CodeGen/X86/movtopush.ll b/test/CodeGen/X86/movtopush.ll index b02f9ec45e7..cd7ec1b165d 100644 --- a/test/CodeGen/X86/movtopush.ll +++ b/test/CodeGen/X86/movtopush.ll @@ -357,3 +357,26 @@ entry: call void @good(i32 9, i32 10, i32 11, i32 12) ret void } + +; Make sure the add does not prevent folding loads into pushes. +; val1 and val2 will not be folded into pushes since they have +; an additional use, but val3 should be. +; NORMAL-LABEL: test13: +; NORMAL: movl ([[P1:%e..]]), [[V1:%e..]] +; NORMAL-NEXT: movl ([[P2:%e..]]), [[V2:%e..]] +; NORMAL-NEXT: , [[ADD:%e..]] +; NORMAL-NEXT: pushl [[ADD]] +; NORMAL-NEXT: pushl ([[P3:%e..]]) +; NORMAL-NEXT: pushl [[V2]] +; NORMAL-NEXT: pushl [[V1]] +; NORMAL-NEXT: calll _good +; NORMAL: movl [[P3]], %eax +define i32* @test13(i32* inreg %ptr1, i32* inreg %ptr2, i32* inreg %ptr3) optsize { +entry: + %val1 = load i32, i32* %ptr1 + %val2 = load i32, i32* %ptr2 + %val3 = load i32, i32* %ptr3 + %add = add i32 %val1, %val2 + call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add) + ret i32* %ptr3 +} -- 2.34.1