From e31d7f83c5edcc9f797bc7c9216a8c7a27249b8d Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Thu, 23 Jul 2015 12:23:45 +0000 Subject: [PATCH] [X86] Allow load folding into PUSH instructions Adds pushes to the folding tables. This also required a fix to the TD definition, since the memory forms of the push instructions did not have the right mayLoad/mayStore flags. Differential Revision: http://reviews.llvm.org/D11340 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@243010 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 5 +++++ lib/Target/X86/X86InstrInfo.cpp | 15 +++++++++---- lib/Target/X86/X86InstrInfo.td | 16 ++++++++----- test/CodeGen/X86/fold-push.ll | 40 +++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 9 deletions(-) create mode 100644 test/CodeGen/X86/fold-push.ll diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 852267400bb..7b0d4a1e8bb 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -181,6 +181,11 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw", def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; +// TODO: This feature ought to be renamed. +// What it really refers to are CPUs where instruction that cause MSROM +// lookups are expensive, so alternative sequences should be preferred. +// The best examples of this are the memory forms of CALL and PUSH +// instructions, which should be avoided in favor of a MOV + register CALL/PUSH. def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", "CallRegIndirect", "true", "Call register indirect">; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 3fdbac79814..65df840a935 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -332,6 +332,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD }, { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE }, { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE }, + { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD }, + { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD }, + { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD }, { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, @@ -4878,10 +4881,14 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( bool isCallRegIndirect = Subtarget.callRegIndirect(); bool isTwoAddrFold = false; - // For CPUs that favor the register form of a call, - // do not fold loads into calls. - if (isCallRegIndirect && - (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) + // For CPUs that favor the register form of a call or push, + // do not fold loads into calls or pushes, unless optimizing for size + // aggressively. + if (isCallRegIndirect && + !MF.getFunction()->hasFnAttribute(Attribute::MinSize) && + (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r || + MI->getOpcode() == X86::PUSH16r || MI->getOpcode() == X86::PUSH32r || + MI->getOpcode() == X86::PUSH64r)) return nullptr; unsigned NumOps = MI->getDesc().getNumOperands(); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 3d65c464d28..3fb44172c64 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1022,12 +1022,8 @@ def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[], IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>; def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize16; -def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[], - IIC_PUSH_MEM>, OpSize16; def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[], IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>; -def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], - IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>; def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm), "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16; @@ -1041,6 +1037,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32, Requires<[Not64BitMode]>; } // mayStore, SchedRW + +let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { +def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[], + IIC_PUSH_MEM>, OpSize16; +def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[], + IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>; +} // mayLoad, mayStore, SchedRW + } let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0, @@ -1073,9 +1077,11 @@ def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>; def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>; +} // mayStore, SchedRW +let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in { def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], IIC_PUSH_MEM>, OpSize32, Requires<[In64BitMode]>; -} // mayStore, SchedRW +} // mayLoad, mayStore, SchedRW } let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1, diff --git a/test/CodeGen/X86/fold-push.ll b/test/CodeGen/X86/fold-push.ll new file mode 100644 index 00000000000..f5f33321c13 --- /dev/null +++ b/test/CodeGen/X86/fold-push.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL +; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM + +declare void @foo(i32 %r) + +define void @test(i32 %a, i32 %b) optsize { +; CHECK-LABEL: test: +; CHECK: movl [[EAX:%e..]], (%esp) +; CHECK-NEXT: pushl [[EAX]] +; CHECK-NEXT: calll +; CHECK-NEXT: addl $4, %esp +; CHECK: nop +; NORMAL: pushl (%esp) +; SLM: movl (%esp), [[RELOAD:%e..]] +; SLM-NEXT: pushl [[RELOAD]] +; CHECK: calll +; CHECK-NEXT: addl $4, %esp + %c = add i32 %a, %b + call void @foo(i32 %c) + call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() + call void @foo(i32 %c) + ret void +} + +define void @test_min(i32 %a, i32 %b) minsize { +; CHECK-LABEL: test_min: +; CHECK: movl [[EAX:%e..]], (%esp) +; CHECK-NEXT: pushl [[EAX]] +; CHECK-NEXT: calll +; CHECK-NEXT: addl $4, %esp +; CHECK: nop +; CHECK: pushl (%esp) +; CHECK: calll +; CHECK-NEXT: addl $4, %esp + %c = add i32 %a, %b + call void @foo(i32 %c) + call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"() + call void @foo(i32 %c) + ret void +} -- 2.34.1