From 7a1b190bcd92cc0b0a1e089f981372cf236afe15 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Thu, 30 Apr 2015 22:11:59 +0000 Subject: [PATCH] [X86] Use 4 byte preferred aggregate alignment on Win32 This helps reduce the frequency of stack realignment prologues in 32-bit X86 Windows code. Before this change and the corresponding clang change, we would take the max of the type preferred alignment and the explicit alignment on the alloca. If you don't override aggregate alignment in datalayout, you get a default of 8. This dates back to 2007 / r34356, and changing it seems prohibitively difficult at this point. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236270 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86TargetMachine.cpp | 2 +- test/CodeGen/X86/inalloca-invoke.ll | 5 +++-- test/CodeGen/X86/inalloca-stdcall.ll | 5 ++--- test/CodeGen/X86/inalloca.ll | 15 ++++++--------- test/CodeGen/X86/win32_sret.ll | 24 ++++++++++++------------ 5 files changed, 24 insertions(+), 27 deletions(-) diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 43d389549f2..919072aa913 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -81,7 +81,7 @@ static std::string computeDataLayout(const Triple &TT) { // The stack is aligned to 32 bits on some ABIs and 128 bits on others. if (!TT.isArch64Bit() && TT.isOSWindows()) - Ret += "-S32"; + Ret += "-a:0:32-S32"; else Ret += "-S128"; diff --git a/test/CodeGen/X86/inalloca-invoke.ll b/test/CodeGen/X86/inalloca-invoke.ll index 93bd910ab51..d6fc76ee50b 100644 --- a/test/CodeGen/X86/inalloca-invoke.ll +++ b/test/CodeGen/X86/inalloca-invoke.ll @@ -21,8 +21,9 @@ blah: %end = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 1 ; CHECK: calll __chkstk -; CHECK: movl %[[beg:[^,]*]], %esp -; CHECK: leal 12(%[[beg]]), %[[end:[^ ]*]] +; CHECK: movl %esp, %[[beg:[^ ]*]] +; CHECK: movl %esp, %[[end:[^ ]*]] +; CHECK: addl $12, %[[end]] call void @begin(%Iter* sret %temp.lvalue) ; CHECK: calll _begin diff --git a/test/CodeGen/X86/inalloca-stdcall.ll b/test/CodeGen/X86/inalloca-stdcall.ll index fe99f47353a..e5f6ea70e9c 100644 --- a/test/CodeGen/X86/inalloca-stdcall.ll +++ b/test/CodeGen/X86/inalloca-stdcall.ll @@ -10,13 +10,12 @@ define void @g() { %b = alloca inalloca %Foo ; CHECK: movl $8, %eax ; CHECK: calll __chkstk -; CHECK: movl %[[REG:[^,]*]], %esp %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, (%[[REG]]) -; CHECK: movl $42, 4(%[[REG]]) +; CHECK: movl $13, (%esp) +; CHECK: movl $42, 4(%esp) call x86_stdcallcc void @f(%Foo* inalloca %b) ; CHECK: calll _f@8 ; CHECK-NOT: %esp diff --git a/test/CodeGen/X86/inalloca.ll b/test/CodeGen/X86/inalloca.ll index 49ee6a23f4d..904366219ab 100644 --- a/test/CodeGen/X86/inalloca.ll +++ b/test/CodeGen/X86/inalloca.ll @@ -10,13 +10,12 @@ entry: %b = alloca inalloca %Foo ; CHECK: movl $8, %eax ; CHECK: calll __chkstk -; CHECK: movl %[[REG:[^,]*]], %esp %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, (%[[REG]]) -; CHECK: movl $42, 4(%[[REG]]) +; CHECK: movl $13, (%esp) +; CHECK: movl $42, 4(%esp) call void @f(%Foo* inalloca %b) ; CHECK: calll _f ret void @@ -30,13 +29,12 @@ entry: %b = alloca inalloca %Foo ; CHECK: movl $8, %eax ; CHECK: calll __chkstk -; CHECK: movl %[[REG:[^,]*]], %esp %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK: movl $13, (%[[REG]]) -; CHECK: movl $42, 4(%[[REG]]) +; CHECK: movl $13, (%esp) +; CHECK: movl $42, 4(%esp) call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b) ; CHECK: movl $1, %eax ; CHECK: calll _inreg_with_inalloca @@ -51,13 +49,12 @@ entry: %b = alloca inalloca %Foo ; CHECK: movl $8, %eax ; CHECK: calll __chkstk -; CHECK: movl %[[REG:[^,]*]], %esp %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0 %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1 store i32 13, i32* %f1 store i32 42, i32* %f2 -; CHECK-DAG: movl $13, (%[[REG]]) -; CHECK-DAG: movl $42, 4(%[[REG]]) +; CHECK-DAG: movl $13, (%esp) +; CHECK-DAG: movl $42, 4(%esp) call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b) ; CHECK-DAG: xorl %ecx, %ecx ; CHECK: calll _thiscall_with_inalloca diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll index ca01d3b4331..b38273ad959 100644 --- a/test/CodeGen/X86/win32_sret.ll +++ b/test/CodeGen/X86/win32_sret.ll @@ -155,25 +155,25 @@ define void @test6_f(%struct.test6* %x) nounwind { ; LINUX-LABEL: test6_f: ; The %x argument is moved to %ecx. It will be the this pointer. -; WIN32: movl 8(%ebp), %ecx +; WIN32: movl 20(%esp), %ecx ; The %x argument is moved to (%esp). It will be the this pointer. With -O0 ; we copy esp to ecx and use (ecx) instead of (esp). -; MINGW_X86: movl 8(%ebp), %eax +; MINGW_X86: movl 20(%esp), %eax ; MINGW_X86: movl %eax, (%e{{([a-d]x)|(sp)}}) -; CYGWIN: movl 8(%ebp), %eax +; CYGWIN: movl 20(%esp), %eax ; CYGWIN: movl %eax, (%e{{([a-d]x)|(sp)}}) ; The sret pointer is (%esp) -; WIN32: leal 8(%esp), %[[REG:e[a-d]x]] +; WIN32: leal 4(%esp), %[[REG:e[a-d]x]] ; WIN32-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}}) ; The sret pointer is %ecx -; MINGW_X86-NEXT: leal 8(%esp), %ecx +; MINGW_X86-NEXT: leal 4(%esp), %ecx ; MINGW_X86-NEXT: calll _test6_g -; CYGWIN-NEXT: leal 8(%esp), %ecx +; CYGWIN-NEXT: leal 4(%esp), %ecx ; CYGWIN-NEXT: calll _test6_g %tmp = alloca %struct.test6, align 4 @@ -191,16 +191,16 @@ define void @test7_f(%struct.test7* %x) nounwind { ; LINUX-LABEL: test7_f: ; The %x argument is moved to %ecx on all OSs. It will be the this pointer. -; WIN32: movl 8(%ebp), %ecx -; MINGW_X86: movl 8(%ebp), %ecx -; CYGWIN: movl 8(%ebp), %ecx +; WIN32: movl 20(%esp), %ecx +; MINGW_X86: movl 20(%esp), %ecx +; CYGWIN: movl 20(%esp), %ecx ; The sret pointer is (%esp) -; WIN32: leal 8(%esp), %[[REG:e[a-d]x]] +; WIN32: leal 4(%esp), %[[REG:e[a-d]x]] ; WIN32-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}}) -; MINGW_X86: leal 8(%esp), %[[REG:e[a-d]x]] +; MINGW_X86: leal 4(%esp), %[[REG:e[a-d]x]] ; MINGW_X86-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}}) -; CYGWIN: leal 8(%esp), %[[REG:e[a-d]x]] +; CYGWIN: leal 4(%esp), %[[REG:e[a-d]x]] ; CYGWIN-NEXT: movl %[[REG]], (%e{{([a-d]x)|(sp)}}) %tmp = alloca %struct.test7, align 4 -- 2.34.1