[AVX512] Enabling bit logic lowering

[oota-llvm.git] / test / CodeGen / X86 / win32_sret.ll
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll

index f1dd99543bdc948b49aa6f0c3c17e6b665ce22b4..8728712cece49b7e785770b0b34f49f554bbfd24 100644 (file)
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -1,9 +1,13 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
-; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
-; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
-; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
-; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
-; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+; We specify -mcpu explicitly to avoid instruction reordering that happens on
+; some setups (e.g., Atom) from affecting the output.
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
+; RUN: llc < %s -mcpu=core2 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
+; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
+; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
  
  ; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
  ; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
@@ -11,16 +15,19 @@
  
  define void @sret1(i8* sret %x) nounwind {
  entry:
-; WIN32:      sret1
+; WIN32-LABEL:      _sret1:
  ; WIN32:      movb $42, (%eax)
  ; WIN32-NOT:  popl %eax
-; WIN32:    {{ret$}}
+; WIN32:    {{retl$}}
  
-; MINGW_X86:  sret1
-; MINGW_X86:  ret $4
+; MINGW_X86-LABEL:  _sret1:
+; MINGW_X86:  {{retl$}}
  
-; LINUX:      sret1
-; LINUX:      ret $4
+; CYGWIN-LABEL:     _sret1:
+; CYGWIN:     retl $4
+
+; LINUX-LABEL:      sret1:
+; LINUX:      retl $4
  
    store i8 42, i8* %x, align 4
    ret void
@@ -28,16 +35,19 @@ entry:
  
  define void @sret2(i8* sret %x, i8 %y) nounwind {
  entry:
-; WIN32:      sret2
+; WIN32-LABEL:      _sret2:
  ; WIN32:      movb {{.*}}, (%eax)
  ; WIN32-NOT:  popl %eax
-; WIN32:    {{ret$}}
+; WIN32:    {{retl$}}
+
+; MINGW_X86-LABEL:  _sret2:
+; MINGW_X86:  {{retl$}}
  
-; MINGW_X86:  sret2
-; MINGW_X86:  ret $4
+; CYGWIN-LABEL:     _sret2:
+; CYGWIN:     retl $4
  
-; LINUX:      sret2
-; LINUX:      ret $4
+; LINUX-LABEL:      sret2:
+; LINUX:      retl $4
  
    store i8 %y, i8* %x
    ret void
@@ -45,17 +55,20 @@ entry:
  
  define void @sret3(i8* sret %x, i8* %y) nounwind {
  entry:
-; WIN32:      sret3
+; WIN32-LABEL:      _sret3:
  ; WIN32:      movb $42, (%eax)
  ; WIN32-NOT:  movb $13, (%eax)
  ; WIN32-NOT:  popl %eax
-; WIN32:    {{ret$}}
+; WIN32:    {{retl$}}
  
-; MINGW_X86:  sret3
-; MINGW_X86:  ret $4
+; MINGW_X86-LABEL:  _sret3:
+; MINGW_X86:  {{retl$}}
  
-; LINUX:      sret3
-; LINUX:      ret $4
+; CYGWIN-LABEL:     _sret3:
+; CYGWIN:     retl $4
+
+; LINUX-LABEL:      sret3:
+; LINUX:      retl $4
  
    store i8 42, i8* %x
    store i8 13, i8* %y
@@ -67,16 +80,19 @@ entry:
  
  define void @sret4(%struct.S4* noalias sret %agg.result) {
  entry:
-; WIN32:     sret4
+; WIN32-LABEL:     _sret4:
  ; WIN32:     movl $42, (%eax)
  ; WIN32-NOT: popl %eax
-; WIN32:   {{ret$}}
+; WIN32:   {{retl$}}
+
+; MINGW_X86-LABEL: _sret4:
+; MINGW_X86: {{retl$}}
  
-; MINGW_X86: sret4
-; MINGW_X86: ret $4
+; CYGWIN-LABEL:    _sret4:
+; CYGWIN:    retl $4
  
-; LINUX:     sret4
-; LINUX:     ret $4
+; LINUX-LABEL:     sret4:
+; LINUX:     retl $4
  
    %x = getelementptr inbounds %struct.S4* %agg.result, i32 0, i32 0
    store i32 42, i32* %x, align 4
@@ -94,14 +110,17 @@ entry:
    %x = getelementptr inbounds %struct.S5* %agg.result, i32 0, i32 0
    store i32 42, i32* %x, align 4
    ret void
-; WIN32:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
+; WIN32-LABEL:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
+; MINGW_X86-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
+; CYGWIN-LABEL:    {{^}}"?foo@C5@@QAE?AUS5@@XZ":
+; LINUX-LABEL:     {{^}}"?foo@C5@@QAE?AUS5@@XZ":
  
  ; The address of the return structure is passed as an implicit parameter.
  ; In the -O0 build, %eax is spilled at the beginning of the function, hence we
  ; should match both 4(%esp) and 8(%esp).
  ; WIN32:     {{[48]}}(%esp), %eax
  ; WIN32:     movl $42, (%eax)
-; WIN32:     ret $4
+; WIN32:     retl $4
  }
  
  define void @call_foo5() {
@@ -109,16 +128,116 @@ entry:
    %c = alloca %class.C5, align 1
    %s = alloca %struct.S5, align 4
    call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* sret %s, %class.C5* %c)
-; WIN32:      {{^}}_call_foo5:
+; WIN32-LABEL:      {{^}}_call_foo5:
+; MINGW_X86-LABEL:  {{^}}_call_foo5:
+; CYGWIN-LABEL:     {{^}}_call_foo5:
+; LINUX-LABEL:      {{^}}call_foo5:
+
  
  ; Load the address of the result and put it onto stack
  ; (through %ecx in the -O0 build).
-; WIN32:      leal {{[0-9]+}}(%esp), %eax
-; WIN32:      movl %eax, (%e{{[sc][px]}})
+; WIN32:      leal {{[0-9]+}}(%esp), %e{{[a-d]}}x
+; WIN32:      movl %e{{[a-d]}}x, (%e{{([a-d]x)|(sp)}})
  
  ; The this pointer goes to ECX.
  ; WIN32-NEXT: leal {{[0-9]+}}(%esp), %ecx
  ; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
-; WIN32:      ret
+; WIN32:      retl
    ret void
  }
+
+
+%struct.test6 = type { i32, i32, i32 }
+define void @test6_f(%struct.test6* %x) nounwind {
+; WIN32-LABEL: _test6_f:
+; MINGW_X86-LABEL: _test6_f:
+; CYGWIN-LABEL: _test6_f:
+; LINUX-LABEL: test6_f:
+
+; The %x argument is moved to %ecx. It will be the this pointer.
+; WIN32: movl    8(%ebp), %ecx
+
+; The %x argument is moved to (%esp). It will be the this pointer. With -O0
+; we copy esp to ecx and use (ecx) instead of (esp).
+; MINGW_X86: movl    8(%ebp), %eax
+; MINGW_X86: movl    %eax, (%e{{([a-d]x)|(sp)}})
+
+; CYGWIN: movl    8(%ebp), %eax
+; CYGWIN: movl    %eax, (%e{{([a-d]x)|(sp)}})
+
+; The sret pointer is (%esp)
+; WIN32:          leal    8(%esp), %[[REG:e[a-d]x]]
+; WIN32-NEXT:     movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+
+; The sret pointer is %ecx
+; MINGW_X86-NEXT: leal    8(%esp), %ecx
+; MINGW_X86-NEXT: calll   _test6_g
+
+; CYGWIN-NEXT: leal    8(%esp), %ecx
+; CYGWIN-NEXT: calll   _test6_g
+
+  %tmp = alloca %struct.test6, align 4
+  call x86_thiscallcc void @test6_g(%struct.test6* sret %tmp, %struct.test6* %x)
+  ret void
+}
+declare x86_thiscallcc void @test6_g(%struct.test6* sret, %struct.test6*)
+
+; Flipping the parameters at the IR level generates the same code.
+%struct.test7 = type { i32, i32, i32 }
+define void @test7_f(%struct.test7* %x) nounwind {
+; WIN32-LABEL: _test7_f:
+; MINGW_X86-LABEL: _test7_f:
+; CYGWIN-LABEL: _test7_f:
+; LINUX-LABEL: test7_f:
+
+; The %x argument is moved to %ecx on all OSs. It will be the this pointer.
+; WIN32:      movl    8(%ebp), %ecx
+; MINGW_X86:  movl    8(%ebp), %ecx
+; CYGWIN:     movl    8(%ebp), %ecx
+
+; The sret pointer is (%esp)
+; WIN32:          leal    8(%esp), %[[REG:e[a-d]x]]
+; WIN32-NEXT:     movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+; MINGW_X86:      leal    8(%esp), %[[REG:e[a-d]x]]
+; MINGW_X86-NEXT: movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+; CYGWIN:         leal    8(%esp), %[[REG:e[a-d]x]]
+; CYGWIN-NEXT:    movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
+
+  %tmp = alloca %struct.test7, align 4
+  call x86_thiscallcc void @test7_g(%struct.test7* %x, %struct.test7* sret %tmp)
+  ret void
+}
+
+define x86_thiscallcc void @test7_g(%struct.test7* %in, %struct.test7* sret %out) {
+  %s = getelementptr %struct.test7* %in, i32 0, i32 0
+  %d = getelementptr %struct.test7* %out, i32 0, i32 0
+  %v = load i32* %s
+  store i32 %v, i32* %d
+  call void @clobber_eax()
+  ret void
+
+; Make sure we return the second parameter in %eax.
+; WIN32-LABEL: _test7_g:
+; WIN32: calll _clobber_eax
+; WIN32: movl {{.*}}, %eax
+; WIN32: retl
+}
+
+declare void @clobber_eax()
+
+; Test what happens if the first parameter has to be split by codegen.
+; Realistically, no frontend will generate code like this, but here it is for
+; completeness.
+define void @test8_f(i64 inreg %a, i64* sret %out) {
+  store i64 %a, i64* %out
+  call void @clobber_eax()
+  ret void
+
+; WIN32-LABEL: _test8_f:
+; WIN32: movl {{[0-9]+}}(%esp), %[[out:[a-z]+]]
+; WIN32-DAG: movl %edx, 4(%[[out]])
+; WIN32-DAG: movl %eax, (%[[out]])
+; WIN32: calll _clobber_eax
+; WIN32: movl {{.*}}, %eax
+; WIN32: retl
+}