From 37f1bba13a323828a90703cec569e8c9f10fb4ef Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 18 May 2015 23:35:09 +0000 Subject: [PATCH] Re-land r237175: [X86] Always return the sret parameter in eax/rax ... This reverts commit r237210. Also fix X86/complex-fca.ll to match the code that we used to generate on win32 and now generate everwhere to conform to SysV. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237639 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 36 +++++++++++-------------- test/CodeGen/X86/cmovcmov.ll | 18 ++++++------- test/CodeGen/X86/complex-fca.ll | 26 ++++++++++-------- test/CodeGen/X86/sret-implicit.ll | 42 +++++++++++++++++++++++------- 4 files changed, 71 insertions(+), 51 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 162b684e927..bfd99a55179 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2000,9 +2000,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - // The x86-64 ABIs require that for returning structs by value we copy + // All x86 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. - // Win32 requires us to put the sret argument to %eax as well. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. // @@ -2011,8 +2010,6 @@ X86TargetLowering::LowerReturn(SDValue Chain, // false, then an sret argument may be implicitly inserted in the SelDAG. In // either case FuncInfo->setSRetReturnReg() will have been called. if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) { - assert((Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) && - "No need for an sret register"); SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy()); unsigned RetValReg @@ -2442,24 +2439,21 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(ArgValue); } - if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) { - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - // The x86-64 ABIs require that for returning structs by value we copy - // the sret argument into %rax/%eax (depending on ABI) for the return. - // Win32 requires us to put the sret argument to %eax as well. - // Save the argument into a virtual register so that we can access it - // from the return points. - if (Ins[i].Flags.isSRet()) { - unsigned Reg = FuncInfo->getSRetReturnReg(); - if (!Reg) { - MVT PtrTy = getPointerTy(); - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); - FuncInfo->setSRetReturnReg(Reg); - } - SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); - break; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + // All x86 ABIs require that for returning structs by value we copy the + // sret argument into %rax/%eax (depending on ABI) for the return. Save + // the argument into a virtual register so that we can access it from the + // return points. + if (Ins[i].Flags.isSRet()) { + unsigned Reg = FuncInfo->getSRetReturnReg(); + if (!Reg) { + MVT PtrTy = getPointerTy(); + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); + FuncInfo->setSRetReturnReg(Reg); } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); + break; } } diff --git a/test/CodeGen/X86/cmovcmov.ll b/test/CodeGen/X86/cmovcmov.ll index 982bc82c4c7..d3d9748d653 100644 --- a/test/CodeGen/X86/cmovcmov.ll +++ b/test/CodeGen/X86/cmovcmov.ll @@ -143,19 +143,19 @@ entry: ; NOCMOV-NEXT: jp [[TBB]] ; NOCMOV-NEXT: leal 24(%esp), %eax ; NOCMOV-NEXT: [[TBB]]: -; NOCMOV-NEXT: movl (%eax), %eax -; NOCMOV-NEXT: leal 44(%esp), %ecx +; NOCMOV-NEXT: movl (%eax), %ecx +; NOCMOV-NEXT: leal 44(%esp), %edx ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] ; NOCMOV-NEXT: jp [[TBB]] -; NOCMOV-NEXT: leal 28(%esp), %ecx +; NOCMOV-NEXT: leal 28(%esp), %edx ; NOCMOV-NEXT: [[TBB]]: -; NOCMOV-NEXT: movl (%ecx), %ecx +; NOCMOV-NEXT: movl 12(%esp), %eax +; NOCMOV-NEXT: movl (%edx), %edx ; NOCMOV-NEXT: leal 48(%esp), %esi ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] ; NOCMOV-NEXT: jp [[TBB]] ; NOCMOV-NEXT: leal 32(%esp), %esi ; NOCMOV-NEXT: [[TBB]]: -; NOCMOV-NEXT: movl 12(%esp), %edx ; NOCMOV-NEXT: movl (%esi), %esi ; NOCMOV-NEXT: leal 52(%esp), %edi ; NOCMOV-NEXT: jne [[TBB:.LBB[0-9_]+]] @@ -163,10 +163,10 @@ entry: ; NOCMOV-NEXT: leal 36(%esp), %edi ; NOCMOV-NEXT: [[TBB]]: ; NOCMOV-NEXT: movl (%edi), %edi -; NOCMOV-NEXT: movl %edi, 12(%edx) -; NOCMOV-NEXT: movl %esi, 8(%edx) -; NOCMOV-NEXT: movl %ecx, 4(%edx) -; NOCMOV-NEXT: movl %eax, (%edx) +; NOCMOV-NEXT: movl %edi, 12(%eax) +; NOCMOV-NEXT: movl %esi, 8(%eax) +; NOCMOV-NEXT: movl %edx, 4(%eax) +; NOCMOV-NEXT: movl %ecx, (%eax) ; NOCMOV-NEXT: popl %esi ; NOCMOV-NEXT: popl %edi ; NOCMOV-NEXT: retl $4 diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll index 8ad38a4ee5c..78b27b7dc3f 100644 --- a/test/CodeGen/X86/complex-fca.ll +++ b/test/CodeGen/X86/complex-fca.ll @@ -1,17 +1,21 @@ -; RUN: llc < %s -march=x86 | grep mov | count 2 - -; Skip this on Windows as there is no ccosl and sret behaves differently. -; XFAIL: pc-win32 +; RUN: llc < %s -march=x86 | FileCheck %s define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind { entry: - %z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0 - %z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1 - %0 = fsub x86_fp80 0xK80000000000000000000, %z9 - %insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0 - %insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1 - call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind - ret void + %z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0 + %z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1 + %0 = fsub x86_fp80 0xK80000000000000000000, %z9 + %insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0 + %insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1 + call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind + ret void } +; CHECK-LABEL: ccosl: +; CHECK: movl {{[0-9]+}}(%esp), %[[sret_reg:[^ ]+]] +; CHECK: movl %[[sret_reg]], (%esp) +; CHECK: calll {{.*ccoshl.*}} +; CHECK: movl %[[sret_reg]], %eax +; CHECK: retl + declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret, { x86_fp80, x86_fp80 }) nounwind diff --git a/test/CodeGen/X86/sret-implicit.ll b/test/CodeGen/X86/sret-implicit.ll index 56809525407..2a998fc6b6c 100644 --- a/test/CodeGen/X86/sret-implicit.ll +++ b/test/CodeGen/X86/sret-implicit.ll @@ -1,12 +1,34 @@ -; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-apple-darwin8 -terminal-rule < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-pc-linux -terminal-rule < %s | FileCheck %s - -; CHECK-LABEL: return32 -; CHECK-DAG: movq $0, (%rdi) -; CHECK-DAG: movq %rdi, %rax -; CHECK: retq -define i256 @return32() { +; RUN: llc -mtriple=x86_64-apple-darwin8 < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -mtriple=i686-pc-linux < %s | FileCheck %s --check-prefix=X86 +; RUN: llc -mtriple=x86_64-apple-darwin8 -terminal-rule < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -mtriple=x86_64-pc-linux -terminal-rule < %s | FileCheck %s --check-prefix=X64 + +define void @sret_void(i32* sret %p) { + store i32 0, i32* %p + ret void +} + +; X64-LABEL: sret_void +; X64-DAG: movl $0, (%rdi) +; X64-DAG: movq %rdi, %rax +; X64: retq + +; X86-LABEL: sret_void +; X86: movl 4(%esp), %eax +; X86: movl $0, (%eax) +; X86: retl + +define i256 @sret_demoted() { ret i256 0 } + +; X64-LABEL: sret_demoted +; X64-DAG: movq $0, (%rdi) +; X64-DAG: movq %rdi, %rax +; X64: retq + +; X86-LABEL: sret_demoted +; X86: movl 4(%esp), %eax +; X86: movl $0, (%eax) +; X86: retl -- 2.34.1