From 1d1d705a95e4a93137382e950ea1d34767d0b9f0 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Sat, 25 Oct 2014 03:46:16 +0000 Subject: [PATCH] [NVPTX] aligned byte-buffers for vector return types Summary: Fixes PR21100 which is caused by inconsistency between the declared return type and the expected return type at the call site. The new behavior is consistent with nvcc and the NVPTXTargetLowering::getPrototype function. Test Plan: test/Codegen/NVPTX/vector-return.ll Reviewers: jholewinski Reviewed By: jholewinski Subscribers: llvm-commits, meheff, eliben, jholewinski Differential Revision: http://reviews.llvm.org/D5612 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220607 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.cpp | 7 ++++++- test/CodeGen/NVPTX/vector-return.ll | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/NVPTX/vector-return.ll diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6cccf93028d..866017e49db 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1355,7 +1355,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // .param .align 16 .b8 retval0[], or // .param .b retval0 unsigned resultsz = TD->getTypeAllocSizeInBits(retTy); - if (retTy->isSingleValueType()) { + // Emit ".param .b retval0" instead of byte arrays only for + // these three types to match the logic in + // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype. + // Plus, this behavior is consistent with nvcc's. + if (retTy->isFloatingPointTy() || retTy->isIntegerTy() || + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; diff --git a/test/CodeGen/NVPTX/vector-return.ll b/test/CodeGen/NVPTX/vector-return.ll new file mode 100644 index 00000000000..15e50f8e144 --- /dev/null +++ b/test/CodeGen/NVPTX/vector-return.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s + +declare <2 x float> @bar(<2 x float> %input) + +define void @foo(<2 x float> %input, <2 x float>* %output) { +; CHECK-LABEL: @foo +entry: + %call = tail call <2 x float> @bar(<2 x float> %input) +; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: ld.param.v2.f32 {[[ELEM1:%f[0-9]+]], [[ELEM2:%f[0-9]+]]}, [retval0+0]; + store <2 x float> %call, <2 x float>* %output, align 8 +; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[ELEM1]], [[ELEM2]]} + ret void +} -- 2.34.1