[NVPTX] aligned byte-buffers for vector return types

author Jingyue Wu <jingyue@google.com>

Sat, 25 Oct 2014 03:46:16 +0000 (03:46 +0000)

committer Jingyue Wu <jingyue@google.com>

Sat, 25 Oct 2014 03:46:16 +0000 (03:46 +0000)
author Jingyue Wu <jingyue@google.com>
Sat, 25 Oct 2014 03:46:16 +0000 (03:46 +0000)
committer Jingyue Wu <jingyue@google.com>
Sat, 25 Oct 2014 03:46:16 +0000 (03:46 +0000)
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp

index 6cccf93028d3ba41ae189d92ff4bca8375cc52d3..866017e49db5c6cebdf48c96d4f26f68cfd0b897 100644 (file)
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1355,7 +1355,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
      //  .param .align 16 .b8 retval0[<size-in-bytes>], or
      //  .param .b<size-in-bits> retval0
      unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
-    if (retTy->isSingleValueType()) {
+    // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
+    // these three types to match the logic in
+    // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
+    // Plus, this behavior is consistent with nvcc's.
+    if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
+        retTy->isPointerTy()) {
        // Scalar needs to be at least 32bit wide
        if (resultsz < 32)
          resultsz = 32;
diff --git a/test/CodeGen/NVPTX/vector-return.ll b/test/CodeGen/NVPTX/vector-return.ll

new file mode 100644 (file)

index 0000000..15e50f8
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-return.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+declare <2 x float> @bar(<2 x float> %input)
+
+define void @foo(<2 x float> %input, <2 x float>* %output) {
+; CHECK-LABEL: @foo
+entry:
+  %call = tail call <2 x float> @bar(<2 x float> %input)
+; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: ld.param.v2.f32 {[[ELEM1:%f[0-9]+]], [[ELEM2:%f[0-9]+]]}, [retval0+0];
+  store <2 x float> %call, <2 x float>* %output, align 8
+; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[ELEM1]], [[ELEM2]]}
+  ret void
+}
author	Jingyue Wu <jingyue@google.com>
	Sat, 25 Oct 2014 03:46:16 +0000 (03:46 +0000)
committer	Jingyue Wu <jingyue@google.com>
	Sat, 25 Oct 2014 03:46:16 +0000 (03:46 +0000)
lib/Target/NVPTX/NVPTXISelLowering.cpp		patch \| blob \| history
test/CodeGen/NVPTX/vector-return.ll	[new file with mode: 0644]	patch \| blob