Pass AVX vectors which are arguments to varargs functions on the stack. <rdar://probl...
authorEli Friedman <eli.friedman@gmail.com>
Thu, 1 Dec 2011 04:49:21 +0000 (04:49 +0000)
committerEli Friedman <eli.friedman@gmail.com>
Thu, 1 Dec 2011 04:49:21 +0000 (04:49 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145573 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86CallingConv.td
test/CodeGen/X86/avx-varargs-x86_64.ll [new file with mode: 0644]

index 77b99056ae005b1f5ed31a5df181613dcfecdcb7..aab2a0577053d6ce6bc9dee10b0250aecfe17557 100644 (file)
@@ -158,10 +158,15 @@ def CC_X86_64_C : CallingConv<[
             CCIfSubtarget<"hasXMM()",
             CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
 
-  // The first 8 256-bit vector arguments are passed in YMM registers.
-  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-            CCIfSubtarget<"hasAVX()",
-            CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+  // The first 8 256-bit vector arguments are passed in YMM registers, unless
+  // this is a vararg function.
+  // FIXME: This isn't precisely correct; the x86-64 ABI document says that
+  // fixed arguments to vararg functions are supposed to be passed in
+  // registers.  Actually modeling that would be a lot of work, though.
+  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+                          CCIfSubtarget<"hasAVX()",
+                          CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
+                                         YMM4, YMM5, YMM6, YMM7]>>>>,
 
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
   // 8-byte aligned if there are no more registers to hold them.
diff --git a/test/CodeGen/X86/avx-varargs-x86_64.ll b/test/CodeGen/X86/avx-varargs-x86_64.ll
new file mode 100644 (file)
index 0000000..b0932bd
--- /dev/null
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; <rdar://problem/10463281>
+; Check that the <8 x float> is passed on the stack.
+
+@x = common global <8 x float> zeroinitializer, align 32
+declare i32 @f(i32, ...)
+
+; CHECK: test1:
+; CHECK: vmovaps       %ymm0, (%rsp)
+define void @test1() nounwind uwtable ssp {
+entry:
+  %0 = load <8 x float>* @x, align 32
+  %call = call i32 (i32, ...)* @f(i32 1, <8 x float> %0)
+  ret void
+}