-; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 > %t
-; RUN: grep pmul %t | count 6
-; RUN: grep mov %t | count 8
+; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 -join-physregs > %t
+; RUN: grep pmul %t | count 12
+; RUN: grep mov %t | count 11
+
+; The f() arguments in %xmm0 and %xmm1 cause an extra movdqa without -join-physregs.
define <4 x i32> @a(<4 x i32> %i) nounwind {
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >