Fix the x86-64 side of PR2108 by adding a v2f64 version of
authorChris Lattner <sabre@nondot.org>
Thu, 10 Apr 2008 05:13:43 +0000 (05:13 +0000)
committerChris Lattner <sabre@nondot.org>
Thu, 10 Apr 2008 05:13:43 +0000 (05:13 +0000)
MOVZQI2PQIrr.  This would be better handled as a dag combine
(with the goal of eliminating the bitconvert) but I don't know
how to do that safely.  Thoughts welcome.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49463 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86InstrSSE.td
test/CodeGen/X86/vec_shuffle-17.ll [new file with mode: 0644]

index dbc04b01e20163ad0efe939d14ee6b416cbc9d02..442847cda8e8ce6a3e87f0651f21ea55f7d1219b 100644 (file)
@@ -2305,6 +2305,16 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                                  MOVL_shuffle_mask)))]>;
 }
 
+// Handle the v2f64 form of 'MOVZQI2PQIrr' for PR2108.  FIXME: this would be
+// better written as a dag combine xform.
+let AddedComplexity = 15 in
+def : Pat<(v2f64 (vector_shuffle immAllZerosV_bc,
+                                  (v2f64 (scalar_to_vector 
+                                       (f64 (bitconvert GR64:$src)))),
+                                  MOVL_shuffle_mask)),
+          (MOVZQI2PQIrr GR64:$src)>, Requires<[HasSSE3]>;
+          
+
 let AddedComplexity = 20 in {
 def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                        "movd\t{$src, $dst|$dst, $src}",
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
new file mode 100644 (file)
index 0000000..992d791
--- /dev/null
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {movd.*%rdi, %xmm0}
+; RUN: llvm-as < %s | llc -march=x86-64 | not grep xor
+; PR2108
+
+define <2 x i64> @doload64(i64 %x) nounwind  {
+entry:
+       %tmp717 = bitcast i64 %x to double              ; <double> [#uses=1]
+       %tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0         ; <<2 x double>> [#uses=1]
+       %tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1            ; <<2 x double>> [#uses=1]
+       %tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>                ; <<2 x i64>> [#uses=1]
+       ret <2 x i64> %tmp11
+}
+