Certain patterns involving the "movss" instruction were marked as requiring SSE2...
authorAnders Carlsson <andersca@mac.com>
Tue, 7 Oct 2008 16:14:11 +0000 (16:14 +0000)
committerAnders Carlsson <andersca@mac.com>
Tue, 7 Oct 2008 16:14:11 +0000 (16:14 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57246 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86InstrSSE.td
test/CodeGen/X86/2008-10-07-SSEISelBug.ll [new file with mode: 0644]

index 93f722332bdd9f0d60b3e805aedf08d2b0f1f203..35348b69f93040eb36b762ad74b1e0f10452bcdc 100644 (file)
@@ -2891,11 +2891,11 @@ let AddedComplexity = 15 in {
 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
           (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
-          (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE2]>;
+          (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
 }
 
 // Splat v2f64 / v2i64
diff --git a/test/CodeGen/X86/2008-10-07-SSEISelBug.ll b/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
new file mode 100644 (file)
index 0000000..4808986
--- /dev/null
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse,-sse2
+
+define <4 x float> @f(float %w) nounwind {
+entry:
+       %retval = alloca <4 x float>            ; <<4 x float>*> [#uses=2]
+       %w.addr = alloca float          ; <float*> [#uses=2]
+       %.compoundliteral = alloca <4 x float>          ; <<4 x float>*> [#uses=2]
+       store float %w, float* %w.addr
+       %tmp = load float* %w.addr              ; <float> [#uses=1]
+       %0 = insertelement <4 x float> undef, float %tmp, i32 0         ; <<4 x float>> [#uses=1]
+       %1 = insertelement <4 x float> %0, float 0.000000e+00, i32 1            ; <<4 x float>> [#uses=1]
+       %2 = insertelement <4 x float> %1, float 0.000000e+00, i32 2            ; <<4 x float>> [#uses=1]
+       %3 = insertelement <4 x float> %2, float 0.000000e+00, i32 3            ; <<4 x float>> [#uses=1]
+       store <4 x float> %3, <4 x float>* %.compoundliteral
+       %tmp1 = load <4 x float>* %.compoundliteral             ; <<4 x float>> [#uses=1]
+       store <4 x float> %tmp1, <4 x float>* %retval
+       br label %return
+
+return:                ; preds = %entry
+       %4 = load <4 x float>* %retval          ; <<4 x float>> [#uses=1]
+       ret <4 x float> %4
+}