Fix PR10844: Add patterns to cover non foldable versions of X86vzmovl.

author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Thu, 8 Sep 2011 18:05:02 +0000 (18:05 +0000)

committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>

Thu, 8 Sep 2011 18:05:02 +0000 (18:05 +0000)
author Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Thu, 8 Sep 2011 18:05:02 +0000 (18:05 +0000)
committer Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Thu, 8 Sep 2011 18:05:02 +0000 (18:05 +0000)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index c210a987dc54154135843924a8ccf389be867407..9148c76ce0c38e49f58ab904be8df0f367de7140 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -536,15 +536,15 @@ let Predicates = [HasAVX] in {
    // Move scalar to XMM zero-extended, zeroing a VR128 then do a
    // MOVS{S,D} to the lower bits.
    def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
-            (VMOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
+            (VMOVSSrr (v4f32 (AVX_SET0PS)), FR32:$src)>;
    def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-            (VMOVSSrr (v4f32 (V_SET0PS)),
+            (VMOVSSrr (v4f32 (AVX_SET0PS)),
                        (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
    def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-            (VMOVSSrr (v4i32 (V_SET0PI)),
+            (VMOVSSrr (v4i32 (AVX_SET0PI)),
                        (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
    def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
-            (VMOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
+            (VMOVSDrr (v2f64 (AVX_SET0PS)), FR64:$src)>;
    }
  
    let AddedComplexity = 20 in {
@@ -579,6 +579,16 @@ let Predicates = [HasAVX] in {
                     (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
              (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
    }
+  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
+                   (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
+            (SUBREG_TO_REG (i32 0),
+                           (v4f32 (VMOVSSrr (v4f32 (AVX_SET0PS)), FR32:$src)),
+                           sub_xmm)>;
+  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
+                   (v2f64 (scalar_to_vector FR64:$src)), (i32 0)))),
+            (SUBREG_TO_REG (i64 0),
+                           (v2f64 (VMOVSDrr (v2f64 (AVX_SET0PS)), FR64:$src)),
+                           sub_xmm)>;
  
    // Extract and store.
    def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll

index f70291b96e163ec155181bd8e7d6fd0519e0bf5e..1fda9bc22923407804c5c1d3ff2a971a26a332e3 100644 (file)
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -1,4 +1,5 @@
  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc -O0 < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s -check-prefix=CHECK_O0
  
  ; CHECK: vmovaps
  ; CHECK: vmovaps
@@ -78,3 +79,27 @@ entry:
    ret void
  }
  
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+
+; CHECK_O0: _f_f
+; CHECK-O0: vmovss LCPI
+; CHECK-O0: vxorps  %xmm
+; CHECK-O0: vmovss %xmm
+define void @f_f() nounwind {
+allocas:
+  br i1 undef, label %cif_mask_all, label %cif_mask_mixed
+
+cif_mask_all:                                     ; preds = %allocas
+  unreachable
+
+cif_mask_mixed:                                   ; preds = %allocas
+  br i1 undef, label %cif_mixed_test_all, label %cif_mixed_test_any_check
+
+cif_mixed_test_all:                               ; preds = %cif_mask_mixed
+  call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x float> <float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <8 x float> undef) nounwind
+  unreachable
+
+cif_mixed_test_any_check:                         ; preds = %cif_mask_mixed
+  unreachable
+}
+
author	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Thu, 8 Sep 2011 18:05:02 +0000 (18:05 +0000)
committer	Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
	Thu, 8 Sep 2011 18:05:02 +0000 (18:05 +0000)
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/avx-load-store.ll		patch \| blob \| history