Add 256-bit variant vmovss and vmovsd patterns. rdar://10538417

author Evan Cheng <evan.cheng@apple.com>

Thu, 8 Dec 2011 22:30:45 +0000 (22:30 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Thu, 8 Dec 2011 22:30:45 +0000 (22:30 +0000)
author Evan Cheng <evan.cheng@apple.com>
Thu, 8 Dec 2011 22:30:45 +0000 (22:30 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Thu, 8 Dec 2011 22:30:45 +0000 (22:30 +0000)
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 7b19d22f8c419c6c4d074b402f2d2a10eea6c374..e806b8f7235aebee08958d963533d36a54bd41f4 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -645,6 +645,16 @@ let Predicates = [HasAVX] in {
              (VMOVSSrr (v4f32 VR128:$src1),
                        (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
  
+  // 256-bit variants
+  def : Pat<(v8i32 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
+                          (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+  def : Pat<(v8f32 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
+                          (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+
    // Shuffle with VMOVSD
    def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
              (VMOVSDrr VR128:$src1, FR64:$src2)>;
@@ -661,6 +671,17 @@ let Predicates = [HasAVX] in {
              (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
                                                     sub_sd))>;
  
+  // 256-bit variants
+  def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
+                          (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+  def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
+                          (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+
+
    // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
    // is during lowering, where it's not possible to recognize the fold cause
    // it has two uses through a bitcast. One use disappears at isel time and the
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll

index 0db334dd9940ef88f258a19961cdec1a6efe2b43..e66a3d48d3828f9bd77827c2fd475d79cf016009 100644 (file)
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -8,3 +8,13 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
  ; CHECK: vshufps
  ; CHECK: vpshufd
  }
+
+; rdar://10538417
+define <3 x i64> @test2(<3 x i64> %v) nounwind readnone {
+; CHECK: test2:
+; CHECK: vxorps
+; CHECK: vmovsd
+  %1 = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
+  ret <3 x i64> %2
+}
author	Evan Cheng <evan.cheng@apple.com>
	Thu, 8 Dec 2011 22:30:45 +0000 (22:30 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Thu, 8 Dec 2011 22:30:45 +0000 (22:30 +0000)
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/avx-shuffle.ll		patch \| blob \| history