From: Chad Rosier <mcrosier@apple.com>
Date: Tue, 20 Mar 2012 17:08:51 +0000 (+0000)
Subject: [avx] Adjust the VINSERTF128rm pattern to allow for unaligned loads.
X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=33e528d44d8c9c9ad2ae49816a7ddb234446c08e;p=oota-llvm.git

[avx] Adjust the VINSERTF128rm pattern to allow for unaligned loads.

This results in things such as

	vmovups	16(%rdi), %xmm0
	vinsertf128	$1, %xmm0, %ymm0, %ymm0

to be combined to

    vinsertf128	$1, 16(%rdi), %ymm0, %ymm0

rdar://11076953


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153092 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 882d647327b..38744c3eeb3 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -7759,15 +7759,15 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
           (VINSERTF128rr VR256:$src1, VR128:$src2,
                          (INSERT_get_vinsertf128_imm VR256:$ins))>;
 
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (alignedloadv4f32 addr:$src2),
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
                                    (i32 imm)),
           (VINSERTF128rm VR256:$src1, addr:$src2,
                          (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (alignedloadv2f64 addr:$src2),
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
                                    (i32 imm)),
           (VINSERTF128rm VR256:$src1, addr:$src2,
                          (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (alignedloadv2i64 addr:$src2),
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
                                    (i32 imm)),
           (VINSERTF128rm VR256:$src1, addr:$src2,
                          (INSERT_get_vinsertf128_imm VR256:$ins))>;
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll
index 9934a330413..3e86a2ae5ea 100644
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -116,3 +116,16 @@ entry:
   %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
   ret <8 x float> %2
 }
+
+rdar://11076953
+; CHECK: vinsertf128_ucombine
+define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovups
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 8
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}