X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Favx-vinsertf128.ll;h=38389de7a8a10f5321814c970a05c669bcfa67e9;hb=e4ee51a0058c4d5aee56af2b4301d15678d4cc3e;hp=8813a7f6ab8215b7dd4852e53ebf9e87759c20bf;hpb=97136c922ec4b492584cb91c1fc1cdcb40983ecf;p=oota-llvm.git diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll index 8813a7f6ab8..38389de7a8a 100644 --- a/test/CodeGen/X86/avx-vinsertf128.ll +++ b/test/CodeGen/X86/avx-vinsertf128.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck -check-prefix=CHECK-SSE %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s +; CHECK-LABEL: A: ; CHECK-NOT: vunpck ; CHECK: vinsertf128 $1 define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp { @@ -9,6 +9,7 @@ entry: ret <8 x float> %shuffle } +; CHECK-LABEL: B: ; CHECK-NOT: vunpck ; CHECK: vinsertf128 $1 define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp { @@ -22,7 +23,7 @@ declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone ; Just check that no crash happens -; CHECK-SSE: _insert_crash +; CHECK-LABEL: _insert_crash: define void @insert_crash() nounwind { allocas: %v1.i.i451 = shufflevector <4 x double> zeroinitializer, <4 x double> undef, <4 x i32> @@ -37,10 +38,74 @@ allocas: ret void } -; CHECK: _C +;; DAG Combine must remove useless vinsertf128 instructions + +; CHECK-LABEL: DAGCombineA: ; CHECK-NOT: vinsertf128 $1 -define <4 x i32> @C(<4 x i32> %v1) nounwind readonly { +define <4 x i32> @DAGCombineA(<4 x i32> %v1) nounwind readonly { %1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <8 x i32> %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> ret <4 x i32> %2 } + +; CHECK-LABEL: DAGCombineB: +; CHECK: vpaddd %xmm +; CHECK-NOT: vinsertf128 $1 +; CHECK: vpaddd %xmm +define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly { + %1 = add <8 x i32> %v1, %v2 + %2 = add <8 x i32> %1, %v1 + ret <8 x i32> %2 +} + +; CHECK-LABEL: insert_undef_pd: +define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) { +; CHECK: vmovaps %ymm1, %ymm0 +%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0) +ret <4 x double> %res +} +declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone + + +; CHECK-LABEL: insert_undef_ps: +define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) { +; CHECK: vmovaps %ymm1, %ymm0 +%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0) +ret <8 x float> %res +} +declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone + + +; CHECK-LABEL: insert_undef_si: +define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) { +; CHECK: vmovaps %ymm1, %ymm0 +%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0) +ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone + +; rdar://10643481 +; CHECK-LABEL: vinsertf128_combine: +define <8 x float> @vinsertf128_combine(float* nocapture %f) nounwind uwtable readonly ssp { +; CHECK-NOT: vmovaps +; CHECK: vinsertf128 +entry: + %add.ptr = getelementptr inbounds float, float* %f, i64 4 + %0 = bitcast float* %add.ptr to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 16 + %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1) + ret <8 x float> %2 +} + +; rdar://11076953 +; CHECK-LABEL: vinsertf128_ucombine: +define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp { +; CHECK-NOT: vmovups +; CHECK: vinsertf128 +entry: + %add.ptr = getelementptr inbounds float, float* %f, i64 4 + %0 = bitcast float* %add.ptr to <4 x float>* + %1 = load <4 x float>, <4 x float>* %0, align 8 + %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1) + ret <8 x float> %2 +}