X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Favx-vperm2x128.ll;h=0958008d9a3e1b0731f7d97835eaac34cffae75e;hp=ca0daf9aebc9ef679ecdf2f23b393dd4c24bc68b;hb=024ff64164bd3944f0ff54061cb3ff9675b9cc06;hpb=692f7382b5d50099a6112ac2d11f769d9589422a diff --git a/test/CodeGen/X86/avx-vperm2x128.ll b/test/CodeGen/X86/avx-vperm2x128.ll index ca0daf9aebc..0958008d9a3 100644 --- a/test/CodeGen/X86/avx-vperm2x128.ll +++ b/test/CodeGen/X86/avx-vperm2x128.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 @@ -147,8 +148,8 @@ define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ; AVX1-LABEL: E5i: ; AVX1: ## BB#0: ## %entry ; AVX1-NEXT: vmovdqa (%rdi), %ymm0 -; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vmovaps (%rsi), %ymm1 +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; @@ -261,3 +262,107 @@ entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } + +;; Test zero mask generation. +;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984 +;; Prefer xor+vblendpd over vperm2f128 because that has better performance. + +define <4 x double> @vperm2z_0x08(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x08: +; ALL: ## BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] +; ALL-NEXT: retq + %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x18(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x18: +; ALL: ## BB#0: +; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; ALL-NEXT: retq + %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x28(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x28: +; ALL: ## BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] +; ALL-NEXT: retq + %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x38(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x38: +; ALL: ## BB#0: +; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; ALL-NEXT: retq + %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x80(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x80: +; ALL: ## BB#0: +; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: retq + %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x81(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x81: +; ALL: ## BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero +; ALL-NEXT: retq + %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x82(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x82: +; ALL: ## BB#0: +; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 +; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] +; ALL-NEXT: retq + %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x83(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x83: +; ALL: ## BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero +; ALL-NEXT: retq + %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> + ret <4 x double> %s +} + +;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection. + +define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) { +; AVX1-LABEL: vperm2z_int_0x83: +; AVX1: ## BB#0: +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: vperm2z_int_0x83: +; AVX2: ## BB#0: +; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero +; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: retq + %s = shufflevector <4 x i64> , <4 x i64> %a, <4 x i32> + %c = add <4 x i64> %b, %s + ret <4 x i64> %c +} +