X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=test%2FCodeGen%2FX86%2Favx-vperm2x128.ll;h=74d20f348b5298c9c79cc33ab07ec4062db33b95;hb=13ce16385579925c3d806e3bb029e8c58d9c7a94;hp=43303ca57c4fc70afb5b06bb8786ca2adfc8abc7;hpb=bd357588a106dc7c828c57ad8048e82003d638de;p=oota-llvm.git diff --git a/test/CodeGen/X86/avx-vperm2x128.ll b/test/CodeGen/X86/avx-vperm2x128.ll index 43303ca57c4..74d20f348b5 100644 --- a/test/CodeGen/X86/avx-vperm2x128.ll +++ b/test/CodeGen/X86/avx-vperm2x128.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: A: @@ -147,8 +147,8 @@ define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ; AVX1-LABEL: E5i: ; AVX1: ## BB#0: ## %entry ; AVX1-NEXT: vmovdqa (%rdi), %ymm0 -; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vmovaps (%rsi), %ymm1 +; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; @@ -160,8 +160,8 @@ define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq entry: - %c = load <16 x i16>* %a - %d = load <16 x i16>* %b + %c = load <16 x i16>, <16 x i16>* %a + %d = load <16 x i16>, <16 x i16>* %b %c2 = add <16 x i16> %c, %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> ret <16 x i16> %shuffle @@ -172,13 +172,83 @@ entry: define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: F: ; ALL: ## BB#0: ## %entry -; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[0,1,0,1] +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; ALL-NEXT: retq entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } +define <8 x float> @F2(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { +; ALL-LABEL: F2: +; ALL: ## BB#0: ## %entry +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; ALL-NEXT: retq +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + +define <8 x float> @F3(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { +; ALL-LABEL: F3: +; ALL: ## BB#0: ## %entry +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; ALL-NEXT: retq +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + +define <8 x float> @F4(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { +; ALL-LABEL: F4: +; ALL: ## BB#0: ## %entry +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; ALL-NEXT: retq +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + +define <8 x float> @F5(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { +; ALL-LABEL: F5: +; ALL: ## BB#0: ## %entry +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; ALL-NEXT: retq +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + +define <8 x float> @F6(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { +; ALL-LABEL: F6: +; ALL: ## BB#0: ## %entry +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; ALL-NEXT: retq +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + +define <8 x float> @F7(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { +; ALL-LABEL: F7: +; ALL: ## BB#0: ## %entry +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] +; ALL-NEXT: retq +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + +define <8 x float> @F8(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { +; ALL-LABEL: F8: +; ALL: ## BB#0: ## %entry +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; ALL-NEXT: retq +entry: + %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %shuffle +} + ;;;; Cases we must not select vperm2f128 define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { @@ -191,3 +261,94 @@ entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } + +;; Test zero mask generation. +;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984 +;; Prefer xor+vblendpd over vperm2f128 because that has better performance. + +define <4 x double> @vperm2z_0x08(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x08: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: retq + %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x18(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x18: +; ALL: # BB#0: +; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 +; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0 +; ALL-NEXT: retq + %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x28(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x28: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: retq + %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x38(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x38: +; ALL: # BB#0: +; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 +; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0 +; ALL-NEXT: retq + %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x80(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x80: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: retq + %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x81(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x81: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: retq + %s = shufflevector <4 x double> %a, <4 x double> , <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x82(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x82: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: retq + %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> + ret <4 x double> %s +} + +define <4 x double> @vperm2z_0x83(<4 x double> %a) { +; ALL-LABEL: vperm2z_0x83: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0 +; ALL-NEXT: retq + %s = shufflevector <4 x double> , <4 x double> %a, <4 x i32> + ret <4 x double> %s +} + +;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection. + +define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) { +; ALL-LABEL: vperm2z_int_0x83: +; ALL: # BB#0: +; AVX1: vperm2f128 $129, %ymm0, %ymm0, %ymm0 +; AVX2: vperm2i128 $129, %ymm0, %ymm0, %ymm0 + %s = shufflevector <4 x i64> , <4 x i64> %a, <4 x i32> + %c = add <4 x i64> %b, %s + ret <4 x i64> %c +} +