; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: A: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] ; ALL-NEXT: retq entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: B: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] ; ALL-NEXT: retq entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: C: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; ALL-NEXT: retq entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: D: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; ALL-NEXT: retq entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: E: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; ALL-NEXT: retq entry: %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: E2: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] ; ALL-NEXT: retq entry: %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle } define <32 x i8> @Ei(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp { ; AVX1-LABEL: Ei: ; AVX1: ## BB#0: ## %entry ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: Ei: ; AVX2: ## BB#0: ## %entry ; AVX2-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX2-NEXT: retq entry: ; add forces execution domain %a2 = add <32 x i8> %a, %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle } define <4 x i64> @E2i(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { ; AVX1-LABEL: E2i: ; AVX1: ## BB#0: ## %entry ; AVX1-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] ; AVX1-NEXT: retq ; ; AVX2-LABEL: E2i: ; AVX2: ## BB#0: ## %entry ; AVX2-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2 ; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1] ; AVX2-NEXT: retq entry: ; add forces execution domain %a2 = add <4 x i64> %a, %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle } define <8 x i32> @E3i(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { ; AVX1-LABEL: E3i: ; AVX1: ## BB#0: ## %entry ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: E3i: ; AVX2: ## BB#0: ## %entry ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX2-NEXT: retq entry: ; add forces execution domain %a2 = add <8 x i32> %a, %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } define <16 x i16> @E4i(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp { ; AVX1-LABEL: E4i: ; AVX1: ## BB#0: ## %entry ; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: E4i: ; AVX2: ## BB#0: ## %entry ; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq entry: ; add forces execution domain %a2 = add <16 x i16> %a, %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle } define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp { ; AVX1-LABEL: E5i: ; AVX1: ## BB#0: ## %entry ; AVX1-NEXT: vmovdqa (%rdi), %ymm0 ; AVX1-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vmovaps (%rsi), %ymm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: E5i: ; AVX2: ## BB#0: ## %entry ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 ; AVX2-NEXT: vmovdqa (%rsi), %ymm1 ; AVX2-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq entry: %c = load <16 x i16>* %a %d = load <16 x i16>* %b %c2 = add <16 x i16> %c, %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> ret <16 x i16> %shuffle } ;;;; Cases with undef indicies mixed in the mask define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: F: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[0,1,0,1] ; ALL-NEXT: retq entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle } ;;;; Cases we must not select vperm2f128 define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp { ; ALL-LABEL: G: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1,0,1] ; ALL-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7] ; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3] ; ALL-NEXT: retq entry: %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ret <8 x float> %shuffle }