define <4 x double> @vperm2z_0x80(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x80:
; ALL: ## BB#0:
-; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
+; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x double> %s
define <4 x double> @vperm2z_0x82(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x82:
; ALL: ## BB#0:
-; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
+; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x double> %s
;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection.
define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
-; ALL-LABEL: vperm2z_int_0x83:
-; ALL: ## BB#0:
-; AVX1: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
-; AVX2: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX1-LABEL: vperm2z_int_0x83:
+; AVX1: ## BB#0:
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: vperm2z_int_0x83:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX2-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: retq
%s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
%c = add <4 x i64> %b, %s
ret <4 x i64> %c