; AVX1-LABEL: var_shift_v8i32:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpextrd $1, %xmm2, %eax
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrd $1, %xmm3, %ecx
-; AVX1-NEXT: sarl %cl, %eax
-; AVX1-NEXT: vmovd %xmm2, %edx
-; AVX1-NEXT: vmovd %xmm3, %ecx
-; AVX1-NEXT: sarl %cl, %edx
-; AVX1-NEXT: vmovd %edx, %xmm4
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm4, %xmm4
-; AVX1-NEXT: vpextrd $2, %xmm2, %eax
-; AVX1-NEXT: vpextrd $2, %xmm3, %ecx
-; AVX1-NEXT: sarl %cl, %eax
-; AVX1-NEXT: vpinsrd $2, %eax, %xmm4, %xmm4
-; AVX1-NEXT: vpextrd $3, %xmm2, %eax
-; AVX1-NEXT: vpextrd $3, %xmm3, %ecx
-; AVX1-NEXT: sarl %cl, %eax
-; AVX1-NEXT: vpinsrd $3, %eax, %xmm4, %xmm2
-; AVX1-NEXT: vpextrd $1, %xmm0, %eax
-; AVX1-NEXT: vpextrd $1, %xmm1, %ecx
-; AVX1-NEXT: sarl %cl, %eax
-; AVX1-NEXT: vmovd %xmm0, %edx
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: sarl %cl, %edx
-; AVX1-NEXT: vmovd %edx, %xmm3
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
-; AVX1-NEXT: vpextrd $2, %xmm0, %eax
-; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
-; AVX1-NEXT: sarl %cl, %eax
-; AVX1-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
-; AVX1-NEXT: vpextrd $3, %xmm0, %eax
-; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
-; AVX1-NEXT: sarl %cl, %eax
-; AVX1-NEXT: vpinsrd $3, %eax, %xmm3, %xmm0
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrad %xmm4, %xmm2, %xmm4
+; AVX1-NEXT: vpsrlq $32, %xmm3, %xmm5
+; AVX1-NEXT: vpsrad %xmm5, %xmm2, %xmm5
+; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm6 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
+; AVX1-NEXT: vpsrad %xmm6, %xmm2, %xmm6
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
+; AVX1-NEXT: vpsrad %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
+; AVX1-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT: vpsrad %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm4
+; AVX1-NEXT: vpsrad %xmm4, %xmm0, %xmm4
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; AVX1-NEXT: vpsrad %xmm4, %xmm0, %xmm4
+; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; AVX1-LABEL: constant_shift_v8i32:
; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrd $1, %xmm1, %eax
-; AVX1-NEXT: sarl $9, %eax
-; AVX1-NEXT: vmovd %xmm1, %ecx
-; AVX1-NEXT: sarl $8, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $2, %xmm1, %eax
-; AVX1-NEXT: sarl $8, %eax
-; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $3, %xmm1, %eax
-; AVX1-NEXT: sarl $7, %eax
-; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1
-; AVX1-NEXT: vpextrd $1, %xmm0, %eax
-; AVX1-NEXT: sarl $5, %eax
-; AVX1-NEXT: vmovd %xmm0, %ecx
-; AVX1-NEXT: sarl $4, %ecx
-; AVX1-NEXT: vmovd %ecx, %xmm2
-; AVX1-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $2, %xmm0, %eax
-; AVX1-NEXT: sarl $6, %eax
-; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT: vpextrd $3, %xmm0, %eax
-; AVX1-NEXT: sarl $7, %eax
-; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpsrad $7, %xmm0, %xmm1
+; AVX1-NEXT: vpsrad $5, %xmm0, %xmm2
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT: vpsrad $6, %xmm0, %xmm2
+; AVX1-NEXT: vpsrad $4, %xmm0, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpsrad $7, %xmm0, %xmm2
+; AVX1-NEXT: vpsrad $9, %xmm0, %xmm3
+; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT: vpsrad $8, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: constant_shift_v8i32: