+; CHECK-LABEL: foo3_8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
+; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retl
+;
+; CHECK-WIDE-LABEL: foo3_8:
+; CHECK-WIDE: ## BB#0:
+; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[3,0,0,0]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT: shll $8, %eax
+; CHECK-WIDE-NEXT: vmovhlps {{.*#+}} xmm1 = xmm0[1,1]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %ecx
+; CHECK-WIDE-NEXT: movzbl %cl, %ecx
+; CHECK-WIDE-NEXT: orl %eax, %ecx
+; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,0,0,0]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT: shll $8, %eax
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %edx
+; CHECK-WIDE-NEXT: movzbl %dl, %edx
+; CHECK-WIDE-NEXT: orl %eax, %edx
+; CHECK-WIDE-NEXT: vpinsrw $0, %edx, %xmm0, %xmm1
+; CHECK-WIDE-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,0,0,0]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: shll $8, %eax
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
+; CHECK-WIDE-NEXT: movzbl %cl, %ecx
+; CHECK-WIDE-NEXT: orl %eax, %ecx
+; CHECK-WIDE-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
+; CHECK-WIDE-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[3,0,0,0]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT: shll $8, %eax
+; CHECK-WIDE-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
+; CHECK-WIDE-NEXT: movzbl %cl, %ecx
+; CHECK-WIDE-NEXT: orl %eax, %ecx
+; CHECK-WIDE-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vzeroupper
+; CHECK-WIDE-NEXT: retl