; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: shll $12, %ecx
; CHECK-NEXT: movd %ecx, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
-; CHECK-NEXT: movlpd %xmm0, (%eax)
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; CHECK-NEXT: movq %xmm0, (%eax)
; CHECK-NEXT: retl
%tmp12 = shl i32 %a, 12
%tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movaps (%eax), %xmm1
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
; CHECK-NEXT: retl
- %tmp1 = load <4 x float>* %P
+ %tmp1 = load <4 x float>, <4 x float>* %P
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
ret <4 x float> %tmp2
}
; CHECK-LABEL: t3:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movaps (%eax), %xmm0
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,0]
+; CHECK-NEXT: movapd (%eax), %xmm0
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; CHECK-NEXT: retl
- %tmp1 = load <4 x float>* %P
+ %tmp1 = load <4 x float>, <4 x float>* %P
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
ret <4 x float> %tmp2
}
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movaps (%eax), %xmm0
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
-; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
; CHECK-NEXT: retl
- %tmp1 = load <4 x float>* %P
+ %tmp1 = load <4 x float>, <4 x float>* %P
%tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
ret <4 x float> %tmp2
}