ret void
; X64: t0:
-; X64: movddup (%rsi), %xmm0
-; X64: pshuflw $0, %xmm0, %xmm0
-; X64: xorl %eax, %eax
-; X64: pinsrw $0, %eax, %xmm0
+; X64: movdqa (%rsi), %xmm0
+; X64: pslldq $2, %xmm0
; X64: movdqa %xmm0, (%rdi)
; X64: ret
}
ret <8 x i16> %tmp3
; X64: t1:
-; X64: movl (%rsi), %eax
; X64: movdqa (%rdi), %xmm0
-; X64: pinsrw $0, %eax, %xmm0
+; X64: pinsrw $0, (%rsi), %xmm0
; X64: ret
}
ret void
; X64: t10:
; X64: pextrw $4, [[X0:%xmm[0-9]+]], %eax
-; X64: unpcklpd [[X1:%xmm[0-9]+]]
+; X64: movlhps [[X1:%xmm[0-9]+]]
; X64: pshuflw $8, [[X1]], [[X2:%xmm[0-9]+]]
; X64: pinsrw $2, %eax, [[X2]]
; X64: pextrw $6, [[X0]], %eax
%tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 2, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
ret <16 x i8> %tmp9
; X64: t16:
-; X64: pinsrw $0, %eax, [[X1:%xmm[0-9]+]]
-; X64: pextrw $8, [[X0:%xmm[0-9]+]], %eax
-; X64: pinsrw $1, %eax, [[X1]]
-; X64: pextrw $1, [[X1]], %ecx
-; X64: movd [[X1]], %edx
-; X64: pinsrw $0, %edx, %xmm
-; X64: pinsrw $1, %eax, %xmm
+; X64: movdqa %xmm1, %xmm0
+; X64: pslldq $2, %xmm0
+; X64: pextrw $1, %xmm0, %eax
+; X64: movd %xmm0, %ecx
+; X64: pinsrw $0, %ecx, %xmm0
+; X64: pextrw $8, %xmm1, %ecx
; X64: ret
}