1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
8 define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
9 ; SSE2-LABEL: trunc8i64_8i32:
10 ; SSE2: # BB#0: # %entry
11 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
12 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
13 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
14 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
15 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
16 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
19 ; SSSE3-LABEL: trunc8i64_8i32:
20 ; SSSE3: # BB#0: # %entry
21 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
22 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
23 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
24 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
25 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
26 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
29 ; SSE41-LABEL: trunc8i64_8i32:
30 ; SSE41: # BB#0: # %entry
31 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
32 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
33 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
34 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
35 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
36 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
39 ; AVX1-LABEL: trunc8i64_8i32:
40 ; AVX1: # BB#0: # %entry
41 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
42 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
43 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
44 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
45 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
46 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
47 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
48 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
49 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
52 ; AVX2-LABEL: trunc8i64_8i32:
53 ; AVX2: # BB#0: # %entry
54 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
55 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
56 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
57 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
60 %0 = trunc <8 x i64> %a to <8 x i32>
64 define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
65 ; SSE2-LABEL: trunc8i64_8i16:
66 ; SSE2: # BB#0: # %entry
67 ; SSE2-NEXT: pextrw $4, %xmm1, %eax
68 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
69 ; SSE2-NEXT: pextrw $4, %xmm0, %ecx
70 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
71 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
72 ; SSE2-NEXT: pextrw $4, %xmm3, %edx
73 ; SSE2-NEXT: movd %edx, %xmm1
74 ; SSE2-NEXT: movd %eax, %xmm3
75 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
76 ; SSE2-NEXT: pextrw $4, %xmm2, %eax
77 ; SSE2-NEXT: movd %eax, %xmm1
78 ; SSE2-NEXT: movd %ecx, %xmm2
79 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
80 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
81 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
84 ; SSSE3-LABEL: trunc8i64_8i16:
85 ; SSSE3: # BB#0: # %entry
86 ; SSSE3-NEXT: pextrw $4, %xmm1, %eax
87 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
88 ; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
89 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
90 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
91 ; SSSE3-NEXT: pextrw $4, %xmm3, %edx
92 ; SSSE3-NEXT: movd %edx, %xmm1
93 ; SSSE3-NEXT: movd %eax, %xmm3
94 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
95 ; SSSE3-NEXT: pextrw $4, %xmm2, %eax
96 ; SSSE3-NEXT: movd %eax, %xmm1
97 ; SSSE3-NEXT: movd %ecx, %xmm2
98 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
99 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
100 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
103 ; SSE41-LABEL: trunc8i64_8i16:
104 ; SSE41: # BB#0: # %entry
105 ; SSE41-NEXT: pextrw $4, %xmm0, %eax
106 ; SSE41-NEXT: pinsrw $1, %eax, %xmm0
107 ; SSE41-NEXT: movd %xmm1, %eax
108 ; SSE41-NEXT: pinsrw $2, %eax, %xmm0
109 ; SSE41-NEXT: pextrw $4, %xmm1, %eax
110 ; SSE41-NEXT: pinsrw $3, %eax, %xmm0
111 ; SSE41-NEXT: movd %xmm2, %eax
112 ; SSE41-NEXT: pinsrw $4, %eax, %xmm0
113 ; SSE41-NEXT: pextrw $4, %xmm2, %eax
114 ; SSE41-NEXT: pinsrw $5, %eax, %xmm0
115 ; SSE41-NEXT: movd %xmm3, %eax
116 ; SSE41-NEXT: pinsrw $6, %eax, %xmm0
117 ; SSE41-NEXT: pextrw $4, %xmm3, %eax
118 ; SSE41-NEXT: pinsrw $7, %eax, %xmm0
121 ; AVX1-LABEL: trunc8i64_8i16:
122 ; AVX1: # BB#0: # %entry
123 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
124 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
125 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
126 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
127 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
128 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
129 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
130 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
131 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
132 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
133 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
134 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
135 ; AVX1-NEXT: vzeroupper
138 ; AVX2-LABEL: trunc8i64_8i16:
139 ; AVX2: # BB#0: # %entry
140 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
141 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
142 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
143 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
144 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
145 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
146 ; AVX2-NEXT: vzeroupper
149 %0 = trunc <8 x i64> %a to <8 x i16>
153 define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
154 ; SSE2-LABEL: trunc8i32_8i16:
155 ; SSE2: # BB#0: # %entry
156 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
157 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
158 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
159 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
160 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
161 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
162 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
165 ; SSSE3-LABEL: trunc8i32_8i16:
166 ; SSSE3: # BB#0: # %entry
167 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
168 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
169 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
170 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
173 ; SSE41-LABEL: trunc8i32_8i16:
174 ; SSE41: # BB#0: # %entry
175 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
176 ; SSE41-NEXT: pshufb %xmm2, %xmm1
177 ; SSE41-NEXT: pshufb %xmm2, %xmm0
178 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
181 ; AVX1-LABEL: trunc8i32_8i16:
182 ; AVX1: # BB#0: # %entry
183 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
184 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
185 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
186 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
187 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
188 ; AVX1-NEXT: vzeroupper
191 ; AVX2-LABEL: trunc8i32_8i16:
192 ; AVX2: # BB#0: # %entry
193 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
194 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
195 ; AVX2-NEXT: vzeroupper
198 %0 = trunc <8 x i32> %a to <8 x i16>
202 define void @trunc8i32_8i8(<8 x i32> %a) {
203 ; SSE2-LABEL: trunc8i32_8i8:
204 ; SSE2: # BB#0: # %entry
205 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255]
206 ; SSE2-NEXT: pand %xmm2, %xmm1
207 ; SSE2-NEXT: pand %xmm2, %xmm0
208 ; SSE2-NEXT: packuswb %xmm1, %xmm0
209 ; SSE2-NEXT: packuswb %xmm0, %xmm0
210 ; SSE2-NEXT: movq %xmm0, (%rax)
213 ; SSSE3-LABEL: trunc8i32_8i8:
214 ; SSSE3: # BB#0: # %entry
215 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
216 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
217 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
218 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
219 ; SSSE3-NEXT: movq %xmm0, (%rax)
222 ; SSE41-LABEL: trunc8i32_8i8:
223 ; SSE41: # BB#0: # %entry
224 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
225 ; SSE41-NEXT: pshufb %xmm2, %xmm1
226 ; SSE41-NEXT: pshufb %xmm2, %xmm0
227 ; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
228 ; SSE41-NEXT: movq %xmm0, (%rax)
231 ; AVX1-LABEL: trunc8i32_8i8:
232 ; AVX1: # BB#0: # %entry
233 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
234 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
235 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
236 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
237 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
238 ; AVX1-NEXT: vmovq %xmm0, (%rax)
239 ; AVX1-NEXT: vzeroupper
242 ; AVX2-LABEL: trunc8i32_8i8:
243 ; AVX2: # BB#0: # %entry
244 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
245 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
246 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
247 ; AVX2-NEXT: vmovq %xmm0, (%rax)
248 ; AVX2-NEXT: vzeroupper
251 %0 = trunc <8 x i32> %a to <8 x i8>
252 store <8 x i8> %0, <8 x i8>* undef, align 4
256 define void @trunc16i32_16i8(<16 x i32> %a) {
257 ; SSE2-LABEL: trunc16i32_16i8:
258 ; SSE2: # BB#0: # %entry
259 ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp)
260 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
261 ; SSE2-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
262 ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
263 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
264 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
265 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
266 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
267 ; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
268 ; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
269 ; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
270 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
271 ; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
272 ; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
273 ; SSE2-NEXT: movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
274 ; SSE2-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
275 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3],xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7]
276 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3],xmm7[4],xmm4[4],xmm7[5],xmm4[5],xmm7[6],xmm4[6],xmm7[7],xmm4[7]
277 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3],xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
278 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
279 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
280 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
281 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
282 ; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
283 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
284 ; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
285 ; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
286 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
287 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
288 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
289 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
290 ; SSE2-NEXT: movdqu %xmm0, (%rax)
293 ; SSSE3-LABEL: trunc16i32_16i8:
294 ; SSSE3: # BB#0: # %entry
295 ; SSSE3-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp)
296 ; SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
297 ; SSSE3-NEXT: movdqa %xmm2, -{{[0-9]+}}(%rsp)
298 ; SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
299 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
300 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
301 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
302 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
303 ; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
304 ; SSSE3-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
305 ; SSSE3-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero
306 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
307 ; SSSE3-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
308 ; SSSE3-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero
309 ; SSSE3-NEXT: movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
310 ; SSSE3-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
311 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3],xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7]
312 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3],xmm7[4],xmm4[4],xmm7[5],xmm4[5],xmm7[6],xmm4[6],xmm7[7],xmm4[7]
313 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3],xmm0[4],xmm7[4],xmm0[5],xmm7[5],xmm0[6],xmm7[6],xmm0[7],xmm7[7]
314 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
315 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
316 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
317 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
318 ; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
319 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
320 ; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
321 ; SSSE3-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
322 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
323 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
324 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3],xmm5[4],xmm3[4],xmm5[5],xmm3[5],xmm5[6],xmm3[6],xmm5[7],xmm3[7]
325 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
326 ; SSSE3-NEXT: movdqu %xmm0, (%rax)
329 ; SSE41-LABEL: trunc16i32_16i8:
330 ; SSE41: # BB#0: # %entry
331 ; SSE41-NEXT: pextrb $4, %xmm0, %eax
332 ; SSE41-NEXT: pextrb $8, %xmm0, %ecx
333 ; SSE41-NEXT: pextrb $12, %xmm0, %edx
334 ; SSE41-NEXT: pinsrb $1, %eax, %xmm0
335 ; SSE41-NEXT: pinsrb $2, %ecx, %xmm0
336 ; SSE41-NEXT: pinsrb $3, %edx, %xmm0
337 ; SSE41-NEXT: pextrb $0, %xmm1, %eax
338 ; SSE41-NEXT: pinsrb $4, %eax, %xmm0
339 ; SSE41-NEXT: pextrb $4, %xmm1, %eax
340 ; SSE41-NEXT: pinsrb $5, %eax, %xmm0
341 ; SSE41-NEXT: pextrb $8, %xmm1, %eax
342 ; SSE41-NEXT: pinsrb $6, %eax, %xmm0
343 ; SSE41-NEXT: pextrb $12, %xmm1, %eax
344 ; SSE41-NEXT: pinsrb $7, %eax, %xmm0
345 ; SSE41-NEXT: pextrb $0, %xmm2, %eax
346 ; SSE41-NEXT: pinsrb $8, %eax, %xmm0
347 ; SSE41-NEXT: pextrb $4, %xmm2, %eax
348 ; SSE41-NEXT: pinsrb $9, %eax, %xmm0
349 ; SSE41-NEXT: pextrb $8, %xmm2, %eax
350 ; SSE41-NEXT: pinsrb $10, %eax, %xmm0
351 ; SSE41-NEXT: pextrb $12, %xmm2, %eax
352 ; SSE41-NEXT: pinsrb $11, %eax, %xmm0
353 ; SSE41-NEXT: pextrb $0, %xmm3, %eax
354 ; SSE41-NEXT: pinsrb $12, %eax, %xmm0
355 ; SSE41-NEXT: pextrb $4, %xmm3, %eax
356 ; SSE41-NEXT: pinsrb $13, %eax, %xmm0
357 ; SSE41-NEXT: pextrb $8, %xmm3, %eax
358 ; SSE41-NEXT: pinsrb $14, %eax, %xmm0
359 ; SSE41-NEXT: pextrb $12, %xmm3, %eax
360 ; SSE41-NEXT: pinsrb $15, %eax, %xmm0
361 ; SSE41-NEXT: movdqu %xmm0, (%rax)
364 ; AVX1-LABEL: trunc16i32_16i8:
365 ; AVX1: # BB#0: # %entry
366 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
367 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
368 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
369 ; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
370 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
371 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
372 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
373 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
374 ; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
375 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
376 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
377 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
378 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
379 ; AVX1-NEXT: vmovdqu %xmm0, (%rax)
380 ; AVX1-NEXT: vzeroupper
383 ; AVX2-LABEL: trunc16i32_16i8:
384 ; AVX2: # BB#0: # %entry
385 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
386 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
387 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
388 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
389 ; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1
390 ; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
391 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
392 ; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0
393 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
394 ; AVX2-NEXT: vmovdqu %xmm0, (%rax)
395 ; AVX2-NEXT: vzeroupper
398 %0 = trunc <16 x i32> %a to <16 x i8>
399 store <16 x i8> %0, <16 x i8>* undef, align 4
403 define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
404 ; SSE2-LABEL: trunc2x4i64_8i32:
405 ; SSE2: # BB#0: # %entry
406 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
407 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
408 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
409 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
410 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
411 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
414 ; SSSE3-LABEL: trunc2x4i64_8i32:
415 ; SSSE3: # BB#0: # %entry
416 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
417 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
418 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
419 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
420 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
421 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
424 ; SSE41-LABEL: trunc2x4i64_8i32:
425 ; SSE41: # BB#0: # %entry
426 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
427 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
428 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
429 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
430 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
431 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
434 ; AVX1-LABEL: trunc2x4i64_8i32:
435 ; AVX1: # BB#0: # %entry
436 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
437 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
438 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
439 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
440 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
441 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
442 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
443 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
444 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
447 ; AVX2-LABEL: trunc2x4i64_8i32:
448 ; AVX2: # BB#0: # %entry
449 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
450 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
451 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
452 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
455 %0 = trunc <4 x i64> %a to <4 x i32>
456 %1 = trunc <4 x i64> %b to <4 x i32>
457 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
461 define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
462 ; SSE2-LABEL: trunc2x4i64_8i16:
463 ; SSE2: # BB#0: # %entry
464 ; SSE2-NEXT: pextrw $4, %xmm1, %eax
465 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
466 ; SSE2-NEXT: pextrw $4, %xmm0, %ecx
467 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
468 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
469 ; SSE2-NEXT: pextrw $4, %xmm3, %edx
470 ; SSE2-NEXT: movd %edx, %xmm1
471 ; SSE2-NEXT: movd %eax, %xmm3
472 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
473 ; SSE2-NEXT: pextrw $4, %xmm2, %eax
474 ; SSE2-NEXT: movd %eax, %xmm1
475 ; SSE2-NEXT: movd %ecx, %xmm2
476 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
477 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
478 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
481 ; SSSE3-LABEL: trunc2x4i64_8i16:
482 ; SSSE3: # BB#0: # %entry
483 ; SSSE3-NEXT: pextrw $4, %xmm1, %eax
484 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
485 ; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
486 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
487 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
488 ; SSSE3-NEXT: pextrw $4, %xmm3, %edx
489 ; SSSE3-NEXT: movd %edx, %xmm1
490 ; SSSE3-NEXT: movd %eax, %xmm3
491 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
492 ; SSSE3-NEXT: pextrw $4, %xmm2, %eax
493 ; SSSE3-NEXT: movd %eax, %xmm1
494 ; SSSE3-NEXT: movd %ecx, %xmm2
495 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
496 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
497 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
500 ; SSE41-LABEL: trunc2x4i64_8i16:
501 ; SSE41: # BB#0: # %entry
502 ; SSE41-NEXT: pextrw $4, %xmm0, %eax
503 ; SSE41-NEXT: pinsrw $1, %eax, %xmm0
504 ; SSE41-NEXT: movd %xmm1, %eax
505 ; SSE41-NEXT: pinsrw $2, %eax, %xmm0
506 ; SSE41-NEXT: pextrw $4, %xmm1, %eax
507 ; SSE41-NEXT: pinsrw $3, %eax, %xmm0
508 ; SSE41-NEXT: movd %xmm2, %eax
509 ; SSE41-NEXT: pinsrw $4, %eax, %xmm0
510 ; SSE41-NEXT: pextrw $4, %xmm2, %eax
511 ; SSE41-NEXT: pinsrw $5, %eax, %xmm0
512 ; SSE41-NEXT: movd %xmm3, %eax
513 ; SSE41-NEXT: pinsrw $6, %eax, %xmm0
514 ; SSE41-NEXT: pextrw $4, %xmm3, %eax
515 ; SSE41-NEXT: pinsrw $7, %eax, %xmm0
518 ; AVX1-LABEL: trunc2x4i64_8i16:
519 ; AVX1: # BB#0: # %entry
520 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
521 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
522 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
523 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
524 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
525 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
526 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
527 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
528 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
529 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
530 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
531 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
532 ; AVX1-NEXT: vzeroupper
535 ; AVX2-LABEL: trunc2x4i64_8i16:
536 ; AVX2: # BB#0: # %entry
537 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
538 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
539 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
540 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
541 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
542 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
543 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
544 ; AVX2-NEXT: vzeroupper
547 %0 = trunc <4 x i64> %a to <4 x i16>
548 %1 = trunc <4 x i64> %b to <4 x i16>
549 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
553 define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) {
554 ; SSE2-LABEL: trunc2x2i64_4i32:
555 ; SSE2: # BB#0: # %entry
556 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
557 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
558 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
561 ; SSSE3-LABEL: trunc2x2i64_4i32:
562 ; SSSE3: # BB#0: # %entry
563 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
564 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
565 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
568 ; SSE41-LABEL: trunc2x2i64_4i32:
569 ; SSE41: # BB#0: # %entry
570 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
571 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
572 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
575 ; AVX1-LABEL: trunc2x2i64_4i32:
576 ; AVX1: # BB#0: # %entry
577 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
578 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
579 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
582 ; AVX2-LABEL: trunc2x2i64_4i32:
583 ; AVX2: # BB#0: # %entry
584 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
585 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
586 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
589 %0 = trunc <2 x i64> %a to <2 x i32>
590 %1 = trunc <2 x i64> %b to <2 x i32>
591 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
595 define i64 @trunc2i64_i64(<2 x i64> %inval) {
596 ; SSE-LABEL: trunc2i64_i64:
597 ; SSE: # BB#0: # %entry
598 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
599 ; SSE-NEXT: movd %xmm0, %rax
602 ; AVX-LABEL: trunc2i64_i64:
603 ; AVX: # BB#0: # %entry
604 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
605 ; AVX-NEXT: vmovq %xmm0, %rax
608 %0 = trunc <2 x i64> %inval to <2 x i32>
609 %1 = bitcast <2 x i32> %0 to i64
613 define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
614 ; SSE2-LABEL: trunc2x4i32_8i16:
615 ; SSE2: # BB#0: # %entry
616 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
617 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
618 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
619 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
620 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
621 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
622 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
625 ; SSSE3-LABEL: trunc2x4i32_8i16:
626 ; SSSE3: # BB#0: # %entry
627 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
628 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
629 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
630 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
633 ; SSE41-LABEL: trunc2x4i32_8i16:
634 ; SSE41: # BB#0: # %entry
635 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
636 ; SSE41-NEXT: pshufb %xmm2, %xmm1
637 ; SSE41-NEXT: pshufb %xmm2, %xmm0
638 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
641 ; AVX-LABEL: trunc2x4i32_8i16:
642 ; AVX: # BB#0: # %entry
643 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
644 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
645 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
646 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
649 %0 = trunc <4 x i32> %a to <4 x i16>
650 %1 = trunc <4 x i32> %b to <4 x i16>
651 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
655 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
656 define i64 @trunc4i32_i64(<4 x i32> %inval) {
657 ; SSE2-LABEL: trunc4i32_i64:
658 ; SSE2: # BB#0: # %entry
659 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
660 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
661 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
662 ; SSE2-NEXT: movd %xmm0, %rax
665 ; SSSE3-LABEL: trunc4i32_i64:
666 ; SSSE3: # BB#0: # %entry
667 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
668 ; SSSE3-NEXT: movd %xmm0, %rax
671 ; SSE41-LABEL: trunc4i32_i64:
672 ; SSE41: # BB#0: # %entry
673 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
674 ; SSE41-NEXT: movd %xmm0, %rax
677 ; AVX-LABEL: trunc4i32_i64:
678 ; AVX: # BB#0: # %entry
679 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
680 ; AVX-NEXT: vmovq %xmm0, %rax
683 %0 = trunc <4 x i32> %inval to <4 x i16>
684 %1 = bitcast <4 x i16> %0 to i64
688 define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
689 ; SSE2-LABEL: trunc2x8i16_16i8:
690 ; SSE2: # BB#0: # %entry
691 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
692 ; SSE2-NEXT: pand %xmm2, %xmm1
693 ; SSE2-NEXT: pand %xmm2, %xmm0
694 ; SSE2-NEXT: packuswb %xmm1, %xmm0
697 ; SSSE3-LABEL: trunc2x8i16_16i8:
698 ; SSSE3: # BB#0: # %entry
699 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
700 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
701 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
702 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
705 ; SSE41-LABEL: trunc2x8i16_16i8:
706 ; SSE41: # BB#0: # %entry
707 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
708 ; SSE41-NEXT: pshufb %xmm2, %xmm1
709 ; SSE41-NEXT: pshufb %xmm2, %xmm0
710 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
713 ; AVX-LABEL: trunc2x8i16_16i8:
714 ; AVX: # BB#0: # %entry
715 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
716 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
717 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
718 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
721 %0 = trunc <8 x i16> %a to <8 x i8>
722 %1 = trunc <8 x i16> %b to <8 x i8>
723 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
727 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
728 define i64 @trunc8i16_i64(<8 x i16> %inval) {
729 ; SSE2-LABEL: trunc8i16_i64:
730 ; SSE2: # BB#0: # %entry
731 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
732 ; SSE2-NEXT: packuswb %xmm0, %xmm0
733 ; SSE2-NEXT: movd %xmm0, %rax
736 ; SSSE3-LABEL: trunc8i16_i64:
737 ; SSSE3: # BB#0: # %entry
738 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
739 ; SSSE3-NEXT: movd %xmm0, %rax
742 ; SSE41-LABEL: trunc8i16_i64:
743 ; SSE41: # BB#0: # %entry
744 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
745 ; SSE41-NEXT: movd %xmm0, %rax
748 ; AVX-LABEL: trunc8i16_i64:
749 ; AVX: # BB#0: # %entry
750 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
751 ; AVX-NEXT: vmovq %xmm0, %rax
754 %0 = trunc <8 x i16> %inval to <8 x i8>
755 %1 = bitcast <8 x i8> %0 to i64
759 define <16 x i8> @trunc16i64_16i8_const() {
760 ; SSE-LABEL: trunc16i64_16i8_const:
761 ; SSE: # BB#0: # %entry
762 ; SSE-NEXT: xorps %xmm0, %xmm0
765 ; AVX-LABEL: trunc16i64_16i8_const:
766 ; AVX: # BB#0: # %entry
767 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
771 %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
772 %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>