1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
7 define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
8 ; SSE2-LABEL: trunc8i64_8i32:
9 ; SSE2: # BB#0: # %entry
10 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
11 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
12 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
13 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
14 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
15 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
18 ; SSSE3-LABEL: trunc8i64_8i32:
19 ; SSSE3: # BB#0: # %entry
20 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
21 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
22 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
23 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
24 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
25 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
28 ; SSE41-LABEL: trunc8i64_8i32:
29 ; SSE41: # BB#0: # %entry
30 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
31 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
32 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
33 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
34 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
35 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
38 ; AVX1-LABEL: trunc8i64_8i32:
39 ; AVX1: # BB#0: # %entry
40 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
41 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
42 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
43 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
44 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
45 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
46 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
47 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
48 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
51 ; AVX2-LABEL: trunc8i64_8i32:
52 ; AVX2: # BB#0: # %entry
53 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
54 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
55 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
56 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
59 %0 = trunc <8 x i64> %a to <8 x i32>
63 define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
64 ; SSE2-LABEL: trunc8i64_8i16:
65 ; SSE2: # BB#0: # %entry
66 ; SSE2-NEXT: pextrw $4, %xmm1, %eax
67 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
68 ; SSE2-NEXT: pextrw $4, %xmm0, %ecx
69 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
70 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
71 ; SSE2-NEXT: pextrw $4, %xmm3, %edx
72 ; SSE2-NEXT: movd %edx, %xmm1
73 ; SSE2-NEXT: movd %eax, %xmm3
74 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
75 ; SSE2-NEXT: pextrw $4, %xmm2, %eax
76 ; SSE2-NEXT: movd %eax, %xmm1
77 ; SSE2-NEXT: movd %ecx, %xmm2
78 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
79 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
80 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
83 ; SSSE3-LABEL: trunc8i64_8i16:
84 ; SSSE3: # BB#0: # %entry
85 ; SSSE3-NEXT: pextrw $4, %xmm1, %eax
86 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
87 ; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
88 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
89 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
90 ; SSSE3-NEXT: pextrw $4, %xmm3, %edx
91 ; SSSE3-NEXT: movd %edx, %xmm1
92 ; SSSE3-NEXT: movd %eax, %xmm3
93 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
94 ; SSSE3-NEXT: pextrw $4, %xmm2, %eax
95 ; SSSE3-NEXT: movd %eax, %xmm1
96 ; SSSE3-NEXT: movd %ecx, %xmm2
97 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
98 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
99 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
102 ; SSE41-LABEL: trunc8i64_8i16:
103 ; SSE41: # BB#0: # %entry
104 ; SSE41-NEXT: pextrw $4, %xmm0, %eax
105 ; SSE41-NEXT: pinsrw $1, %eax, %xmm0
106 ; SSE41-NEXT: movd %xmm1, %eax
107 ; SSE41-NEXT: pinsrw $2, %eax, %xmm0
108 ; SSE41-NEXT: pextrw $4, %xmm1, %eax
109 ; SSE41-NEXT: pinsrw $3, %eax, %xmm0
110 ; SSE41-NEXT: movd %xmm2, %eax
111 ; SSE41-NEXT: pinsrw $4, %eax, %xmm0
112 ; SSE41-NEXT: pextrw $4, %xmm2, %eax
113 ; SSE41-NEXT: pinsrw $5, %eax, %xmm0
114 ; SSE41-NEXT: movd %xmm3, %eax
115 ; SSE41-NEXT: pinsrw $6, %eax, %xmm0
116 ; SSE41-NEXT: pextrw $4, %xmm3, %eax
117 ; SSE41-NEXT: pinsrw $7, %eax, %xmm0
120 ; AVX1-LABEL: trunc8i64_8i16:
121 ; AVX1: # BB#0: # %entry
122 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
123 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
124 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
125 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
126 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
127 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
128 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
129 ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
130 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
131 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
132 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
133 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
134 ; AVX1-NEXT: vzeroupper
137 ; AVX2-LABEL: trunc8i64_8i16:
138 ; AVX2: # BB#0: # %entry
139 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
140 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
141 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
142 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
143 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
144 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
145 ; AVX2-NEXT: vzeroupper
148 %0 = trunc <8 x i64> %a to <8 x i16>
152 define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
153 ; SSE2-LABEL: trunc8i32_8i16:
154 ; SSE2: # BB#0: # %entry
155 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
156 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
157 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
158 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
159 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
160 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
161 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
164 ; SSSE3-LABEL: trunc8i32_8i16:
165 ; SSSE3: # BB#0: # %entry
166 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
167 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
168 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
169 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
172 ; SSE41-LABEL: trunc8i32_8i16:
173 ; SSE41: # BB#0: # %entry
174 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
175 ; SSE41-NEXT: pshufb %xmm2, %xmm1
176 ; SSE41-NEXT: pshufb %xmm2, %xmm0
177 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
180 ; AVX1-LABEL: trunc8i32_8i16:
181 ; AVX1: # BB#0: # %entry
182 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
183 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
184 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
185 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
186 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
187 ; AVX1-NEXT: vzeroupper
190 ; AVX2-LABEL: trunc8i32_8i16:
191 ; AVX2: # BB#0: # %entry
192 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
193 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
194 ; AVX2-NEXT: vzeroupper
197 %0 = trunc <8 x i32> %a to <8 x i16>
201 define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
202 ; SSE2-LABEL: trunc2x4i64_8i32:
203 ; SSE2: # BB#0: # %entry
204 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
205 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
206 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
207 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
208 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
209 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
212 ; SSSE3-LABEL: trunc2x4i64_8i32:
213 ; SSSE3: # BB#0: # %entry
214 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
215 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
216 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
217 ; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
218 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
219 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
222 ; SSE41-LABEL: trunc2x4i64_8i32:
223 ; SSE41: # BB#0: # %entry
224 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
225 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
226 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
227 ; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
228 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
229 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
232 ; AVX1-LABEL: trunc2x4i64_8i32:
233 ; AVX1: # BB#0: # %entry
234 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
235 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
236 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
237 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
238 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
239 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
240 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
241 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
242 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
245 ; AVX2-LABEL: trunc2x4i64_8i32:
246 ; AVX2: # BB#0: # %entry
247 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
248 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
249 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
250 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
253 %0 = trunc <4 x i64> %a to <4 x i32>
254 %1 = trunc <4 x i64> %b to <4 x i32>
255 %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
259 define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
260 ; SSE2-LABEL: trunc2x4i64_8i16:
261 ; SSE2: # BB#0: # %entry
262 ; SSE2-NEXT: pextrw $4, %xmm1, %eax
263 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
264 ; SSE2-NEXT: pextrw $4, %xmm0, %ecx
265 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
266 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
267 ; SSE2-NEXT: pextrw $4, %xmm3, %edx
268 ; SSE2-NEXT: movd %edx, %xmm1
269 ; SSE2-NEXT: movd %eax, %xmm3
270 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
271 ; SSE2-NEXT: pextrw $4, %xmm2, %eax
272 ; SSE2-NEXT: movd %eax, %xmm1
273 ; SSE2-NEXT: movd %ecx, %xmm2
274 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
275 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
276 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
279 ; SSSE3-LABEL: trunc2x4i64_8i16:
280 ; SSSE3: # BB#0: # %entry
281 ; SSSE3-NEXT: pextrw $4, %xmm1, %eax
282 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
283 ; SSSE3-NEXT: pextrw $4, %xmm0, %ecx
284 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
285 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
286 ; SSSE3-NEXT: pextrw $4, %xmm3, %edx
287 ; SSSE3-NEXT: movd %edx, %xmm1
288 ; SSSE3-NEXT: movd %eax, %xmm3
289 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
290 ; SSSE3-NEXT: pextrw $4, %xmm2, %eax
291 ; SSSE3-NEXT: movd %eax, %xmm1
292 ; SSSE3-NEXT: movd %ecx, %xmm2
293 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
294 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
295 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
298 ; SSE41-LABEL: trunc2x4i64_8i16:
299 ; SSE41: # BB#0: # %entry
300 ; SSE41-NEXT: pextrw $4, %xmm0, %eax
301 ; SSE41-NEXT: pinsrw $1, %eax, %xmm0
302 ; SSE41-NEXT: movd %xmm1, %eax
303 ; SSE41-NEXT: pinsrw $2, %eax, %xmm0
304 ; SSE41-NEXT: pextrw $4, %xmm1, %eax
305 ; SSE41-NEXT: pinsrw $3, %eax, %xmm0
306 ; SSE41-NEXT: movd %xmm2, %eax
307 ; SSE41-NEXT: pinsrw $4, %eax, %xmm0
308 ; SSE41-NEXT: pextrw $4, %xmm2, %eax
309 ; SSE41-NEXT: pinsrw $5, %eax, %xmm0
310 ; SSE41-NEXT: movd %xmm3, %eax
311 ; SSE41-NEXT: pinsrw $6, %eax, %xmm0
312 ; SSE41-NEXT: pextrw $4, %xmm3, %eax
313 ; SSE41-NEXT: pinsrw $7, %eax, %xmm0
316 ; AVX1-LABEL: trunc2x4i64_8i16:
317 ; AVX1: # BB#0: # %entry
318 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
319 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
320 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
321 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
322 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
323 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
324 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
325 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
326 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
327 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
328 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
329 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
330 ; AVX1-NEXT: vzeroupper
333 ; AVX2-LABEL: trunc2x4i64_8i16:
334 ; AVX2: # BB#0: # %entry
335 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
336 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
337 ; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
338 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
339 ; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
340 ; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
341 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
342 ; AVX2-NEXT: vzeroupper
345 %0 = trunc <4 x i64> %a to <4 x i16>
346 %1 = trunc <4 x i64> %b to <4 x i16>
347 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
351 define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) {
352 ; SSE2-LABEL: trunc2x2i64_4i32:
353 ; SSE2: # BB#0: # %entry
354 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
355 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
356 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
359 ; SSSE3-LABEL: trunc2x2i64_4i32:
360 ; SSSE3: # BB#0: # %entry
361 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
362 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
363 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
366 ; SSE41-LABEL: trunc2x2i64_4i32:
367 ; SSE41: # BB#0: # %entry
368 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
369 ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
370 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
373 ; AVX1-LABEL: trunc2x2i64_4i32:
374 ; AVX1: # BB#0: # %entry
375 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
376 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
377 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
380 ; AVX2-LABEL: trunc2x2i64_4i32:
381 ; AVX2: # BB#0: # %entry
382 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
383 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
384 ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
387 %0 = trunc <2 x i64> %a to <2 x i32>
388 %1 = trunc <2 x i64> %b to <2 x i32>
389 %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
393 define i64 @trunc2i64_i64(<2 x i64> %inval) {
394 ; SSE-LABEL: trunc2i64_i64:
395 ; SSE: # BB#0: # %entry
396 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
397 ; SSE-NEXT: movd %xmm0, %rax
400 ; AVX-LABEL: trunc2i64_i64:
401 ; AVX: # BB#0: # %entry
402 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
403 ; AVX-NEXT: vmovq %xmm0, %rax
406 %0 = trunc <2 x i64> %inval to <2 x i32>
407 %1 = bitcast <2 x i32> %0 to i64
411 define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
412 ; SSE2-LABEL: trunc2x4i32_8i16:
413 ; SSE2: # BB#0: # %entry
414 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
415 ; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
416 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
417 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
418 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
419 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
420 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
423 ; SSSE3-LABEL: trunc2x4i32_8i16:
424 ; SSSE3: # BB#0: # %entry
425 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
426 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
427 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
428 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
431 ; SSE41-LABEL: trunc2x4i32_8i16:
432 ; SSE41: # BB#0: # %entry
433 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
434 ; SSE41-NEXT: pshufb %xmm2, %xmm1
435 ; SSE41-NEXT: pshufb %xmm2, %xmm0
436 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
439 ; AVX-LABEL: trunc2x4i32_8i16:
440 ; AVX: # BB#0: # %entry
441 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
442 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
443 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
444 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
447 %0 = trunc <4 x i32> %a to <4 x i16>
448 %1 = trunc <4 x i32> %b to <4 x i16>
449 %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
453 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
454 define i64 @trunc4i32_i64(<4 x i32> %inval) {
455 ; SSE2-LABEL: trunc4i32_i64:
456 ; SSE2: # BB#0: # %entry
457 ; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
458 ; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
459 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
460 ; SSE2-NEXT: movd %xmm0, %rax
463 ; SSSE3-LABEL: trunc4i32_i64:
464 ; SSSE3: # BB#0: # %entry
465 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
466 ; SSSE3-NEXT: movd %xmm0, %rax
469 ; SSE41-LABEL: trunc4i32_i64:
470 ; SSE41: # BB#0: # %entry
471 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
472 ; SSE41-NEXT: movd %xmm0, %rax
475 ; AVX-LABEL: trunc4i32_i64:
476 ; AVX: # BB#0: # %entry
477 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
478 ; AVX-NEXT: vmovq %xmm0, %rax
481 %0 = trunc <4 x i32> %inval to <4 x i16>
482 %1 = bitcast <4 x i16> %0 to i64
486 define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
487 ; SSE2-LABEL: trunc2x8i16_16i8:
488 ; SSE2: # BB#0: # %entry
489 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
490 ; SSE2-NEXT: pand %xmm2, %xmm1
491 ; SSE2-NEXT: pand %xmm2, %xmm0
492 ; SSE2-NEXT: packuswb %xmm1, %xmm0
495 ; SSSE3-LABEL: trunc2x8i16_16i8:
496 ; SSSE3: # BB#0: # %entry
497 ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
498 ; SSSE3-NEXT: pshufb %xmm2, %xmm1
499 ; SSSE3-NEXT: pshufb %xmm2, %xmm0
500 ; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
503 ; SSE41-LABEL: trunc2x8i16_16i8:
504 ; SSE41: # BB#0: # %entry
505 ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
506 ; SSE41-NEXT: pshufb %xmm2, %xmm1
507 ; SSE41-NEXT: pshufb %xmm2, %xmm0
508 ; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
511 ; AVX-LABEL: trunc2x8i16_16i8:
512 ; AVX: # BB#0: # %entry
513 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
514 ; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
515 ; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
516 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
519 %0 = trunc <8 x i16> %a to <8 x i8>
520 %1 = trunc <8 x i16> %b to <8 x i8>
521 %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
525 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
526 define i64 @trunc8i16_i64(<8 x i16> %inval) {
527 ; SSE2-LABEL: trunc8i16_i64:
528 ; SSE2: # BB#0: # %entry
529 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
530 ; SSE2-NEXT: packuswb %xmm0, %xmm0
531 ; SSE2-NEXT: movd %xmm0, %rax
534 ; SSSE3-LABEL: trunc8i16_i64:
535 ; SSSE3: # BB#0: # %entry
536 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
537 ; SSSE3-NEXT: movd %xmm0, %rax
540 ; SSE41-LABEL: trunc8i16_i64:
541 ; SSE41: # BB#0: # %entry
542 ; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
543 ; SSE41-NEXT: movd %xmm0, %rax
546 ; AVX-LABEL: trunc8i16_i64:
547 ; AVX: # BB#0: # %entry
548 ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
549 ; AVX-NEXT: vmovq %xmm0, %rax
552 %0 = trunc <8 x i16> %inval to <8 x i8>
553 %1 = bitcast <8 x i8> %0 to i64
557 define <16 x i8> @trunc16i64_16i8_const() {
558 ; SSE-LABEL: trunc16i64_16i8_const:
559 ; SSE: # BB#0: # %entry
560 ; SSE-NEXT: xorps %xmm0, %xmm0
563 ; AVX-LABEL: trunc16i64_16i8_const:
564 ; AVX: # BB#0: # %entry
565 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
569 %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
570 %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 undef, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26>