1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -mattr=+popcnt | FileCheck --check-prefix=AVX2 --check-prefix=AVX2-POPCNT %s
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -mattr=-popcnt | FileCheck --check-prefix=AVX2 --check-prefix=AVX2-NOPOPCNT %s
4 ; When avx2 is enabled, we should always generate the same code regardless
5 ; of popcnt instruction availability.
7 define <32 x i8> @testv32i8(<32 x i8> %in) {
8 ; AVX2-POPCNT-LABEL: testv32i8:
10 ; AVX2-POPCNT-NEXT: vextracti128 $1, %ymm0, %xmm1
11 ; AVX2-POPCNT-NEXT: vpextrb $1, %xmm1, %eax
12 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
13 ; AVX2-POPCNT-NEXT: vpextrb $0, %xmm1, %ecx
14 ; AVX2-POPCNT-NEXT: popcntw %cx, %cx
15 ; AVX2-POPCNT-NEXT: vmovd %ecx, %xmm2
16 ; AVX2-POPCNT-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
17 ; AVX2-POPCNT-NEXT: vpextrb $2, %xmm1, %eax
18 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
19 ; AVX2-POPCNT-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
20 ; AVX2-POPCNT-NEXT: vpextrb $3, %xmm1, %eax
21 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
22 ; AVX2-POPCNT-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
23 ; AVX2-POPCNT-NEXT: vpextrb $4, %xmm1, %eax
24 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
25 ; AVX2-POPCNT-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
26 ; AVX2-POPCNT-NEXT: vpextrb $5, %xmm1, %eax
27 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
28 ; AVX2-POPCNT-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
29 ; AVX2-POPCNT-NEXT: vpextrb $6, %xmm1, %eax
30 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
31 ; AVX2-POPCNT-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
32 ; AVX2-POPCNT-NEXT: vpextrb $7, %xmm1, %eax
33 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
34 ; AVX2-POPCNT-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
35 ; AVX2-POPCNT-NEXT: vpextrb $8, %xmm1, %eax
36 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
37 ; AVX2-POPCNT-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
38 ; AVX2-POPCNT-NEXT: vpextrb $9, %xmm1, %eax
39 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
40 ; AVX2-POPCNT-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
41 ; AVX2-POPCNT-NEXT: vpextrb $10, %xmm1, %eax
42 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
43 ; AVX2-POPCNT-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
44 ; AVX2-POPCNT-NEXT: vpextrb $11, %xmm1, %eax
45 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
46 ; AVX2-POPCNT-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
47 ; AVX2-POPCNT-NEXT: vpextrb $12, %xmm1, %eax
48 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
49 ; AVX2-POPCNT-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
50 ; AVX2-POPCNT-NEXT: vpextrb $13, %xmm1, %eax
51 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
52 ; AVX2-POPCNT-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
53 ; AVX2-POPCNT-NEXT: vpextrb $14, %xmm1, %eax
54 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
55 ; AVX2-POPCNT-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
56 ; AVX2-POPCNT-NEXT: vpextrb $15, %xmm1, %eax
57 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
58 ; AVX2-POPCNT-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
59 ; AVX2-POPCNT-NEXT: vpextrb $1, %xmm0, %eax
60 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
61 ; AVX2-POPCNT-NEXT: vpextrb $0, %xmm0, %ecx
62 ; AVX2-POPCNT-NEXT: popcntw %cx, %cx
63 ; AVX2-POPCNT-NEXT: vmovd %ecx, %xmm2
64 ; AVX2-POPCNT-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
65 ; AVX2-POPCNT-NEXT: vpextrb $2, %xmm0, %eax
66 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
67 ; AVX2-POPCNT-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
68 ; AVX2-POPCNT-NEXT: vpextrb $3, %xmm0, %eax
69 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
70 ; AVX2-POPCNT-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
71 ; AVX2-POPCNT-NEXT: vpextrb $4, %xmm0, %eax
72 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
73 ; AVX2-POPCNT-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
74 ; AVX2-POPCNT-NEXT: vpextrb $5, %xmm0, %eax
75 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
76 ; AVX2-POPCNT-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
77 ; AVX2-POPCNT-NEXT: vpextrb $6, %xmm0, %eax
78 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
79 ; AVX2-POPCNT-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
80 ; AVX2-POPCNT-NEXT: vpextrb $7, %xmm0, %eax
81 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
82 ; AVX2-POPCNT-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
83 ; AVX2-POPCNT-NEXT: vpextrb $8, %xmm0, %eax
84 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
85 ; AVX2-POPCNT-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
86 ; AVX2-POPCNT-NEXT: vpextrb $9, %xmm0, %eax
87 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
88 ; AVX2-POPCNT-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
89 ; AVX2-POPCNT-NEXT: vpextrb $10, %xmm0, %eax
90 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
91 ; AVX2-POPCNT-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
92 ; AVX2-POPCNT-NEXT: vpextrb $11, %xmm0, %eax
93 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
94 ; AVX2-POPCNT-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
95 ; AVX2-POPCNT-NEXT: vpextrb $12, %xmm0, %eax
96 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
97 ; AVX2-POPCNT-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
98 ; AVX2-POPCNT-NEXT: vpextrb $13, %xmm0, %eax
99 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
100 ; AVX2-POPCNT-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
101 ; AVX2-POPCNT-NEXT: vpextrb $14, %xmm0, %eax
102 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
103 ; AVX2-POPCNT-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
104 ; AVX2-POPCNT-NEXT: vpextrb $15, %xmm0, %eax
105 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
106 ; AVX2-POPCNT-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
107 ; AVX2-POPCNT-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
108 ; AVX2-POPCNT-NEXT: retq
110 ; AVX2-NOPOPCNT-LABEL: testv32i8:
111 ; AVX2-NOPOPCNT: # BB#0:
112 ; AVX2-NOPOPCNT-NEXT: vextracti128 $1, %ymm0, %xmm1
113 ; AVX2-NOPOPCNT-NEXT: vpextrb $1, %xmm1, %eax
114 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
115 ; AVX2-NOPOPCNT-NEXT: shrb %cl
116 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
117 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
118 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
119 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
120 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
121 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
122 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
123 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
124 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
125 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
126 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
127 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
128 ; AVX2-NOPOPCNT-NEXT: vpextrb $0, %xmm1, %ecx
129 ; AVX2-NOPOPCNT-NEXT: movb %cl, %dl
130 ; AVX2-NOPOPCNT-NEXT: shrb %dl
131 ; AVX2-NOPOPCNT-NEXT: andb $85, %dl
132 ; AVX2-NOPOPCNT-NEXT: subb %dl, %cl
133 ; AVX2-NOPOPCNT-NEXT: movb %cl, %dl
134 ; AVX2-NOPOPCNT-NEXT: andb $51, %dl
135 ; AVX2-NOPOPCNT-NEXT: shrb $2, %cl
136 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
137 ; AVX2-NOPOPCNT-NEXT: addb %dl, %cl
138 ; AVX2-NOPOPCNT-NEXT: movb %cl, %dl
139 ; AVX2-NOPOPCNT-NEXT: shrb $4, %dl
140 ; AVX2-NOPOPCNT-NEXT: addb %cl, %dl
141 ; AVX2-NOPOPCNT-NEXT: andb $15, %dl
142 ; AVX2-NOPOPCNT-NEXT: movzbl %dl, %ecx
143 ; AVX2-NOPOPCNT-NEXT: vmovd %ecx, %xmm2
144 ; AVX2-NOPOPCNT-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
145 ; AVX2-NOPOPCNT-NEXT: vpextrb $2, %xmm1, %eax
146 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
147 ; AVX2-NOPOPCNT-NEXT: shrb %cl
148 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
149 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
150 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
151 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
152 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
153 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
154 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
155 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
156 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
157 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
158 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
159 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
160 ; AVX2-NOPOPCNT-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
161 ; AVX2-NOPOPCNT-NEXT: vpextrb $3, %xmm1, %eax
162 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
163 ; AVX2-NOPOPCNT-NEXT: shrb %cl
164 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
165 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
166 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
167 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
168 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
169 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
170 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
171 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
172 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
173 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
174 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
175 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
176 ; AVX2-NOPOPCNT-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
177 ; AVX2-NOPOPCNT-NEXT: vpextrb $4, %xmm1, %eax
178 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
179 ; AVX2-NOPOPCNT-NEXT: shrb %cl
180 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
181 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
182 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
183 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
184 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
185 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
186 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
187 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
188 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
189 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
190 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
191 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
192 ; AVX2-NOPOPCNT-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
193 ; AVX2-NOPOPCNT-NEXT: vpextrb $5, %xmm1, %eax
194 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
195 ; AVX2-NOPOPCNT-NEXT: shrb %cl
196 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
197 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
198 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
199 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
200 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
201 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
202 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
203 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
204 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
205 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
206 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
207 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
208 ; AVX2-NOPOPCNT-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
209 ; AVX2-NOPOPCNT-NEXT: vpextrb $6, %xmm1, %eax
210 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
211 ; AVX2-NOPOPCNT-NEXT: shrb %cl
212 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
213 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
214 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
215 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
216 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
217 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
218 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
219 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
220 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
221 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
222 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
223 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
224 ; AVX2-NOPOPCNT-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
225 ; AVX2-NOPOPCNT-NEXT: vpextrb $7, %xmm1, %eax
226 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
227 ; AVX2-NOPOPCNT-NEXT: shrb %cl
228 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
229 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
230 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
231 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
232 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
233 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
234 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
235 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
236 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
237 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
238 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
239 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
240 ; AVX2-NOPOPCNT-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
241 ; AVX2-NOPOPCNT-NEXT: vpextrb $8, %xmm1, %eax
242 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
243 ; AVX2-NOPOPCNT-NEXT: shrb %cl
244 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
245 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
246 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
247 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
248 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
249 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
250 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
251 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
252 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
253 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
254 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
255 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
256 ; AVX2-NOPOPCNT-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
257 ; AVX2-NOPOPCNT-NEXT: vpextrb $9, %xmm1, %eax
258 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
259 ; AVX2-NOPOPCNT-NEXT: shrb %cl
260 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
261 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
262 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
263 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
264 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
265 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
266 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
267 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
268 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
269 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
270 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
271 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
272 ; AVX2-NOPOPCNT-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
273 ; AVX2-NOPOPCNT-NEXT: vpextrb $10, %xmm1, %eax
274 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
275 ; AVX2-NOPOPCNT-NEXT: shrb %cl
276 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
277 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
278 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
279 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
280 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
281 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
282 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
283 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
284 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
285 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
286 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
287 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
288 ; AVX2-NOPOPCNT-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
289 ; AVX2-NOPOPCNT-NEXT: vpextrb $11, %xmm1, %eax
290 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
291 ; AVX2-NOPOPCNT-NEXT: shrb %cl
292 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
293 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
294 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
295 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
296 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
297 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
298 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
299 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
300 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
301 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
302 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
303 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
304 ; AVX2-NOPOPCNT-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
305 ; AVX2-NOPOPCNT-NEXT: vpextrb $12, %xmm1, %eax
306 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
307 ; AVX2-NOPOPCNT-NEXT: shrb %cl
308 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
309 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
310 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
311 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
312 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
313 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
314 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
315 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
316 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
317 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
318 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
319 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
320 ; AVX2-NOPOPCNT-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
321 ; AVX2-NOPOPCNT-NEXT: vpextrb $13, %xmm1, %eax
322 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
323 ; AVX2-NOPOPCNT-NEXT: shrb %cl
324 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
325 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
326 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
327 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
328 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
329 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
330 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
331 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
332 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
333 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
334 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
335 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
336 ; AVX2-NOPOPCNT-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
337 ; AVX2-NOPOPCNT-NEXT: vpextrb $14, %xmm1, %eax
338 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
339 ; AVX2-NOPOPCNT-NEXT: shrb %cl
340 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
341 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
342 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
343 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
344 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
345 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
346 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
347 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
348 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
349 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
350 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
351 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
352 ; AVX2-NOPOPCNT-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
353 ; AVX2-NOPOPCNT-NEXT: vpextrb $15, %xmm1, %eax
354 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
355 ; AVX2-NOPOPCNT-NEXT: shrb %cl
356 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
357 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
358 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
359 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
360 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
361 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
362 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
363 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
364 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
365 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
366 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
367 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
368 ; AVX2-NOPOPCNT-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
369 ; AVX2-NOPOPCNT-NEXT: vpextrb $1, %xmm0, %eax
370 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
371 ; AVX2-NOPOPCNT-NEXT: shrb %cl
372 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
373 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
374 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
375 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
376 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
377 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
378 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
379 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
380 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
381 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
382 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
383 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
384 ; AVX2-NOPOPCNT-NEXT: vpextrb $0, %xmm0, %ecx
385 ; AVX2-NOPOPCNT-NEXT: movb %cl, %dl
386 ; AVX2-NOPOPCNT-NEXT: shrb %dl
387 ; AVX2-NOPOPCNT-NEXT: andb $85, %dl
388 ; AVX2-NOPOPCNT-NEXT: subb %dl, %cl
389 ; AVX2-NOPOPCNT-NEXT: movb %cl, %dl
390 ; AVX2-NOPOPCNT-NEXT: andb $51, %dl
391 ; AVX2-NOPOPCNT-NEXT: shrb $2, %cl
392 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
393 ; AVX2-NOPOPCNT-NEXT: addb %dl, %cl
394 ; AVX2-NOPOPCNT-NEXT: movb %cl, %dl
395 ; AVX2-NOPOPCNT-NEXT: shrb $4, %dl
396 ; AVX2-NOPOPCNT-NEXT: addb %cl, %dl
397 ; AVX2-NOPOPCNT-NEXT: andb $15, %dl
398 ; AVX2-NOPOPCNT-NEXT: movzbl %dl, %ecx
399 ; AVX2-NOPOPCNT-NEXT: vmovd %ecx, %xmm2
400 ; AVX2-NOPOPCNT-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
401 ; AVX2-NOPOPCNT-NEXT: vpextrb $2, %xmm0, %eax
402 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
403 ; AVX2-NOPOPCNT-NEXT: shrb %cl
404 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
405 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
406 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
407 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
408 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
409 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
410 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
411 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
412 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
413 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
414 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
415 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
416 ; AVX2-NOPOPCNT-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
417 ; AVX2-NOPOPCNT-NEXT: vpextrb $3, %xmm0, %eax
418 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
419 ; AVX2-NOPOPCNT-NEXT: shrb %cl
420 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
421 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
422 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
423 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
424 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
425 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
426 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
427 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
428 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
429 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
430 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
431 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
432 ; AVX2-NOPOPCNT-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
433 ; AVX2-NOPOPCNT-NEXT: vpextrb $4, %xmm0, %eax
434 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
435 ; AVX2-NOPOPCNT-NEXT: shrb %cl
436 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
437 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
438 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
439 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
440 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
441 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
442 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
443 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
444 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
445 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
446 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
447 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
448 ; AVX2-NOPOPCNT-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
449 ; AVX2-NOPOPCNT-NEXT: vpextrb $5, %xmm0, %eax
450 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
451 ; AVX2-NOPOPCNT-NEXT: shrb %cl
452 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
453 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
454 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
455 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
456 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
457 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
458 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
459 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
460 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
461 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
462 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
463 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
464 ; AVX2-NOPOPCNT-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
465 ; AVX2-NOPOPCNT-NEXT: vpextrb $6, %xmm0, %eax
466 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
467 ; AVX2-NOPOPCNT-NEXT: shrb %cl
468 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
469 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
470 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
471 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
472 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
473 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
474 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
475 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
476 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
477 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
478 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
479 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
480 ; AVX2-NOPOPCNT-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
481 ; AVX2-NOPOPCNT-NEXT: vpextrb $7, %xmm0, %eax
482 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
483 ; AVX2-NOPOPCNT-NEXT: shrb %cl
484 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
485 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
486 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
487 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
488 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
489 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
490 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
491 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
492 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
493 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
494 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
495 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
496 ; AVX2-NOPOPCNT-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
497 ; AVX2-NOPOPCNT-NEXT: vpextrb $8, %xmm0, %eax
498 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
499 ; AVX2-NOPOPCNT-NEXT: shrb %cl
500 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
501 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
502 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
503 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
504 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
505 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
506 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
507 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
508 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
509 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
510 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
511 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
512 ; AVX2-NOPOPCNT-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
513 ; AVX2-NOPOPCNT-NEXT: vpextrb $9, %xmm0, %eax
514 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
515 ; AVX2-NOPOPCNT-NEXT: shrb %cl
516 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
517 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
518 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
519 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
520 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
521 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
522 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
523 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
524 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
525 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
526 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
527 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
528 ; AVX2-NOPOPCNT-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
529 ; AVX2-NOPOPCNT-NEXT: vpextrb $10, %xmm0, %eax
530 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
531 ; AVX2-NOPOPCNT-NEXT: shrb %cl
532 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
533 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
534 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
535 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
536 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
537 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
538 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
539 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
540 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
541 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
542 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
543 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
544 ; AVX2-NOPOPCNT-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
545 ; AVX2-NOPOPCNT-NEXT: vpextrb $11, %xmm0, %eax
546 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
547 ; AVX2-NOPOPCNT-NEXT: shrb %cl
548 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
549 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
550 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
551 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
552 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
553 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
554 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
555 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
556 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
557 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
558 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
559 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
560 ; AVX2-NOPOPCNT-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
561 ; AVX2-NOPOPCNT-NEXT: vpextrb $12, %xmm0, %eax
562 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
563 ; AVX2-NOPOPCNT-NEXT: shrb %cl
564 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
565 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
566 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
567 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
568 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
569 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
570 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
571 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
572 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
573 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
574 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
575 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
576 ; AVX2-NOPOPCNT-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
577 ; AVX2-NOPOPCNT-NEXT: vpextrb $13, %xmm0, %eax
578 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
579 ; AVX2-NOPOPCNT-NEXT: shrb %cl
580 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
581 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
582 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
583 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
584 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
585 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
586 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
587 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
588 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
589 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
590 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
591 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
592 ; AVX2-NOPOPCNT-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
593 ; AVX2-NOPOPCNT-NEXT: vpextrb $14, %xmm0, %eax
594 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
595 ; AVX2-NOPOPCNT-NEXT: shrb %cl
596 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
597 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
598 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
599 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
600 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
601 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
602 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
603 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
604 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
605 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
606 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
607 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
608 ; AVX2-NOPOPCNT-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
609 ; AVX2-NOPOPCNT-NEXT: vpextrb $15, %xmm0, %eax
610 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
611 ; AVX2-NOPOPCNT-NEXT: shrb %cl
612 ; AVX2-NOPOPCNT-NEXT: andb $85, %cl
613 ; AVX2-NOPOPCNT-NEXT: subb %cl, %al
614 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
615 ; AVX2-NOPOPCNT-NEXT: andb $51, %cl
616 ; AVX2-NOPOPCNT-NEXT: shrb $2, %al
617 ; AVX2-NOPOPCNT-NEXT: andb $51, %al
618 ; AVX2-NOPOPCNT-NEXT: addb %cl, %al
619 ; AVX2-NOPOPCNT-NEXT: movb %al, %cl
620 ; AVX2-NOPOPCNT-NEXT: shrb $4, %cl
621 ; AVX2-NOPOPCNT-NEXT: addb %al, %cl
622 ; AVX2-NOPOPCNT-NEXT: andb $15, %cl
623 ; AVX2-NOPOPCNT-NEXT: movzbl %cl, %eax
624 ; AVX2-NOPOPCNT-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
625 ; AVX2-NOPOPCNT-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
626 ; AVX2-NOPOPCNT-NEXT: retq
627 %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %in)
631 define <4 x i64> @testv4i64(<4 x i64> %in) {
632 ; AVX2-POPCNT-LABEL: testv4i64:
633 ; AVX2-POPCNT: # BB#0:
634 ; AVX2-POPCNT-NEXT: vextracti128 $1, %ymm0, %xmm1
635 ; AVX2-POPCNT-NEXT: vpextrq $1, %xmm1, %rax
636 ; AVX2-POPCNT-NEXT: popcntq %rax, %rax
637 ; AVX2-POPCNT-NEXT: vmovq %rax, %xmm2
638 ; AVX2-POPCNT-NEXT: vmovq %xmm1, %rax
639 ; AVX2-POPCNT-NEXT: popcntq %rax, %rax
640 ; AVX2-POPCNT-NEXT: vmovq %rax, %xmm1
641 ; AVX2-POPCNT-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
642 ; AVX2-POPCNT-NEXT: vpextrq $1, %xmm0, %rax
643 ; AVX2-POPCNT-NEXT: popcntq %rax, %rax
644 ; AVX2-POPCNT-NEXT: vmovq %rax, %xmm2
645 ; AVX2-POPCNT-NEXT: vmovq %xmm0, %rax
646 ; AVX2-POPCNT-NEXT: popcntq %rax, %rax
647 ; AVX2-POPCNT-NEXT: vmovq %rax, %xmm0
648 ; AVX2-POPCNT-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
649 ; AVX2-POPCNT-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
650 ; AVX2-POPCNT-NEXT: retq
652 ; AVX2-NOPOPCNT-LABEL: testv4i64:
653 ; AVX2-NOPOPCNT: # BB#0:
654 ; AVX2-NOPOPCNT-NEXT: vpsrlq $1, %ymm0, %ymm1
655 ; AVX2-NOPOPCNT-NEXT: vpbroadcastq {{.*}}(%rip), %ymm2
656 ; AVX2-NOPOPCNT-NEXT: vpand %ymm2, %ymm1, %ymm1
657 ; AVX2-NOPOPCNT-NEXT: vpsubq %ymm1, %ymm0, %ymm0
658 ; AVX2-NOPOPCNT-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
659 ; AVX2-NOPOPCNT-NEXT: vpand %ymm1, %ymm0, %ymm2
660 ; AVX2-NOPOPCNT-NEXT: vpsrlq $2, %ymm0, %ymm0
661 ; AVX2-NOPOPCNT-NEXT: vpand %ymm1, %ymm0, %ymm0
662 ; AVX2-NOPOPCNT-NEXT: vpaddq %ymm0, %ymm2, %ymm0
663 ; AVX2-NOPOPCNT-NEXT: vpsrlq $4, %ymm0, %ymm1
664 ; AVX2-NOPOPCNT-NEXT: vpaddq %ymm1, %ymm0, %ymm0
665 ; AVX2-NOPOPCNT-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
666 ; AVX2-NOPOPCNT-NEXT: vpand %ymm1, %ymm0, %ymm0
667 ; AVX2-NOPOPCNT-NEXT: vpsrlq $8, %ymm0, %ymm1
668 ; AVX2-NOPOPCNT-NEXT: vpaddq %ymm1, %ymm0, %ymm0
669 ; AVX2-NOPOPCNT-NEXT: vpsrlq $16, %ymm0, %ymm1
670 ; AVX2-NOPOPCNT-NEXT: vpaddq %ymm1, %ymm0, %ymm0
671 ; AVX2-NOPOPCNT-NEXT: vpsrlq $32, %ymm0, %ymm1
672 ; AVX2-NOPOPCNT-NEXT: vpaddq %ymm1, %ymm0, %ymm0
673 ; AVX2-NOPOPCNT-NEXT: vpbroadcastq {{.*}}(%rip), %ymm1
674 ; AVX2-NOPOPCNT-NEXT: vpand %ymm1, %ymm0, %ymm0
675 ; AVX2-NOPOPCNT-NEXT: retq
676 %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %in)
680 define <8 x i32> @testv8i32(<8 x i32> %in) {
681 ; AVX2-LABEL: testv8i32:
683 ; AVX2-NEXT: vpsrld $1, %ymm0, %ymm1
684 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2
685 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
686 ; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
687 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
688 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm2
689 ; AVX2-NEXT: vpsrld $2, %ymm0, %ymm0
690 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
691 ; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
692 ; AVX2-NEXT: vpsrld $4, %ymm0, %ymm1
693 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
694 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
695 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
696 ; AVX2-NEXT: vpsrld $8, %ymm0, %ymm1
697 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
698 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm1
699 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
700 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
701 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
703 %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %in)
707 define <16 x i16> @testv16i16(<16 x i16> %in) {
708 ; AVX2-POPCNT-LABEL: testv16i16:
709 ; AVX2-POPCNT: # BB#0:
710 ; AVX2-POPCNT-NEXT: vextracti128 $1, %ymm0, %xmm1
711 ; AVX2-POPCNT-NEXT: vpextrw $1, %xmm1, %eax
712 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
713 ; AVX2-POPCNT-NEXT: vmovd %xmm1, %ecx
714 ; AVX2-POPCNT-NEXT: popcntw %cx, %cx
715 ; AVX2-POPCNT-NEXT: vmovd %ecx, %xmm2
716 ; AVX2-POPCNT-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
717 ; AVX2-POPCNT-NEXT: vpextrw $2, %xmm1, %eax
718 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
719 ; AVX2-POPCNT-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
720 ; AVX2-POPCNT-NEXT: vpextrw $3, %xmm1, %eax
721 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
722 ; AVX2-POPCNT-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
723 ; AVX2-POPCNT-NEXT: vpextrw $4, %xmm1, %eax
724 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
725 ; AVX2-POPCNT-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
726 ; AVX2-POPCNT-NEXT: vpextrw $5, %xmm1, %eax
727 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
728 ; AVX2-POPCNT-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
729 ; AVX2-POPCNT-NEXT: vpextrw $6, %xmm1, %eax
730 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
731 ; AVX2-POPCNT-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
732 ; AVX2-POPCNT-NEXT: vpextrw $7, %xmm1, %eax
733 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
734 ; AVX2-POPCNT-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
735 ; AVX2-POPCNT-NEXT: vpextrw $1, %xmm0, %eax
736 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
737 ; AVX2-POPCNT-NEXT: vmovd %xmm0, %ecx
738 ; AVX2-POPCNT-NEXT: popcntw %cx, %cx
739 ; AVX2-POPCNT-NEXT: vmovd %ecx, %xmm2
740 ; AVX2-POPCNT-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
741 ; AVX2-POPCNT-NEXT: vpextrw $2, %xmm0, %eax
742 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
743 ; AVX2-POPCNT-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
744 ; AVX2-POPCNT-NEXT: vpextrw $3, %xmm0, %eax
745 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
746 ; AVX2-POPCNT-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
747 ; AVX2-POPCNT-NEXT: vpextrw $4, %xmm0, %eax
748 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
749 ; AVX2-POPCNT-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
750 ; AVX2-POPCNT-NEXT: vpextrw $5, %xmm0, %eax
751 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
752 ; AVX2-POPCNT-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
753 ; AVX2-POPCNT-NEXT: vpextrw $6, %xmm0, %eax
754 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
755 ; AVX2-POPCNT-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
756 ; AVX2-POPCNT-NEXT: vpextrw $7, %xmm0, %eax
757 ; AVX2-POPCNT-NEXT: popcntw %ax, %ax
758 ; AVX2-POPCNT-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
759 ; AVX2-POPCNT-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
760 ; AVX2-POPCNT-NEXT: retq
762 ; AVX2-NOPOPCNT-LABEL: testv16i16:
763 ; AVX2-NOPOPCNT: # BB#0:
764 ; AVX2-NOPOPCNT-NEXT: vextracti128 $1, %ymm0, %xmm1
765 ; AVX2-NOPOPCNT-NEXT: vpextrw $1, %xmm1, %eax
766 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
767 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
768 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
769 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
770 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
771 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
772 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
773 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
774 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
775 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
776 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
777 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
778 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
779 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
780 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
781 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
782 ; AVX2-NOPOPCNT-NEXT: vmovd %xmm1, %ecx
783 ; AVX2-NOPOPCNT-NEXT: movl %ecx, %edx
784 ; AVX2-NOPOPCNT-NEXT: shrl %edx
785 ; AVX2-NOPOPCNT-NEXT: andl $21845, %edx # imm = 0x5555
786 ; AVX2-NOPOPCNT-NEXT: subl %edx, %ecx
787 ; AVX2-NOPOPCNT-NEXT: movl %ecx, %edx
788 ; AVX2-NOPOPCNT-NEXT: andl $13107, %edx # imm = 0x3333
789 ; AVX2-NOPOPCNT-NEXT: shrl $2, %ecx
790 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
791 ; AVX2-NOPOPCNT-NEXT: addl %edx, %ecx
792 ; AVX2-NOPOPCNT-NEXT: movl %ecx, %edx
793 ; AVX2-NOPOPCNT-NEXT: andl $65520, %edx # imm = 0xFFF0
794 ; AVX2-NOPOPCNT-NEXT: shrl $4, %edx
795 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %edx
796 ; AVX2-NOPOPCNT-NEXT: andl $3855, %edx # imm = 0xF0F
797 ; AVX2-NOPOPCNT-NEXT: imull $257, %edx, %ecx # imm = 0x101
798 ; AVX2-NOPOPCNT-NEXT: movzbl %ch, %ecx # NOREX
799 ; AVX2-NOPOPCNT-NEXT: vmovd %ecx, %xmm2
800 ; AVX2-NOPOPCNT-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
801 ; AVX2-NOPOPCNT-NEXT: vpextrw $2, %xmm1, %eax
802 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
803 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
804 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
805 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
806 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
807 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
808 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
809 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
810 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
811 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
812 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
813 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
814 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
815 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
816 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
817 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
818 ; AVX2-NOPOPCNT-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
819 ; AVX2-NOPOPCNT-NEXT: vpextrw $3, %xmm1, %eax
820 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
821 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
822 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
823 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
824 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
825 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
826 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
827 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
828 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
829 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
830 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
831 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
832 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
833 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
834 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
835 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
836 ; AVX2-NOPOPCNT-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
837 ; AVX2-NOPOPCNT-NEXT: vpextrw $4, %xmm1, %eax
838 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
839 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
840 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
841 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
842 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
843 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
844 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
845 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
846 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
847 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
848 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
849 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
850 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
851 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
852 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
853 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
854 ; AVX2-NOPOPCNT-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
855 ; AVX2-NOPOPCNT-NEXT: vpextrw $5, %xmm1, %eax
856 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
857 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
858 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
859 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
860 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
861 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
862 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
863 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
864 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
865 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
866 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
867 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
868 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
869 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
870 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
871 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
872 ; AVX2-NOPOPCNT-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
873 ; AVX2-NOPOPCNT-NEXT: vpextrw $6, %xmm1, %eax
874 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
875 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
876 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
877 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
878 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
879 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
880 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
881 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
882 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
883 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
884 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
885 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
886 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
887 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
888 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
889 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
890 ; AVX2-NOPOPCNT-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
891 ; AVX2-NOPOPCNT-NEXT: vpextrw $7, %xmm1, %eax
892 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
893 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
894 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
895 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
896 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
897 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
898 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
899 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
900 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
901 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
902 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
903 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
904 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
905 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
906 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
907 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
908 ; AVX2-NOPOPCNT-NEXT: vpinsrw $7, %eax, %xmm2, %xmm1
909 ; AVX2-NOPOPCNT-NEXT: vpextrw $1, %xmm0, %eax
910 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
911 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
912 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
913 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
914 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
915 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
916 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
917 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
918 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
919 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
920 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
921 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
922 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
923 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
924 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
925 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
926 ; AVX2-NOPOPCNT-NEXT: vmovd %xmm0, %ecx
927 ; AVX2-NOPOPCNT-NEXT: movl %ecx, %edx
928 ; AVX2-NOPOPCNT-NEXT: shrl %edx
929 ; AVX2-NOPOPCNT-NEXT: andl $21845, %edx # imm = 0x5555
930 ; AVX2-NOPOPCNT-NEXT: subl %edx, %ecx
931 ; AVX2-NOPOPCNT-NEXT: movl %ecx, %edx
932 ; AVX2-NOPOPCNT-NEXT: andl $13107, %edx # imm = 0x3333
933 ; AVX2-NOPOPCNT-NEXT: shrl $2, %ecx
934 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
935 ; AVX2-NOPOPCNT-NEXT: addl %edx, %ecx
936 ; AVX2-NOPOPCNT-NEXT: movl %ecx, %edx
937 ; AVX2-NOPOPCNT-NEXT: andl $65520, %edx # imm = 0xFFF0
938 ; AVX2-NOPOPCNT-NEXT: shrl $4, %edx
939 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %edx
940 ; AVX2-NOPOPCNT-NEXT: andl $3855, %edx # imm = 0xF0F
941 ; AVX2-NOPOPCNT-NEXT: imull $257, %edx, %ecx # imm = 0x101
942 ; AVX2-NOPOPCNT-NEXT: movzbl %ch, %ecx # NOREX
943 ; AVX2-NOPOPCNT-NEXT: vmovd %ecx, %xmm2
944 ; AVX2-NOPOPCNT-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
945 ; AVX2-NOPOPCNT-NEXT: vpextrw $2, %xmm0, %eax
946 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
947 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
948 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
949 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
950 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
951 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
952 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
953 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
954 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
955 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
956 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
957 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
958 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
959 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
960 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
961 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
962 ; AVX2-NOPOPCNT-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
963 ; AVX2-NOPOPCNT-NEXT: vpextrw $3, %xmm0, %eax
964 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
965 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
966 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
967 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
968 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
969 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
970 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
971 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
972 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
973 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
974 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
975 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
976 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
977 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
978 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
979 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
980 ; AVX2-NOPOPCNT-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2
981 ; AVX2-NOPOPCNT-NEXT: vpextrw $4, %xmm0, %eax
982 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
983 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
984 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
985 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
986 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
987 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
988 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
989 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
990 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
991 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
992 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
993 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
994 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
995 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
996 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
997 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
998 ; AVX2-NOPOPCNT-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
999 ; AVX2-NOPOPCNT-NEXT: vpextrw $5, %xmm0, %eax
1000 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
1001 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
1002 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
1003 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
1004 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
1005 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
1006 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
1007 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
1008 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
1009 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
1010 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
1011 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
1012 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
1013 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
1014 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
1015 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
1016 ; AVX2-NOPOPCNT-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2
1017 ; AVX2-NOPOPCNT-NEXT: vpextrw $6, %xmm0, %eax
1018 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
1019 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
1020 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
1021 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
1022 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
1023 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
1024 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
1025 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
1026 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
1027 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
1028 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
1029 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
1030 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
1031 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
1032 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
1033 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
1034 ; AVX2-NOPOPCNT-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2
1035 ; AVX2-NOPOPCNT-NEXT: vpextrw $7, %xmm0, %eax
1036 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
1037 ; AVX2-NOPOPCNT-NEXT: shrl %ecx
1038 ; AVX2-NOPOPCNT-NEXT: andl $21845, %ecx # imm = 0x5555
1039 ; AVX2-NOPOPCNT-NEXT: subl %ecx, %eax
1040 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
1041 ; AVX2-NOPOPCNT-NEXT: andl $13107, %ecx # imm = 0x3333
1042 ; AVX2-NOPOPCNT-NEXT: shrl $2, %eax
1043 ; AVX2-NOPOPCNT-NEXT: andl $13107, %eax # imm = 0x3333
1044 ; AVX2-NOPOPCNT-NEXT: addl %ecx, %eax
1045 ; AVX2-NOPOPCNT-NEXT: movl %eax, %ecx
1046 ; AVX2-NOPOPCNT-NEXT: andl $65520, %ecx # imm = 0xFFF0
1047 ; AVX2-NOPOPCNT-NEXT: shrl $4, %ecx
1048 ; AVX2-NOPOPCNT-NEXT: addl %eax, %ecx
1049 ; AVX2-NOPOPCNT-NEXT: andl $3855, %ecx # imm = 0xF0F
1050 ; AVX2-NOPOPCNT-NEXT: imull $257, %ecx, %eax # imm = 0x101
1051 ; AVX2-NOPOPCNT-NEXT: movzbl %ah, %eax # NOREX
1052 ; AVX2-NOPOPCNT-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
1053 ; AVX2-NOPOPCNT-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
1054 ; AVX2-NOPOPCNT-NEXT: retq
1055 %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %in)
1059 declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
1060 declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
1061 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
1062 declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)