1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
5 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
6 ; AVX512BW-LABEL: test_pcmpeq_b:
8 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
9 ; AVX512BW-NEXT: kmovq %k0, %rax
11 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
15 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
16 ; AVX512BW-LABEL: test_mask_pcmpeq_b:
18 ; AVX512BW-NEXT: kmovq %rdi, %k1
19 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
20 ; AVX512BW-NEXT: kmovq %k0, %rax
22 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
26 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
28 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
29 ; AVX512BW-LABEL: test_pcmpeq_w:
31 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
32 ; AVX512BW-NEXT: kmovd %k0, %eax
34 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
38 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
39 ; AVX512BW-LABEL: test_mask_pcmpeq_w:
41 ; AVX512BW-NEXT: kmovd %edi, %k1
42 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
43 ; AVX512BW-NEXT: kmovd %k0, %eax
45 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
49 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
51 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
52 ; AVX512BW-LABEL: test_pcmpgt_b:
54 ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
55 ; AVX512BW-NEXT: kmovq %k0, %rax
57 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
61 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
62 ; AVX512BW-LABEL: test_mask_pcmpgt_b:
64 ; AVX512BW-NEXT: kmovq %rdi, %k1
65 ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
66 ; AVX512BW-NEXT: kmovq %k0, %rax
68 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
72 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
74 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
75 ; AVX512BW-LABEL: test_pcmpgt_w:
77 ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
78 ; AVX512BW-NEXT: kmovd %k0, %eax
80 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
84 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
85 ; AVX512BW-LABEL: test_mask_pcmpgt_w:
87 ; AVX512BW-NEXT: kmovd %edi, %k1
88 ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
89 ; AVX512BW-NEXT: kmovd %k0, %eax
91 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
95 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
97 define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
98 ; AVX512BW-LABEL: test_cmp_b_512:
100 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
101 ; AVX512BW-NEXT: kmovq %k0, %rax
102 ; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0
103 ; AVX512BW-NEXT: kmovq %k0, %rcx
104 ; AVX512BW-NEXT: addq %rax, %rcx
105 ; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0
106 ; AVX512BW-NEXT: kmovq %k0, %rax
107 ; AVX512BW-NEXT: addq %rcx, %rax
108 ; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0
109 ; AVX512BW-NEXT: kmovq %k0, %rcx
110 ; AVX512BW-NEXT: addq %rax, %rcx
111 ; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0
112 ; AVX512BW-NEXT: kmovq %k0, %rax
113 ; AVX512BW-NEXT: addq %rcx, %rax
114 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0
115 ; AVX512BW-NEXT: kmovq %k0, %rcx
116 ; AVX512BW-NEXT: addq %rax, %rcx
117 ; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0
118 ; AVX512BW-NEXT: kmovq %k0, %rdx
119 ; AVX512BW-NEXT: addq %rcx, %rdx
120 ; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0
121 ; AVX512BW-NEXT: kmovq %k0, %rax
122 ; AVX512BW-NEXT: addq %rdx, %rax
123 ; AVX512BW-NEXT: retq
124 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
125 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
126 %ret1 = add i64 %res0, %res1
127 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
128 %ret2 = add i64 %ret1, %res2
129 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
130 %ret3 = add i64 %ret2, %res3
131 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
132 %ret4 = add i64 %ret3, %res4
133 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
134 %ret5 = add i64 %ret4, %res5
135 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
136 %ret6 = add i64 %ret5, %res6
137 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
138 %ret7 = add i64 %ret6, %res7
142 define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
143 ; AVX512BW-LABEL: test_mask_cmp_b_512:
145 ; AVX512BW-NEXT: kmovq %rdi, %k1
146 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
147 ; AVX512BW-NEXT: kmovq %k0, %rax
148 ; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1}
149 ; AVX512BW-NEXT: kmovq %k0, %rcx
150 ; AVX512BW-NEXT: addq %rax, %rcx
151 ; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
152 ; AVX512BW-NEXT: kmovq %k0, %rax
153 ; AVX512BW-NEXT: addq %rcx, %rax
154 ; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
155 ; AVX512BW-NEXT: kmovq %k0, %rcx
156 ; AVX512BW-NEXT: addq %rax, %rcx
157 ; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
158 ; AVX512BW-NEXT: kmovq %k0, %rax
159 ; AVX512BW-NEXT: addq %rcx, %rax
160 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
161 ; AVX512BW-NEXT: kmovq %k0, %rcx
162 ; AVX512BW-NEXT: addq %rax, %rcx
163 ; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
164 ; AVX512BW-NEXT: kmovq %k0, %rdx
165 ; AVX512BW-NEXT: addq %rcx, %rdx
166 ; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1}
167 ; AVX512BW-NEXT: kmovq %k0, %rax
168 ; AVX512BW-NEXT: addq %rdx, %rax
169 ; AVX512BW-NEXT: retq
170 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
171 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
172 %ret1 = add i64 %res0, %res1
173 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
174 %ret2 = add i64 %ret1, %res2
175 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
176 %ret3 = add i64 %ret2, %res3
177 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
178 %ret4 = add i64 %ret3, %res4
179 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
180 %ret5 = add i64 %ret4, %res5
181 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
182 %ret6 = add i64 %ret5, %res6
183 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
184 %ret7 = add i64 %ret6, %res7
188 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
190 define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
191 ; AVX512BW-LABEL: test_ucmp_b_512:
193 ; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0
194 ; AVX512BW-NEXT: kmovq %k0, %rax
195 ; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0
196 ; AVX512BW-NEXT: kmovq %k0, %rcx
197 ; AVX512BW-NEXT: addq %rax, %rcx
198 ; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0
199 ; AVX512BW-NEXT: kmovq %k0, %rax
200 ; AVX512BW-NEXT: addq %rcx, %rax
201 ; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0
202 ; AVX512BW-NEXT: kmovq %k0, %rcx
203 ; AVX512BW-NEXT: addq %rax, %rcx
204 ; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0
205 ; AVX512BW-NEXT: kmovq %k0, %rax
206 ; AVX512BW-NEXT: addq %rcx, %rax
207 ; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0
208 ; AVX512BW-NEXT: kmovq %k0, %rcx
209 ; AVX512BW-NEXT: addq %rax, %rcx
210 ; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
211 ; AVX512BW-NEXT: kmovq %k0, %rdx
212 ; AVX512BW-NEXT: addq %rcx, %rdx
213 ; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0
214 ; AVX512BW-NEXT: kmovq %k0, %rax
215 ; AVX512BW-NEXT: addq %rdx, %rax
216 ; AVX512BW-NEXT: retq
217 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
218 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
219 %ret1 = add i64 %res0, %res1
220 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
221 %ret2 = add i64 %ret1, %res2
222 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
223 %ret3 = add i64 %ret2, %res3
224 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
225 %ret4 = add i64 %ret3, %res4
226 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
227 %ret5 = add i64 %ret4, %res5
228 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
229 %ret6 = add i64 %ret5, %res6
230 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
231 %ret7 = add i64 %ret6, %res7
235 define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
236 ; AVX512BW-LABEL: test_mask_x86_avx512_ucmp_b_512:
238 ; AVX512BW-NEXT: kmovq %rdi, %k1
239 ; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1}
240 ; AVX512BW-NEXT: kmovq %k0, %rax
241 ; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1}
242 ; AVX512BW-NEXT: kmovq %k0, %rcx
243 ; AVX512BW-NEXT: addq %rax, %rcx
244 ; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
245 ; AVX512BW-NEXT: kmovq %k0, %rax
246 ; AVX512BW-NEXT: addq %rcx, %rax
247 ; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
248 ; AVX512BW-NEXT: kmovq %k0, %rcx
249 ; AVX512BW-NEXT: addq %rax, %rcx
250 ; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
251 ; AVX512BW-NEXT: kmovq %k0, %rax
252 ; AVX512BW-NEXT: addq %rcx, %rax
253 ; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
254 ; AVX512BW-NEXT: kmovq %k0, %rcx
255 ; AVX512BW-NEXT: addq %rax, %rcx
256 ; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
257 ; AVX512BW-NEXT: kmovq %k0, %rdx
258 ; AVX512BW-NEXT: addq %rcx, %rdx
259 ; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1}
260 ; AVX512BW-NEXT: kmovq %k0, %rax
261 ; AVX512BW-NEXT: addq %rdx, %rax
262 ; AVX512BW-NEXT: retq
263 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
264 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
265 %ret1 = add i64 %res0, %res1
266 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
267 %ret2 = add i64 %ret1, %res2
268 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
269 %ret3 = add i64 %ret2, %res3
270 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
271 %ret4 = add i64 %ret3, %res4
272 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
273 %ret5 = add i64 %ret4, %res5
274 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
275 %ret6 = add i64 %ret5, %res6
276 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
277 %ret7 = add i64 %ret6, %res7
281 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
283 define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
284 ; AVX512BW-LABEL: test_cmp_w_512:
286 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
287 ; AVX512BW-NEXT: kmovd %k0, %eax
288 ; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0
289 ; AVX512BW-NEXT: kmovd %k0, %ecx
290 ; AVX512BW-NEXT: addl %eax, %ecx
291 ; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0
292 ; AVX512BW-NEXT: kmovd %k0, %eax
293 ; AVX512BW-NEXT: addl %ecx, %eax
294 ; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0
295 ; AVX512BW-NEXT: kmovd %k0, %ecx
296 ; AVX512BW-NEXT: addl %eax, %ecx
297 ; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0
298 ; AVX512BW-NEXT: kmovd %k0, %eax
299 ; AVX512BW-NEXT: addl %ecx, %eax
300 ; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0
301 ; AVX512BW-NEXT: kmovd %k0, %ecx
302 ; AVX512BW-NEXT: addl %eax, %ecx
303 ; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0
304 ; AVX512BW-NEXT: kmovd %k0, %edx
305 ; AVX512BW-NEXT: addl %ecx, %edx
306 ; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0
307 ; AVX512BW-NEXT: kmovd %k0, %eax
308 ; AVX512BW-NEXT: addl %edx, %eax
309 ; AVX512BW-NEXT: retq
310 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
311 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
312 %ret1 = add i32 %res0, %res1
313 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
314 %ret2 = add i32 %ret1, %res2
315 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
316 %ret3 = add i32 %ret2, %res3
317 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
318 %ret4 = add i32 %ret3, %res4
319 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
320 %ret5 = add i32 %ret4, %res5
321 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
322 %ret6 = add i32 %ret5, %res6
323 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
324 %ret7 = add i32 %ret6, %res7
328 define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
329 ; AVX512BW-LABEL: test_mask_cmp_w_512:
331 ; AVX512BW-NEXT: kmovd %edi, %k1
332 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
333 ; AVX512BW-NEXT: kmovd %k0, %eax
334 ; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1}
335 ; AVX512BW-NEXT: kmovd %k0, %ecx
336 ; AVX512BW-NEXT: addl %eax, %ecx
337 ; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
338 ; AVX512BW-NEXT: kmovd %k0, %eax
339 ; AVX512BW-NEXT: addl %ecx, %eax
340 ; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
341 ; AVX512BW-NEXT: kmovd %k0, %ecx
342 ; AVX512BW-NEXT: addl %eax, %ecx
343 ; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
344 ; AVX512BW-NEXT: kmovd %k0, %eax
345 ; AVX512BW-NEXT: addl %ecx, %eax
346 ; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
347 ; AVX512BW-NEXT: kmovd %k0, %ecx
348 ; AVX512BW-NEXT: addl %eax, %ecx
349 ; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
350 ; AVX512BW-NEXT: kmovd %k0, %edx
351 ; AVX512BW-NEXT: addl %ecx, %edx
352 ; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1}
353 ; AVX512BW-NEXT: kmovd %k0, %eax
354 ; AVX512BW-NEXT: addl %edx, %eax
355 ; AVX512BW-NEXT: retq
356 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
357 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
358 %ret1 = add i32 %res0, %res1
359 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
360 %ret2 = add i32 %ret1, %res2
361 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
362 %ret3 = add i32 %ret2, %res3
363 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
364 %ret4 = add i32 %ret3, %res4
365 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
366 %ret5 = add i32 %ret4, %res5
367 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
368 %ret6 = add i32 %ret5, %res6
369 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
370 %ret7 = add i32 %ret6, %res7
374 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
376 define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
377 ; AVX512BW-LABEL: test_ucmp_w_512:
379 ; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0
380 ; AVX512BW-NEXT: kmovd %k0, %eax
381 ; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0
382 ; AVX512BW-NEXT: kmovd %k0, %ecx
383 ; AVX512BW-NEXT: addl %eax, %ecx
384 ; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0
385 ; AVX512BW-NEXT: kmovd %k0, %eax
386 ; AVX512BW-NEXT: addl %ecx, %eax
387 ; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0
388 ; AVX512BW-NEXT: kmovd %k0, %ecx
389 ; AVX512BW-NEXT: addl %eax, %ecx
390 ; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0
391 ; AVX512BW-NEXT: kmovd %k0, %eax
392 ; AVX512BW-NEXT: addl %ecx, %eax
393 ; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0
394 ; AVX512BW-NEXT: kmovd %k0, %ecx
395 ; AVX512BW-NEXT: addl %eax, %ecx
396 ; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
397 ; AVX512BW-NEXT: kmovd %k0, %edx
398 ; AVX512BW-NEXT: addl %ecx, %edx
399 ; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0
400 ; AVX512BW-NEXT: kmovd %k0, %eax
401 ; AVX512BW-NEXT: addl %edx, %eax
402 ; AVX512BW-NEXT: retq
403 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
404 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
405 %ret1 = add i32 %res0, %res1
406 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
407 %ret2 = add i32 %ret1, %res2
408 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
409 %ret3 = add i32 %ret2, %res3
410 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
411 %ret4 = add i32 %ret3, %res4
412 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
413 %ret5 = add i32 %ret4, %res5
414 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
415 %ret6 = add i32 %ret5, %res6
416 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
417 %ret7 = add i32 %ret6, %res7
421 define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
422 ; AVX512BW-LABEL: test_mask_ucmp_w_512:
424 ; AVX512BW-NEXT: kmovd %edi, %k1
425 ; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1}
426 ; AVX512BW-NEXT: kmovd %k0, %eax
427 ; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
428 ; AVX512BW-NEXT: kmovd %k0, %ecx
429 ; AVX512BW-NEXT: addl %eax, %ecx
430 ; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
431 ; AVX512BW-NEXT: kmovd %k0, %eax
432 ; AVX512BW-NEXT: addl %ecx, %eax
433 ; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
434 ; AVX512BW-NEXT: kmovd %k0, %ecx
435 ; AVX512BW-NEXT: addl %eax, %ecx
436 ; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
437 ; AVX512BW-NEXT: kmovd %k0, %eax
438 ; AVX512BW-NEXT: addl %ecx, %eax
439 ; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
440 ; AVX512BW-NEXT: kmovd %k0, %ecx
441 ; AVX512BW-NEXT: addl %eax, %ecx
442 ; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
443 ; AVX512BW-NEXT: kmovd %k0, %edx
444 ; AVX512BW-NEXT: addl %ecx, %edx
445 ; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1}
446 ; AVX512BW-NEXT: kmovd %k0, %eax
447 ; AVX512BW-NEXT: addl %edx, %eax
448 ; AVX512BW-NEXT: retq
449 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
450 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
451 %ret1 = add i32 %res0, %res1
452 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
453 %ret2 = add i32 %ret1, %res2
454 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
455 %ret3 = add i32 %ret2, %res3
456 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
457 %ret4 = add i32 %ret3, %res4
458 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
459 %ret5 = add i32 %ret4, %res5
460 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
461 %ret6 = add i32 %ret5, %res6
462 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
463 %ret7 = add i32 %ret6, %res7
467 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
469 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
471 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
472 ; AVX512BW-LABEL: test_x86_mask_blend_w_512:
474 ; AVX512BW-NEXT: kmovd %edi, %k1
475 ; AVX512BW-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1}
476 ; AVX512BW-NEXT: retq
477 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
480 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
482 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
483 ; AVX512BW-LABEL: test_x86_mask_blend_b_512:
485 ; AVX512BW-NEXT: kmovq %rdi, %k1
486 ; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1}
487 ; AVX512BW-NEXT: retq
488 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
492 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
493 ; AVX512BW-LABEL: test_mask_packs_epi32_rr_512:
495 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0
496 ; AVX512BW-NEXT: retq
497 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
501 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
502 ; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512:
504 ; AVX512BW-NEXT: kmovd %edi, %k1
505 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
506 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
507 ; AVX512BW-NEXT: retq
508 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
512 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
513 ; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512:
515 ; AVX512BW-NEXT: kmovd %edi, %k1
516 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
517 ; AVX512BW-NEXT: retq
518 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
522 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
523 ; AVX512BW-LABEL: test_mask_packs_epi32_rm_512:
525 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0
526 ; AVX512BW-NEXT: retq
527 %b = load <16 x i32>, <16 x i32>* %ptr_b
528 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
532 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
533 ; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512:
535 ; AVX512BW-NEXT: kmovd %esi, %k1
536 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1}
537 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
538 ; AVX512BW-NEXT: retq
539 %b = load <16 x i32>, <16 x i32>* %ptr_b
540 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
544 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
545 ; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512:
547 ; AVX512BW-NEXT: kmovd %esi, %k1
548 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z}
549 ; AVX512BW-NEXT: retq
550 %b = load <16 x i32>, <16 x i32>* %ptr_b
551 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
555 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
556 ; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512:
558 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0
559 ; AVX512BW-NEXT: retq
560 %q = load i32, i32* %ptr_b
561 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
562 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
563 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
567 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
568 ; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512:
570 ; AVX512BW-NEXT: kmovd %esi, %k1
571 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
572 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
573 ; AVX512BW-NEXT: retq
574 %q = load i32, i32* %ptr_b
575 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
576 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
577 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
581 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
582 ; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512:
584 ; AVX512BW-NEXT: kmovd %esi, %k1
585 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
586 ; AVX512BW-NEXT: retq
587 %q = load i32, i32* %ptr_b
588 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
589 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
590 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
594 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
596 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
597 ; AVX512BW-LABEL: test_mask_packs_epi16_rr_512:
599 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0
600 ; AVX512BW-NEXT: retq
601 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
605 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
606 ; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512:
608 ; AVX512BW-NEXT: kmovq %rdi, %k1
609 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
610 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
611 ; AVX512BW-NEXT: retq
612 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
616 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
617 ; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512:
619 ; AVX512BW-NEXT: kmovq %rdi, %k1
620 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
621 ; AVX512BW-NEXT: retq
622 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
626 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
627 ; AVX512BW-LABEL: test_mask_packs_epi16_rm_512:
629 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0
630 ; AVX512BW-NEXT: retq
631 %b = load <32 x i16>, <32 x i16>* %ptr_b
632 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
636 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
637 ; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512:
639 ; AVX512BW-NEXT: kmovq %rsi, %k1
640 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1}
641 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
642 ; AVX512BW-NEXT: retq
643 %b = load <32 x i16>, <32 x i16>* %ptr_b
644 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
648 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
649 ; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512:
651 ; AVX512BW-NEXT: kmovq %rsi, %k1
652 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z}
653 ; AVX512BW-NEXT: retq
654 %b = load <32 x i16>, <32 x i16>* %ptr_b
655 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
659 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
662 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
663 ; AVX512BW-LABEL: test_mask_packus_epi32_rr_512:
665 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0
666 ; AVX512BW-NEXT: retq
667 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
671 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
672 ; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512:
674 ; AVX512BW-NEXT: kmovd %edi, %k1
675 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
676 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
677 ; AVX512BW-NEXT: retq
678 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
682 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
683 ; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512:
685 ; AVX512BW-NEXT: kmovd %edi, %k1
686 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
687 ; AVX512BW-NEXT: retq
688 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
692 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
693 ; AVX512BW-LABEL: test_mask_packus_epi32_rm_512:
695 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0
696 ; AVX512BW-NEXT: retq
697 %b = load <16 x i32>, <16 x i32>* %ptr_b
698 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
702 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
703 ; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512:
705 ; AVX512BW-NEXT: kmovd %esi, %k1
706 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
707 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
708 ; AVX512BW-NEXT: retq
709 %b = load <16 x i32>, <16 x i32>* %ptr_b
710 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
714 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
715 ; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512:
717 ; AVX512BW-NEXT: kmovd %esi, %k1
718 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
719 ; AVX512BW-NEXT: retq
720 %b = load <16 x i32>, <16 x i32>* %ptr_b
721 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
725 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
726 ; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512:
728 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
729 ; AVX512BW-NEXT: retq
730 %q = load i32, i32* %ptr_b
731 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
732 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
733 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
737 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
738 ; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512:
740 ; AVX512BW-NEXT: kmovd %esi, %k1
741 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
742 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
743 ; AVX512BW-NEXT: retq
744 %q = load i32, i32* %ptr_b
745 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
746 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
747 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
751 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
752 ; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512:
754 ; AVX512BW-NEXT: kmovd %esi, %k1
755 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
756 ; AVX512BW-NEXT: retq
757 %q = load i32, i32* %ptr_b
758 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
759 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
760 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
764 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
766 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
767 ; AVX512BW-LABEL: test_mask_packus_epi16_rr_512:
769 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
770 ; AVX512BW-NEXT: retq
771 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
775 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
776 ; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512:
778 ; AVX512BW-NEXT: kmovq %rdi, %k1
779 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
780 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
781 ; AVX512BW-NEXT: retq
782 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
786 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
787 ; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512:
789 ; AVX512BW-NEXT: kmovq %rdi, %k1
790 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
791 ; AVX512BW-NEXT: retq
792 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
796 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
797 ; AVX512BW-LABEL: test_mask_packus_epi16_rm_512:
799 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0
800 ; AVX512BW-NEXT: retq
801 %b = load <32 x i16>, <32 x i16>* %ptr_b
802 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
806 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
807 ; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512:
809 ; AVX512BW-NEXT: kmovq %rsi, %k1
810 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
811 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
812 ; AVX512BW-NEXT: retq
813 %b = load <32 x i16>, <32 x i16>* %ptr_b
814 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
818 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
819 ; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512:
821 ; AVX512BW-NEXT: kmovq %rsi, %k1
822 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
823 ; AVX512BW-NEXT: retq
824 %b = load <32 x i16>, <32 x i16>* %ptr_b
825 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
829 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
831 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
832 ; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
834 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0
835 ; AVX512BW-NEXT: retq
836 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
840 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
841 ; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
843 ; AVX512BW-NEXT: kmovd %edi, %k1
844 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
845 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
846 ; AVX512BW-NEXT: retq
847 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
851 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
852 ; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
854 ; AVX512BW-NEXT: kmovd %edi, %k1
855 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
856 ; AVX512BW-NEXT: retq
857 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
861 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
862 ; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
864 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0
865 ; AVX512BW-NEXT: retq
866 %b = load <32 x i16>, <32 x i16>* %ptr_b
867 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
871 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
872 ; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
874 ; AVX512BW-NEXT: kmovd %esi, %k1
875 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
876 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
877 ; AVX512BW-NEXT: retq
878 %b = load <32 x i16>, <32 x i16>* %ptr_b
879 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
883 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
884 ; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
886 ; AVX512BW-NEXT: kmovd %esi, %k1
887 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
888 ; AVX512BW-NEXT: retq
889 %b = load <32 x i16>, <32 x i16>* %ptr_b
890 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
894 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
896 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
897 ; AVX512BW-LABEL: test_mask_subs_epi16_rr_512:
899 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0
900 ; AVX512BW-NEXT: retq
901 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
905 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
906 ; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
908 ; AVX512BW-NEXT: kmovd %edi, %k1
909 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
910 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
911 ; AVX512BW-NEXT: retq
912 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
916 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
917 ; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
919 ; AVX512BW-NEXT: kmovd %edi, %k1
920 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
921 ; AVX512BW-NEXT: retq
922 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
926 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
927 ; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
929 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0
930 ; AVX512BW-NEXT: retq
931 %b = load <32 x i16>, <32 x i16>* %ptr_b
932 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
936 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
937 ; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
939 ; AVX512BW-NEXT: kmovd %esi, %k1
940 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
941 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
942 ; AVX512BW-NEXT: retq
943 %b = load <32 x i16>, <32 x i16>* %ptr_b
944 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
948 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
949 ; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
951 ; AVX512BW-NEXT: kmovd %esi, %k1
952 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
953 ; AVX512BW-NEXT: retq
954 %b = load <32 x i16>, <32 x i16>* %ptr_b
955 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
959 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
961 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
962 ; AVX512BW-LABEL: test_mask_adds_epu16_rr_512:
964 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0
965 ; AVX512BW-NEXT: retq
966 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
970 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
971 ; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
973 ; AVX512BW-NEXT: kmovd %edi, %k1
974 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
975 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
976 ; AVX512BW-NEXT: retq
977 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
981 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
982 ; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
984 ; AVX512BW-NEXT: kmovd %edi, %k1
985 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
986 ; AVX512BW-NEXT: retq
987 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
991 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
992 ; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
994 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0
995 ; AVX512BW-NEXT: retq
996 %b = load <32 x i16>, <32 x i16>* %ptr_b
997 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1001 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1002 ; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
1003 ; AVX512BW: ## BB#0:
1004 ; AVX512BW-NEXT: kmovd %esi, %k1
1005 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
1006 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1007 ; AVX512BW-NEXT: retq
1008 %b = load <32 x i16>, <32 x i16>* %ptr_b
1009 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1013 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1014 ; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
1015 ; AVX512BW: ## BB#0:
1016 ; AVX512BW-NEXT: kmovd %esi, %k1
1017 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1018 ; AVX512BW-NEXT: retq
1019 %b = load <32 x i16>, <32 x i16>* %ptr_b
1020 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1024 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1026 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1027 ; AVX512BW-LABEL: test_mask_subs_epu16_rr_512:
1028 ; AVX512BW: ## BB#0:
1029 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
1030 ; AVX512BW-NEXT: retq
1031 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1035 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
1036 ; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
1037 ; AVX512BW: ## BB#0:
1038 ; AVX512BW-NEXT: kmovd %edi, %k1
1039 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
1040 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0
1041 ; AVX512BW-NEXT: retq
1042 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1046 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
1047 ; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
1048 ; AVX512BW: ## BB#0:
1049 ; AVX512BW-NEXT: kmovd %edi, %k1
1050 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
1051 ; AVX512BW-NEXT: retq
1052 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1056 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1057 ; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
1058 ; AVX512BW: ## BB#0:
1059 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0
1060 ; AVX512BW-NEXT: retq
1061 %b = load <32 x i16>, <32 x i16>* %ptr_b
1062 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
1066 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1067 ; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
1068 ; AVX512BW: ## BB#0:
1069 ; AVX512BW-NEXT: kmovd %esi, %k1
1070 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
1071 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0
1072 ; AVX512BW-NEXT: retq
1073 %b = load <32 x i16>, <32 x i16>* %ptr_b
1074 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
1078 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
1079 ; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
1080 ; AVX512BW: ## BB#0:
1081 ; AVX512BW-NEXT: kmovd %esi, %k1
1082 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
1083 ; AVX512BW-NEXT: retq
1084 %b = load <32 x i16>, <32 x i16>* %ptr_b
1085 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
1089 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1091 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1093 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1094 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
1095 ; AVX512BW: ## BB#0:
1096 ; AVX512BW-NEXT: kmovq %rdi, %k1
1097 ; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
1098 ; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0
1099 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1100 ; AVX512BW-NEXT: retq
1101 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1102 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1103 %res2 = add <64 x i8> %res, %res1
1107 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1109 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1110 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
1111 ; AVX512BW: ## BB#0:
1112 ; AVX512BW-NEXT: kmovd %edi, %k1
1113 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
1114 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0
1115 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1116 ; AVX512BW-NEXT: retq
1117 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1118 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1119 %res2 = add <32 x i16> %res, %res1
1120 ret <32 x i16> %res2
1123 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1125 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1126 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
1127 ; AVX512BW: ## BB#0:
1128 ; AVX512BW-NEXT: kmovq %rdi, %k1
1129 ; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
1130 ; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0
1131 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1132 ; AVX512BW-NEXT: retq
1133 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1134 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1135 %res2 = add <64 x i8> %res, %res1
1139 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1141 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1142 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
1143 ; AVX512BW: ## BB#0:
1144 ; AVX512BW-NEXT: kmovd %edi, %k1
1145 ; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
1146 ; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0
1147 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1148 ; AVX512BW-NEXT: retq
1149 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1150 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1151 %res2 = add <32 x i16> %res, %res1
1152 ret <32 x i16> %res2
1155 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1157 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1158 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_b_512:
1159 ; AVX512BW: ## BB#0:
1160 ; AVX512BW-NEXT: kmovq %rdi, %k1
1161 ; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1}
1162 ; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0
1163 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1164 ; AVX512BW-NEXT: retq
1165 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1166 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1167 %res2 = add <64 x i8> %res, %res1
1171 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1173 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1174 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_w_512:
1175 ; AVX512BW: ## BB#0:
1176 ; AVX512BW-NEXT: kmovd %edi, %k1
1177 ; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1}
1178 ; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0
1179 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1180 ; AVX512BW-NEXT: retq
1181 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1182 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1183 %res2 = add <32 x i16> %res, %res1
1184 ret <32 x i16> %res2
1187 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1189 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1190 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_b_512:
1191 ; AVX512BW: ## BB#0:
1192 ; AVX512BW-NEXT: kmovq %rdi, %k1
1193 ; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1}
1194 ; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0
1195 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1196 ; AVX512BW-NEXT: retq
1197 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1198 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1199 %res2 = add <64 x i8> %res, %res1
1203 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1205 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1206 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_w_512:
1207 ; AVX512BW: ## BB#0:
1208 ; AVX512BW-NEXT: kmovd %edi, %k1
1209 ; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1}
1210 ; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0
1211 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1212 ; AVX512BW-NEXT: retq
1213 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1214 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1215 %res2 = add <32 x i16> %res, %res1
1216 ret <32 x i16> %res2
1219 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1221 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1222 ; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
1223 ; AVX512BW: ## BB#0:
1224 ; AVX512BW-NEXT: kmovd %edi, %k1
1225 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
1226 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
1227 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1228 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1229 ; AVX512BW-NEXT: retq
1230 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1231 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1232 %res2 = add <32 x i16> %res, %res1
1233 ret <32 x i16> %res2
1236 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1238 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1239 ; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
1240 ; AVX512BW: ## BB#0:
1241 ; AVX512BW-NEXT: kmovd %edi, %k1
1242 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
1243 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
1244 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1
1245 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1246 ; AVX512BW-NEXT: retq
1247 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1248 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1249 %res2 = add <32 x i16> %res, %res1
1250 ret <32 x i16> %res2
1253 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1255 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1256 ; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
1257 ; AVX512BW: ## BB#0:
1258 ; AVX512BW-NEXT: kmovd %edi, %k1
1259 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3
1260 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1261 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
1262 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1263 ; AVX512BW-NEXT: retq
1264 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1265 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1266 %res2 = add <32 x i16> %res, %res1
1267 ret <32 x i16> %res2
1270 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1272 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1273 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_b_512:
1274 ; AVX512BW: ## BB#0:
1275 ; AVX512BW-NEXT: kmovq %rdi, %k1
1276 ; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1}
1277 ; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm0
1278 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1279 ; AVX512BW-NEXT: retq
1280 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1281 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1282 %res2 = add <64 x i8> %res, %res1
1286 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1288 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1289 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_w_512:
1290 ; AVX512BW: ## BB#0:
1291 ; AVX512BW-NEXT: kmovd %edi, %k1
1292 ; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1}
1293 ; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm0
1294 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1295 ; AVX512BW-NEXT: retq
1296 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1297 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1298 %res2 = add <32 x i16> %res, %res1
1299 ret <32 x i16> %res2
1302 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1304 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1305 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1306 ; AVX512BW: ## BB#0:
1307 ; AVX512BW-NEXT: kmovq %rdi, %k1
1308 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1}
1309 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0
1310 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1311 ; AVX512BW-NEXT: retq
1312 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1313 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1314 %res2 = add <64 x i8> %res, %res1
1318 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
1320 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
1321 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512:
1322 ; AVX512BW: ## BB#0:
1323 ; AVX512BW-NEXT: kmovd %edi, %k1
1324 ; AVX512BW-NEXT: vpabsw %zmm0, %zmm1 {%k1}
1325 ; AVX512BW-NEXT: vpabsw %zmm0, %zmm0
1326 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
1327 ; AVX512BW-NEXT: retq
1328 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
1329 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
1330 %res2 = add <32 x i16> %res, %res1
1331 ret <32 x i16> %res2
1334 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
1336 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
1337 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512:
1338 ; AVX512BW: ## BB#0:
1339 ; AVX512BW-NEXT: kmovq %rdi, %k1
1340 ; AVX512BW-NEXT: vpabsb %zmm0, %zmm1 {%k1}
1341 ; AVX512BW-NEXT: vpabsb %zmm0, %zmm0
1342 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
1343 ; AVX512BW-NEXT: retq
1344 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
1345 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
1346 %res2 = add <64 x i8> %res, %res1
1350 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1352 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1353 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
1354 ; AVX512BW: ## BB#0:
1355 ; AVX512BW-NEXT: kmovd %edi, %k1
1356 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
1357 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0
1358 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1359 ; AVX512BW-NEXT: retq
1360 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1361 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1362 %res2 = add <32 x i16> %res, %res1
1363 ret <32 x i16> %res2
1366 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1368 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1369 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
1370 ; AVX512BW: ## BB#0:
1371 ; AVX512BW-NEXT: kmovd %edi, %k1
1372 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
1373 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0
1374 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1375 ; AVX512BW-NEXT: retq
1376 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1377 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1378 %res2 = add <32 x i16> %res, %res1
1379 ret <32 x i16> %res2
1382 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1384 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1385 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
1386 ; AVX512BW: ## BB#0:
1387 ; AVX512BW-NEXT: kmovd %edi, %k1
1388 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
1389 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0
1390 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1391 ; AVX512BW-NEXT: retq
1392 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1393 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1394 %res2 = add <32 x i16> %res, %res1
1395 ret <32 x i16> %res2
1398 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
1400 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1401 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
1402 ; AVX512BW: ## BB#0:
1403 ; AVX512BW-NEXT: kmovd %edi, %k1
1404 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1 {%k1}
1405 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
1406 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
1407 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1408 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1409 ; AVX512BW-NEXT: retq
1410 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1411 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1412 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1413 %res3 = add <32 x i8> %res0, %res1
1414 %res4 = add <32 x i8> %res3, %res2
1418 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1420 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1421 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1422 ; AVX512BW: ## BB#0:
1423 ; AVX512BW-NEXT: kmovd %esi, %k1
1424 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi)
1425 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1}
1426 ; AVX512BW-NEXT: retq
1427 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1428 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1432 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
1434 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1435 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1436 ; AVX512BW: ## BB#0:
1437 ; AVX512BW-NEXT: kmovd %edi, %k1
1438 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm1 {%k1}
1439 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
1440 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0
1441 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1442 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1443 ; AVX512BW-NEXT: retq
1444 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1445 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1446 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1447 %res3 = add <32 x i8> %res0, %res1
1448 %res4 = add <32 x i8> %res3, %res2
1452 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1454 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1455 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1456 ; AVX512BW: ## BB#0:
1457 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi)
1458 ; AVX512BW-NEXT: kmovd %esi, %k1
1459 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1}
1460 ; AVX512BW-NEXT: retq
1461 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1462 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1466 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
1468 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1469 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1470 ; AVX512BW: ## BB#0:
1471 ; AVX512BW-NEXT: kmovd %edi, %k1
1472 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm1 {%k1}
1473 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
1474 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
1475 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1476 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0
1477 ; AVX512BW-NEXT: retq
1478 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1479 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1480 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1481 %res3 = add <32 x i8> %res0, %res1
1482 %res4 = add <32 x i8> %res3, %res2
1486 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1488 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1489 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1490 ; AVX512BW: ## BB#0:
1491 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi)
1492 ; AVX512BW-NEXT: kmovd %esi, %k1
1493 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1}
1494 ; AVX512BW-NEXT: retq
1495 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1496 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1500 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
1502 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
1503 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1504 ; AVX512BW: ## BB#0:
1505 ; AVX512BW-NEXT: kmovd %edi, %k1
1506 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
1507 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
1508 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1509 ; AVX512BW-NEXT: retq
1510 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
1511 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
1512 %res2 = add <32 x i16> %res, %res1
1513 ret <32 x i16> %res2
1516 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
1518 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
1519 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1520 ; AVX512BW: ## BB#0:
1521 ; AVX512BW-NEXT: kmovw %edi, %k1
1522 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
1523 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
1524 ; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0
1525 ; AVX512BW-NEXT: retq
1526 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
1527 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
1528 %res2 = add <16 x i32> %res, %res1
1529 ret <16 x i32> %res2
1532 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1534 define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1535 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
1536 ; AVX512BW: ## BB#0:
1537 ; AVX512BW-NEXT: kmovq %rdi, %k1
1538 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
1539 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
1540 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1541 ; AVX512BW-NEXT: retq
1542 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1543 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1544 %res2 = add <64 x i8> %res, %res1
1548 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1550 define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1551 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
1552 ; AVX512BW: ## BB#0:
1553 ; AVX512BW-NEXT: kmovq %rdi, %k1
1554 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
1555 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
1556 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1557 ; AVX512BW-NEXT: retq
1558 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1559 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1560 %res2 = add <64 x i8> %res, %res1
1564 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1566 define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1567 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
1568 ; AVX512BW: ## BB#0:
1569 ; AVX512BW-NEXT: kmovd %edi, %k1
1570 ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
1571 ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
1572 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1573 ; AVX512BW-NEXT: retq
1574 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1575 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1576 %res2 = add <32 x i16> %res, %res1
1577 ret <32 x i16> %res2
1580 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1582 define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1583 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
1584 ; AVX512BW: ## BB#0:
1585 ; AVX512BW-NEXT: kmovd %edi, %k1
1586 ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
1587 ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
1588 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1589 ; AVX512BW-NEXT: retq
1590 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1591 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1592 %res2 = add <32 x i16> %res, %res1
1593 ret <32 x i16> %res2
1596 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
1598 define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
1599 ; AVX512BW-LABEL: test_int_x86_avx512_mask_palignr_512:
1600 ; AVX512BW: ## BB#0:
1601 ; AVX512BW-NEXT: kmovq %rdi, %k1
1602 ; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
1603 ; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
1604 ; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0
1605 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm1
1606 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
1607 ; AVX512BW-NEXT: retq
1608 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
1609 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
1610 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
1611 %res3 = add <64 x i8> %res, %res1
1612 %res4 = add <64 x i8> %res3, %res2
1616 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
1618 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
1619 ; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1620 ; AVX512BW: ## BB#0:
1621 ; AVX512BW-NEXT: kmovd %edi, %k1
1622 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
1623 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
1624 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
1625 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
1626 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
1627 ; AVX512BW-NEXT: retq
1628 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
1629 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
1630 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
1631 %res3 = add <32 x i16> %res, %res1
1632 %res4 = add <32 x i16> %res3, %res2
1633 ret <32 x i16> %res4
1636 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
1638 define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
1639 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_dq_512:
1640 ; AVX512BW: ## BB#0:
1641 ; AVX512BW-NEXT: vpslldq $8, %zmm0, %zmm1
1642 ; AVX512BW-NEXT: vpslldq $4, %zmm0, %zmm0
1643 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
1644 ; AVX512BW-NEXT: retq
1645 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
1646 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
1647 %res2 = add <8 x i64> %res, %res1
1651 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
1653 define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
1654 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_dq_512:
1655 ; AVX512BW: ## BB#0:
1656 ; AVX512BW-NEXT: vpsrldq $8, %zmm0, %zmm1
1657 ; AVX512BW-NEXT: vpsrldq $4, %zmm0, %zmm0
1658 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
1659 ; AVX512BW-NEXT: retq
1660 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
1661 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
1662 %res2 = add <8 x i64> %res, %res1
1665 declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
1667 define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
1668 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512:
1669 ; AVX512BW: ## BB#0:
1670 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1
1671 ; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0
1672 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0
1673 ; AVX512BW-NEXT: retq
1674 %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
1675 %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
1676 %res2 = add <8 x i64> %res, %res1