1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
4 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
5 declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32)
6 declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32)
7 declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32)
9 declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32)
10 declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32)
11 declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32)
12 declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32)
14 define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) {
15 ; CHECK-LABEL: gather_mask_dps:
17 ; CHECK-NEXT: kmovw %edi, %k1
18 ; CHECK-NEXT: kmovq %k1, %k2
19 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
20 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
21 ; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
23 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
24 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
25 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4)
29 define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
30 ; CHECK-LABEL: gather_mask_dpd:
32 ; CHECK-NEXT: kmovb %edi, %k1
33 ; CHECK-NEXT: kmovq %k1, %k2
34 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
35 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
36 ; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
38 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
39 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
40 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4)
44 define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) {
45 ; CHECK-LABEL: gather_mask_qps:
47 ; CHECK-NEXT: kmovb %edi, %k1
48 ; CHECK-NEXT: kmovq %k1, %k2
49 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
50 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
51 ; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
53 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
54 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
55 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4)
59 define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) {
60 ; CHECK-LABEL: gather_mask_qpd:
62 ; CHECK-NEXT: kmovb %edi, %k1
63 ; CHECK-NEXT: kmovq %k1, %k2
64 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
65 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
66 ; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
68 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
69 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
70 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4)
74 ;; Integer Gather/Scatter
76 declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32)
77 declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32)
78 declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32)
79 declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32)
81 declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32)
82 declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32)
83 declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32)
84 declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32)
86 define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) {
87 ; CHECK-LABEL: gather_mask_dd:
89 ; CHECK-NEXT: kmovw %edi, %k1
90 ; CHECK-NEXT: kmovq %k1, %k2
91 ; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
92 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
93 ; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
95 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
96 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
97 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4)
101 define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) {
102 ; CHECK-LABEL: gather_mask_qd:
104 ; CHECK-NEXT: kmovb %edi, %k1
105 ; CHECK-NEXT: kmovq %k1, %k2
106 ; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
107 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
108 ; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
110 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
111 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
112 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4)
116 define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
117 ; CHECK-LABEL: gather_mask_qq:
119 ; CHECK-NEXT: kmovb %edi, %k1
120 ; CHECK-NEXT: kmovq %k1, %k2
121 ; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
122 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
123 ; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
125 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
126 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
127 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4)
131 define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) {
132 ; CHECK-LABEL: gather_mask_dq:
134 ; CHECK-NEXT: kmovb %edi, %k1
135 ; CHECK-NEXT: kmovq %k1, %k2
136 ; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
137 ; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
138 ; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
140 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
141 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
142 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4)
146 define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
147 ; CHECK-LABEL: gather_mask_dpd_execdomain:
149 ; CHECK-NEXT: kmovb %edi, %k1
150 ; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1}
151 ; CHECK-NEXT: vmovapd %zmm1, (%rdx)
153 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4)
154 store <8 x double> %x, <8 x double>* %stbuf
158 define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) {
159 ; CHECK-LABEL: gather_mask_qpd_execdomain:
161 ; CHECK-NEXT: kmovb %edi, %k1
162 ; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1}
163 ; CHECK-NEXT: vmovapd %zmm1, (%rdx)
165 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
166 store <8 x double> %x, <8 x double>* %stbuf
170 define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) {
171 ; CHECK-LABEL: gather_mask_dps_execdomain:
173 ; CHECK-NEXT: kmovw %edi, %k1
174 ; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1}
175 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
177 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4)
178 ret <16 x float> %res;
181 define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) {
182 ; CHECK-LABEL: gather_mask_qps_execdomain:
184 ; CHECK-NEXT: kmovb %edi, %k1
185 ; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1}
186 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
188 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4)
189 ret <8 x float> %res;
192 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
193 ; CHECK-LABEL: scatter_mask_dpd_execdomain:
195 ; CHECK-NEXT: kmovb %esi, %k1
196 ; CHECK-NEXT: vmovapd (%rdi), %zmm1
197 ; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1}
199 %x = load <8 x double>, <8 x double>* %src, align 64
200 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
204 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) {
205 ; CHECK-LABEL: scatter_mask_qpd_execdomain:
207 ; CHECK-NEXT: kmovb %esi, %k1
208 ; CHECK-NEXT: vmovapd (%rdi), %zmm1
209 ; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1}
211 %x = load <8 x double>, <8 x double>* %src, align 64
212 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
216 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) {
217 ; CHECK-LABEL: scatter_mask_dps_execdomain:
219 ; CHECK-NEXT: kmovw %esi, %k1
220 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
221 ; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1}
223 %x = load <16 x float>, <16 x float>* %src, align 64
224 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
228 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) {
229 ; CHECK-LABEL: scatter_mask_qps_execdomain:
231 ; CHECK-NEXT: kmovb %esi, %k1
232 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
233 ; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1}
235 %x = load <8 x float>, <8 x float>* %src, align 32
236 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
240 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) {
241 ; CHECK-LABEL: gather_qps:
243 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
244 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
245 ; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
246 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
247 ; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
249 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4)
250 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>
251 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4)
255 declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
256 declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
257 define void @prefetch(<8 x i64> %ind, i8* %base) {
258 ; CHECK-LABEL: prefetch:
260 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
261 ; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
262 ; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
263 ; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
264 ; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
266 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0)
267 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1)
268 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0)
269 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1)
274 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32)
276 define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
277 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_df:
279 ; CHECK-NEXT: kmovb %esi, %k1
280 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
281 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
282 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
283 ; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1}
284 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
286 %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
287 %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
288 %res2 = fadd <2 x double> %res, %res1
289 ret <2 x double> %res2
292 declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32)
294 define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
295 ; CHECK-LABEL: test_int_x86_avx512_gather3div2_di:
297 ; CHECK-NEXT: kmovb %esi, %k1
298 ; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1}
299 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
301 %res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
302 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8)
303 %res2 = add <4 x i32> %res, %res1
307 declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32)
309 define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
310 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_df:
312 ; CHECK-NEXT: kmovb %esi, %k1
313 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
314 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
315 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
316 ; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1}
317 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
319 %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
320 %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
321 %res2 = fadd <4 x double> %res, %res1
322 ret <4 x double> %res2
325 declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32)
327 define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
328 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_di:
330 ; CHECK-NEXT: kmovb %esi, %k1
331 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
332 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
333 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
334 ; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
335 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
337 %res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8)
338 %res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8)
339 %res2 = add <8 x i32> %res, %res1
343 declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32)
345 define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
346 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf:
348 ; CHECK-NEXT: kmovb %esi, %k1
349 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
350 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
351 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
352 ; CHECK-NEXT: vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1}
353 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
355 %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
356 %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
357 %res2 = fadd <4 x float> %res, %res1
358 ret <4 x float> %res2
361 declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32)
363 define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) {
364 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
366 ; CHECK-NEXT: kmovb %esi, %k1
367 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
368 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
369 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
370 ; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
371 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
373 %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4)
374 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
375 %res2 = add <4 x i32> %res, %res1
379 declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32)
381 define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
382 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf:
384 ; CHECK-NEXT: kmovb %esi, %k1
385 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
386 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
387 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
388 ; CHECK-NEXT: vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1}
389 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
391 %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
392 %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
393 %res2 = fadd <4 x float> %res, %res1
394 ret <4 x float> %res2
397 declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32)
399 define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) {
400 ; CHECK-LABEL: test_int_x86_avx512_gather3div8_si:
402 ; CHECK-NEXT: kmovb %esi, %k1
403 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
404 ; CHECK-NEXT: kmovq %k1, %k2
405 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
406 ; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
407 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
409 %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
410 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2)
411 %res2 = add <4 x i32> %res, %res1
415 declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32)
417 define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
418 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df:
420 ; CHECK-NEXT: kmovb %esi, %k1
421 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
422 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
423 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
424 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1}
425 ; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
427 %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
428 %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
429 %res2 = fadd <2 x double> %res, %res1
430 ret <2 x double> %res2
433 declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32)
435 define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
436 ; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di:
438 ; CHECK-NEXT: kmovb %esi, %k1
439 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
440 ; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0
442 %res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
443 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
444 %res2 = add <4 x i32> %res, %res1
448 declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32)
450 define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
451 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df:
453 ; CHECK-NEXT: kmovb %esi, %k1
454 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
455 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
456 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
457 ; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1}
458 ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
460 %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
461 %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
462 %res2 = fadd <4 x double> %res, %res1
463 ret <4 x double> %res2
466 declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32)
468 define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
469 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di:
471 ; CHECK-NEXT: kmovb %esi, %k1
472 ; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1}
473 ; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0
475 %res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
476 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8)
477 %res2 = add <8 x i32> %res, %res1
481 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32)
483 define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
484 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf:
486 ; CHECK-NEXT: kmovb %esi, %k1
487 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
488 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
489 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
490 ; CHECK-NEXT: vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1}
491 ; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
493 %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
494 %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
495 %res2 = fadd <4 x float> %res, %res1
496 ret <4 x float> %res2
499 declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32)
501 define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) {
502 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
504 ; CHECK-NEXT: kmovb %esi, %k1
505 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
506 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
507 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
508 ; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1}
509 ; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
511 %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
512 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 2)
513 %res2 = add <4 x i32> %res, %res1
517 declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32)
519 define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
520 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf:
522 ; CHECK-NEXT: kmovb %esi, %k1
523 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
524 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
525 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
526 ; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1}
527 ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
529 %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
530 %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 2)
531 %res2 = fadd <8 x float> %res, %res1
532 ret <8 x float> %res2
535 declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32)
537 define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) {
538 ; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si:
540 ; CHECK-NEXT: kmovb %esi, %k1
541 ; CHECK-NEXT: vmovaps %zmm0, %zmm2
542 ; CHECK-NEXT: kmovq %k1, %k2
543 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
544 ; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1}
545 ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
547 %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
548 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 2)
549 %res2 = add <8 x i32> %res, %res1
553 declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32)
555 define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) {
556 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
558 ; CHECK-NEXT: kmovb %esi, %k1
559 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
560 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
561 ; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
563 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2)
564 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
568 declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32)
570 define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) {
571 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
573 ; CHECK-NEXT: kmovb %esi, %k1
574 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1}
575 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
576 ; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
578 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2)
579 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
583 declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32)
585 define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) {
586 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
588 ; CHECK-NEXT: kmovb %esi, %k1
589 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1}
590 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
591 ; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
593 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2)
594 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
598 declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32)
600 define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) {
601 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
603 ; CHECK-NEXT: kmovb %esi, %k1
604 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1}
605 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
606 ; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
608 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2)
609 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
613 declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32)
615 define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) {
616 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
618 ; CHECK-NEXT: kmovb %esi, %k1
619 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1}
620 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
621 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
623 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2)
624 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
628 declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32)
630 define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) {
631 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
633 ; CHECK-NEXT: kmovb %esi, %k1
634 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
635 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
636 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
638 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2)
639 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
643 declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32)
645 define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) {
646 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
648 ; CHECK-NEXT: kmovb %esi, %k1
649 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1}
650 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
651 ; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
653 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2)
654 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
658 declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32)
660 define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) {
661 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
663 ; CHECK-NEXT: kmovb %esi, %k1
664 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1}
665 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
666 ; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
668 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2)
669 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
673 declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32)
675 define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) {
676 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
678 ; CHECK-NEXT: kmovb %esi, %k1
679 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
680 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
681 ; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
683 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2)
684 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
688 declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32)
690 define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) {
691 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
693 ; CHECK-NEXT: kmovb %esi, %k1
694 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
695 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
696 ; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
698 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2)
699 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
703 declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32)
705 define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) {
706 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
708 ; CHECK-NEXT: kmovb %esi, %k1
709 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1}
710 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
711 ; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
713 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2)
714 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
718 declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32)
720 define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) {
721 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
723 ; CHECK-NEXT: kmovb %esi, %k1
724 ; CHECK-NEXT: kxnorw %k0, %k0, %k2
725 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2}
726 ; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
728 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2)
729 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
733 declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32)
735 define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) {
736 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
738 ; CHECK-NEXT: kmovb %esi, %k1
739 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1}
740 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
741 ; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
743 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2)
744 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
748 declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32)
750 define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) {
751 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
753 ; CHECK-NEXT: kmovb %esi, %k1
754 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1}
755 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
756 ; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
758 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2)
759 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
763 declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32)
765 define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) {
766 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
768 ; CHECK-NEXT: kmovb %esi, %k1
769 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
770 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
771 ; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
773 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2)
774 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
778 declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32)
780 define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) {
781 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
783 ; CHECK-NEXT: kmovb %esi, %k1
784 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
785 ; CHECK-NEXT: kxnorw %k0, %k0, %k1
786 ; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
788 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
789 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)