1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
3 define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = load <16 x i8>* %b
9 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
10 %3 = add <16 x i8> %1, %2
11 ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
12 store <16 x i8> %3, <16 x i8>* %c
13 ; CHECK-DAG: st.b [[R3]], 0($4)
16 ; CHECK: .size add_v16i8
19 define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
22 %1 = load <8 x i16>* %a
23 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
24 %2 = load <8 x i16>* %b
25 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
26 %3 = add <8 x i16> %1, %2
27 ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
28 store <8 x i16> %3, <8 x i16>* %c
29 ; CHECK-DAG: st.h [[R3]], 0($4)
32 ; CHECK: .size add_v8i16
35 define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
38 %1 = load <4 x i32>* %a
39 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
40 %2 = load <4 x i32>* %b
41 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
42 %3 = add <4 x i32> %1, %2
43 ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
44 store <4 x i32> %3, <4 x i32>* %c
45 ; CHECK-DAG: st.w [[R3]], 0($4)
48 ; CHECK: .size add_v4i32
51 define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
54 %1 = load <2 x i64>* %a
55 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
56 %2 = load <2 x i64>* %b
57 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
58 %3 = add <2 x i64> %1, %2
59 ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
60 store <2 x i64> %3, <2 x i64>* %c
61 ; CHECK-DAG: st.d [[R3]], 0($4)
64 ; CHECK: .size add_v2i64
67 define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
70 %1 = load <16 x i8>* %a
71 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
72 %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
73 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
74 ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1
75 store <16 x i8> %2, <16 x i8>* %c
76 ; CHECK-DAG: st.b [[R3]], 0($4)
79 ; CHECK: .size add_v16i8_i
82 define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
85 %1 = load <8 x i16>* %a
86 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
87 %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
88 i16 1, i16 1, i16 1, i16 1>
89 ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1
90 store <8 x i16> %2, <8 x i16>* %c
91 ; CHECK-DAG: st.h [[R3]], 0($4)
94 ; CHECK: .size add_v8i16_i
97 define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
100 %1 = load <4 x i32>* %a
101 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
102 %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
103 ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
104 store <4 x i32> %2, <4 x i32>* %c
105 ; CHECK-DAG: st.w [[R3]], 0($4)
108 ; CHECK: .size add_v4i32_i
111 define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
112 ; CHECK: add_v2i64_i:
114 %1 = load <2 x i64>* %a
115 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
116 %2 = add <2 x i64> %1, <i64 1, i64 1>
117 ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
118 store <2 x i64> %2, <2 x i64>* %c
119 ; CHECK-DAG: st.d [[R3]], 0($4)
122 ; CHECK: .size add_v2i64_i
125 define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
128 %1 = load <16 x i8>* %a
129 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
130 %2 = load <16 x i8>* %b
131 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
132 %3 = sub <16 x i8> %1, %2
133 ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
134 store <16 x i8> %3, <16 x i8>* %c
135 ; CHECK-DAG: st.b [[R3]], 0($4)
138 ; CHECK: .size sub_v16i8
141 define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
144 %1 = load <8 x i16>* %a
145 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
146 %2 = load <8 x i16>* %b
147 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
148 %3 = sub <8 x i16> %1, %2
149 ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
150 store <8 x i16> %3, <8 x i16>* %c
151 ; CHECK-DAG: st.h [[R3]], 0($4)
154 ; CHECK: .size sub_v8i16
157 define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
160 %1 = load <4 x i32>* %a
161 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
162 %2 = load <4 x i32>* %b
163 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
164 %3 = sub <4 x i32> %1, %2
165 ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
166 store <4 x i32> %3, <4 x i32>* %c
167 ; CHECK-DAG: st.w [[R3]], 0($4)
170 ; CHECK: .size sub_v4i32
173 define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
176 %1 = load <2 x i64>* %a
177 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
178 %2 = load <2 x i64>* %b
179 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
180 %3 = sub <2 x i64> %1, %2
181 ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
182 store <2 x i64> %3, <2 x i64>* %c
183 ; CHECK-DAG: st.d [[R3]], 0($4)
186 ; CHECK: .size sub_v2i64
189 define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
190 ; CHECK: sub_v16i8_i:
192 %1 = load <16 x i8>* %a
193 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
194 %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
195 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
196 ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1
197 store <16 x i8> %2, <16 x i8>* %c
198 ; CHECK-DAG: st.b [[R3]], 0($4)
201 ; CHECK: .size sub_v16i8_i
204 define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
205 ; CHECK: sub_v8i16_i:
207 %1 = load <8 x i16>* %a
208 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
209 %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
210 i16 1, i16 1, i16 1, i16 1>
211 ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1
212 store <8 x i16> %2, <8 x i16>* %c
213 ; CHECK-DAG: st.h [[R3]], 0($4)
216 ; CHECK: .size sub_v8i16_i
219 define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
220 ; CHECK: sub_v4i32_i:
222 %1 = load <4 x i32>* %a
223 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
224 %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
225 ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
226 store <4 x i32> %2, <4 x i32>* %c
227 ; CHECK-DAG: st.w [[R3]], 0($4)
230 ; CHECK: .size sub_v4i32_i
233 define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
234 ; CHECK: sub_v2i64_i:
236 %1 = load <2 x i64>* %a
237 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
238 %2 = sub <2 x i64> %1, <i64 1, i64 1>
239 ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
240 store <2 x i64> %2, <2 x i64>* %c
241 ; CHECK-DAG: st.d [[R3]], 0($4)
244 ; CHECK: .size sub_v2i64_i
247 define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
250 %1 = load <16 x i8>* %a
251 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
252 %2 = load <16 x i8>* %b
253 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
254 %3 = mul <16 x i8> %1, %2
255 ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
256 store <16 x i8> %3, <16 x i8>* %c
257 ; CHECK-DAG: st.b [[R3]], 0($4)
260 ; CHECK: .size mul_v16i8
263 define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
266 %1 = load <8 x i16>* %a
267 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
268 %2 = load <8 x i16>* %b
269 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
270 %3 = mul <8 x i16> %1, %2
271 ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
272 store <8 x i16> %3, <8 x i16>* %c
273 ; CHECK-DAG: st.h [[R3]], 0($4)
276 ; CHECK: .size mul_v8i16
279 define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
282 %1 = load <4 x i32>* %a
283 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
284 %2 = load <4 x i32>* %b
285 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
286 %3 = mul <4 x i32> %1, %2
287 ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
288 store <4 x i32> %3, <4 x i32>* %c
289 ; CHECK-DAG: st.w [[R3]], 0($4)
292 ; CHECK: .size mul_v4i32
295 define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
298 %1 = load <2 x i64>* %a
299 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
300 %2 = load <2 x i64>* %b
301 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
302 %3 = mul <2 x i64> %1, %2
303 ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
304 store <2 x i64> %3, <2 x i64>* %c
305 ; CHECK-DAG: st.d [[R3]], 0($4)
308 ; CHECK: .size mul_v2i64
311 define void @maddv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
312 <16 x i8>* %c) nounwind {
313 ; CHECK: maddv_v16i8:
315 %1 = load <16 x i8>* %a
316 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
317 %2 = load <16 x i8>* %b
318 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
319 %3 = load <16 x i8>* %c
320 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
321 %4 = mul <16 x i8> %2, %3
322 %5 = add <16 x i8> %4, %1
323 ; CHECK-DAG: maddv.b [[R1]], [[R2]], [[R3]]
324 store <16 x i8> %5, <16 x i8>* %d
325 ; CHECK-DAG: st.b [[R1]], 0($4)
328 ; CHECK: .size maddv_v16i8
331 define void @maddv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
332 <8 x i16>* %c) nounwind {
333 ; CHECK: maddv_v8i16:
335 %1 = load <8 x i16>* %a
336 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
337 %2 = load <8 x i16>* %b
338 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
339 %3 = load <8 x i16>* %c
340 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
341 %4 = mul <8 x i16> %2, %3
342 %5 = add <8 x i16> %4, %1
343 ; CHECK-DAG: maddv.h [[R1]], [[R2]], [[R3]]
344 store <8 x i16> %5, <8 x i16>* %d
345 ; CHECK-DAG: st.h [[R1]], 0($4)
348 ; CHECK: .size maddv_v8i16
351 define void @maddv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
352 <4 x i32>* %c) nounwind {
353 ; CHECK: maddv_v4i32:
355 %1 = load <4 x i32>* %a
356 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
357 %2 = load <4 x i32>* %b
358 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
359 %3 = load <4 x i32>* %c
360 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
361 %4 = mul <4 x i32> %2, %3
362 %5 = add <4 x i32> %4, %1
363 ; CHECK-DAG: maddv.w [[R1]], [[R2]], [[R3]]
364 store <4 x i32> %5, <4 x i32>* %d
365 ; CHECK-DAG: st.w [[R1]], 0($4)
368 ; CHECK: .size maddv_v4i32
371 define void @maddv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
372 <2 x i64>* %c) nounwind {
373 ; CHECK: maddv_v2i64:
375 %1 = load <2 x i64>* %a
376 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
377 %2 = load <2 x i64>* %b
378 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
379 %3 = load <2 x i64>* %c
380 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
381 %4 = mul <2 x i64> %2, %3
382 %5 = add <2 x i64> %4, %1
383 ; CHECK-DAG: maddv.d [[R1]], [[R2]], [[R3]]
384 store <2 x i64> %5, <2 x i64>* %d
385 ; CHECK-DAG: st.d [[R1]], 0($4)
388 ; CHECK: .size maddv_v2i64
391 define void @msubv_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
392 <16 x i8>* %c) nounwind {
393 ; CHECK: msubv_v16i8:
395 %1 = load <16 x i8>* %a
396 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
397 %2 = load <16 x i8>* %b
398 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
399 %3 = load <16 x i8>* %c
400 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
401 %4 = mul <16 x i8> %2, %3
402 %5 = sub <16 x i8> %1, %4
403 ; CHECK-DAG: msubv.b [[R1]], [[R2]], [[R3]]
404 store <16 x i8> %5, <16 x i8>* %d
405 ; CHECK-DAG: st.b [[R1]], 0($4)
408 ; CHECK: .size msubv_v16i8
411 define void @msubv_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b,
412 <8 x i16>* %c) nounwind {
413 ; CHECK: msubv_v8i16:
415 %1 = load <8 x i16>* %a
416 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
417 %2 = load <8 x i16>* %b
418 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
419 %3 = load <8 x i16>* %c
420 ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
421 %4 = mul <8 x i16> %2, %3
422 %5 = sub <8 x i16> %1, %4
423 ; CHECK-DAG: msubv.h [[R1]], [[R2]], [[R3]]
424 store <8 x i16> %5, <8 x i16>* %d
425 ; CHECK-DAG: st.h [[R1]], 0($4)
428 ; CHECK: .size msubv_v8i16
431 define void @msubv_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b,
432 <4 x i32>* %c) nounwind {
433 ; CHECK: msubv_v4i32:
435 %1 = load <4 x i32>* %a
436 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
437 %2 = load <4 x i32>* %b
438 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
439 %3 = load <4 x i32>* %c
440 ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
441 %4 = mul <4 x i32> %2, %3
442 %5 = sub <4 x i32> %1, %4
443 ; CHECK-DAG: msubv.w [[R1]], [[R2]], [[R3]]
444 store <4 x i32> %5, <4 x i32>* %d
445 ; CHECK-DAG: st.w [[R1]], 0($4)
448 ; CHECK: .size msubv_v4i32
451 define void @msubv_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b,
452 <2 x i64>* %c) nounwind {
453 ; CHECK: msubv_v2i64:
455 %1 = load <2 x i64>* %a
456 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
457 %2 = load <2 x i64>* %b
458 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
459 %3 = load <2 x i64>* %c
460 ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
461 %4 = mul <2 x i64> %2, %3
462 %5 = sub <2 x i64> %1, %4
463 ; CHECK-DAG: msubv.d [[R1]], [[R2]], [[R3]]
464 store <2 x i64> %5, <2 x i64>* %d
465 ; CHECK-DAG: st.d [[R1]], 0($4)
468 ; CHECK: .size msubv_v2i64
471 define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
472 ; CHECK: div_s_v16i8:
474 %1 = load <16 x i8>* %a
475 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
476 %2 = load <16 x i8>* %b
477 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
478 %3 = sdiv <16 x i8> %1, %2
479 ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
480 store <16 x i8> %3, <16 x i8>* %c
481 ; CHECK-DAG: st.b [[R3]], 0($4)
484 ; CHECK: .size div_s_v16i8
487 define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
488 ; CHECK: div_s_v8i16:
490 %1 = load <8 x i16>* %a
491 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
492 %2 = load <8 x i16>* %b
493 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
494 %3 = sdiv <8 x i16> %1, %2
495 ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
496 store <8 x i16> %3, <8 x i16>* %c
497 ; CHECK-DAG: st.h [[R3]], 0($4)
500 ; CHECK: .size div_s_v8i16
503 define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
504 ; CHECK: div_s_v4i32:
506 %1 = load <4 x i32>* %a
507 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
508 %2 = load <4 x i32>* %b
509 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
510 %3 = sdiv <4 x i32> %1, %2
511 ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
512 store <4 x i32> %3, <4 x i32>* %c
513 ; CHECK-DAG: st.w [[R3]], 0($4)
516 ; CHECK: .size div_s_v4i32
519 define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
520 ; CHECK: div_s_v2i64:
522 %1 = load <2 x i64>* %a
523 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
524 %2 = load <2 x i64>* %b
525 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
526 %3 = sdiv <2 x i64> %1, %2
527 ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
528 store <2 x i64> %3, <2 x i64>* %c
529 ; CHECK-DAG: st.d [[R3]], 0($4)
532 ; CHECK: .size div_s_v2i64
535 define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
536 ; CHECK: div_u_v16i8:
538 %1 = load <16 x i8>* %a
539 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
540 %2 = load <16 x i8>* %b
541 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
542 %3 = udiv <16 x i8> %1, %2
543 ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
544 store <16 x i8> %3, <16 x i8>* %c
545 ; CHECK-DAG: st.b [[R3]], 0($4)
548 ; CHECK: .size div_u_v16i8
551 define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
552 ; CHECK: div_u_v8i16:
554 %1 = load <8 x i16>* %a
555 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
556 %2 = load <8 x i16>* %b
557 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
558 %3 = udiv <8 x i16> %1, %2
559 ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
560 store <8 x i16> %3, <8 x i16>* %c
561 ; CHECK-DAG: st.h [[R3]], 0($4)
564 ; CHECK: .size div_u_v8i16
567 define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
568 ; CHECK: div_u_v4i32:
570 %1 = load <4 x i32>* %a
571 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
572 %2 = load <4 x i32>* %b
573 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
574 %3 = udiv <4 x i32> %1, %2
575 ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
576 store <4 x i32> %3, <4 x i32>* %c
577 ; CHECK-DAG: st.w [[R3]], 0($4)
580 ; CHECK: .size div_u_v4i32
583 define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
584 ; CHECK: div_u_v2i64:
586 %1 = load <2 x i64>* %a
587 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
588 %2 = load <2 x i64>* %b
589 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
590 %3 = udiv <2 x i64> %1, %2
591 ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
592 store <2 x i64> %3, <2 x i64>* %c
593 ; CHECK-DAG: st.d [[R3]], 0($4)
596 ; CHECK: .size div_u_v2i64
599 define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
600 ; CHECK: mod_s_v16i8:
602 %1 = load <16 x i8>* %a
603 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
604 %2 = load <16 x i8>* %b
605 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
606 %3 = srem <16 x i8> %1, %2
607 ; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
608 store <16 x i8> %3, <16 x i8>* %c
609 ; CHECK-DAG: st.b [[R3]], 0($4)
612 ; CHECK: .size mod_s_v16i8
615 define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
616 ; CHECK: mod_s_v8i16:
618 %1 = load <8 x i16>* %a
619 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
620 %2 = load <8 x i16>* %b
621 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
622 %3 = srem <8 x i16> %1, %2
623 ; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
624 store <8 x i16> %3, <8 x i16>* %c
625 ; CHECK-DAG: st.h [[R3]], 0($4)
628 ; CHECK: .size mod_s_v8i16
631 define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
632 ; CHECK: mod_s_v4i32:
634 %1 = load <4 x i32>* %a
635 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
636 %2 = load <4 x i32>* %b
637 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
638 %3 = srem <4 x i32> %1, %2
639 ; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
640 store <4 x i32> %3, <4 x i32>* %c
641 ; CHECK-DAG: st.w [[R3]], 0($4)
644 ; CHECK: .size mod_s_v4i32
647 define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
648 ; CHECK: mod_s_v2i64:
650 %1 = load <2 x i64>* %a
651 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
652 %2 = load <2 x i64>* %b
653 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
654 %3 = srem <2 x i64> %1, %2
655 ; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
656 store <2 x i64> %3, <2 x i64>* %c
657 ; CHECK-DAG: st.d [[R3]], 0($4)
660 ; CHECK: .size mod_s_v2i64
663 define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
664 ; CHECK: mod_u_v16i8:
666 %1 = load <16 x i8>* %a
667 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
668 %2 = load <16 x i8>* %b
669 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
670 %3 = urem <16 x i8> %1, %2
671 ; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
672 store <16 x i8> %3, <16 x i8>* %c
673 ; CHECK-DAG: st.b [[R3]], 0($4)
676 ; CHECK: .size mod_u_v16i8
679 define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
680 ; CHECK: mod_u_v8i16:
682 %1 = load <8 x i16>* %a
683 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
684 %2 = load <8 x i16>* %b
685 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
686 %3 = urem <8 x i16> %1, %2
687 ; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
688 store <8 x i16> %3, <8 x i16>* %c
689 ; CHECK-DAG: st.h [[R3]], 0($4)
692 ; CHECK: .size mod_u_v8i16
695 define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
696 ; CHECK: mod_u_v4i32:
698 %1 = load <4 x i32>* %a
699 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
700 %2 = load <4 x i32>* %b
701 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
702 %3 = urem <4 x i32> %1, %2
703 ; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
704 store <4 x i32> %3, <4 x i32>* %c
705 ; CHECK-DAG: st.w [[R3]], 0($4)
708 ; CHECK: .size mod_u_v4i32
711 define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
712 ; CHECK: mod_u_v2i64:
714 %1 = load <2 x i64>* %a
715 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
716 %2 = load <2 x i64>* %b
717 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
718 %3 = urem <2 x i64> %1, %2
719 ; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
720 store <2 x i64> %3, <2 x i64>* %c
721 ; CHECK-DAG: st.d [[R3]], 0($4)
724 ; CHECK: .size mod_u_v2i64