1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
3 define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = load <16 x i8>* %b
9 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
10 %3 = add <16 x i8> %1, %2
11 ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
12 store <16 x i8> %3, <16 x i8>* %c
13 ; CHECK-DAG: st.b [[R3]], 0($4)
16 ; CHECK: .size add_v16i8
19 define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
22 %1 = load <8 x i16>* %a
23 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
24 %2 = load <8 x i16>* %b
25 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
26 %3 = add <8 x i16> %1, %2
27 ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
28 store <8 x i16> %3, <8 x i16>* %c
29 ; CHECK-DAG: st.h [[R3]], 0($4)
32 ; CHECK: .size add_v8i16
35 define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
38 %1 = load <4 x i32>* %a
39 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
40 %2 = load <4 x i32>* %b
41 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
42 %3 = add <4 x i32> %1, %2
43 ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
44 store <4 x i32> %3, <4 x i32>* %c
45 ; CHECK-DAG: st.w [[R3]], 0($4)
48 ; CHECK: .size add_v4i32
51 define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
54 %1 = load <2 x i64>* %a
55 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
56 %2 = load <2 x i64>* %b
57 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
58 %3 = add <2 x i64> %1, %2
59 ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
60 store <2 x i64> %3, <2 x i64>* %c
61 ; CHECK-DAG: st.d [[R3]], 0($4)
64 ; CHECK: .size add_v2i64
67 define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
70 %1 = load <16 x i8>* %a
71 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
72 %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
73 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
74 ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1
75 store <16 x i8> %2, <16 x i8>* %c
76 ; CHECK-DAG: st.b [[R3]], 0($4)
79 ; CHECK: .size add_v16i8_i
82 define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
85 %1 = load <8 x i16>* %a
86 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
87 %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
88 i16 1, i16 1, i16 1, i16 1>
89 ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1
90 store <8 x i16> %2, <8 x i16>* %c
91 ; CHECK-DAG: st.h [[R3]], 0($4)
94 ; CHECK: .size add_v8i16_i
97 define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
100 %1 = load <4 x i32>* %a
101 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
102 %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
103 ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
104 store <4 x i32> %2, <4 x i32>* %c
105 ; CHECK-DAG: st.w [[R3]], 0($4)
108 ; CHECK: .size add_v4i32_i
111 define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
112 ; CHECK: add_v2i64_i:
114 %1 = load <2 x i64>* %a
115 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
116 %2 = add <2 x i64> %1, <i64 1, i64 1>
117 ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
118 store <2 x i64> %2, <2 x i64>* %c
119 ; CHECK-DAG: st.d [[R3]], 0($4)
122 ; CHECK: .size add_v2i64_i
125 define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
128 %1 = load <16 x i8>* %a
129 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
130 %2 = load <16 x i8>* %b
131 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
132 %3 = sub <16 x i8> %1, %2
133 ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
134 store <16 x i8> %3, <16 x i8>* %c
135 ; CHECK-DAG: st.b [[R3]], 0($4)
138 ; CHECK: .size sub_v16i8
141 define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
144 %1 = load <8 x i16>* %a
145 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
146 %2 = load <8 x i16>* %b
147 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
148 %3 = sub <8 x i16> %1, %2
149 ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
150 store <8 x i16> %3, <8 x i16>* %c
151 ; CHECK-DAG: st.h [[R3]], 0($4)
154 ; CHECK: .size sub_v8i16
157 define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
160 %1 = load <4 x i32>* %a
161 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
162 %2 = load <4 x i32>* %b
163 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
164 %3 = sub <4 x i32> %1, %2
165 ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
166 store <4 x i32> %3, <4 x i32>* %c
167 ; CHECK-DAG: st.w [[R3]], 0($4)
170 ; CHECK: .size sub_v4i32
173 define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
176 %1 = load <2 x i64>* %a
177 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
178 %2 = load <2 x i64>* %b
179 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
180 %3 = sub <2 x i64> %1, %2
181 ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
182 store <2 x i64> %3, <2 x i64>* %c
183 ; CHECK-DAG: st.d [[R3]], 0($4)
186 ; CHECK: .size sub_v2i64
189 define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
190 ; CHECK: sub_v16i8_i:
192 %1 = load <16 x i8>* %a
193 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
194 %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
195 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
196 ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1
197 store <16 x i8> %2, <16 x i8>* %c
198 ; CHECK-DAG: st.b [[R3]], 0($4)
201 ; CHECK: .size sub_v16i8_i
204 define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
205 ; CHECK: sub_v8i16_i:
207 %1 = load <8 x i16>* %a
208 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
209 %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
210 i16 1, i16 1, i16 1, i16 1>
211 ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1
212 store <8 x i16> %2, <8 x i16>* %c
213 ; CHECK-DAG: st.h [[R3]], 0($4)
216 ; CHECK: .size sub_v8i16_i
219 define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
220 ; CHECK: sub_v4i32_i:
222 %1 = load <4 x i32>* %a
223 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
224 %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
225 ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
226 store <4 x i32> %2, <4 x i32>* %c
227 ; CHECK-DAG: st.w [[R3]], 0($4)
230 ; CHECK: .size sub_v4i32_i
233 define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
234 ; CHECK: sub_v2i64_i:
236 %1 = load <2 x i64>* %a
237 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
238 %2 = sub <2 x i64> %1, <i64 1, i64 1>
239 ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
240 store <2 x i64> %2, <2 x i64>* %c
241 ; CHECK-DAG: st.d [[R3]], 0($4)
244 ; CHECK: .size sub_v2i64_i
247 define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
250 %1 = load <16 x i8>* %a
251 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
252 %2 = load <16 x i8>* %b
253 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
254 %3 = mul <16 x i8> %1, %2
255 ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
256 store <16 x i8> %3, <16 x i8>* %c
257 ; CHECK-DAG: st.b [[R3]], 0($4)
260 ; CHECK: .size mul_v16i8
263 define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
266 %1 = load <8 x i16>* %a
267 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
268 %2 = load <8 x i16>* %b
269 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
270 %3 = mul <8 x i16> %1, %2
271 ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
272 store <8 x i16> %3, <8 x i16>* %c
273 ; CHECK-DAG: st.h [[R3]], 0($4)
276 ; CHECK: .size mul_v8i16
279 define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
282 %1 = load <4 x i32>* %a
283 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
284 %2 = load <4 x i32>* %b
285 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
286 %3 = mul <4 x i32> %1, %2
287 ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
288 store <4 x i32> %3, <4 x i32>* %c
289 ; CHECK-DAG: st.w [[R3]], 0($4)
292 ; CHECK: .size mul_v4i32
295 define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
298 %1 = load <2 x i64>* %a
299 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
300 %2 = load <2 x i64>* %b
301 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
302 %3 = mul <2 x i64> %1, %2
303 ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
304 store <2 x i64> %3, <2 x i64>* %c
305 ; CHECK-DAG: st.d [[R3]], 0($4)
308 ; CHECK: .size mul_v2i64
311 define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
312 ; CHECK: div_s_v16i8:
314 %1 = load <16 x i8>* %a
315 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
316 %2 = load <16 x i8>* %b
317 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
318 %3 = sdiv <16 x i8> %1, %2
319 ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
320 store <16 x i8> %3, <16 x i8>* %c
321 ; CHECK-DAG: st.b [[R3]], 0($4)
324 ; CHECK: .size div_s_v16i8
327 define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
328 ; CHECK: div_s_v8i16:
330 %1 = load <8 x i16>* %a
331 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
332 %2 = load <8 x i16>* %b
333 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
334 %3 = sdiv <8 x i16> %1, %2
335 ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
336 store <8 x i16> %3, <8 x i16>* %c
337 ; CHECK-DAG: st.h [[R3]], 0($4)
340 ; CHECK: .size div_s_v8i16
343 define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
344 ; CHECK: div_s_v4i32:
346 %1 = load <4 x i32>* %a
347 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
348 %2 = load <4 x i32>* %b
349 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
350 %3 = sdiv <4 x i32> %1, %2
351 ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
352 store <4 x i32> %3, <4 x i32>* %c
353 ; CHECK-DAG: st.w [[R3]], 0($4)
356 ; CHECK: .size div_s_v4i32
359 define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
360 ; CHECK: div_s_v2i64:
362 %1 = load <2 x i64>* %a
363 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
364 %2 = load <2 x i64>* %b
365 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
366 %3 = sdiv <2 x i64> %1, %2
367 ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
368 store <2 x i64> %3, <2 x i64>* %c
369 ; CHECK-DAG: st.d [[R3]], 0($4)
372 ; CHECK: .size div_s_v2i64
375 define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
376 ; CHECK: div_u_v16i8:
378 %1 = load <16 x i8>* %a
379 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
380 %2 = load <16 x i8>* %b
381 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
382 %3 = udiv <16 x i8> %1, %2
383 ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
384 store <16 x i8> %3, <16 x i8>* %c
385 ; CHECK-DAG: st.b [[R3]], 0($4)
388 ; CHECK: .size div_u_v16i8
391 define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
392 ; CHECK: div_u_v8i16:
394 %1 = load <8 x i16>* %a
395 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
396 %2 = load <8 x i16>* %b
397 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
398 %3 = udiv <8 x i16> %1, %2
399 ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
400 store <8 x i16> %3, <8 x i16>* %c
401 ; CHECK-DAG: st.h [[R3]], 0($4)
404 ; CHECK: .size div_u_v8i16
407 define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
408 ; CHECK: div_u_v4i32:
410 %1 = load <4 x i32>* %a
411 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
412 %2 = load <4 x i32>* %b
413 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
414 %3 = udiv <4 x i32> %1, %2
415 ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
416 store <4 x i32> %3, <4 x i32>* %c
417 ; CHECK-DAG: st.w [[R3]], 0($4)
420 ; CHECK: .size div_u_v4i32
423 define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
424 ; CHECK: div_u_v2i64:
426 %1 = load <2 x i64>* %a
427 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
428 %2 = load <2 x i64>* %b
429 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
430 %3 = udiv <2 x i64> %1, %2
431 ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
432 store <2 x i64> %3, <2 x i64>* %c
433 ; CHECK-DAG: st.d [[R3]], 0($4)
436 ; CHECK: .size div_u_v2i64