1 ; RUN: llc -march=mips -mattr=+msa < %s | FileCheck %s
3 define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = load <16 x i8>* %b
9 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
10 %3 = and <16 x i8> %1, %2
11 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
12 store <16 x i8> %3, <16 x i8>* %c
13 ; CHECK-DAG: st.b [[R3]], 0($4)
16 ; CHECK: .size and_v16i8
19 define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
22 %1 = load <8 x i16>* %a
23 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
24 %2 = load <8 x i16>* %b
25 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
26 %3 = and <8 x i16> %1, %2
27 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
28 store <8 x i16> %3, <8 x i16>* %c
29 ; CHECK-DAG: st.h [[R3]], 0($4)
32 ; CHECK: .size and_v8i16
35 define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
38 %1 = load <4 x i32>* %a
39 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
40 %2 = load <4 x i32>* %b
41 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
42 %3 = and <4 x i32> %1, %2
43 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
44 store <4 x i32> %3, <4 x i32>* %c
45 ; CHECK-DAG: st.w [[R3]], 0($4)
48 ; CHECK: .size and_v4i32
51 define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
54 %1 = load <2 x i64>* %a
55 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
56 %2 = load <2 x i64>* %b
57 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
58 %3 = and <2 x i64> %1, %2
59 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
60 store <2 x i64> %3, <2 x i64>* %c
61 ; CHECK-DAG: st.d [[R3]], 0($4)
64 ; CHECK: .size and_v2i64
67 define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
70 %1 = load <16 x i8>* %a
71 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
72 %2 = load <16 x i8>* %b
73 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
74 %3 = or <16 x i8> %1, %2
75 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
76 store <16 x i8> %3, <16 x i8>* %c
77 ; CHECK-DAG: st.b [[R3]], 0($4)
80 ; CHECK: .size or_v16i8
83 define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
86 %1 = load <8 x i16>* %a
87 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
88 %2 = load <8 x i16>* %b
89 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
90 %3 = or <8 x i16> %1, %2
91 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
92 store <8 x i16> %3, <8 x i16>* %c
93 ; CHECK-DAG: st.h [[R3]], 0($4)
96 ; CHECK: .size or_v8i16
99 define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
102 %1 = load <4 x i32>* %a
103 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
104 %2 = load <4 x i32>* %b
105 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
106 %3 = or <4 x i32> %1, %2
107 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
108 store <4 x i32> %3, <4 x i32>* %c
109 ; CHECK-DAG: st.w [[R3]], 0($4)
112 ; CHECK: .size or_v4i32
115 define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
118 %1 = load <2 x i64>* %a
119 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
120 %2 = load <2 x i64>* %b
121 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
122 %3 = or <2 x i64> %1, %2
123 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
124 store <2 x i64> %3, <2 x i64>* %c
125 ; CHECK-DAG: st.d [[R3]], 0($4)
128 ; CHECK: .size or_v2i64
131 define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
134 %1 = load <16 x i8>* %a
135 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
136 %2 = load <16 x i8>* %b
137 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
138 %3 = or <16 x i8> %1, %2
139 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
140 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
141 store <16 x i8> %4, <16 x i8>* %c
142 ; CHECK-DAG: st.b [[R3]], 0($4)
145 ; CHECK: .size nor_v16i8
148 define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
151 %1 = load <8 x i16>* %a
152 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
153 %2 = load <8 x i16>* %b
154 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
155 %3 = or <8 x i16> %1, %2
156 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
157 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
158 store <8 x i16> %4, <8 x i16>* %c
159 ; CHECK-DAG: st.h [[R3]], 0($4)
162 ; CHECK: .size nor_v8i16
165 define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
168 %1 = load <4 x i32>* %a
169 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
170 %2 = load <4 x i32>* %b
171 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
172 %3 = or <4 x i32> %1, %2
173 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
174 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
175 store <4 x i32> %4, <4 x i32>* %c
176 ; CHECK-DAG: st.w [[R3]], 0($4)
179 ; CHECK: .size nor_v4i32
182 define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
185 %1 = load <2 x i64>* %a
186 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
187 %2 = load <2 x i64>* %b
188 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
189 %3 = or <2 x i64> %1, %2
190 %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
191 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
192 store <2 x i64> %4, <2 x i64>* %c
193 ; CHECK-DAG: st.d [[R3]], 0($4)
196 ; CHECK: .size nor_v2i64
199 define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
202 %1 = load <16 x i8>* %a
203 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
204 %2 = load <16 x i8>* %b
205 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
206 %3 = xor <16 x i8> %1, %2
207 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
208 store <16 x i8> %3, <16 x i8>* %c
209 ; CHECK-DAG: st.b [[R3]], 0($4)
212 ; CHECK: .size xor_v16i8
215 define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
218 %1 = load <8 x i16>* %a
219 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
220 %2 = load <8 x i16>* %b
221 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
222 %3 = xor <8 x i16> %1, %2
223 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
224 store <8 x i16> %3, <8 x i16>* %c
225 ; CHECK-DAG: st.h [[R3]], 0($4)
228 ; CHECK: .size xor_v8i16
231 define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
234 %1 = load <4 x i32>* %a
235 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
236 %2 = load <4 x i32>* %b
237 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
238 %3 = xor <4 x i32> %1, %2
239 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
240 store <4 x i32> %3, <4 x i32>* %c
241 ; CHECK-DAG: st.w [[R3]], 0($4)
244 ; CHECK: .size xor_v4i32
247 define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
250 %1 = load <2 x i64>* %a
251 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
252 %2 = load <2 x i64>* %b
253 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
254 %3 = xor <2 x i64> %1, %2
255 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
256 store <2 x i64> %3, <2 x i64>* %c
257 ; CHECK-DAG: st.d [[R3]], 0($4)
260 ; CHECK: .size xor_v2i64
263 define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
266 %1 = load <16 x i8>* %a
267 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
268 %2 = load <16 x i8>* %b
269 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
270 %3 = shl <16 x i8> %1, %2
271 ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
272 store <16 x i8> %3, <16 x i8>* %c
273 ; CHECK-DAG: st.b [[R3]], 0($4)
276 ; CHECK: .size sll_v16i8
279 define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
282 %1 = load <8 x i16>* %a
283 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
284 %2 = load <8 x i16>* %b
285 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
286 %3 = shl <8 x i16> %1, %2
287 ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
288 store <8 x i16> %3, <8 x i16>* %c
289 ; CHECK-DAG: st.h [[R3]], 0($4)
292 ; CHECK: .size sll_v8i16
295 define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
298 %1 = load <4 x i32>* %a
299 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
300 %2 = load <4 x i32>* %b
301 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
302 %3 = shl <4 x i32> %1, %2
303 ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
304 store <4 x i32> %3, <4 x i32>* %c
305 ; CHECK-DAG: st.w [[R3]], 0($4)
308 ; CHECK: .size sll_v4i32
311 define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
314 %1 = load <2 x i64>* %a
315 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
316 %2 = load <2 x i64>* %b
317 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
318 %3 = shl <2 x i64> %1, %2
319 ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
320 store <2 x i64> %3, <2 x i64>* %c
321 ; CHECK-DAG: st.d [[R3]], 0($4)
324 ; CHECK: .size sll_v2i64
327 define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
328 ; CHECK: sll_v16i8_i:
330 %1 = load <16 x i8>* %a
331 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
332 %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
333 ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
334 store <16 x i8> %2, <16 x i8>* %c
335 ; CHECK-DAG: st.b [[R4]], 0($4)
338 ; CHECK: .size sll_v16i8_i
341 define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
342 ; CHECK: sll_v8i16_i:
344 %1 = load <8 x i16>* %a
345 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
346 %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
347 ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
348 store <8 x i16> %2, <8 x i16>* %c
349 ; CHECK-DAG: st.h [[R4]], 0($4)
352 ; CHECK: .size sll_v8i16_i
355 define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
356 ; CHECK: sll_v4i32_i:
358 %1 = load <4 x i32>* %a
359 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
360 %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
361 ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
362 store <4 x i32> %2, <4 x i32>* %c
363 ; CHECK-DAG: st.w [[R4]], 0($4)
366 ; CHECK: .size sll_v4i32_i
369 define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
370 ; CHECK: sll_v2i64_i:
372 %1 = load <2 x i64>* %a
373 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
374 %2 = shl <2 x i64> %1, <i64 1, i64 1>
375 ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
376 store <2 x i64> %2, <2 x i64>* %c
377 ; CHECK-DAG: st.d [[R4]], 0($4)
380 ; CHECK: .size sll_v2i64_i
383 define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
386 %1 = load <16 x i8>* %a
387 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
388 %2 = load <16 x i8>* %b
389 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
390 %3 = ashr <16 x i8> %1, %2
391 ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
392 store <16 x i8> %3, <16 x i8>* %c
393 ; CHECK-DAG: st.b [[R3]], 0($4)
396 ; CHECK: .size sra_v16i8
399 define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
402 %1 = load <8 x i16>* %a
403 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
404 %2 = load <8 x i16>* %b
405 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
406 %3 = ashr <8 x i16> %1, %2
407 ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
408 store <8 x i16> %3, <8 x i16>* %c
409 ; CHECK-DAG: st.h [[R3]], 0($4)
412 ; CHECK: .size sra_v8i16
415 define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
418 %1 = load <4 x i32>* %a
419 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
420 %2 = load <4 x i32>* %b
421 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
422 %3 = ashr <4 x i32> %1, %2
423 ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
424 store <4 x i32> %3, <4 x i32>* %c
425 ; CHECK-DAG: st.w [[R3]], 0($4)
428 ; CHECK: .size sra_v4i32
431 define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
434 %1 = load <2 x i64>* %a
435 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
436 %2 = load <2 x i64>* %b
437 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
438 %3 = ashr <2 x i64> %1, %2
439 ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
440 store <2 x i64> %3, <2 x i64>* %c
441 ; CHECK-DAG: st.d [[R3]], 0($4)
444 ; CHECK: .size sra_v2i64
447 define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
448 ; CHECK: sra_v16i8_i:
450 %1 = load <16 x i8>* %a
451 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
452 %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
453 ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
454 store <16 x i8> %2, <16 x i8>* %c
455 ; CHECK-DAG: st.b [[R4]], 0($4)
458 ; CHECK: .size sra_v16i8_i
461 define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
462 ; CHECK: sra_v8i16_i:
464 %1 = load <8 x i16>* %a
465 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
466 %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
467 ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
468 store <8 x i16> %2, <8 x i16>* %c
469 ; CHECK-DAG: st.h [[R4]], 0($4)
472 ; CHECK: .size sra_v8i16_i
475 define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
476 ; CHECK: sra_v4i32_i:
478 %1 = load <4 x i32>* %a
479 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
480 %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
481 ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
482 store <4 x i32> %2, <4 x i32>* %c
483 ; CHECK-DAG: st.w [[R4]], 0($4)
486 ; CHECK: .size sra_v4i32_i
489 define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
490 ; CHECK: sra_v2i64_i:
492 %1 = load <2 x i64>* %a
493 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
494 %2 = ashr <2 x i64> %1, <i64 1, i64 1>
495 ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
496 store <2 x i64> %2, <2 x i64>* %c
497 ; CHECK-DAG: st.d [[R4]], 0($4)
500 ; CHECK: .size sra_v2i64_i
503 define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
506 %1 = load <16 x i8>* %a
507 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
508 %2 = load <16 x i8>* %b
509 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
510 %3 = lshr <16 x i8> %1, %2
511 ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
512 store <16 x i8> %3, <16 x i8>* %c
513 ; CHECK-DAG: st.b [[R3]], 0($4)
516 ; CHECK: .size srl_v16i8
519 define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
522 %1 = load <8 x i16>* %a
523 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
524 %2 = load <8 x i16>* %b
525 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
526 %3 = lshr <8 x i16> %1, %2
527 ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
528 store <8 x i16> %3, <8 x i16>* %c
529 ; CHECK-DAG: st.h [[R3]], 0($4)
532 ; CHECK: .size srl_v8i16
535 define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
538 %1 = load <4 x i32>* %a
539 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
540 %2 = load <4 x i32>* %b
541 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
542 %3 = lshr <4 x i32> %1, %2
543 ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
544 store <4 x i32> %3, <4 x i32>* %c
545 ; CHECK-DAG: st.w [[R3]], 0($4)
548 ; CHECK: .size srl_v4i32
551 define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
554 %1 = load <2 x i64>* %a
555 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
556 %2 = load <2 x i64>* %b
557 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
558 %3 = lshr <2 x i64> %1, %2
559 ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
560 store <2 x i64> %3, <2 x i64>* %c
561 ; CHECK-DAG: st.d [[R3]], 0($4)
564 ; CHECK: .size srl_v2i64
567 define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
568 ; CHECK: srl_v16i8_i:
570 %1 = load <16 x i8>* %a
571 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
572 %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
573 ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
574 store <16 x i8> %2, <16 x i8>* %c
575 ; CHECK-DAG: st.b [[R4]], 0($4)
578 ; CHECK: .size srl_v16i8_i
581 define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
582 ; CHECK: srl_v8i16_i:
584 %1 = load <8 x i16>* %a
585 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
586 %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
587 ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
588 store <8 x i16> %2, <8 x i16>* %c
589 ; CHECK-DAG: st.h [[R4]], 0($4)
592 ; CHECK: .size srl_v8i16_i
595 define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
596 ; CHECK: srl_v4i32_i:
598 %1 = load <4 x i32>* %a
599 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
600 %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
601 ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
602 store <4 x i32> %2, <4 x i32>* %c
603 ; CHECK-DAG: st.w [[R4]], 0($4)
606 ; CHECK: .size srl_v4i32_i
609 define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
610 ; CHECK: srl_v2i64_i:
612 %1 = load <2 x i64>* %a
613 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
614 %2 = lshr <2 x i64> %1, <i64 1, i64 1>
615 ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
616 store <2 x i64> %2, <2 x i64>* %c
617 ; CHECK-DAG: st.d [[R4]], 0($4)
620 ; CHECK: .size srl_v2i64_i
623 define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
624 ; CHECK: ctpop_v16i8:
626 %1 = load <16 x i8>* %a
627 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
628 %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
629 ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
630 store <16 x i8> %2, <16 x i8>* %c
631 ; CHECK-DAG: st.b [[R3]], 0($4)
634 ; CHECK: .size ctpop_v16i8
637 define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
638 ; CHECK: ctpop_v8i16:
640 %1 = load <8 x i16>* %a
641 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
642 %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
643 ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
644 store <8 x i16> %2, <8 x i16>* %c
645 ; CHECK-DAG: st.h [[R3]], 0($4)
648 ; CHECK: .size ctpop_v8i16
651 define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
652 ; CHECK: ctpop_v4i32:
654 %1 = load <4 x i32>* %a
655 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
656 %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
657 ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
658 store <4 x i32> %2, <4 x i32>* %c
659 ; CHECK-DAG: st.w [[R3]], 0($4)
662 ; CHECK: .size ctpop_v4i32
665 define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
666 ; CHECK: ctpop_v2i64:
668 %1 = load <2 x i64>* %a
669 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
670 %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
671 ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
672 store <2 x i64> %2, <2 x i64>* %c
673 ; CHECK-DAG: st.d [[R3]], 0($4)
676 ; CHECK: .size ctpop_v2i64
679 define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
682 %1 = load <16 x i8>* %a
683 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
684 %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
685 ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
686 store <16 x i8> %2, <16 x i8>* %c
687 ; CHECK-DAG: st.b [[R3]], 0($4)
690 ; CHECK: .size ctlz_v16i8
693 define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
696 %1 = load <8 x i16>* %a
697 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
698 %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
699 ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
700 store <8 x i16> %2, <8 x i16>* %c
701 ; CHECK-DAG: st.h [[R3]], 0($4)
704 ; CHECK: .size ctlz_v8i16
707 define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
710 %1 = load <4 x i32>* %a
711 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
712 %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
713 ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
714 store <4 x i32> %2, <4 x i32>* %c
715 ; CHECK-DAG: st.w [[R3]], 0($4)
718 ; CHECK: .size ctlz_v4i32
721 define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
724 %1 = load <2 x i64>* %a
725 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
726 %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
727 ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
728 store <2 x i64> %2, <2 x i64>* %c
729 ; CHECK-DAG: st.d [[R3]], 0($4)
732 ; CHECK: .size ctlz_v2i64
735 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
736 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
737 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
738 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
739 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
740 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
741 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
742 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)