1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
3 define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
6 %1 = load <16 x i8>* %a
7 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
8 %2 = load <16 x i8>* %b
9 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
10 %3 = and <16 x i8> %1, %2
11 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
12 store <16 x i8> %3, <16 x i8>* %c
13 ; CHECK-DAG: st.b [[R3]], 0($4)
16 ; CHECK: .size and_v16i8
19 define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
22 %1 = load <8 x i16>* %a
23 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
24 %2 = load <8 x i16>* %b
25 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
26 %3 = and <8 x i16> %1, %2
27 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
28 store <8 x i16> %3, <8 x i16>* %c
29 ; CHECK-DAG: st.h [[R3]], 0($4)
32 ; CHECK: .size and_v8i16
35 define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
38 %1 = load <4 x i32>* %a
39 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
40 %2 = load <4 x i32>* %b
41 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
42 %3 = and <4 x i32> %1, %2
43 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
44 store <4 x i32> %3, <4 x i32>* %c
45 ; CHECK-DAG: st.w [[R3]], 0($4)
48 ; CHECK: .size and_v4i32
51 define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
54 %1 = load <2 x i64>* %a
55 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
56 %2 = load <2 x i64>* %b
57 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
58 %3 = and <2 x i64> %1, %2
59 ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
60 store <2 x i64> %3, <2 x i64>* %c
61 ; CHECK-DAG: st.d [[R3]], 0($4)
64 ; CHECK: .size and_v2i64
67 define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
70 %1 = load <16 x i8>* %a
71 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
72 %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
73 ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1
74 store <16 x i8> %2, <16 x i8>* %c
75 ; CHECK-DAG: st.b [[R4]], 0($4)
78 ; CHECK: .size and_v16i8_i
81 define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
84 %1 = load <8 x i16>* %a
85 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
86 %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
87 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
88 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
89 store <8 x i16> %2, <8 x i16>* %c
90 ; CHECK-DAG: st.h [[R4]], 0($4)
93 ; CHECK: .size and_v8i16_i
96 define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
99 %1 = load <4 x i32>* %a
100 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
101 %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
102 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
103 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
104 store <4 x i32> %2, <4 x i32>* %c
105 ; CHECK-DAG: st.w [[R4]], 0($4)
108 ; CHECK: .size and_v4i32_i
111 define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
112 ; CHECK: and_v2i64_i:
114 %1 = load <2 x i64>* %a
115 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
116 %2 = and <2 x i64> %1, <i64 1, i64 1>
117 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
118 ; CHECK-DAG: and.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
119 store <2 x i64> %2, <2 x i64>* %c
120 ; CHECK-DAG: st.d [[R4]], 0($4)
123 ; CHECK: .size and_v2i64_i
126 define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
129 %1 = load <16 x i8>* %a
130 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
131 %2 = load <16 x i8>* %b
132 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
133 %3 = or <16 x i8> %1, %2
134 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
135 store <16 x i8> %3, <16 x i8>* %c
136 ; CHECK-DAG: st.b [[R3]], 0($4)
139 ; CHECK: .size or_v16i8
142 define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
145 %1 = load <8 x i16>* %a
146 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
147 %2 = load <8 x i16>* %b
148 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
149 %3 = or <8 x i16> %1, %2
150 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
151 store <8 x i16> %3, <8 x i16>* %c
152 ; CHECK-DAG: st.h [[R3]], 0($4)
155 ; CHECK: .size or_v8i16
158 define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
161 %1 = load <4 x i32>* %a
162 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
163 %2 = load <4 x i32>* %b
164 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
165 %3 = or <4 x i32> %1, %2
166 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
167 store <4 x i32> %3, <4 x i32>* %c
168 ; CHECK-DAG: st.w [[R3]], 0($4)
171 ; CHECK: .size or_v4i32
174 define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
177 %1 = load <2 x i64>* %a
178 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
179 %2 = load <2 x i64>* %b
180 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
181 %3 = or <2 x i64> %1, %2
182 ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
183 store <2 x i64> %3, <2 x i64>* %c
184 ; CHECK-DAG: st.d [[R3]], 0($4)
187 ; CHECK: .size or_v2i64
190 define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
193 %1 = load <16 x i8>* %a
194 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
195 %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
196 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3
197 store <16 x i8> %2, <16 x i8>* %c
198 ; CHECK-DAG: st.b [[R4]], 0($4)
201 ; CHECK: .size or_v16i8_i
204 define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
207 %1 = load <8 x i16>* %a
208 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
209 %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
210 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
211 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
212 store <8 x i16> %2, <8 x i16>* %c
213 ; CHECK-DAG: st.h [[R4]], 0($4)
216 ; CHECK: .size or_v8i16_i
219 define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
222 %1 = load <4 x i32>* %a
223 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
224 %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
225 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
226 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
227 store <4 x i32> %2, <4 x i32>* %c
228 ; CHECK-DAG: st.w [[R4]], 0($4)
231 ; CHECK: .size or_v4i32_i
234 define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
237 %1 = load <2 x i64>* %a
238 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
239 %2 = or <2 x i64> %1, <i64 3, i64 3>
240 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
241 ; CHECK-DAG: or.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
242 store <2 x i64> %2, <2 x i64>* %c
243 ; CHECK-DAG: st.d [[R4]], 0($4)
246 ; CHECK: .size or_v2i64_i
249 define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
252 %1 = load <16 x i8>* %a
253 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
254 %2 = load <16 x i8>* %b
255 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
256 %3 = or <16 x i8> %1, %2
257 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
258 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
259 store <16 x i8> %4, <16 x i8>* %c
260 ; CHECK-DAG: st.b [[R3]], 0($4)
263 ; CHECK: .size nor_v16i8
266 define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
269 %1 = load <8 x i16>* %a
270 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
271 %2 = load <8 x i16>* %b
272 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
273 %3 = or <8 x i16> %1, %2
274 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
275 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
276 store <8 x i16> %4, <8 x i16>* %c
277 ; CHECK-DAG: st.h [[R3]], 0($4)
280 ; CHECK: .size nor_v8i16
283 define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
286 %1 = load <4 x i32>* %a
287 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
288 %2 = load <4 x i32>* %b
289 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
290 %3 = or <4 x i32> %1, %2
291 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
292 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
293 store <4 x i32> %4, <4 x i32>* %c
294 ; CHECK-DAG: st.w [[R3]], 0($4)
297 ; CHECK: .size nor_v4i32
300 define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
303 %1 = load <2 x i64>* %a
304 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
305 %2 = load <2 x i64>* %b
306 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
307 %3 = or <2 x i64> %1, %2
308 %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
309 ; CHECK-DAG: nor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
310 store <2 x i64> %4, <2 x i64>* %c
311 ; CHECK-DAG: st.d [[R3]], 0($4)
314 ; CHECK: .size nor_v2i64
317 define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
318 ; CHECK: nor_v16i8_i:
320 %1 = load <16 x i8>* %a
321 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
322 %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
323 %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
324 ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 1
325 store <16 x i8> %3, <16 x i8>* %c
326 ; CHECK-DAG: st.b [[R4]], 0($4)
329 ; CHECK: .size nor_v16i8_i
332 define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
333 ; CHECK: nor_v8i16_i:
335 %1 = load <8 x i16>* %a
336 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
337 %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
338 %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
339 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
340 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
341 store <8 x i16> %3, <8 x i16>* %c
342 ; CHECK-DAG: st.h [[R4]], 0($4)
345 ; CHECK: .size nor_v8i16_i
348 define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
349 ; CHECK: nor_v4i32_i:
351 %1 = load <4 x i32>* %a
352 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
353 %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
354 %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
355 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
356 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
357 store <4 x i32> %3, <4 x i32>* %c
358 ; CHECK-DAG: st.w [[R4]], 0($4)
361 ; CHECK: .size nor_v4i32_i
364 define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
365 ; CHECK: nor_v2i64_i:
367 %1 = load <2 x i64>* %a
368 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
369 %2 = or <2 x i64> %1, <i64 1, i64 1>
370 %3 = xor <2 x i64> %2, <i64 -1, i64 -1>
371 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
372 ; CHECK-DAG: nor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
373 store <2 x i64> %3, <2 x i64>* %c
374 ; CHECK-DAG: st.d [[R4]], 0($4)
377 ; CHECK: .size nor_v2i64_i
380 define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
383 %1 = load <16 x i8>* %a
384 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
385 %2 = load <16 x i8>* %b
386 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
387 %3 = xor <16 x i8> %1, %2
388 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
389 store <16 x i8> %3, <16 x i8>* %c
390 ; CHECK-DAG: st.b [[R3]], 0($4)
393 ; CHECK: .size xor_v16i8
396 define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
399 %1 = load <8 x i16>* %a
400 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
401 %2 = load <8 x i16>* %b
402 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
403 %3 = xor <8 x i16> %1, %2
404 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
405 store <8 x i16> %3, <8 x i16>* %c
406 ; CHECK-DAG: st.h [[R3]], 0($4)
409 ; CHECK: .size xor_v8i16
412 define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
415 %1 = load <4 x i32>* %a
416 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
417 %2 = load <4 x i32>* %b
418 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
419 %3 = xor <4 x i32> %1, %2
420 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
421 store <4 x i32> %3, <4 x i32>* %c
422 ; CHECK-DAG: st.w [[R3]], 0($4)
425 ; CHECK: .size xor_v4i32
428 define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
431 %1 = load <2 x i64>* %a
432 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
433 %2 = load <2 x i64>* %b
434 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
435 %3 = xor <2 x i64> %1, %2
436 ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
437 store <2 x i64> %3, <2 x i64>* %c
438 ; CHECK-DAG: st.d [[R3]], 0($4)
441 ; CHECK: .size xor_v2i64
444 define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
445 ; CHECK: xor_v16i8_i:
447 %1 = load <16 x i8>* %a
448 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
449 %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
450 ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3
451 store <16 x i8> %2, <16 x i8>* %c
452 ; CHECK-DAG: st.b [[R4]], 0($4)
455 ; CHECK: .size xor_v16i8_i
458 define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
459 ; CHECK: xor_v8i16_i:
461 %1 = load <8 x i16>* %a
462 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
463 %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
464 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
465 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
466 store <8 x i16> %2, <8 x i16>* %c
467 ; CHECK-DAG: st.h [[R4]], 0($4)
470 ; CHECK: .size xor_v8i16_i
473 define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
474 ; CHECK: xor_v4i32_i:
476 %1 = load <4 x i32>* %a
477 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
478 %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
479 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
480 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
481 store <4 x i32> %2, <4 x i32>* %c
482 ; CHECK-DAG: st.w [[R4]], 0($4)
485 ; CHECK: .size xor_v4i32_i
488 define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
489 ; CHECK: xor_v2i64_i:
491 %1 = load <2 x i64>* %a
492 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
493 %2 = xor <2 x i64> %1, <i64 3, i64 3>
494 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
495 ; CHECK-DAG: xor.v [[R4:\$w[0-9]+]], [[R1]], [[R3]]
496 store <2 x i64> %2, <2 x i64>* %c
497 ; CHECK-DAG: st.d [[R4]], 0($4)
500 ; CHECK: .size xor_v2i64_i
503 define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
506 %1 = load <16 x i8>* %a
507 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
508 %2 = load <16 x i8>* %b
509 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
510 %3 = shl <16 x i8> %1, %2
511 ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
512 store <16 x i8> %3, <16 x i8>* %c
513 ; CHECK-DAG: st.b [[R3]], 0($4)
516 ; CHECK: .size sll_v16i8
519 define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
522 %1 = load <8 x i16>* %a
523 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
524 %2 = load <8 x i16>* %b
525 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
526 %3 = shl <8 x i16> %1, %2
527 ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
528 store <8 x i16> %3, <8 x i16>* %c
529 ; CHECK-DAG: st.h [[R3]], 0($4)
532 ; CHECK: .size sll_v8i16
535 define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
538 %1 = load <4 x i32>* %a
539 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
540 %2 = load <4 x i32>* %b
541 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
542 %3 = shl <4 x i32> %1, %2
543 ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
544 store <4 x i32> %3, <4 x i32>* %c
545 ; CHECK-DAG: st.w [[R3]], 0($4)
548 ; CHECK: .size sll_v4i32
551 define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
554 %1 = load <2 x i64>* %a
555 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
556 %2 = load <2 x i64>* %b
557 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
558 %3 = shl <2 x i64> %1, %2
559 ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
560 store <2 x i64> %3, <2 x i64>* %c
561 ; CHECK-DAG: st.d [[R3]], 0($4)
564 ; CHECK: .size sll_v2i64
567 define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
568 ; CHECK: sll_v16i8_i:
570 %1 = load <16 x i8>* %a
571 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
572 %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
573 ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
574 store <16 x i8> %2, <16 x i8>* %c
575 ; CHECK-DAG: st.b [[R4]], 0($4)
578 ; CHECK: .size sll_v16i8_i
581 define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
582 ; CHECK: sll_v8i16_i:
584 %1 = load <8 x i16>* %a
585 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
586 %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
587 ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
588 store <8 x i16> %2, <8 x i16>* %c
589 ; CHECK-DAG: st.h [[R4]], 0($4)
592 ; CHECK: .size sll_v8i16_i
595 define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
596 ; CHECK: sll_v4i32_i:
598 %1 = load <4 x i32>* %a
599 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
600 %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
601 ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
602 store <4 x i32> %2, <4 x i32>* %c
603 ; CHECK-DAG: st.w [[R4]], 0($4)
606 ; CHECK: .size sll_v4i32_i
609 define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
610 ; CHECK: sll_v2i64_i:
612 %1 = load <2 x i64>* %a
613 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
614 %2 = shl <2 x i64> %1, <i64 1, i64 1>
615 ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
616 store <2 x i64> %2, <2 x i64>* %c
617 ; CHECK-DAG: st.d [[R4]], 0($4)
620 ; CHECK: .size sll_v2i64_i
623 define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
626 %1 = load <16 x i8>* %a
627 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
628 %2 = load <16 x i8>* %b
629 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
630 %3 = ashr <16 x i8> %1, %2
631 ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
632 store <16 x i8> %3, <16 x i8>* %c
633 ; CHECK-DAG: st.b [[R3]], 0($4)
636 ; CHECK: .size sra_v16i8
639 define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
642 %1 = load <8 x i16>* %a
643 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
644 %2 = load <8 x i16>* %b
645 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
646 %3 = ashr <8 x i16> %1, %2
647 ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
648 store <8 x i16> %3, <8 x i16>* %c
649 ; CHECK-DAG: st.h [[R3]], 0($4)
652 ; CHECK: .size sra_v8i16
655 define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
658 %1 = load <4 x i32>* %a
659 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
660 %2 = load <4 x i32>* %b
661 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
662 %3 = ashr <4 x i32> %1, %2
663 ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
664 store <4 x i32> %3, <4 x i32>* %c
665 ; CHECK-DAG: st.w [[R3]], 0($4)
668 ; CHECK: .size sra_v4i32
671 define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
674 %1 = load <2 x i64>* %a
675 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
676 %2 = load <2 x i64>* %b
677 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
678 %3 = ashr <2 x i64> %1, %2
679 ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
680 store <2 x i64> %3, <2 x i64>* %c
681 ; CHECK-DAG: st.d [[R3]], 0($4)
684 ; CHECK: .size sra_v2i64
687 define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
688 ; CHECK: sra_v16i8_i:
690 %1 = load <16 x i8>* %a
691 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
692 %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
693 ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
694 store <16 x i8> %2, <16 x i8>* %c
695 ; CHECK-DAG: st.b [[R4]], 0($4)
698 ; CHECK: .size sra_v16i8_i
701 define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
702 ; CHECK: sra_v8i16_i:
704 %1 = load <8 x i16>* %a
705 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
706 %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
707 ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
708 store <8 x i16> %2, <8 x i16>* %c
709 ; CHECK-DAG: st.h [[R4]], 0($4)
712 ; CHECK: .size sra_v8i16_i
715 define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
716 ; CHECK: sra_v4i32_i:
718 %1 = load <4 x i32>* %a
719 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
720 %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
721 ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
722 store <4 x i32> %2, <4 x i32>* %c
723 ; CHECK-DAG: st.w [[R4]], 0($4)
726 ; CHECK: .size sra_v4i32_i
729 define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
730 ; CHECK: sra_v2i64_i:
732 %1 = load <2 x i64>* %a
733 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
734 %2 = ashr <2 x i64> %1, <i64 1, i64 1>
735 ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
736 store <2 x i64> %2, <2 x i64>* %c
737 ; CHECK-DAG: st.d [[R4]], 0($4)
740 ; CHECK: .size sra_v2i64_i
743 define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
746 %1 = load <16 x i8>* %a
747 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
748 %2 = load <16 x i8>* %b
749 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
750 %3 = lshr <16 x i8> %1, %2
751 ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
752 store <16 x i8> %3, <16 x i8>* %c
753 ; CHECK-DAG: st.b [[R3]], 0($4)
756 ; CHECK: .size srl_v16i8
759 define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
762 %1 = load <8 x i16>* %a
763 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
764 %2 = load <8 x i16>* %b
765 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
766 %3 = lshr <8 x i16> %1, %2
767 ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
768 store <8 x i16> %3, <8 x i16>* %c
769 ; CHECK-DAG: st.h [[R3]], 0($4)
772 ; CHECK: .size srl_v8i16
775 define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
778 %1 = load <4 x i32>* %a
779 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
780 %2 = load <4 x i32>* %b
781 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
782 %3 = lshr <4 x i32> %1, %2
783 ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
784 store <4 x i32> %3, <4 x i32>* %c
785 ; CHECK-DAG: st.w [[R3]], 0($4)
788 ; CHECK: .size srl_v4i32
791 define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
794 %1 = load <2 x i64>* %a
795 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
796 %2 = load <2 x i64>* %b
797 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
798 %3 = lshr <2 x i64> %1, %2
799 ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
800 store <2 x i64> %3, <2 x i64>* %c
801 ; CHECK-DAG: st.d [[R3]], 0($4)
804 ; CHECK: .size srl_v2i64
807 define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
808 ; CHECK: srl_v16i8_i:
810 %1 = load <16 x i8>* %a
811 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
812 %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
813 ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
814 store <16 x i8> %2, <16 x i8>* %c
815 ; CHECK-DAG: st.b [[R4]], 0($4)
818 ; CHECK: .size srl_v16i8_i
821 define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
822 ; CHECK: srl_v8i16_i:
824 %1 = load <8 x i16>* %a
825 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
826 %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
827 ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
828 store <8 x i16> %2, <8 x i16>* %c
829 ; CHECK-DAG: st.h [[R4]], 0($4)
832 ; CHECK: .size srl_v8i16_i
835 define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
836 ; CHECK: srl_v4i32_i:
838 %1 = load <4 x i32>* %a
839 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
840 %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
841 ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
842 store <4 x i32> %2, <4 x i32>* %c
843 ; CHECK-DAG: st.w [[R4]], 0($4)
846 ; CHECK: .size srl_v4i32_i
849 define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
850 ; CHECK: srl_v2i64_i:
852 %1 = load <2 x i64>* %a
853 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
854 %2 = lshr <2 x i64> %1, <i64 1, i64 1>
855 ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
856 store <2 x i64> %2, <2 x i64>* %c
857 ; CHECK-DAG: st.d [[R4]], 0($4)
860 ; CHECK: .size srl_v2i64_i
863 define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
864 ; CHECK: ctpop_v16i8:
866 %1 = load <16 x i8>* %a
867 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
868 %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
869 ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
870 store <16 x i8> %2, <16 x i8>* %c
871 ; CHECK-DAG: st.b [[R3]], 0($4)
874 ; CHECK: .size ctpop_v16i8
877 define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
878 ; CHECK: ctpop_v8i16:
880 %1 = load <8 x i16>* %a
881 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
882 %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
883 ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
884 store <8 x i16> %2, <8 x i16>* %c
885 ; CHECK-DAG: st.h [[R3]], 0($4)
888 ; CHECK: .size ctpop_v8i16
891 define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
892 ; CHECK: ctpop_v4i32:
894 %1 = load <4 x i32>* %a
895 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
896 %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
897 ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
898 store <4 x i32> %2, <4 x i32>* %c
899 ; CHECK-DAG: st.w [[R3]], 0($4)
902 ; CHECK: .size ctpop_v4i32
905 define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
906 ; CHECK: ctpop_v2i64:
908 %1 = load <2 x i64>* %a
909 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
910 %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
911 ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
912 store <2 x i64> %2, <2 x i64>* %c
913 ; CHECK-DAG: st.d [[R3]], 0($4)
916 ; CHECK: .size ctpop_v2i64
919 define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
922 %1 = load <16 x i8>* %a
923 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
924 %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
925 ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
926 store <16 x i8> %2, <16 x i8>* %c
927 ; CHECK-DAG: st.b [[R3]], 0($4)
930 ; CHECK: .size ctlz_v16i8
933 define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
936 %1 = load <8 x i16>* %a
937 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
938 %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
939 ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
940 store <8 x i16> %2, <8 x i16>* %c
941 ; CHECK-DAG: st.h [[R3]], 0($4)
944 ; CHECK: .size ctlz_v8i16
947 define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
950 %1 = load <4 x i32>* %a
951 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
952 %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
953 ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
954 store <4 x i32> %2, <4 x i32>* %c
955 ; CHECK-DAG: st.w [[R3]], 0($4)
958 ; CHECK: .size ctlz_v4i32
961 define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
964 %1 = load <2 x i64>* %a
965 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
966 %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
967 ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
968 store <2 x i64> %2, <2 x i64>* %c
969 ; CHECK-DAG: st.d [[R3]], 0($4)
972 ; CHECK: .size ctlz_v2i64
975 define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind {
978 %1 = load <16 x i8>* %a
979 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
980 %2 = load <16 x i8>* %b
981 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
982 %3 = load <16 x i8>* %m
983 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
984 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
985 i8 -1, i8 -1, i8 -1, i8 -1,
986 i8 -1, i8 -1, i8 -1, i8 -1,
987 i8 -1, i8 -1, i8 -1, i8 -1>
988 %5 = and <16 x i8> %1, %3
989 %6 = and <16 x i8> %2, %4
990 %7 = or <16 x i8> %5, %6
991 ; bmnz is the same operation
992 ; CHECK-DAG: bmnz.v [[R1]], [[R2]], [[R3]]
993 store <16 x i8> %7, <16 x i8>* %c
994 ; CHECK-DAG: st.b [[R1]], 0($4)
997 ; CHECK: .size bsel_v16i8
1000 define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind {
1001 ; CHECK: bsel_v16i8_i:
1003 %1 = load <16 x i8>* %a
1004 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1005 %2 = load <16 x i8>* %m
1006 ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6)
1007 %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
1008 i8 -1, i8 -1, i8 -1, i8 -1,
1009 i8 -1, i8 -1, i8 -1, i8 -1,
1010 i8 -1, i8 -1, i8 -1, i8 -1>
1011 %4 = and <16 x i8> %1, %3
1012 %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6,
1013 i8 6, i8 6, i8 6, i8 6,
1014 i8 6, i8 6, i8 6, i8 6,
1015 i8 6, i8 6, i8 6, i8 6>, %2
1016 %6 = or <16 x i8> %4, %5
1017 ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6
1018 store <16 x i8> %6, <16 x i8>* %c
1019 ; CHECK-DAG: st.b [[R3]], 0($4)
1022 ; CHECK: .size bsel_v16i8_i
1025 define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1026 ; CHECK: bsel_v8i16:
1028 %1 = load <8 x i16>* %a
1029 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1030 %2 = load <8 x i16>* %b
1031 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1032 %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
1033 i16 6, i16 6, i16 6, i16 6>
1034 %4 = and <8 x i16> %2, <i16 65529, i16 65529, i16 65529, i16 65529,
1035 i16 65529, i16 65529, i16 65529, i16 65529>
1036 %5 = or <8 x i16> %3, %4
1037 ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 6
1038 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
1039 store <8 x i16> %5, <8 x i16>* %c
1040 ; CHECK-DAG: st.h [[R3]], 0($4)
1043 ; CHECK: .size bsel_v8i16
1046 define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1047 ; CHECK: bsel_v4i32:
1049 %1 = load <4 x i32>* %a
1050 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1051 %2 = load <4 x i32>* %b
1052 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1053 %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
1054 %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
1055 %5 = or <4 x i32> %3, %4
1056 ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 6
1057 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
1058 store <4 x i32> %5, <4 x i32>* %c
1059 ; CHECK-DAG: st.w [[R3]], 0($4)
1062 ; CHECK: .size bsel_v4i32
1065 define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1066 ; CHECK: bsel_v2i64:
1068 %1 = load <2 x i64>* %a
1069 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1070 %2 = load <2 x i64>* %b
1071 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1072 %3 = and <2 x i64> %1, <i64 6, i64 6>
1073 %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
1074 %5 = or <2 x i64> %3, %4
1075 ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 6
1076 ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
1077 store <2 x i64> %5, <2 x i64>* %c
1078 ; CHECK-DAG: st.d [[R3]], 0($4)
1081 ; CHECK: .size bsel_v2i64
1084 define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1085 ; CHECK: binsl_v16i8_i:
1087 %1 = load <16 x i8>* %a
1088 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1089 %2 = load <16 x i8>* %b
1090 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1091 %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192,
1092 i8 192, i8 192, i8 192, i8 192,
1093 i8 192, i8 192, i8 192, i8 192,
1094 i8 192, i8 192, i8 192, i8 192>
1095 %4 = and <16 x i8> %2, <i8 63, i8 63, i8 63, i8 63,
1096 i8 63, i8 63, i8 63, i8 63,
1097 i8 63, i8 63, i8 63, i8 63,
1098 i8 63, i8 63, i8 63, i8 63>
1099 %5 = or <16 x i8> %3, %4
1100 ; CHECK-DAG: binsli.b [[R2]], [[R1]], 2
1101 store <16 x i8> %5, <16 x i8>* %c
1102 ; CHECK-DAG: st.b [[R2]], 0($4)
1105 ; CHECK: .size binsl_v16i8_i
1108 define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1109 ; CHECK: binsl_v8i16_i:
1111 %1 = load <8 x i16>* %a
1112 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1113 %2 = load <8 x i16>* %b
1114 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1115 %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152,
1116 i16 49152, i16 49152, i16 49152, i16 49152>
1117 %4 = and <8 x i16> %2, <i16 16383, i16 16383, i16 16383, i16 16383,
1118 i16 16383, i16 16383, i16 16383, i16 16383>
1119 %5 = or <8 x i16> %3, %4
1120 ; CHECK-DAG: binsli.h [[R2]], [[R1]], 2
1121 store <8 x i16> %5, <8 x i16>* %c
1122 ; CHECK-DAG: st.h [[R2]], 0($4)
1125 ; CHECK: .size binsl_v8i16_i
1128 define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1129 ; CHECK: binsl_v4i32_i:
1131 %1 = load <4 x i32>* %a
1132 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1133 %2 = load <4 x i32>* %b
1134 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1135 %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472>
1136 %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
1137 %5 = or <4 x i32> %3, %4
1138 ; CHECK-DAG: binsli.w [[R2]], [[R1]], 2
1139 store <4 x i32> %5, <4 x i32>* %c
1140 ; CHECK-DAG: st.w [[R2]], 0($4)
1143 ; CHECK: .size binsl_v4i32_i
1146 define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1147 ; CHECK: binsl_v2i64_i:
1149 %1 = load <2 x i64>* %a
1150 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1151 %2 = load <2 x i64>* %b
1152 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1153 %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608>
1154 %4 = and <2 x i64> %2, <i64 7, i64 7>
1155 %5 = or <2 x i64> %3, %4
1156 ; TODO: We use a particularly wide mask here to work around a legalization
1157 ; issue. If the mask doesn't fit within a 10-bit immediate, it gets
1158 ; legalized into a constant pool. We should add a test to cover the
1159 ; other cases once they correctly select binsli.d.
1160 ; CHECK-DAG: binsli.d [[R2]], [[R1]], 61
1161 store <2 x i64> %5, <2 x i64>* %c
1162 ; CHECK-DAG: st.d [[R2]], 0($4)
1165 ; CHECK: .size binsl_v2i64_i
1168 define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1169 ; CHECK: binsr_v16i8_i:
1171 %1 = load <16 x i8>* %a
1172 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1173 %2 = load <16 x i8>* %b
1174 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1175 %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
1176 i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
1177 %4 = and <16 x i8> %2, <i8 252, i8 252, i8 252, i8 252,
1178 i8 252, i8 252, i8 252, i8 252,
1179 i8 252, i8 252, i8 252, i8 252,
1180 i8 252, i8 252, i8 252, i8 252>
1181 %5 = or <16 x i8> %3, %4
1182 ; CHECK-DAG: binsri.b [[R2]], [[R1]], 2
1183 store <16 x i8> %5, <16 x i8>* %c
1184 ; CHECK-DAG: st.b [[R2]], 0($4)
1187 ; CHECK: .size binsr_v16i8_i
1190 define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1191 ; CHECK: binsr_v8i16_i:
1193 %1 = load <8 x i16>* %a
1194 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1195 %2 = load <8 x i16>* %b
1196 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1197 %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
1198 i16 3, i16 3, i16 3, i16 3>
1199 %4 = and <8 x i16> %2, <i16 65532, i16 65532, i16 65532, i16 65532,
1200 i16 65532, i16 65532, i16 65532, i16 65532>
1201 %5 = or <8 x i16> %3, %4
1202 ; CHECK-DAG: binsri.h [[R2]], [[R1]], 2
1203 store <8 x i16> %5, <8 x i16>* %c
1204 ; CHECK-DAG: st.h [[R2]], 0($4)
1207 ; CHECK: .size binsr_v8i16_i
1210 define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1211 ; CHECK: binsr_v4i32_i:
1213 %1 = load <4 x i32>* %a
1214 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1215 %2 = load <4 x i32>* %b
1216 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1217 %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
1218 %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
1219 %5 = or <4 x i32> %3, %4
1220 ; CHECK-DAG: binsri.w [[R2]], [[R1]], 2
1221 store <4 x i32> %5, <4 x i32>* %c
1222 ; CHECK-DAG: st.w [[R2]], 0($4)
1225 ; CHECK: .size binsr_v4i32_i
1228 define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1229 ; CHECK: binsr_v2i64_i:
1231 %1 = load <2 x i64>* %a
1232 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1233 %2 = load <2 x i64>* %b
1234 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1235 %3 = and <2 x i64> %1, <i64 3, i64 3>
1236 %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
1237 %5 = or <2 x i64> %3, %4
1238 ; CHECK-DAG: binsri.d [[R2]], [[R1]], 2
1239 store <2 x i64> %5, <2 x i64>* %c
1240 ; CHECK-DAG: st.d [[R2]], 0($4)
1243 ; CHECK: .size binsr_v2i64_i
1246 define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1247 ; CHECK: bclr_v16i8:
1249 %1 = load <16 x i8>* %a
1250 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1251 %2 = load <16 x i8>* %b
1252 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1253 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1254 %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1255 %5 = and <16 x i8> %1, %4
1256 ; CHECK-DAG: bclr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1257 store <16 x i8> %5, <16 x i8>* %c
1258 ; CHECK-DAG: st.b [[R3]], 0($4)
1261 ; CHECK: .size bclr_v16i8
1264 define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1265 ; CHECK: bclr_v8i16:
1267 %1 = load <8 x i16>* %a
1268 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1269 %2 = load <8 x i16>* %b
1270 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1271 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1272 %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1273 %5 = and <8 x i16> %1, %4
1274 ; CHECK-DAG: bclr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1275 store <8 x i16> %5, <8 x i16>* %c
1276 ; CHECK-DAG: st.h [[R3]], 0($4)
1279 ; CHECK: .size bclr_v8i16
1282 define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1283 ; CHECK: bclr_v4i32:
1285 %1 = load <4 x i32>* %a
1286 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1287 %2 = load <4 x i32>* %b
1288 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1289 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1290 %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
1291 %5 = and <4 x i32> %1, %4
1292 ; CHECK-DAG: bclr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1293 store <4 x i32> %5, <4 x i32>* %c
1294 ; CHECK-DAG: st.w [[R3]], 0($4)
1297 ; CHECK: .size bclr_v4i32
1300 define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1301 ; CHECK: bclr_v2i64:
1303 %1 = load <2 x i64>* %a
1304 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1305 %2 = load <2 x i64>* %b
1306 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1307 %3 = shl <2 x i64> <i64 1, i64 1>, %2
1308 %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
1309 %5 = and <2 x i64> %1, %4
1310 ; CHECK-DAG: bclr.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1311 store <2 x i64> %5, <2 x i64>* %c
1312 ; CHECK-DAG: st.d [[R3]], 0($4)
1315 ; CHECK: .size bclr_v2i64
1318 define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1319 ; CHECK: bset_v16i8:
1321 %1 = load <16 x i8>* %a
1322 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1323 %2 = load <16 x i8>* %b
1324 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1325 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1326 %4 = or <16 x i8> %1, %3
1327 ; CHECK-DAG: bset.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1328 store <16 x i8> %4, <16 x i8>* %c
1329 ; CHECK-DAG: st.b [[R3]], 0($4)
1332 ; CHECK: .size bset_v16i8
1335 define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1336 ; CHECK: bset_v8i16:
1338 %1 = load <8 x i16>* %a
1339 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1340 %2 = load <8 x i16>* %b
1341 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1342 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1343 %4 = or <8 x i16> %1, %3
1344 ; CHECK-DAG: bset.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1345 store <8 x i16> %4, <8 x i16>* %c
1346 ; CHECK-DAG: st.h [[R3]], 0($4)
1349 ; CHECK: .size bset_v8i16
1352 define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1353 ; CHECK: bset_v4i32:
1355 %1 = load <4 x i32>* %a
1356 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1357 %2 = load <4 x i32>* %b
1358 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1359 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1360 %4 = or <4 x i32> %1, %3
1361 ; CHECK-DAG: bset.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1362 store <4 x i32> %4, <4 x i32>* %c
1363 ; CHECK-DAG: st.w [[R3]], 0($4)
1366 ; CHECK: .size bset_v4i32
1369 define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1370 ; CHECK: bset_v2i64:
1372 %1 = load <2 x i64>* %a
1373 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1374 %2 = load <2 x i64>* %b
1375 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1376 %3 = shl <2 x i64> <i64 1, i64 1>, %2
1377 %4 = or <2 x i64> %1, %3
1378 ; CHECK-DAG: bset.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1379 store <2 x i64> %4, <2 x i64>* %c
1380 ; CHECK-DAG: st.d [[R3]], 0($4)
1383 ; CHECK: .size bset_v2i64
1386 define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
1387 ; CHECK: bneg_v16i8:
1389 %1 = load <16 x i8>* %a
1390 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1391 %2 = load <16 x i8>* %b
1392 ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
1393 %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
1394 %4 = xor <16 x i8> %1, %3
1395 ; CHECK-DAG: bneg.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1396 store <16 x i8> %4, <16 x i8>* %c
1397 ; CHECK-DAG: st.b [[R3]], 0($4)
1400 ; CHECK: .size bneg_v16i8
1403 define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
1404 ; CHECK: bneg_v8i16:
1406 %1 = load <8 x i16>* %a
1407 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1408 %2 = load <8 x i16>* %b
1409 ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
1410 %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
1411 %4 = xor <8 x i16> %1, %3
1412 ; CHECK-DAG: bneg.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1413 store <8 x i16> %4, <8 x i16>* %c
1414 ; CHECK-DAG: st.h [[R3]], 0($4)
1417 ; CHECK: .size bneg_v8i16
1420 define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
1421 ; CHECK: bneg_v4i32:
1423 %1 = load <4 x i32>* %a
1424 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1425 %2 = load <4 x i32>* %b
1426 ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
1427 %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
1428 %4 = xor <4 x i32> %1, %3
1429 ; CHECK-DAG: bneg.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1430 store <4 x i32> %4, <4 x i32>* %c
1431 ; CHECK-DAG: st.w [[R3]], 0($4)
1434 ; CHECK: .size bneg_v4i32
1437 define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
1438 ; CHECK: bneg_v2i64:
1440 %1 = load <2 x i64>* %a
1441 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1442 %2 = load <2 x i64>* %b
1443 ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
1444 %3 = shl <2 x i64> <i64 1, i64 1>, %2
1445 %4 = xor <2 x i64> %1, %3
1446 ; CHECK-DAG: bneg.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
1447 store <2 x i64> %4, <2 x i64>* %c
1448 ; CHECK-DAG: st.d [[R3]], 0($4)
1451 ; CHECK: .size bneg_v2i64
1454 define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1455 ; CHECK: bclri_v16i8:
1457 %1 = load <16 x i8>* %a
1458 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1459 %2 = xor <16 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>,
1460 <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1461 %3 = and <16 x i8> %1, %2
1462 ; bclri.b and andi.b are exactly equivalent.
1463 ; CHECK-DAG: andi.b [[R3:\$w[0-9]+]], [[R1]], 247
1464 store <16 x i8> %3, <16 x i8>* %c
1465 ; CHECK-DAG: st.b [[R3]], 0($4)
1468 ; CHECK: .size bclri_v16i8
1471 define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1472 ; CHECK: bclri_v8i16:
1474 %1 = load <8 x i16>* %a
1475 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1476 %2 = xor <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>,
1477 <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1478 %3 = and <8 x i16> %1, %2
1479 ; CHECK-DAG: bclri.h [[R3:\$w[0-9]+]], [[R1]], 3
1480 store <8 x i16> %3, <8 x i16>* %c
1481 ; CHECK-DAG: st.h [[R3]], 0($4)
1484 ; CHECK: .size bclri_v8i16
1487 define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1488 ; CHECK: bclri_v4i32:
1490 %1 = load <4 x i32>* %a
1491 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1492 %2 = xor <4 x i32> <i32 8, i32 8, i32 8, i32 8>,
1493 <i32 -1, i32 -1, i32 -1, i32 -1>
1494 %3 = and <4 x i32> %1, %2
1495 ; CHECK-DAG: bclri.w [[R3:\$w[0-9]+]], [[R1]], 3
1496 store <4 x i32> %3, <4 x i32>* %c
1497 ; CHECK-DAG: st.w [[R3]], 0($4)
1500 ; CHECK: .size bclri_v4i32
1503 define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1504 ; CHECK: bclri_v2i64:
1506 %1 = load <2 x i64>* %a
1507 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1508 %2 = xor <2 x i64> <i64 8, i64 8>,
1510 %3 = and <2 x i64> %1, %2
1511 ; CHECK-DAG: bclri.d [[R3:\$w[0-9]+]], [[R1]], 3
1512 store <2 x i64> %3, <2 x i64>* %c
1513 ; CHECK-DAG: st.d [[R3]], 0($4)
1516 ; CHECK: .size bclri_v2i64
1519 define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1520 ; CHECK: bseti_v16i8:
1522 %1 = load <16 x i8>* %a
1523 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1524 %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1525 ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3
1526 store <16 x i8> %2, <16 x i8>* %c
1527 ; CHECK-DAG: st.b [[R3]], 0($4)
1530 ; CHECK: .size bseti_v16i8
1533 define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1534 ; CHECK: bseti_v8i16:
1536 %1 = load <8 x i16>* %a
1537 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1538 %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1539 ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3
1540 store <8 x i16> %2, <8 x i16>* %c
1541 ; CHECK-DAG: st.h [[R3]], 0($4)
1544 ; CHECK: .size bseti_v8i16
1547 define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1548 ; CHECK: bseti_v4i32:
1550 %1 = load <4 x i32>* %a
1551 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1552 %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
1553 ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3
1554 store <4 x i32> %2, <4 x i32>* %c
1555 ; CHECK-DAG: st.w [[R3]], 0($4)
1558 ; CHECK: .size bseti_v4i32
1561 define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1562 ; CHECK: bseti_v2i64:
1564 %1 = load <2 x i64>* %a
1565 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1566 %2 = or <2 x i64> %1, <i64 8, i64 8>
1567 ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3
1568 store <2 x i64> %2, <2 x i64>* %c
1569 ; CHECK-DAG: st.d [[R3]], 0($4)
1572 ; CHECK: .size bseti_v2i64
1575 define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
1576 ; CHECK: bnegi_v16i8:
1578 %1 = load <16 x i8>* %a
1579 ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
1580 %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
1581 ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3
1582 store <16 x i8> %2, <16 x i8>* %c
1583 ; CHECK-DAG: st.b [[R3]], 0($4)
1586 ; CHECK: .size bnegi_v16i8
1589 define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
1590 ; CHECK: bnegi_v8i16:
1592 %1 = load <8 x i16>* %a
1593 ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
1594 %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
1595 ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3
1596 store <8 x i16> %2, <8 x i16>* %c
1597 ; CHECK-DAG: st.h [[R3]], 0($4)
1600 ; CHECK: .size bnegi_v8i16
1603 define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
1604 ; CHECK: bnegi_v4i32:
1606 %1 = load <4 x i32>* %a
1607 ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
1608 %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
1609 ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3
1610 store <4 x i32> %2, <4 x i32>* %c
1611 ; CHECK-DAG: st.w [[R3]], 0($4)
1614 ; CHECK: .size bnegi_v4i32
1617 define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
1618 ; CHECK: bnegi_v2i64:
1620 %1 = load <2 x i64>* %a
1621 ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
1622 %2 = xor <2 x i64> %1, <i64 8, i64 8>
1623 ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3
1624 store <2 x i64> %2, <2 x i64>* %c
1625 ; CHECK-DAG: st.d [[R3]], 0($4)
1628 ; CHECK: .size bnegi_v2i64
1631 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %val)
1632 declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val)
1633 declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val)
1634 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val)
1635 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %val)
1636 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %val)
1637 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val)
1638 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val)