1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
3 define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 {
4 ; CHECK: rev16 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
5 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
6 ret <8 x i8> %shuffle.i
9 define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 {
10 ; CHECK: rev16 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
11 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
12 ret <16 x i8> %shuffle.i
15 define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 {
16 ; CHECK: rev32 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
17 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
18 ret <8 x i8> %shuffle.i
21 define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 {
22 ; CHECK: rev32 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
23 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
24 ret <4 x i16> %shuffle.i
27 define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 {
28 ; CHECK: rev32 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
29 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
30 ret <16 x i8> %shuffle.i
33 define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 {
34 ; CHECK: rev32 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
35 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
36 ret <8 x i16> %shuffle.i
39 define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 {
40 ; CHECK: rev64 v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
41 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
42 ret <8 x i8> %shuffle.i
45 define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 {
46 ; CHECK: rev64 v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
47 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
48 ret <4 x i16> %shuffle.i
51 define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 {
52 ; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
53 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
54 ret <2 x i32> %shuffle.i
57 define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 {
58 ; CHECK: rev64 v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
59 %shuffle.i = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 0>
60 ret <2 x float> %shuffle.i
63 define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 {
64 ; CHECK: rev64 v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
65 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
66 ret <16 x i8> %shuffle.i
69 define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 {
70 ; CHECK: rev64 v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
71 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
72 ret <8 x i16> %shuffle.i
75 define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 {
76 ; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
77 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
78 ret <4 x i32> %shuffle.i
81 define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 {
82 ; CHECK: rev64 v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
83 %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
84 ret <4 x float> %shuffle.i
87 define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 {
88 ; CHECK: saddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
89 %vpaddl.i = tail call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %a) #4
90 ret <4 x i16> %vpaddl.i
93 define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 {
94 ; CHECK: saddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
95 %vpaddl1.i = tail call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %a) #4
96 ret <2 x i32> %vpaddl1.i
99 define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 {
100 ; CHECK: saddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
101 %vpaddl1.i = tail call <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32> %a) #4
102 ret <1 x i64> %vpaddl1.i
105 define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 {
106 ; CHECK: uaddlp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
107 %vpaddl.i = tail call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %a) #4
108 ret <4 x i16> %vpaddl.i
111 define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 {
112 ; CHECK: uaddlp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
113 %vpaddl1.i = tail call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %a) #4
114 ret <2 x i32> %vpaddl1.i
117 define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 {
118 ; CHECK: uaddlp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
119 %vpaddl1.i = tail call <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32> %a) #4
120 ret <1 x i64> %vpaddl1.i
123 define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 {
124 ; CHECK: saddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
125 %vpaddl.i = tail call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %a) #4
126 ret <8 x i16> %vpaddl.i
129 define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 {
130 ; CHECK: saddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
131 %vpaddl1.i = tail call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %a) #4
132 ret <4 x i32> %vpaddl1.i
135 define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 {
136 ; CHECK: saddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
137 %vpaddl1.i = tail call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %a) #4
138 ret <2 x i64> %vpaddl1.i
141 define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 {
142 ; CHECK: uaddlp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
143 %vpaddl.i = tail call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %a) #4
144 ret <8 x i16> %vpaddl.i
147 define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 {
148 ; CHECK: uaddlp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
149 %vpaddl1.i = tail call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %a) #4
150 ret <4 x i32> %vpaddl1.i
153 define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 {
154 ; CHECK: uaddlp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
155 %vpaddl1.i = tail call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %a) #4
156 ret <2 x i64> %vpaddl1.i
159 define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 {
160 ; CHECK: sadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
161 %vpadal1.i = tail call <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8> %b) #4
162 %sum = add <4 x i16> %a, %vpadal1.i
166 define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 {
167 ; CHECK: sadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
168 %vpadal2.i = tail call <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16> %b) #4
169 %sum = add <2 x i32> %a, %vpadal2.i
173 define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 {
174 ; CHECK: sadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
175 %vpadal2.i = tail call <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32> %b) #4
176 %sum = add <1 x i64> %a, %vpadal2.i
180 define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 {
181 ; CHECK: uadalp v{{[0-9]+}}.4h, v{{[0-9]+}}.8b
182 %vpadal1.i = tail call <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8> %b) #4
183 %sum = add <4 x i16> %a, %vpadal1.i
187 define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 {
188 ; CHECK: uadalp v{{[0-9]+}}.2s, v{{[0-9]+}}.4h
189 %vpadal2.i = tail call <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16> %b) #4
190 %sum = add <2 x i32> %a, %vpadal2.i
194 define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 {
195 ; CHECK: uadalp v{{[0-9]+}}.1d, v{{[0-9]+}}.2s
196 %vpadal2.i = tail call <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32> %b) #4
197 %sum = add <1 x i64> %a, %vpadal2.i
201 define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 {
202 ; CHECK: sadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
203 %vpadal1.i = tail call <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8> %b) #4
204 %sum = add <8 x i16> %a, %vpadal1.i
208 define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 {
209 ; CHECK: sadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
210 %vpadal2.i = tail call <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16> %b) #4
211 %sum = add <4 x i32> %a, %vpadal2.i
215 define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 {
216 ; CHECK: sadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
217 %vpadal2.i = tail call <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32> %b) #4
218 %sum = add <2 x i64> %a, %vpadal2.i
222 define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 {
223 ; CHECK: uadalp v{{[0-9]+}}.8h, v{{[0-9]+}}.16b
224 %vpadal1.i = tail call <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8> %b) #4
225 %sum = add <8 x i16> %a, %vpadal1.i
229 define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 {
230 ; CHECK: uadalp v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
231 %vpadal2.i = tail call <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16> %b) #4
232 %sum = add <4 x i32> %a, %vpadal2.i
236 define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 {
237 ; CHECK: uadalp v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
238 %vpadal2.i = tail call <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32> %b) #4
239 %sum = add <2 x i64> %a, %vpadal2.i
243 define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 {
244 ; CHECK: sqabs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
245 %vqabs.i = tail call <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8> %a) #4
246 ret <8 x i8> %vqabs.i
249 define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 {
250 ; CHECK: sqabs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
251 %vqabs.i = tail call <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8> %a) #4
252 ret <16 x i8> %vqabs.i
255 define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 {
256 ; CHECK: sqabs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
257 %vqabs1.i = tail call <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16> %a) #4
258 ret <4 x i16> %vqabs1.i
261 define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 {
262 ; CHECK: sqabs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
263 %vqabs1.i = tail call <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16> %a) #4
264 ret <8 x i16> %vqabs1.i
267 define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 {
268 ; CHECK: sqabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
269 %vqabs1.i = tail call <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32> %a) #4
270 ret <2 x i32> %vqabs1.i
273 define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 {
274 ; CHECK: sqabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
275 %vqabs1.i = tail call <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32> %a) #4
276 ret <4 x i32> %vqabs1.i
279 define <2 x i64> @test_vqabsq_s64(<2 x i64> %a) #0 {
280 ; CHECK: sqabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
281 %vqabs1.i = tail call <2 x i64> @llvm.arm64.neon.sqabs.v2i64(<2 x i64> %a) #4
282 ret <2 x i64> %vqabs1.i
285 define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 {
286 ; CHECK: sqneg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
287 %vqneg.i = tail call <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8> %a) #4
288 ret <8 x i8> %vqneg.i
291 define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 {
292 ; CHECK: sqneg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
293 %vqneg.i = tail call <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8> %a) #4
294 ret <16 x i8> %vqneg.i
297 define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 {
298 ; CHECK: sqneg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
299 %vqneg1.i = tail call <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16> %a) #4
300 ret <4 x i16> %vqneg1.i
303 define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 {
304 ; CHECK: sqneg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
305 %vqneg1.i = tail call <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16> %a) #4
306 ret <8 x i16> %vqneg1.i
309 define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 {
310 ; CHECK: sqneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
311 %vqneg1.i = tail call <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32> %a) #4
312 ret <2 x i32> %vqneg1.i
315 define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 {
316 ; CHECK: sqneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
317 %vqneg1.i = tail call <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32> %a) #4
318 ret <4 x i32> %vqneg1.i
321 define <2 x i64> @test_vqnegq_s64(<2 x i64> %a) #0 {
322 ; CHECK: sqneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
323 %vqneg1.i = tail call <2 x i64> @llvm.arm64.neon.sqneg.v2i64(<2 x i64> %a) #4
324 ret <2 x i64> %vqneg1.i
327 define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 {
328 ; CHECK: neg v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
329 %sub.i = sub <8 x i8> zeroinitializer, %a
333 define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 {
334 ; CHECK: neg v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
335 %sub.i = sub <16 x i8> zeroinitializer, %a
339 define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 {
340 ; CHECK: neg v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
341 %sub.i = sub <4 x i16> zeroinitializer, %a
345 define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 {
346 ; CHECK: neg v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
347 %sub.i = sub <8 x i16> zeroinitializer, %a
351 define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 {
352 ; CHECK: neg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
353 %sub.i = sub <2 x i32> zeroinitializer, %a
357 define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 {
358 ; CHECK: neg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
359 %sub.i = sub <4 x i32> zeroinitializer, %a
363 define <2 x i64> @test_vnegq_s64(<2 x i64> %a) #0 {
364 ; CHECK: neg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
365 %sub.i = sub <2 x i64> zeroinitializer, %a
369 define <2 x float> @test_vneg_f32(<2 x float> %a) #0 {
370 ; CHECK: fneg v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
371 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
372 ret <2 x float> %sub.i
375 define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 {
376 ; CHECK: fneg v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
377 %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
378 ret <4 x float> %sub.i
381 define <2 x double> @test_vnegq_f64(<2 x double> %a) #0 {
382 ; CHECK: fneg v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
383 %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
384 ret <2 x double> %sub.i
387 define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 {
388 ; CHECK: abs v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
389 %vabs.i = tail call <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8> %a) #4
393 define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 {
394 ; CHECK: abs v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
395 %vabs.i = tail call <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8> %a) #4
396 ret <16 x i8> %vabs.i
399 define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 {
400 ; CHECK: abs v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
401 %vabs1.i = tail call <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16> %a) #4
402 ret <4 x i16> %vabs1.i
405 define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 {
406 ; CHECK: abs v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
407 %vabs1.i = tail call <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16> %a) #4
408 ret <8 x i16> %vabs1.i
411 define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 {
412 ; CHECK: abs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
413 %vabs1.i = tail call <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32> %a) #4
414 ret <2 x i32> %vabs1.i
417 define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 {
418 ; CHECK: abs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
419 %vabs1.i = tail call <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32> %a) #4
420 ret <4 x i32> %vabs1.i
423 define <2 x i64> @test_vabsq_s64(<2 x i64> %a) #0 {
424 ; CHECK: abs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
425 %vabs1.i = tail call <2 x i64> @llvm.arm64.neon.abs.v2i64(<2 x i64> %a) #4
426 ret <2 x i64> %vabs1.i
429 define <2 x float> @test_vabs_f32(<2 x float> %a) #1 {
430 ; CHECK: fabs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
431 %vabs1.i = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4
432 ret <2 x float> %vabs1.i
435 define <4 x float> @test_vabsq_f32(<4 x float> %a) #1 {
436 ; CHECK: fabs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
437 %vabs1.i = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4
438 ret <4 x float> %vabs1.i
441 define <2 x double> @test_vabsq_f64(<2 x double> %a) #1 {
442 ; CHECK: fabs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
443 %vabs1.i = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) #4
444 ret <2 x double> %vabs1.i
447 define <8 x i8> @test_vuqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 {
448 ; CHECK: suqadd v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
449 %vuqadd.i = tail call <8 x i8> @llvm.arm64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4
450 ret <8 x i8> %vuqadd.i
453 define <16 x i8> @test_vuqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
454 ; CHECK: suqadd v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
455 %vuqadd.i = tail call <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) #4
456 ret <16 x i8> %vuqadd.i
459 define <4 x i16> @test_vuqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 {
460 ; CHECK: suqadd v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
461 %vuqadd2.i = tail call <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4
462 ret <4 x i16> %vuqadd2.i
465 define <8 x i16> @test_vuqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
466 ; CHECK: suqadd v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
467 %vuqadd2.i = tail call <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) #4
468 ret <8 x i16> %vuqadd2.i
471 define <2 x i32> @test_vuqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 {
472 ; CHECK: suqadd v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
473 %vuqadd2.i = tail call <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4
474 ret <2 x i32> %vuqadd2.i
477 define <4 x i32> @test_vuqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
478 ; CHECK: suqadd v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
479 %vuqadd2.i = tail call <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) #4
480 ret <4 x i32> %vuqadd2.i
483 define <2 x i64> @test_vuqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
484 ; CHECK: suqadd v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
485 %vuqadd2.i = tail call <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) #4
486 ret <2 x i64> %vuqadd2.i
489 define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 {
490 ; CHECK: cls v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
491 %vcls.i = tail call <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8> %a) #4
495 define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 {
496 ; CHECK: cls v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
497 %vcls.i = tail call <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8> %a) #4
498 ret <16 x i8> %vcls.i
501 define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 {
502 ; CHECK: cls v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
503 %vcls1.i = tail call <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16> %a) #4
504 ret <4 x i16> %vcls1.i
507 define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 {
508 ; CHECK: cls v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
509 %vcls1.i = tail call <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16> %a) #4
510 ret <8 x i16> %vcls1.i
513 define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 {
514 ; CHECK: cls v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
515 %vcls1.i = tail call <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32> %a) #4
516 ret <2 x i32> %vcls1.i
519 define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 {
520 ; CHECK: cls v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
521 %vcls1.i = tail call <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32> %a) #4
522 ret <4 x i32> %vcls1.i
525 define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 {
526 ; CHECK: clz v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
527 %vclz.i = tail call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4
531 define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 {
532 ; CHECK: clz v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
533 %vclz.i = tail call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4
534 ret <16 x i8> %vclz.i
537 define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 {
538 ; CHECK: clz v{{[0-9]+}}.4h, v{{[0-9]+}}.4h
539 %vclz1.i = tail call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4
540 ret <4 x i16> %vclz1.i
543 define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 {
544 ; CHECK: clz v{{[0-9]+}}.8h, v{{[0-9]+}}.8h
545 %vclz1.i = tail call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4
546 ret <8 x i16> %vclz1.i
549 define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 {
550 ; CHECK: clz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
551 %vclz1.i = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4
552 ret <2 x i32> %vclz1.i
555 define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 {
556 ; CHECK: clz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
557 %vclz1.i = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4
558 ret <4 x i32> %vclz1.i
561 define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 {
562 ; CHECK: cnt v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
563 %vctpop.i = tail call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4
564 ret <8 x i8> %vctpop.i
567 define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 {
568 ; CHECK: cnt v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
569 %vctpop.i = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4
570 ret <16 x i8> %vctpop.i
573 define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 {
574 ; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
575 %neg.i = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
579 define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 {
580 ; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
581 %neg.i = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
585 define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 {
586 ; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
587 %neg.i = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
591 define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 {
592 ; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
593 %neg.i = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
597 define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 {
598 ; CHECK: not v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
599 %neg.i = xor <2 x i32> %a, <i32 -1, i32 -1>
603 define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 {
604 ; CHECK: not v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
605 %neg.i = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
609 define <8 x i8> @test_vrbit_s8(<8 x i8> %a) #0 {
610 ; CHECK: rbit v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
611 %vrbit.i = tail call <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8> %a) #4
612 ret <8 x i8> %vrbit.i
615 define <16 x i8> @test_vrbitq_s8(<16 x i8> %a) #0 {
616 ; CHECK: rbit v{{[0-9]+}}.16b, v{{[0-9]+}}.16b
617 %vrbit.i = tail call <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8> %a) #4
618 ret <16 x i8> %vrbit.i
621 define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 {
622 ; CHECK: xtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
623 %vmovn.i = trunc <8 x i16> %a to <8 x i8>
624 ret <8 x i8> %vmovn.i
627 define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 {
628 ; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
629 %vmovn.i = trunc <4 x i32> %a to <4 x i16>
630 ret <4 x i16> %vmovn.i
633 define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 {
634 ; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
635 %vmovn.i = trunc <2 x i64> %a to <2 x i32>
636 ret <2 x i32> %vmovn.i
639 define <16 x i8> @test_vmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
640 ; CHECK: xtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
641 %vmovn.i.i = trunc <8 x i16> %b to <8 x i8>
642 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vmovn.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
643 ret <16 x i8> %shuffle.i
646 define <8 x i16> @test_vmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
647 ; CHECK: xtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
648 %vmovn.i.i = trunc <4 x i32> %b to <4 x i16>
649 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vmovn.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
650 ret <8 x i16> %shuffle.i
653 define <4 x i32> @test_vmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
654 ; CHECK: xtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
655 %vmovn.i.i = trunc <2 x i64> %b to <2 x i32>
656 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vmovn.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
657 ret <4 x i32> %shuffle.i
660 define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 {
661 ; CHECK: sqxtun v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
662 %vqdmull1.i = tail call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %a) #4
663 ret <8 x i8> %vqdmull1.i
666 define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 {
667 ; CHECK: sqxtun v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
668 %vqdmull1.i = tail call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %a) #4
669 ret <4 x i16> %vqdmull1.i
672 define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 {
673 ; CHECK: sqxtun v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
674 %vqdmull1.i = tail call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %a) #4
675 ret <2 x i32> %vqdmull1.i
678 define <16 x i8> @test_vqmovun_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
679 ; CHECK: sqxtun2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
680 %vqdmull1.i.i = tail call <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16> %b) #4
681 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqdmull1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
682 ret <16 x i8> %shuffle.i
685 define <8 x i16> @test_vqmovun_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
686 ; CHECK: sqxtun2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
687 %vqdmull1.i.i = tail call <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32> %b) #4
688 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqdmull1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
689 ret <8 x i16> %shuffle.i
692 define <4 x i32> @test_vqmovun_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
693 ; CHECK: sqxtun2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
694 %vqdmull1.i.i = tail call <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64> %b) #4
695 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqdmull1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
696 ret <4 x i32> %shuffle.i
699 define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 {
700 ; CHECK: sqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
701 %vqmovn1.i = tail call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %a) #4
702 ret <8 x i8> %vqmovn1.i
705 define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 {
706 ; CHECK: sqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
707 %vqmovn1.i = tail call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %a) #4
708 ret <4 x i16> %vqmovn1.i
711 define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 {
712 ; CHECK: sqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
713 %vqmovn1.i = tail call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %a) #4
714 ret <2 x i32> %vqmovn1.i
717 define <16 x i8> @test_vqmovn_high_s16(<8 x i8> %a, <8 x i16> %b) #0 {
718 ; CHECK: sqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
719 %vqmovn1.i.i = tail call <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16> %b) #4
720 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
721 ret <16 x i8> %shuffle.i
724 define <8 x i16> @test_vqmovn_high_s32(<4 x i16> %a, <4 x i32> %b) #0 {
725 ; CHECK: test_vqmovn_high_s32
726 %vqmovn1.i.i = tail call <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32> %b) #4
727 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
728 ret <8 x i16> %shuffle.i
731 define <4 x i32> @test_vqmovn_high_s64(<2 x i32> %a, <2 x i64> %b) #0 {
732 ; CHECK: test_vqmovn_high_s64
733 %vqmovn1.i.i = tail call <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64> %b) #4
734 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
735 ret <4 x i32> %shuffle.i
738 define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 {
739 ; CHECK: uqxtn v{{[0-9]+}}.8b, v{{[0-9]+}}.8h
740 %vqmovn1.i = tail call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %a) #4
741 ret <8 x i8> %vqmovn1.i
744 define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 {
745 ; CHECK: uqxtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
746 %vqmovn1.i = tail call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %a) #4
747 ret <4 x i16> %vqmovn1.i
750 define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 {
751 ; CHECK: uqxtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
752 %vqmovn1.i = tail call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %a) #4
753 ret <2 x i32> %vqmovn1.i
756 define <16 x i8> @test_vqmovn_high_u16(<8 x i8> %a, <8 x i16> %b) #0 {
757 ; CHECK: uqxtn2 v{{[0-9]+}}.16b, v{{[0-9]+}}.8h
758 %vqmovn1.i.i = tail call <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16> %b) #4
759 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> %vqmovn1.i.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
760 ret <16 x i8> %shuffle.i
763 define <8 x i16> @test_vqmovn_high_u32(<4 x i16> %a, <4 x i32> %b) #0 {
764 ; CHECK: uqxtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
765 %vqmovn1.i.i = tail call <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32> %b) #4
766 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vqmovn1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
767 ret <8 x i16> %shuffle.i
770 define <4 x i32> @test_vqmovn_high_u64(<2 x i32> %a, <2 x i64> %b) #0 {
771 ; CHECK: uqxtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
772 %vqmovn1.i.i = tail call <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64> %b) #4
773 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> %vqmovn1.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
774 ret <4 x i32> %shuffle.i
777 define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 {
778 ; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
779 %1 = sext <8 x i8> %a to <8 x i16>
780 %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
781 ret <8 x i16> %vshll_n
784 define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 {
785 ; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
786 %1 = sext <4 x i16> %a to <4 x i32>
787 %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
788 ret <4 x i32> %vshll_n
791 define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 {
792 ; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
793 %1 = sext <2 x i32> %a to <2 x i64>
794 %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
795 ret <2 x i64> %vshll_n
798 define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 {
799 ; CHECK: shll {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, #8
800 %1 = zext <8 x i8> %a to <8 x i16>
801 %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
802 ret <8 x i16> %vshll_n
805 define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 {
806 ; CHECK: shll {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, #16
807 %1 = zext <4 x i16> %a to <4 x i32>
808 %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
809 ret <4 x i32> %vshll_n
812 define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 {
813 ; CHECK: shll {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, #32
814 %1 = zext <2 x i32> %a to <2 x i64>
815 %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
816 ret <2 x i64> %vshll_n
819 define <8 x i16> @test_vshll_high_n_s8(<16 x i8> %a) #0 {
820 ; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
821 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
822 %1 = sext <8 x i8> %shuffle.i to <8 x i16>
823 %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
824 ret <8 x i16> %vshll_n
827 define <4 x i32> @test_vshll_high_n_s16(<8 x i16> %a) #0 {
828 ; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
829 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
830 %1 = sext <4 x i16> %shuffle.i to <4 x i32>
831 %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
832 ret <4 x i32> %vshll_n
835 define <2 x i64> @test_vshll_high_n_s32(<4 x i32> %a) #0 {
836 ; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
837 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
838 %1 = sext <2 x i32> %shuffle.i to <2 x i64>
839 %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
840 ret <2 x i64> %vshll_n
843 define <8 x i16> @test_vshll_high_n_u8(<16 x i8> %a) #0 {
844 ; CHECK: shll2 {{v[0-9]+}}.8h, {{v[0-9]+}}.16b, #8
845 %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
846 %1 = zext <8 x i8> %shuffle.i to <8 x i16>
847 %vshll_n = shl <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
848 ret <8 x i16> %vshll_n
851 define <4 x i32> @test_vshll_high_n_u16(<8 x i16> %a) #0 {
852 ; CHECK: shll2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, #16
853 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
854 %1 = zext <4 x i16> %shuffle.i to <4 x i32>
855 %vshll_n = shl <4 x i32> %1, <i32 16, i32 16, i32 16, i32 16>
856 ret <4 x i32> %vshll_n
859 define <2 x i64> @test_vshll_high_n_u32(<4 x i32> %a) #0 {
860 ; CHECK: shll2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, #32
861 %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
862 %1 = zext <2 x i32> %shuffle.i to <2 x i64>
863 %vshll_n = shl <2 x i64> %1, <i64 32, i64 32>
864 ret <2 x i64> %vshll_n
867 define <4 x i16> @test_vcvt_f16_f32(<4 x float> %a) #0 {
868 ; CHECK: fcvtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
869 %vcvt1.i = tail call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %a) #4
870 ret <4 x i16> %vcvt1.i
873 define <8 x i16> @test_vcvt_high_f16_f32(<4 x i16> %a, <4 x float> %b) #0 {
874 ; CHECK: fcvtn2 v{{[0-9]+}}.8h, v{{[0-9]+}}.4s
875 %vcvt1.i.i = tail call <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float> %b) #4
876 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> %vcvt1.i.i, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
877 ret <8 x i16> %shuffle.i
880 define <4 x float> @test_vcvt_f32_f16(<4 x i16> %a) #0 {
881 ; CHECK: fcvtl v{{[0-9]+}}.4s, v{{[0-9]+}}.4h
882 %vcvt1.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %a) #4
883 ret <4 x float> %vcvt1.i
886 define <4 x float> @test_vcvt_high_f32_f16(<8 x i16> %a) #0 {
887 ; CHECK: fcvtl2 v{{[0-9]+}}.4s, v{{[0-9]+}}.8h
888 %shuffle.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
889 %vcvt1.i.i = tail call <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16> %shuffle.i.i) #4
890 ret <4 x float> %vcvt1.i.i
893 define <2 x float> @test_vcvt_f32_f64(<2 x double> %a) #0 {
894 ; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
895 %vcvt.i = fptrunc <2 x double> %a to <2 x float>
896 ret <2 x float> %vcvt.i
899 define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
900 ; CHECK: fcvtn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
901 %vcvt.i.i = fptrunc <2 x double> %b to <2 x float>
902 %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvt.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
903 ret <4 x float> %shuffle.i
906 define <2 x float> @test_vcvtx_f32_f64(<2 x double> %a) #0 {
907 ; CHECK: fcvtxn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
908 %vcvtx_f32_f641.i = call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %a) #4
909 ret <2 x float> %vcvtx_f32_f641.i
912 define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %a, <2 x double> %b) #0 {
913 ; CHECK: fcvtxn2 v{{[0-9]+}}.4s, v{{[0-9]+}}.2d
914 %vcvtx_f32_f641.i.i = tail call <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double> %b) #4
915 %shuffle.i = shufflevector <2 x float> %a, <2 x float> %vcvtx_f32_f641.i.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
916 ret <4 x float> %shuffle.i
919 define <2 x double> @test_vcvt_f64_f32(<2 x float> %a) #0 {
920 ; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
921 %vcvt.i = fpext <2 x float> %a to <2 x double>
922 ret <2 x double> %vcvt.i
925 define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %a) #0 {
926 ; CHECK: fcvtl2 v{{[0-9]+}}.2d, v{{[0-9]+}}.4s
927 %shuffle.i.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
928 %vcvt.i.i = fpext <2 x float> %shuffle.i.i to <2 x double>
929 ret <2 x double> %vcvt.i.i
932 define <2 x float> @test_vrndn_f32(<2 x float> %a) #0 {
933 ; CHECK: frintn v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
934 %vrndn1.i = tail call <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float> %a) #4
935 ret <2 x float> %vrndn1.i
938 define <4 x float> @test_vrndnq_f32(<4 x float> %a) #0 {
939 ; CHECK: frintn v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
940 %vrndn1.i = tail call <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float> %a) #4
941 ret <4 x float> %vrndn1.i
944 define <2 x double> @test_vrndnq_f64(<2 x double> %a) #0 {
945 ; CHECK: frintn v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
946 %vrndn1.i = tail call <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double> %a) #4
947 ret <2 x double> %vrndn1.i
950 define <2 x float> @test_vrnda_f32(<2 x float> %a) #0 {
951 ; CHECK: frinta v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
952 %vrnda1.i = tail call <2 x float> @llvm.round.v2f32(<2 x float> %a) #4
953 ret <2 x float> %vrnda1.i
956 define <4 x float> @test_vrndaq_f32(<4 x float> %a) #0 {
957 ; CHECK: frinta v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
958 %vrnda1.i = tail call <4 x float> @llvm.round.v4f32(<4 x float> %a) #4
959 ret <4 x float> %vrnda1.i
962 define <2 x double> @test_vrndaq_f64(<2 x double> %a) #0 {
963 ; CHECK: frinta v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
964 %vrnda1.i = tail call <2 x double> @llvm.round.v2f64(<2 x double> %a) #4
965 ret <2 x double> %vrnda1.i
968 define <2 x float> @test_vrndp_f32(<2 x float> %a) #0 {
969 ; CHECK: frintp v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
970 %vrndp1.i = tail call <2 x float> @llvm.ceil.v2f32(<2 x float> %a) #4
971 ret <2 x float> %vrndp1.i
974 define <4 x float> @test_vrndpq_f32(<4 x float> %a) #0 {
975 ; CHECK: frintp v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
976 %vrndp1.i = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) #4
977 ret <4 x float> %vrndp1.i
980 define <2 x double> @test_vrndpq_f64(<2 x double> %a) #0 {
981 ; CHECK: frintp v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
982 %vrndp1.i = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) #4
983 ret <2 x double> %vrndp1.i
986 define <2 x float> @test_vrndm_f32(<2 x float> %a) #0 {
987 ; CHECK: frintm v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
988 %vrndm1.i = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %a) #4
989 ret <2 x float> %vrndm1.i
992 define <4 x float> @test_vrndmq_f32(<4 x float> %a) #0 {
993 ; CHECK: frintm v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
994 %vrndm1.i = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %a) #4
995 ret <4 x float> %vrndm1.i
998 define <2 x double> @test_vrndmq_f64(<2 x double> %a) #0 {
999 ; CHECK: frintm v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1000 %vrndm1.i = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %a) #4
1001 ret <2 x double> %vrndm1.i
1004 define <2 x float> @test_vrndx_f32(<2 x float> %a) #0 {
1005 ; CHECK: frintx v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1006 %vrndx1.i = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %a) #4
1007 ret <2 x float> %vrndx1.i
1010 define <4 x float> @test_vrndxq_f32(<4 x float> %a) #0 {
1011 ; CHECK: frintx v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1012 %vrndx1.i = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %a) #4
1013 ret <4 x float> %vrndx1.i
1016 define <2 x double> @test_vrndxq_f64(<2 x double> %a) #0 {
1017 ; CHECK: frintx v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1018 %vrndx1.i = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %a) #4
1019 ret <2 x double> %vrndx1.i
1022 define <2 x float> @test_vrnd_f32(<2 x float> %a) #0 {
1023 ; CHECK: frintz v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1024 %vrnd1.i = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %a) #4
1025 ret <2 x float> %vrnd1.i
1028 define <4 x float> @test_vrndq_f32(<4 x float> %a) #0 {
1029 ; CHECK: frintz v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1030 %vrnd1.i = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) #4
1031 ret <4 x float> %vrnd1.i
1034 define <2 x double> @test_vrndq_f64(<2 x double> %a) #0 {
1035 ; CHECK: frintz v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1036 %vrnd1.i = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) #4
1037 ret <2 x double> %vrnd1.i
1040 define <2 x float> @test_vrndi_f32(<2 x float> %a) #0 {
1041 ; CHECK: frinti v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1042 %vrndi1.i = tail call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %a) #4
1043 ret <2 x float> %vrndi1.i
1046 define <4 x float> @test_vrndiq_f32(<4 x float> %a) #0 {
1047 ; CHECK: frinti v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1048 %vrndi1.i = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) #4
1049 ret <4 x float> %vrndi1.i
1052 define <2 x double> @test_vrndiq_f64(<2 x double> %a) #0 {
1053 ; CHECK: frinti v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1054 %vrndi1.i = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) #4
1055 ret <2 x double> %vrndi1.i
1058 define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 {
1059 ; CHECK: fcvtzs v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1060 %vcvt.i = fptosi <2 x float> %a to <2 x i32>
1061 ret <2 x i32> %vcvt.i
1064 define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 {
1065 ; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1066 %vcvt.i = fptosi <4 x float> %a to <4 x i32>
1067 ret <4 x i32> %vcvt.i
1070 define <2 x i64> @test_vcvtq_s64_f64(<2 x double> %a) #0 {
1071 ; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1072 %vcvt.i = fptosi <2 x double> %a to <2 x i64>
1073 ret <2 x i64> %vcvt.i
1076 define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 {
1077 ; CHECK: fcvtzu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1078 %vcvt.i = fptoui <2 x float> %a to <2 x i32>
1079 ret <2 x i32> %vcvt.i
1082 define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 {
1083 ; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1084 %vcvt.i = fptoui <4 x float> %a to <4 x i32>
1085 ret <4 x i32> %vcvt.i
1088 define <2 x i64> @test_vcvtq_u64_f64(<2 x double> %a) #0 {
1089 ; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1090 %vcvt.i = fptoui <2 x double> %a to <2 x i64>
1091 ret <2 x i64> %vcvt.i
1094 define <2 x i64> @test_vcvt_s64_f32(<2 x float> %a) #0 {
1095 ; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
1096 ; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1097 %vcvt.i = fptosi <2 x float> %a to <2 x i64>
1098 ret <2 x i64> %vcvt.i
1101 define <2 x i64> @test_vcvt_u64_f32(<2 x float> %a) #0 {
1102 ; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
1103 ; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1104 %vcvt.i = fptoui <2 x float> %a to <2 x i64>
1105 ret <2 x i64> %vcvt.i
1108 define <4 x i16> @test_vcvt_s16_f32(<4 x float> %a) #0 {
1109 ; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1110 ; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
1111 %vcvt.i = fptosi <4 x float> %a to <4 x i16>
1112 ret <4 x i16> %vcvt.i
1115 define <4 x i16> @test_vcvt_u16_f32(<4 x float> %a) #0 {
1116 ; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1117 ; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
1118 %vcvt.i = fptoui <4 x float> %a to <4 x i16>
1119 ret <4 x i16> %vcvt.i
1122 define <2 x i32> @test_vcvt_s32_f64(<2 x double> %a) #0 {
1123 ; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1124 ; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
1125 %vcvt.i = fptosi <2 x double> %a to <2 x i32>
1126 ret <2 x i32> %vcvt.i
1129 define <2 x i32> @test_vcvt_u32_f64(<2 x double> %a) #0 {
1130 ; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1131 ; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
1132 %vcvt.i = fptoui <2 x double> %a to <2 x i32>
1133 ret <2 x i32> %vcvt.i
1136 define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) {
1137 ; CHECK-LABEL: test_vcvtn_s32_f32
1138 ; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1139 %vcvtns_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float> %a)
1140 ret <2 x i32> %vcvtns_f321.i
1143 define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) {
1144 ; CHECK-LABEL: test_vcvtnq_s32_f32
1145 ; CHECK: fcvtns v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1146 %vcvtns_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float> %a)
1147 ret <4 x i32> %vcvtns_f321.i
1150 define <2 x i64> @test_vcvtnq_s64_f64(<2 x double> %a) {
1151 ; CHECK-LABEL: test_vcvtnq_s64_f64
1152 ; CHECK: fcvtns v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1153 %vcvtns_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double> %a)
1154 ret <2 x i64> %vcvtns_f641.i
1157 define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) {
1158 ; CHECK-LABEL: test_vcvtn_u32_f32
1159 ; CHECK: fcvtnu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1160 %vcvtnu_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float> %a)
1161 ret <2 x i32> %vcvtnu_f321.i
1164 define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) {
1165 ; CHECK-LABEL: test_vcvtnq_u32_f32
1166 ; CHECK: fcvtnu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1167 %vcvtnu_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float> %a)
1168 ret <4 x i32> %vcvtnu_f321.i
1171 define <2 x i64> @test_vcvtnq_u64_f64(<2 x double> %a) {
1172 ; CHECK-LABEL: test_vcvtnq_u64_f64
1173 ; CHECK: fcvtnu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1174 %vcvtnu_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double> %a)
1175 ret <2 x i64> %vcvtnu_f641.i
1178 define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) {
1179 ; CHECK-LABEL: test_vcvtp_s32_f32
1180 ; CHECK: fcvtps v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1181 %vcvtps_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float> %a)
1182 ret <2 x i32> %vcvtps_f321.i
1185 define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) {
1186 ; CHECK-LABEL: test_vcvtpq_s32_f32
1187 ; CHECK: fcvtps v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1188 %vcvtps_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float> %a)
1189 ret <4 x i32> %vcvtps_f321.i
1192 define <2 x i64> @test_vcvtpq_s64_f64(<2 x double> %a) {
1193 ; CHECK-LABEL: test_vcvtpq_s64_f64
1194 ; CHECK: fcvtps v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1195 %vcvtps_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double> %a)
1196 ret <2 x i64> %vcvtps_f641.i
1199 define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) {
1200 ; CHECK-LABEL: test_vcvtp_u32_f32
1201 ; CHECK: fcvtpu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1202 %vcvtpu_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float> %a)
1203 ret <2 x i32> %vcvtpu_f321.i
1206 define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) {
1207 ; CHECK-LABEL: test_vcvtpq_u32_f32
1208 ; CHECK: fcvtpu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1209 %vcvtpu_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float> %a)
1210 ret <4 x i32> %vcvtpu_f321.i
1213 define <2 x i64> @test_vcvtpq_u64_f64(<2 x double> %a) {
1214 ; CHECK-LABEL: test_vcvtpq_u64_f64
1215 ; CHECK: fcvtpu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1216 %vcvtpu_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double> %a)
1217 ret <2 x i64> %vcvtpu_f641.i
1220 define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) {
1221 ; CHECK-LABEL: test_vcvtm_s32_f32
1222 ; CHECK: fcvtms v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1223 %vcvtms_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float> %a)
1224 ret <2 x i32> %vcvtms_f321.i
1227 define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) {
1228 ; CHECK-LABEL: test_vcvtmq_s32_f32
1229 ; CHECK: fcvtms v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1230 %vcvtms_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float> %a)
1231 ret <4 x i32> %vcvtms_f321.i
1234 define <2 x i64> @test_vcvtmq_s64_f64(<2 x double> %a) {
1235 ; CHECK-LABEL: test_vcvtmq_s64_f64
1236 ; CHECK: fcvtms v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1237 %vcvtms_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double> %a)
1238 ret <2 x i64> %vcvtms_f641.i
1241 define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) {
1242 ; CHECK-LABEL: test_vcvtm_u32_f32
1243 ; CHECK: fcvtmu v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1244 %vcvtmu_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float> %a)
1245 ret <2 x i32> %vcvtmu_f321.i
1248 define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) {
1249 ; CHECK-LABEL: test_vcvtmq_u32_f32
1250 ; CHECK: fcvtmu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1251 %vcvtmu_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float> %a)
1252 ret <4 x i32> %vcvtmu_f321.i
1255 define <2 x i64> @test_vcvtmq_u64_f64(<2 x double> %a) {
1256 ; CHECK-LABEL: test_vcvtmq_u64_f64
1257 ; CHECK: fcvtmu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1258 %vcvtmu_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double> %a)
1259 ret <2 x i64> %vcvtmu_f641.i
1262 define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) {
1263 ; CHECK-LABEL: test_vcvta_s32_f32
1264 ; CHECK: fcvtas v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1265 %vcvtas_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float> %a)
1266 ret <2 x i32> %vcvtas_f321.i
1269 define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) {
1270 ; CHECK-LABEL: test_vcvtaq_s32_f32
1271 ; CHECK: fcvtas v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1272 %vcvtas_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float> %a)
1273 ret <4 x i32> %vcvtas_f321.i
1276 define <2 x i64> @test_vcvtaq_s64_f64(<2 x double> %a) {
1277 ; CHECK-LABEL: test_vcvtaq_s64_f64
1278 ; CHECK: fcvtas v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1279 %vcvtas_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double> %a)
1280 ret <2 x i64> %vcvtas_f641.i
1283 define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) {
1284 ; CHECK-LABEL: test_vcvta_u32_f32
1285 ; CHECK: fcvtau v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1286 %vcvtau_f321.i = call <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float> %a)
1287 ret <2 x i32> %vcvtau_f321.i
1290 define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) {
1291 ; CHECK-LABEL: test_vcvtaq_u32_f32
1292 ; CHECK: fcvtau v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1293 %vcvtau_f321.i = call <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float> %a)
1294 ret <4 x i32> %vcvtau_f321.i
1297 define <2 x i64> @test_vcvtaq_u64_f64(<2 x double> %a) {
1298 ; CHECK-LABEL: test_vcvtaq_u64_f64
1299 ; CHECK: fcvtau v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1300 %vcvtau_f641.i = call <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double> %a)
1301 ret <2 x i64> %vcvtau_f641.i
1304 define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 {
1305 ; CHECK: frsqrte v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1306 %vrsqrte1.i = tail call <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float> %a) #4
1307 ret <2 x float> %vrsqrte1.i
1310 define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 {
1311 ; CHECK: frsqrte v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1312 %vrsqrte1.i = tail call <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float> %a) #4
1313 ret <4 x float> %vrsqrte1.i
1316 define <2 x double> @test_vrsqrteq_f64(<2 x double> %a) #0 {
1317 ; CHECK: frsqrte v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1318 %vrsqrte1.i = tail call <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double> %a) #4
1319 ret <2 x double> %vrsqrte1.i
1322 declare <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float>)
1323 declare <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float>)
1324 declare <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double>)
1326 define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 {
1327 ; CHECK: frecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1328 %vrecpe1.i = tail call <2 x float> @llvm.arm64.neon.frecpe.v2f32(<2 x float> %a) #4
1329 ret <2 x float> %vrecpe1.i
1332 define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 {
1333 ; CHECK: frecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1334 %vrecpe1.i = tail call <4 x float> @llvm.arm64.neon.frecpe.v4f32(<4 x float> %a) #4
1335 ret <4 x float> %vrecpe1.i
1338 define <2 x double> @test_vrecpeq_f64(<2 x double> %a) #0 {
1339 ; CHECK: frecpe v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1340 %vrecpe1.i = tail call <2 x double> @llvm.arm64.neon.frecpe.v2f64(<2 x double> %a) #4
1341 ret <2 x double> %vrecpe1.i
1344 define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 {
1345 ; CHECK: urecpe v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1346 %vrecpe1.i = tail call <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32> %a) #4
1347 ret <2 x i32> %vrecpe1.i
1350 define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 {
1351 ; CHECK: urecpe v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1352 %vrecpe1.i = tail call <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32> %a) #4
1353 ret <4 x i32> %vrecpe1.i
1356 define <2 x float> @test_vsqrt_f32(<2 x float> %a) #0 {
1357 ; CHECK: fsqrt v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1358 %vsqrt1.i = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> %a) #4
1359 ret <2 x float> %vsqrt1.i
1362 define <4 x float> @test_vsqrtq_f32(<4 x float> %a) #0 {
1363 ; CHECK: fsqrt v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1364 %vsqrt1.i = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) #4
1365 ret <4 x float> %vsqrt1.i
1368 define <2 x double> @test_vsqrtq_f64(<2 x double> %a) #0 {
1369 ; CHECK: fsqrt v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1370 %vsqrt1.i = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) #4
1371 ret <2 x double> %vsqrt1.i
1374 define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 {
1375 ; CHECK: scvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1376 %vcvt.i = sitofp <2 x i32> %a to <2 x float>
1377 ret <2 x float> %vcvt.i
1380 define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 {
1381 ; CHECK: ucvtf v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
1382 %vcvt.i = uitofp <2 x i32> %a to <2 x float>
1383 ret <2 x float> %vcvt.i
1386 define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 {
1387 ; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1388 %vcvt.i = sitofp <4 x i32> %a to <4 x float>
1389 ret <4 x float> %vcvt.i
1392 define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 {
1393 ; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1394 %vcvt.i = uitofp <4 x i32> %a to <4 x float>
1395 ret <4 x float> %vcvt.i
1398 define <2 x double> @test_vcvtq_f64_s64(<2 x i64> %a) #0 {
1399 ; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1400 %vcvt.i = sitofp <2 x i64> %a to <2 x double>
1401 ret <2 x double> %vcvt.i
1404 define <2 x double> @test_vcvtq_f64_u64(<2 x i64> %a) #0 {
1405 ; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1406 %vcvt.i = uitofp <2 x i64> %a to <2 x double>
1407 ret <2 x double> %vcvt.i
1410 define <4 x float> @test_vcvt_f32_s16(<4 x i16> %a) #0 {
1411 ; CHECK: sshll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0
1412 ; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1413 %vcvt.i = sitofp <4 x i16> %a to <4 x float>
1414 ret <4 x float> %vcvt.i
1417 define <4 x float> @test_vcvt_f32_u16(<4 x i16> %a) #0 {
1418 ; CHECK: ushll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0
1419 ; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
1420 %vcvt.i = uitofp <4 x i16> %a to <4 x float>
1421 ret <4 x float> %vcvt.i
1424 define <2 x double> @test_vcvt_f64_s32(<2 x i32> %a) #0 {
1425 ; CHECK: sshll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0
1426 ; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1427 %vcvt.i = sitofp <2 x i32> %a to <2 x double>
1428 ret <2 x double> %vcvt.i
1431 define <2 x double> @test_vcvt_f64_u32(<2 x i32> %a) #0 {
1432 ; CHECK: ushll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0
1433 ; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
1434 %vcvt.i = uitofp <2 x i32> %a to <2 x double>
1435 ret <2 x double> %vcvt.i
1438 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2
1440 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2
1442 declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #2
1444 declare <4 x i32> @llvm.arm64.neon.urecpe.v4i32(<4 x i32>) #2
1446 declare <2 x i32> @llvm.arm64.neon.urecpe.v2i32(<2 x i32>) #2
1448 declare <2 x double> @llvm.arm64.neon.urecpe.v2f64(<2 x double>) #2
1450 declare <4 x float> @llvm.arm64.neon.urecpe.v4f32(<4 x float>) #2
1452 declare <2 x float> @llvm.arm64.neon.urecpe.v2f32(<2 x float>) #2
1454 declare <2 x double> @llvm.arm64.neon.frsqrte.v2f64(<2 x double>) #2
1456 declare <4 x float> @llvm.arm64.neon.frsqrte.v4f32(<4 x float>) #2
1458 declare <2 x float> @llvm.arm64.neon.frsqrte.v2f32(<2 x float>) #2
1460 declare <2 x i64> @llvm.arm64.neon.fcvtau.v2i64.v2f64(<2 x double>)
1462 declare <4 x i32> @llvm.arm64.neon.fcvtau.v4i32.v4f32(<4 x float>)
1464 declare <2 x i32> @llvm.arm64.neon.fcvtau.v2i32.v2f32(<2 x float>)
1466 declare <2 x i64> @llvm.arm64.neon.fcvtas.v2i64.v2f64(<2 x double>)
1468 declare <4 x i32> @llvm.arm64.neon.fcvtas.v4i32.v4f32(<4 x float>)
1470 declare <2 x i32> @llvm.arm64.neon.fcvtas.v2i32.v2f32(<2 x float>)
1472 declare <2 x i64> @llvm.arm64.neon.fcvtmu.v2i64.v2f64(<2 x double>)
1474 declare <4 x i32> @llvm.arm64.neon.fcvtmu.v4i32.v4f32(<4 x float>)
1476 declare <2 x i32> @llvm.arm64.neon.fcvtmu.v2i32.v2f32(<2 x float>)
1478 declare <2 x i64> @llvm.arm64.neon.fcvtms.v2i64.v2f64(<2 x double>)
1480 declare <4 x i32> @llvm.arm64.neon.fcvtms.v4i32.v4f32(<4 x float>)
1482 declare <2 x i32> @llvm.arm64.neon.fcvtms.v2i32.v2f32(<2 x float>)
1484 declare <2 x i64> @llvm.arm64.neon.fcvtpu.v2i64.v2f64(<2 x double>)
1486 declare <4 x i32> @llvm.arm64.neon.fcvtpu.v4i32.v4f32(<4 x float>)
1488 declare <2 x i32> @llvm.arm64.neon.fcvtpu.v2i32.v2f32(<2 x float>)
1490 declare <2 x i64> @llvm.arm64.neon.fcvtps.v2i64.v2f64(<2 x double>)
1492 declare <4 x i32> @llvm.arm64.neon.fcvtps.v4i32.v4f32(<4 x float>)
1494 declare <2 x i32> @llvm.arm64.neon.fcvtps.v2i32.v2f32(<2 x float>)
1496 declare <2 x i64> @llvm.arm64.neon.fcvtnu.v2i64.v2f64(<2 x double>)
1498 declare <4 x i32> @llvm.arm64.neon.fcvtnu.v4i32.v4f32(<4 x float>)
1500 declare <2 x i32> @llvm.arm64.neon.fcvtnu.v2i32.v2f32(<2 x float>)
1502 declare <2 x i64> @llvm.arm64.neon.fcvtns.v2i64.v2f64(<2 x double>)
1504 declare <4 x i32> @llvm.arm64.neon.fcvtns.v4i32.v4f32(<4 x float>)
1506 declare <2 x i32> @llvm.arm64.neon.fcvtns.v2i32.v2f32(<2 x float>)
1508 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #3
1510 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #3
1512 declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #3
1514 declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #3
1516 declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #3
1518 declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #3
1520 declare <2 x double> @llvm.rint.v2f64(<2 x double>) #3
1522 declare <4 x float> @llvm.rint.v4f32(<4 x float>) #3
1524 declare <2 x float> @llvm.rint.v2f32(<2 x float>) #3
1526 declare <2 x double> @llvm.floor.v2f64(<2 x double>) #3
1528 declare <4 x float> @llvm.floor.v4f32(<4 x float>) #3
1530 declare <2 x float> @llvm.floor.v2f32(<2 x float>) #3
1532 declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #3
1534 declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #3
1536 declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #3
1538 declare <2 x double> @llvm.round.v2f64(<2 x double>) #3
1540 declare <4 x float> @llvm.round.v4f32(<4 x float>) #3
1542 declare <2 x float> @llvm.round.v2f32(<2 x float>) #3
1544 declare <2 x double> @llvm.arm64.neon.frintn.v2f64(<2 x double>) #2
1546 declare <4 x float> @llvm.arm64.neon.frintn.v4f32(<4 x float>) #2
1548 declare <2 x float> @llvm.arm64.neon.frintn.v2f32(<2 x float>) #2
1550 declare <2 x float> @llvm.arm64.neon.fcvtxn.v2f32.v2f64(<2 x double>) #2
1552 declare <2 x float> @llvm.arm64.neon.fcvtn.v2f32.v2f64(<2 x double>) #2
1554 declare <2 x i32> @llvm.arm64.neon.uqxtn.v2i32(<2 x i64>) #2
1556 declare <4 x i16> @llvm.arm64.neon.uqxtn.v4i16(<4 x i32>) #2
1558 declare <8 x i8> @llvm.arm64.neon.uqxtn.v8i8(<8 x i16>) #2
1560 declare <2 x i32> @llvm.arm64.neon.sqxtn.v2i32(<2 x i64>) #2
1562 declare <4 x i16> @llvm.arm64.neon.sqxtn.v4i16(<4 x i32>) #2
1564 declare <8 x i8> @llvm.arm64.neon.sqxtn.v8i8(<8 x i16>) #2
1566 declare <2 x i32> @llvm.arm64.neon.sqxtun.v2i32(<2 x i64>) #2
1568 declare <4 x i16> @llvm.arm64.neon.sqxtun.v4i16(<4 x i32>) #2
1570 declare <8 x i8> @llvm.arm64.neon.sqxtun.v8i8(<8 x i16>) #2
1572 declare <16 x i8> @llvm.arm64.neon.rbit.v16i8(<16 x i8>) #2
1574 declare <8 x i8> @llvm.arm64.neon.rbit.v8i8(<8 x i8>) #2
1576 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) #2
1578 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) #2
1580 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #2
1582 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #2
1584 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) #2
1586 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) #2
1588 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) #2
1590 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) #2
1592 declare <4 x i32> @llvm.arm64.neon.cls.v4i32(<4 x i32>) #2
1594 declare <2 x i32> @llvm.arm64.neon.cls.v2i32(<2 x i32>) #2
1596 declare <8 x i16> @llvm.arm64.neon.cls.v8i16(<8 x i16>) #2
1598 declare <4 x i16> @llvm.arm64.neon.cls.v4i16(<4 x i16>) #2
1600 declare <16 x i8> @llvm.arm64.neon.cls.v16i8(<16 x i8>) #2
1602 declare <8 x i8> @llvm.arm64.neon.cls.v8i8(<8 x i8>) #2
1604 declare <2 x i64> @llvm.arm64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) #2
1606 declare <4 x i32> @llvm.arm64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) #2
1608 declare <2 x i32> @llvm.arm64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) #2
1610 declare <8 x i16> @llvm.arm64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) #2
1612 declare <4 x i16> @llvm.arm64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) #2
1614 declare <16 x i8> @llvm.arm64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) #2
1616 declare <8 x i8> @llvm.arm64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) #2
1618 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #3
1620 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #3
1622 declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #3
1624 declare <2 x i64> @llvm.arm64.neon.abs.v2i64(<2 x i64>) #2
1626 declare <4 x i32> @llvm.arm64.neon.abs.v4i32(<4 x i32>) #2
1628 declare <2 x i32> @llvm.arm64.neon.abs.v2i32(<2 x i32>) #2
1630 declare <8 x i16> @llvm.arm64.neon.abs.v8i16(<8 x i16>) #2
1632 declare <4 x i16> @llvm.arm64.neon.abs.v4i16(<4 x i16>) #2
1634 declare <16 x i8> @llvm.arm64.neon.abs.v16i8(<16 x i8>) #2
1636 declare <8 x i8> @llvm.arm64.neon.abs.v8i8(<8 x i8>) #2
1638 declare <2 x i64> @llvm.arm64.neon.sqneg.v2i64(<2 x i64>) #2
1640 declare <4 x i32> @llvm.arm64.neon.sqneg.v4i32(<4 x i32>) #2
1642 declare <2 x i32> @llvm.arm64.neon.sqneg.v2i32(<2 x i32>) #2
1644 declare <8 x i16> @llvm.arm64.neon.sqneg.v8i16(<8 x i16>) #2
1646 declare <4 x i16> @llvm.arm64.neon.sqneg.v4i16(<4 x i16>) #2
1648 declare <16 x i8> @llvm.arm64.neon.sqneg.v16i8(<16 x i8>) #2
1650 declare <8 x i8> @llvm.arm64.neon.sqneg.v8i8(<8 x i8>) #2
1652 declare <2 x i64> @llvm.arm64.neon.sqabs.v2i64(<2 x i64>) #2
1654 declare <4 x i32> @llvm.arm64.neon.sqabs.v4i32(<4 x i32>) #2
1656 declare <2 x i32> @llvm.arm64.neon.sqabs.v2i32(<2 x i32>) #2
1658 declare <8 x i16> @llvm.arm64.neon.sqabs.v8i16(<8 x i16>) #2
1660 declare <4 x i16> @llvm.arm64.neon.sqabs.v4i16(<4 x i16>) #2
1662 declare <16 x i8> @llvm.arm64.neon.sqabs.v16i8(<16 x i8>) #2
1664 declare <8 x i8> @llvm.arm64.neon.sqabs.v8i8(<8 x i8>) #2
1666 declare <2 x i64> @llvm.arm64.neon.uaddlp.v2i64.v4i32(<4 x i32>) #2
1668 declare <4 x i32> @llvm.arm64.neon.uaddlp.v4i32.v8i16(<8 x i16>) #2
1670 declare <8 x i16> @llvm.arm64.neon.uaddlp.v8i16.v16i8(<16 x i8>) #2
1672 declare <2 x i64> @llvm.arm64.neon.saddlp.v2i64.v4i32(<4 x i32>) #2
1674 declare <4 x i32> @llvm.arm64.neon.saddlp.v4i32.v8i16(<8 x i16>) #2
1676 declare <8 x i16> @llvm.arm64.neon.saddlp.v8i16.v16i8(<16 x i8>) #2
1678 declare <1 x i64> @llvm.arm64.neon.uaddlp.v1i64.v2i32(<2 x i32>) #2
1680 declare <2 x i32> @llvm.arm64.neon.uaddlp.v2i32.v4i16(<4 x i16>) #2
1682 declare <4 x i16> @llvm.arm64.neon.uaddlp.v4i16.v8i8(<8 x i8>) #2
1684 declare <1 x i64> @llvm.arm64.neon.saddlp.v1i64.v2i32(<2 x i32>) #2
1686 declare <2 x i32> @llvm.arm64.neon.saddlp.v2i32.v4i16(<4 x i16>) #2
1688 declare <4 x i16> @llvm.arm64.neon.saddlp.v4i16.v8i8(<8 x i8>) #2
1690 declare <4 x float> @llvm.arm64.neon.vcvthf2fp(<4 x i16>) #2
1692 declare <4 x i16> @llvm.arm64.neon.vcvtfp2hf(<4 x float>) #2
1695 define <1 x i64> @test_vcvt_s64_f64(<1 x double> %a) {
1696 ; CHECK-LABEL: test_vcvt_s64_f64
1697 ; CHECK: fcvtzs {{[xd][0-9]+}}, d{{[0-9]+}}
1698 %1 = fptosi <1 x double> %a to <1 x i64>
1702 define <1 x i64> @test_vcvt_u64_f64(<1 x double> %a) {
1703 ; CHECK-LABEL: test_vcvt_u64_f64
1704 ; CHECK: fcvtzu {{[xd][0-9]+}}, d{{[0-9]+}}
1705 %1 = fptoui <1 x double> %a to <1 x i64>
1709 define <1 x i64> @test_vcvtn_s64_f64(<1 x double> %a) {
1710 ; CHECK-LABEL: test_vcvtn_s64_f64
1711 ; CHECK: fcvtns d{{[0-9]+}}, d{{[0-9]+}}
1712 %1 = call <1 x i64> @llvm.arm64.neon.fcvtns.v1i64.v1f64(<1 x double> %a)
1716 define <1 x i64> @test_vcvtn_u64_f64(<1 x double> %a) {
1717 ; CHECK-LABEL: test_vcvtn_u64_f64
1718 ; CHECK: fcvtnu d{{[0-9]+}}, d{{[0-9]+}}
1719 %1 = call <1 x i64> @llvm.arm64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a)
1723 define <1 x i64> @test_vcvtp_s64_f64(<1 x double> %a) {
1724 ; CHECK-LABEL: test_vcvtp_s64_f64
1725 ; CHECK: fcvtps d{{[0-9]+}}, d{{[0-9]+}}
1726 %1 = call <1 x i64> @llvm.arm64.neon.fcvtps.v1i64.v1f64(<1 x double> %a)
1730 define <1 x i64> @test_vcvtp_u64_f64(<1 x double> %a) {
1731 ; CHECK-LABEL: test_vcvtp_u64_f64
1732 ; CHECK: fcvtpu d{{[0-9]+}}, d{{[0-9]+}}
1733 %1 = call <1 x i64> @llvm.arm64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a)
1737 define <1 x i64> @test_vcvtm_s64_f64(<1 x double> %a) {
1738 ; CHECK-LABEL: test_vcvtm_s64_f64
1739 ; CHECK: fcvtms d{{[0-9]+}}, d{{[0-9]+}}
1740 %1 = call <1 x i64> @llvm.arm64.neon.fcvtms.v1i64.v1f64(<1 x double> %a)
1744 define <1 x i64> @test_vcvtm_u64_f64(<1 x double> %a) {
1745 ; CHECK-LABEL: test_vcvtm_u64_f64
1746 ; CHECK: fcvtmu d{{[0-9]+}}, d{{[0-9]+}}
1747 %1 = call <1 x i64> @llvm.arm64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a)
1751 define <1 x i64> @test_vcvta_s64_f64(<1 x double> %a) {
1752 ; CHECK-LABEL: test_vcvta_s64_f64
1753 ; CHECK: fcvtas d{{[0-9]+}}, d{{[0-9]+}}
1754 %1 = call <1 x i64> @llvm.arm64.neon.fcvtas.v1i64.v1f64(<1 x double> %a)
1758 define <1 x i64> @test_vcvta_u64_f64(<1 x double> %a) {
1759 ; CHECK-LABEL: test_vcvta_u64_f64
1760 ; CHECK: fcvtau d{{[0-9]+}}, d{{[0-9]+}}
1761 %1 = call <1 x i64> @llvm.arm64.neon.fcvtau.v1i64.v1f64(<1 x double> %a)
1765 define <1 x double> @test_vcvt_f64_s64(<1 x i64> %a) {
1766 ; CHECK-LABEL: test_vcvt_f64_s64
1767 ; CHECK: scvtf d{{[0-9]+}}, {{[xd][0-9]+}}
1768 %1 = sitofp <1 x i64> %a to <1 x double>
1772 define <1 x double> @test_vcvt_f64_u64(<1 x i64> %a) {
1773 ; CHECK-LABEL: test_vcvt_f64_u64
1774 ; CHECK: ucvtf d{{[0-9]+}}, {{[xd][0-9]+}}
1775 %1 = uitofp <1 x i64> %a to <1 x double>
1779 declare <1 x i64> @llvm.arm64.neon.fcvtau.v1i64.v1f64(<1 x double>)
1780 declare <1 x i64> @llvm.arm64.neon.fcvtas.v1i64.v1f64(<1 x double>)
1781 declare <1 x i64> @llvm.arm64.neon.fcvtmu.v1i64.v1f64(<1 x double>)
1782 declare <1 x i64> @llvm.arm64.neon.fcvtms.v1i64.v1f64(<1 x double>)
1783 declare <1 x i64> @llvm.arm64.neon.fcvtpu.v1i64.v1f64(<1 x double>)
1784 declare <1 x i64> @llvm.arm64.neon.fcvtps.v1i64.v1f64(<1 x double>)
1785 declare <1 x i64> @llvm.arm64.neon.fcvtnu.v1i64.v1f64(<1 x double>)
1786 declare <1 x i64> @llvm.arm64.neon.fcvtns.v1i64.v1f64(<1 x double>)
1788 define <1 x double> @test_vrndn_f64(<1 x double> %a) {
1789 ; CHECK-LABEL: test_vrndn_f64
1790 ; CHECK: frintn d{{[0-9]+}}, d{{[0-9]+}}
1791 %1 = tail call <1 x double> @llvm.arm64.neon.frintn.v1f64(<1 x double> %a)
1795 define <1 x double> @test_vrnda_f64(<1 x double> %a) {
1796 ; CHECK-LABEL: test_vrnda_f64
1797 ; CHECK: frinta d{{[0-9]+}}, d{{[0-9]+}}
1798 %1 = tail call <1 x double> @llvm.round.v1f64(<1 x double> %a)
1802 define <1 x double> @test_vrndp_f64(<1 x double> %a) {
1803 ; CHECK-LABEL: test_vrndp_f64
1804 ; CHECK: frintp d{{[0-9]+}}, d{{[0-9]+}}
1805 %1 = tail call <1 x double> @llvm.ceil.v1f64(<1 x double> %a)
1809 define <1 x double> @test_vrndm_f64(<1 x double> %a) {
1810 ; CHECK-LABEL: test_vrndm_f64
1811 ; CHECK: frintm d{{[0-9]+}}, d{{[0-9]+}}
1812 %1 = tail call <1 x double> @llvm.floor.v1f64(<1 x double> %a)
1816 define <1 x double> @test_vrndx_f64(<1 x double> %a) {
1817 ; CHECK-LABEL: test_vrndx_f64
1818 ; CHECK: frintx d{{[0-9]+}}, d{{[0-9]+}}
1819 %1 = tail call <1 x double> @llvm.rint.v1f64(<1 x double> %a)
1823 define <1 x double> @test_vrnd_f64(<1 x double> %a) {
1824 ; CHECK-LABEL: test_vrnd_f64
1825 ; CHECK: frintz d{{[0-9]+}}, d{{[0-9]+}}
1826 %1 = tail call <1 x double> @llvm.trunc.v1f64(<1 x double> %a)
1830 define <1 x double> @test_vrndi_f64(<1 x double> %a) {
1831 ; CHECK-LABEL: test_vrndi_f64
1832 ; CHECK: frinti d{{[0-9]+}}, d{{[0-9]+}}
1833 %1 = tail call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a)
1837 declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>)
1838 declare <1 x double> @llvm.trunc.v1f64(<1 x double>)
1839 declare <1 x double> @llvm.rint.v1f64(<1 x double>)
1840 declare <1 x double> @llvm.floor.v1f64(<1 x double>)
1841 declare <1 x double> @llvm.ceil.v1f64(<1 x double>)
1842 declare <1 x double> @llvm.round.v1f64(<1 x double>)
1843 declare <1 x double> @llvm.arm64.neon.frintn.v1f64(<1 x double>)
1845 define <1 x double> @test_vrsqrte_f64(<1 x double> %a) {
1846 ; CHECK-LABEL: test_vrsqrte_f64
1847 ; CHECK: frsqrte d{{[0-9]+}}, d{{[0-9]+}}
1848 %1 = tail call <1 x double> @llvm.arm64.neon.frsqrte.v1f64(<1 x double> %a)
1852 define <1 x double> @test_vrecpe_f64(<1 x double> %a) {
1853 ; CHECK-LABEL: test_vrecpe_f64
1854 ; CHECK: frecpe d{{[0-9]+}}, d{{[0-9]+}}
1855 %1 = tail call <1 x double> @llvm.arm64.neon.frecpe.v1f64(<1 x double> %a)
1859 define <1 x double> @test_vsqrt_f64(<1 x double> %a) {
1860 ; CHECK-LABEL: test_vsqrt_f64
1861 ; CHECK: fsqrt d{{[0-9]+}}, d{{[0-9]+}}
1862 %1 = tail call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a)
1866 define <1 x double> @test_vrecps_f64(<1 x double> %a, <1 x double> %b) {
1867 ; CHECK-LABEL: test_vrecps_f64
1868 ; CHECK: frecps d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
1869 %1 = tail call <1 x double> @llvm.arm64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b)
1873 define <1 x double> @test_vrsqrts_f64(<1 x double> %a, <1 x double> %b) {
1874 ; CHECK-LABEL: test_vrsqrts_f64
1875 ; CHECK: frsqrts d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
1876 %1 = tail call <1 x double> @llvm.arm64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b)
1880 declare <1 x double> @llvm.arm64.neon.frsqrts.v1f64(<1 x double>, <1 x double>)
1881 declare <1 x double> @llvm.arm64.neon.frecps.v1f64(<1 x double>, <1 x double>)
1882 declare <1 x double> @llvm.sqrt.v1f64(<1 x double>)
1883 declare <1 x double> @llvm.arm64.neon.frecpe.v1f64(<1 x double>)
1884 declare <1 x double> @llvm.arm64.neon.frsqrte.v1f64(<1 x double>)
1886 define i64 @test_vaddlv_s32(<2 x i32> %a) {
1887 ; CHECK-LABEL: test_vaddlv_s32
1888 ; CHECK: saddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
1889 %1 = tail call i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32> %a)
1893 define i64 @test_vaddlv_u32(<2 x i32> %a) {
1894 ; CHECK-LABEL: test_vaddlv_u32
1895 ; CHECK: uaddlp {{v[0-9]+}}.1d, {{v[0-9]+}}.2s
1896 %1 = tail call i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
1900 declare i64 @llvm.arm64.neon.saddlv.i64.v2i32(<2 x i32>)
1901 declare i64 @llvm.arm64.neon.uaddlv.i64.v2i32(<2 x i32>)