1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
2 ; arm64 has separate copy of parts that aren't pure intrinsic wrangling.
4 define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
5 ; CHECK: test_vshr_n_s8
6 ; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
7 %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
11 define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
12 ; CHECK: test_vshr_n_s16
13 ; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
14 %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
18 define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
19 ; CHECK: test_vshr_n_s32
20 ; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
21 %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
25 define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
26 ; CHECK: test_vshrq_n_s8
27 ; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
28 %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
32 define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
33 ; CHECK: test_vshrq_n_s16
34 ; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
35 %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
39 define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
40 ; CHECK: test_vshrq_n_s32
41 ; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
42 %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
46 define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
47 ; CHECK: test_vshrq_n_s64
48 ; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
49 %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
53 define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
54 ; CHECK: test_vshr_n_u8
55 ; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
56 %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
60 define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
61 ; CHECK: test_vshr_n_u16
62 ; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
63 %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
67 define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
68 ; CHECK: test_vshr_n_u32
69 ; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
70 %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
74 define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
75 ; CHECK: test_vshrq_n_u8
76 ; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
77 %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
81 define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
82 ; CHECK: test_vshrq_n_u16
83 ; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
84 %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
88 define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
89 ; CHECK: test_vshrq_n_u32
90 ; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
91 %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
95 define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
96 ; CHECK: test_vshrq_n_u64
97 ; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
98 %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
102 define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
103 ; CHECK: test_vsra_n_s8
104 ; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
105 %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
106 %1 = add <8 x i8> %vsra_n, %a
110 define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
111 ; CHECK: test_vsra_n_s16
112 ; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
113 %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
114 %1 = add <4 x i16> %vsra_n, %a
118 define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
119 ; CHECK: test_vsra_n_s32
120 ; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
121 %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
122 %1 = add <2 x i32> %vsra_n, %a
126 define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
127 ; CHECK: test_vsraq_n_s8
128 ; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
129 %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
130 %1 = add <16 x i8> %vsra_n, %a
134 define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
135 ; CHECK: test_vsraq_n_s16
136 ; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
137 %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
138 %1 = add <8 x i16> %vsra_n, %a
142 define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
143 ; CHECK: test_vsraq_n_s32
144 ; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
145 %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
146 %1 = add <4 x i32> %vsra_n, %a
150 define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
151 ; CHECK: test_vsraq_n_s64
152 ; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
153 %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
154 %1 = add <2 x i64> %vsra_n, %a
158 define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
159 ; CHECK: test_vsra_n_u8
160 ; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
161 %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
162 %1 = add <8 x i8> %vsra_n, %a
166 define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
167 ; CHECK: test_vsra_n_u16
168 ; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
169 %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
170 %1 = add <4 x i16> %vsra_n, %a
174 define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
175 ; CHECK: test_vsra_n_u32
176 ; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
177 %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
178 %1 = add <2 x i32> %vsra_n, %a
182 define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
183 ; CHECK: test_vsraq_n_u8
184 ; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
185 %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
186 %1 = add <16 x i8> %vsra_n, %a
190 define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
191 ; CHECK: test_vsraq_n_u16
192 ; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
193 %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
194 %1 = add <8 x i16> %vsra_n, %a
198 define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
199 ; CHECK: test_vsraq_n_u32
200 ; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
201 %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
202 %1 = add <4 x i32> %vsra_n, %a
206 define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
207 ; CHECK: test_vsraq_n_u64
208 ; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
209 %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
210 %1 = add <2 x i64> %vsra_n, %a
214 define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) {
215 ; CHECK: test_vrshr_n_s8
216 ; CHECK: srshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
217 %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %a, i32 3)
218 ret <8 x i8> %vrshr_n
222 define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) {
223 ; CHECK: test_vrshr_n_s16
224 ; CHECK: srshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
225 %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %a, i32 3)
226 ret <4 x i16> %vrshr_n
230 define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) {
231 ; CHECK: test_vrshr_n_s32
232 ; CHECK: srshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
233 %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %a, i32 3)
234 ret <2 x i32> %vrshr_n
238 define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) {
239 ; CHECK: test_vrshrq_n_s8
240 ; CHECK: srshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
241 %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %a, i32 3)
242 ret <16 x i8> %vrshr_n
246 define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) {
247 ; CHECK: test_vrshrq_n_s16
248 ; CHECK: srshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
249 %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %a, i32 3)
250 ret <8 x i16> %vrshr_n
254 define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) {
255 ; CHECK: test_vrshrq_n_s32
256 ; CHECK: srshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
257 %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %a, i32 3)
258 ret <4 x i32> %vrshr_n
262 define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) {
263 ; CHECK: test_vrshrq_n_s64
264 ; CHECK: srshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
265 %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %a, i32 3)
266 ret <2 x i64> %vrshr_n
270 define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) {
271 ; CHECK: test_vrshr_n_u8
272 ; CHECK: urshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
273 %vrshr_n = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %a, i32 3)
274 ret <8 x i8> %vrshr_n
278 define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) {
279 ; CHECK: test_vrshr_n_u16
280 ; CHECK: urshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
281 %vrshr_n = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %a, i32 3)
282 ret <4 x i16> %vrshr_n
286 define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) {
287 ; CHECK: test_vrshr_n_u32
288 ; CHECK: urshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
289 %vrshr_n = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %a, i32 3)
290 ret <2 x i32> %vrshr_n
294 define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) {
295 ; CHECK: test_vrshrq_n_u8
296 ; CHECK: urshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
297 %vrshr_n = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %a, i32 3)
298 ret <16 x i8> %vrshr_n
302 define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) {
303 ; CHECK: test_vrshrq_n_u16
304 ; CHECK: urshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
305 %vrshr_n = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %a, i32 3)
306 ret <8 x i16> %vrshr_n
310 define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) {
311 ; CHECK: test_vrshrq_n_u32
312 ; CHECK: urshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
313 %vrshr_n = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %a, i32 3)
314 ret <4 x i32> %vrshr_n
318 define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) {
319 ; CHECK: test_vrshrq_n_u64
320 ; CHECK: urshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
321 %vrshr_n = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %a, i32 3)
322 ret <2 x i64> %vrshr_n
326 define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
327 ; CHECK: test_vrsra_n_s8
328 ; CHECK: srsra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
329 %1 = tail call <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8> %b, i32 3)
330 %vrsra_n = add <8 x i8> %1, %a
331 ret <8 x i8> %vrsra_n
334 define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
335 ; CHECK: test_vrsra_n_s16
336 ; CHECK: srsra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
337 %1 = tail call <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16> %b, i32 3)
338 %vrsra_n = add <4 x i16> %1, %a
339 ret <4 x i16> %vrsra_n
342 define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
343 ; CHECK: test_vrsra_n_s32
344 ; CHECK: srsra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
345 %1 = tail call <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32> %b, i32 3)
346 %vrsra_n = add <2 x i32> %1, %a
347 ret <2 x i32> %vrsra_n
350 define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
351 ; CHECK: test_vrsraq_n_s8
352 ; CHECK: srsra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
353 %1 = tail call <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8> %b, i32 3)
354 %vrsra_n = add <16 x i8> %1, %a
355 ret <16 x i8> %vrsra_n
358 define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
359 ; CHECK: test_vrsraq_n_s16
360 ; CHECK: srsra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
361 %1 = tail call <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16> %b, i32 3)
362 %vrsra_n = add <8 x i16> %1, %a
363 ret <8 x i16> %vrsra_n
366 define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
367 ; CHECK: test_vrsraq_n_s32
368 ; CHECK: srsra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
369 %1 = tail call <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32> %b, i32 3)
370 %vrsra_n = add <4 x i32> %1, %a
371 ret <4 x i32> %vrsra_n
374 define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
375 ; CHECK: test_vrsraq_n_s64
376 ; CHECK: srsra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
377 %1 = tail call <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64> %b, i32 3)
378 %vrsra_n = add <2 x i64> %1, %a
379 ret <2 x i64> %vrsra_n
382 define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
383 ; CHECK: test_vrsra_n_u8
384 ; CHECK: ursra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
385 %1 = tail call <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8> %b, i32 3)
386 %vrsra_n = add <8 x i8> %1, %a
387 ret <8 x i8> %vrsra_n
390 define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
391 ; CHECK: test_vrsra_n_u16
392 ; CHECK: ursra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
393 %1 = tail call <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16> %b, i32 3)
394 %vrsra_n = add <4 x i16> %1, %a
395 ret <4 x i16> %vrsra_n
398 define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
399 ; CHECK: test_vrsra_n_u32
400 ; CHECK: ursra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
401 %1 = tail call <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32> %b, i32 3)
402 %vrsra_n = add <2 x i32> %1, %a
403 ret <2 x i32> %vrsra_n
406 define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
407 ; CHECK: test_vrsraq_n_u8
408 ; CHECK: ursra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
409 %1 = tail call <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8> %b, i32 3)
410 %vrsra_n = add <16 x i8> %1, %a
411 ret <16 x i8> %vrsra_n
414 define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
415 ; CHECK: test_vrsraq_n_u16
416 ; CHECK: ursra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
417 %1 = tail call <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16> %b, i32 3)
418 %vrsra_n = add <8 x i16> %1, %a
419 ret <8 x i16> %vrsra_n
422 define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
423 ; CHECK: test_vrsraq_n_u32
424 ; CHECK: ursra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
425 %1 = tail call <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32> %b, i32 3)
426 %vrsra_n = add <4 x i32> %1, %a
427 ret <4 x i32> %vrsra_n
430 define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
431 ; CHECK: test_vrsraq_n_u64
432 ; CHECK: ursra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
433 %1 = tail call <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64> %b, i32 3)
434 %vrsra_n = add <2 x i64> %1, %a
435 ret <2 x i64> %vrsra_n
438 define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) {
439 ; CHECK: test_vsri_n_s8
440 ; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
441 %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
446 define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) {
447 ; CHECK: test_vsri_n_s16
448 ; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
449 %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
454 define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) {
455 ; CHECK: test_vsri_n_s32
456 ; CHECK: sri {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
457 %vsri = tail call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
462 define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) {
463 ; CHECK: test_vsriq_n_s8
464 ; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
465 %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
466 ret <16 x i8> %vsri_n
470 define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) {
471 ; CHECK: test_vsriq_n_s16
472 ; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
473 %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
478 define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) {
479 ; CHECK: test_vsriq_n_s32
480 ; CHECK: sri {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
481 %vsri = tail call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
486 define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) {
487 ; CHECK: test_vsriq_n_s64
488 ; CHECK: sri {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
489 %vsri = tail call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
493 define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) {
494 ; CHECK: test_vsri_n_p8
495 ; CHECK: sri {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
496 %vsri_n = tail call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
500 define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) {
501 ; CHECK: test_vsri_n_p16
502 ; CHECK: sri {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
503 %vsri = tail call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
507 define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) {
508 ; CHECK: test_vsriq_n_p8
509 ; CHECK: sri {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
510 %vsri_n = tail call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
511 ret <16 x i8> %vsri_n
514 define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) {
515 ; CHECK: test_vsriq_n_p16
516 ; CHECK: sri {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
517 %vsri = tail call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
521 define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) {
522 ; CHECK: test_vsli_n_s8
523 ; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
524 %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
528 define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) {
529 ; CHECK: test_vsli_n_s16
530 ; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
531 %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 3)
535 define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) {
536 ; CHECK: test_vsli_n_s32
537 ; CHECK: sli {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
538 %vsli = tail call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %a, <2 x i32> %b, i32 3)
542 define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) {
543 ; CHECK: test_vsliq_n_s8
544 ; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
545 %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
546 ret <16 x i8> %vsli_n
549 define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) {
550 ; CHECK: test_vsliq_n_s16
551 ; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
552 %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3)
556 define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) {
557 ; CHECK: test_vsliq_n_s32
558 ; CHECK: sli {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
559 %vsli = tail call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 3)
563 define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) {
564 ; CHECK: test_vsliq_n_s64
565 ; CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
566 %vsli = tail call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %a, <2 x i64> %b, i32 3)
570 define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) {
571 ; CHECK: test_vsli_n_p8
572 ; CHECK: sli {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
573 %vsli_n = tail call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3)
577 define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) {
578 ; CHECK: test_vsli_n_p16
579 ; CHECK: sli {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #15
580 %vsli = tail call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %a, <4 x i16> %b, i32 15)
584 define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) {
585 ; CHECK: test_vsliq_n_p8
586 ; CHECK: sli {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
587 %vsli_n = tail call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3)
588 ret <16 x i8> %vsli_n
591 define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) {
592 ; CHECK: test_vsliq_n_p16
593 ; CHECK: sli {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #15
594 %vsli = tail call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 15)
598 define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) {
599 ; CHECK: test_vqshl_n_s8
600 ; CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
601 %vqshl = tail call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
606 define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) {
607 ; CHECK: test_vqshl_n_s16
608 ; CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
609 %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
614 define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) {
615 ; CHECK: test_vqshl_n_s32
616 ; CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
617 %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
622 define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) {
623 ; CHECK: test_vqshlq_n_s8
624 ; CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
625 %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
626 ret <16 x i8> %vqshl_n
630 define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) {
631 ; CHECK: test_vqshlq_n_s16
632 ; CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
633 %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
638 define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) {
639 ; CHECK: test_vqshlq_n_s32
640 ; CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
641 %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
646 define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) {
647 ; CHECK: test_vqshlq_n_s64
648 ; CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
649 %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
654 define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) {
655 ; CHECK: test_vqshl_n_u8
656 ; CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
657 %vqshl_n = tail call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
658 ret <8 x i8> %vqshl_n
662 define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) {
663 ; CHECK: test_vqshl_n_u16
664 ; CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
665 %vqshl = tail call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> <i16 3, i16 3, i16 3, i16 3>)
670 define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) {
671 ; CHECK: test_vqshl_n_u32
672 ; CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
673 %vqshl = tail call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> <i32 3, i32 3>)
678 define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) {
679 ; CHECK: test_vqshlq_n_u8
680 ; CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
681 %vqshl_n = tail call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
682 ret <16 x i8> %vqshl_n
686 define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) {
687 ; CHECK: test_vqshlq_n_u16
688 ; CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
689 %vqshl = tail call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
694 define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) {
695 ; CHECK: test_vqshlq_n_u32
696 ; CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
697 %vqshl = tail call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
702 define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) {
703 ; CHECK: test_vqshlq_n_u64
704 ; CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
705 %vqshl = tail call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> <i64 3, i64 3>)
709 define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) {
710 ; CHECK: test_vqshlu_n_s8
711 ; CHECK: sqshlu {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
712 %vqshlu = tail call <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8> %a, i32 3)
717 define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) {
718 ; CHECK: test_vqshlu_n_s16
719 ; CHECK: sqshlu {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
720 %vqshlu = tail call <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16> %a, i32 3)
721 ret <4 x i16> %vqshlu
725 define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) {
726 ; CHECK: test_vqshlu_n_s32
727 ; CHECK: sqshlu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
728 %vqshlu = tail call <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32> %a, i32 3)
729 ret <2 x i32> %vqshlu
733 define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) {
734 ; CHECK: test_vqshluq_n_s8
735 ; CHECK: sqshlu {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
736 %vqshlu = tail call <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8> %a, i32 3)
737 ret <16 x i8> %vqshlu
741 define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) {
742 ; CHECK: test_vqshluq_n_s16
743 ; CHECK: sqshlu {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
744 %vqshlu = tail call <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16> %a, i32 3)
745 ret <8 x i16> %vqshlu
749 define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) {
750 ; CHECK: test_vqshluq_n_s32
751 ; CHECK: sqshlu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
752 %vqshlu = tail call <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32> %a, i32 3)
753 ret <4 x i32> %vqshlu
757 define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) {
758 ; CHECK: test_vqshluq_n_s64
759 ; CHECK: sqshlu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
760 %vqshlu = tail call <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64> %a, i32 3)
761 ret <2 x i64> %vqshlu
765 define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
766 ; CHECK: test_vshrn_n_s16
767 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
768 %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
769 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
770 ret <8 x i8> %vshrn_n
773 define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
774 ; CHECK: test_vshrn_n_s32
775 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
776 %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
777 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
778 ret <4 x i16> %vshrn_n
781 define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
782 ; CHECK: test_vshrn_n_s64
783 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
784 %1 = ashr <2 x i64> %a, <i64 19, i64 19>
785 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
786 ret <2 x i32> %vshrn_n
789 define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
790 ; CHECK: test_vshrn_n_u16
791 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
792 %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
793 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
794 ret <8 x i8> %vshrn_n
797 define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
798 ; CHECK: test_vshrn_n_u32
799 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
800 %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
801 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
802 ret <4 x i16> %vshrn_n
805 define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
806 ; CHECK: test_vshrn_n_u64
807 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
808 %1 = lshr <2 x i64> %a, <i64 19, i64 19>
809 %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
810 ret <2 x i32> %vshrn_n
813 define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
814 ; CHECK: test_vshrn_high_n_s16
815 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
816 %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
817 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
818 %2 = bitcast <8 x i8> %a to <1 x i64>
819 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
820 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
821 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
825 define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
826 ; CHECK: test_vshrn_high_n_s32
827 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
828 %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
829 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
830 %2 = bitcast <4 x i16> %a to <1 x i64>
831 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
832 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
833 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
837 define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
838 ; CHECK: test_vshrn_high_n_s64
839 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
840 %1 = bitcast <2 x i32> %a to <1 x i64>
841 %2 = ashr <2 x i64> %b, <i64 19, i64 19>
842 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
843 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
844 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
845 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
849 define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
850 ; CHECK: test_vshrn_high_n_u16
851 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
852 %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
853 %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
854 %2 = bitcast <8 x i8> %a to <1 x i64>
855 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
856 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
857 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
861 define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
862 ; CHECK: test_vshrn_high_n_u32
863 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
864 %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
865 %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
866 %2 = bitcast <4 x i16> %a to <1 x i64>
867 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
868 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
869 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
873 define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
874 ; CHECK: test_vshrn_high_n_u64
875 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
876 %1 = bitcast <2 x i32> %a to <1 x i64>
877 %2 = lshr <2 x i64> %b, <i64 19, i64 19>
878 %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
879 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
880 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
881 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
885 define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) {
886 ; CHECK: test_vqshrun_n_s16
887 ; CHECK: sqshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
888 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %a, i32 3)
889 ret <8 x i8> %vqshrun
893 define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) {
894 ; CHECK: test_vqshrun_n_s32
895 ; CHECK: sqshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
896 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %a, i32 9)
897 ret <4 x i16> %vqshrun
900 define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) {
901 ; CHECK: test_vqshrun_n_s64
902 ; CHECK: sqshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
903 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %a, i32 19)
904 ret <2 x i32> %vqshrun
907 define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
908 ; CHECK: test_vqshrun_high_n_s16
909 ; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
910 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16> %b, i32 3)
911 %1 = bitcast <8 x i8> %a to <1 x i64>
912 %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
913 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
914 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
918 define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
919 ; CHECK: test_vqshrun_high_n_s32
920 ; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
921 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32> %b, i32 9)
922 %1 = bitcast <4 x i16> %a to <1 x i64>
923 %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
924 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
925 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
929 define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
930 ; CHECK: test_vqshrun_high_n_s64
931 ; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
932 %1 = bitcast <2 x i32> %a to <1 x i64>
933 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64> %b, i32 19)
934 %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
935 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
936 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
940 define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) {
941 ; CHECK: test_vrshrn_n_s16
942 ; CHECK: rshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
943 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %a, i32 3)
948 define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) {
949 ; CHECK: test_vrshrn_n_s32
950 ; CHECK: rshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
951 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %a, i32 9)
952 ret <4 x i16> %vrshrn
956 define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) {
957 ; CHECK: test_vrshrn_n_s64
958 ; CHECK: rshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
959 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %a, i32 19)
960 ret <2 x i32> %vrshrn
963 define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
964 ; CHECK: test_vrshrn_high_n_s16
965 ; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
966 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16> %b, i32 3)
967 %1 = bitcast <8 x i8> %a to <1 x i64>
968 %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
969 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
970 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
974 define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
975 ; CHECK: test_vrshrn_high_n_s32
976 ; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
977 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32> %b, i32 9)
978 %1 = bitcast <4 x i16> %a to <1 x i64>
979 %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
980 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
981 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
985 define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
986 ; CHECK: test_vrshrn_high_n_s64
987 ; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
988 %1 = bitcast <2 x i32> %a to <1 x i64>
989 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64> %b, i32 19)
990 %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
991 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
992 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
996 define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) {
997 ; CHECK: test_vqrshrun_n_s16
998 ; CHECK: sqrshrun {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
999 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %a, i32 3)
1000 ret <8 x i8> %vqrshrun
1003 define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) {
1004 ; CHECK: test_vqrshrun_n_s32
1005 ; CHECK: sqrshrun {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1006 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %a, i32 9)
1007 ret <4 x i16> %vqrshrun
1010 define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) {
1011 ; CHECK: test_vqrshrun_n_s64
1012 ; CHECK: sqrshrun {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1013 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %a, i32 19)
1014 ret <2 x i32> %vqrshrun
1017 define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
1018 ; CHECK: test_vqrshrun_high_n_s16
1019 ; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1020 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16> %b, i32 3)
1021 %1 = bitcast <8 x i8> %a to <1 x i64>
1022 %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
1023 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1024 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1028 define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
1029 ; CHECK: test_vqrshrun_high_n_s32
1030 ; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1031 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32> %b, i32 9)
1032 %1 = bitcast <4 x i16> %a to <1 x i64>
1033 %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
1034 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1035 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1039 define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
1040 ; CHECK: test_vqrshrun_high_n_s64
1041 ; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1042 %1 = bitcast <2 x i32> %a to <1 x i64>
1043 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64> %b, i32 19)
1044 %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
1045 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1046 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1050 define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) {
1051 ; CHECK: test_vqshrn_n_s16
1052 ; CHECK: sqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1053 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %a, i32 3)
1054 ret <8 x i8> %vqshrn
1058 define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) {
1059 ; CHECK: test_vqshrn_n_s32
1060 ; CHECK: sqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1061 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %a, i32 9)
1062 ret <4 x i16> %vqshrn
1066 define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) {
1067 ; CHECK: test_vqshrn_n_s64
1068 ; CHECK: sqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1069 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %a, i32 19)
1070 ret <2 x i32> %vqshrn
1074 define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) {
1075 ; CHECK: test_vqshrn_n_u16
1076 ; CHECK: uqshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1077 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %a, i32 3)
1078 ret <8 x i8> %vqshrn
1082 define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) {
1083 ; CHECK: test_vqshrn_n_u32
1084 ; CHECK: uqshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1085 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %a, i32 9)
1086 ret <4 x i16> %vqshrn
1090 define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) {
1091 ; CHECK: test_vqshrn_n_u64
1092 ; CHECK: uqshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1093 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %a, i32 19)
1094 ret <2 x i32> %vqshrn
1098 define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
1099 ; CHECK: test_vqshrn_high_n_s16
1100 ; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1101 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16> %b, i32 3)
1102 %1 = bitcast <8 x i8> %a to <1 x i64>
1103 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
1104 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1105 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1109 define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
1110 ; CHECK: test_vqshrn_high_n_s32
1111 ; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1112 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32> %b, i32 9)
1113 %1 = bitcast <4 x i16> %a to <1 x i64>
1114 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
1115 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1116 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1120 define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
1121 ; CHECK: test_vqshrn_high_n_s64
1122 ; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1123 %1 = bitcast <2 x i32> %a to <1 x i64>
1124 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64> %b, i32 19)
1125 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
1126 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1127 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1131 define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
1132 ; CHECK: test_vqshrn_high_n_u16
1133 ; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1134 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16> %b, i32 3)
1135 %1 = bitcast <8 x i8> %a to <1 x i64>
1136 %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
1137 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1138 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1142 define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
1143 ; CHECK: test_vqshrn_high_n_u32
1144 ; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1145 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32> %b, i32 9)
1146 %1 = bitcast <4 x i16> %a to <1 x i64>
1147 %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
1148 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1149 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1153 define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
1154 ; CHECK: test_vqshrn_high_n_u64
1155 ; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1156 %1 = bitcast <2 x i32> %a to <1 x i64>
1157 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64> %b, i32 19)
1158 %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
1159 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1160 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1164 define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) {
1165 ; CHECK: test_vqrshrn_n_s16
1166 ; CHECK: sqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1167 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %a, i32 3)
1168 ret <8 x i8> %vqrshrn
1172 define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) {
1173 ; CHECK: test_vqrshrn_n_s32
1174 ; CHECK: sqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1175 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %a, i32 9)
1176 ret <4 x i16> %vqrshrn
1180 define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) {
1181 ; CHECK: test_vqrshrn_n_s64
1182 ; CHECK: sqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1183 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %a, i32 19)
1184 ret <2 x i32> %vqrshrn
1188 define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) {
1189 ; CHECK: test_vqrshrn_n_u16
1190 ; CHECK: uqrshrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
1191 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %a, i32 3)
1192 ret <8 x i8> %vqrshrn
1196 define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) {
1197 ; CHECK: test_vqrshrn_n_u32
1198 ; CHECK: uqrshrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
1199 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %a, i32 9)
1200 ret <4 x i16> %vqrshrn
1204 define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) {
1205 ; CHECK: test_vqrshrn_n_u64
1206 ; CHECK: uqrshrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
1207 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %a, i32 19)
1208 ret <2 x i32> %vqrshrn
1212 define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
1213 ; CHECK: test_vqrshrn_high_n_s16
1214 ; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1215 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16> %b, i32 3)
1216 %1 = bitcast <8 x i8> %a to <1 x i64>
1217 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
1218 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1219 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1223 define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
1224 ; CHECK: test_vqrshrn_high_n_s32
1225 ; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1226 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32> %b, i32 9)
1227 %1 = bitcast <4 x i16> %a to <1 x i64>
1228 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
1229 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1230 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1234 define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
1235 ; CHECK: test_vqrshrn_high_n_s64
1236 ; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1237 %1 = bitcast <2 x i32> %a to <1 x i64>
1238 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64> %b, i32 19)
1239 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
1240 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1241 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1245 define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
1246 ; CHECK: test_vqrshrn_high_n_u16
1247 ; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
1248 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16> %b, i32 3)
1249 %1 = bitcast <8 x i8> %a to <1 x i64>
1250 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
1251 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1252 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
1256 define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
1257 ; CHECK: test_vqrshrn_high_n_u32
1258 ; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
1259 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32> %b, i32 9)
1260 %1 = bitcast <4 x i16> %a to <1 x i64>
1261 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
1262 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1263 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
1267 define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
1268 ; CHECK: test_vqrshrn_high_n_u64
1269 ; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
1270 %1 = bitcast <2 x i32> %a to <1 x i64>
1271 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64> %b, i32 19)
1272 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
1273 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
1274 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
1278 define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) {
1279 ; CHECK: test_vcvt_n_f32_s32
1280 ; CHECK: scvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1281 %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
1282 ret <2 x float> %vcvt
1285 define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) {
1286 ; CHECK: test_vcvtq_n_f32_s32
1287 ; CHECK: scvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1288 %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
1289 ret <4 x float> %vcvt
1292 define <2 x double> @test_vcvtq_n_f64_s64(<2 x i64> %a) {
1293 ; CHECK: test_vcvtq_n_f64_s64
1294 ; CHECK: scvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1295 %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
1296 ret <2 x double> %vcvt
1299 define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) {
1300 ; CHECK: test_vcvt_n_f32_u32
1301 ; CHECK: ucvtf {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1302 %vcvt = tail call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %a, i32 31)
1303 ret <2 x float> %vcvt
1306 define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) {
1307 ; CHECK: test_vcvtq_n_f32_u32
1308 ; CHECK: ucvtf {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1309 %vcvt = tail call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %a, i32 31)
1310 ret <4 x float> %vcvt
1313 define <2 x double> @test_vcvtq_n_f64_u64(<2 x i64> %a) {
1314 ; CHECK: test_vcvtq_n_f64_u64
1315 ; CHECK: ucvtf {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1316 %vcvt = tail call <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> %a, i32 50)
1317 ret <2 x double> %vcvt
1320 define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) {
1321 ; CHECK: test_vcvt_n_s32_f32
1322 ; CHECK: fcvtzs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1323 %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %a, i32 31)
1327 define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) {
1328 ; CHECK: test_vcvtq_n_s32_f32
1329 ; CHECK: fcvtzs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1330 %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %a, i32 31)
1334 define <2 x i64> @test_vcvtq_n_s64_f64(<2 x double> %a) {
1335 ; CHECK: test_vcvtq_n_s64_f64
1336 ; CHECK: fcvtzs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1337 %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> %a, i32 50)
1341 define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) {
1342 ; CHECK: test_vcvt_n_u32_f32
1343 ; CHECK: fcvtzu {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #31
1344 %vcvt = tail call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %a, i32 31)
1348 define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) {
1349 ; CHECK: test_vcvt_n_u32_f32
1350 ; CHECK: fcvtzu {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #31
1351 %vcvt = tail call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %a, i32 31)
1355 define <2 x i64> @test_vcvtq_n_u64_f64(<2 x double> %a) {
1356 ; CHECK: test_vcvtq_n_u64_f64
1357 ; CHECK: fcvtzu {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #50
1358 %vcvt = tail call <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> %a, i32 50)
1362 declare <8 x i8> @llvm.aarch64.neon.vsrshr.v8i8(<8 x i8>, i32)
1364 declare <4 x i16> @llvm.aarch64.neon.vsrshr.v4i16(<4 x i16>, i32)
1366 declare <2 x i32> @llvm.aarch64.neon.vsrshr.v2i32(<2 x i32>, i32)
1368 declare <16 x i8> @llvm.aarch64.neon.vsrshr.v16i8(<16 x i8>, i32)
1370 declare <8 x i16> @llvm.aarch64.neon.vsrshr.v8i16(<8 x i16>, i32)
1372 declare <4 x i32> @llvm.aarch64.neon.vsrshr.v4i32(<4 x i32>, i32)
1374 declare <2 x i64> @llvm.aarch64.neon.vsrshr.v2i64(<2 x i64>, i32)
1376 declare <8 x i8> @llvm.aarch64.neon.vurshr.v8i8(<8 x i8>, i32)
1378 declare <4 x i16> @llvm.aarch64.neon.vurshr.v4i16(<4 x i16>, i32)
1380 declare <2 x i32> @llvm.aarch64.neon.vurshr.v2i32(<2 x i32>, i32)
1382 declare <16 x i8> @llvm.aarch64.neon.vurshr.v16i8(<16 x i8>, i32)
1384 declare <8 x i16> @llvm.aarch64.neon.vurshr.v8i16(<8 x i16>, i32)
1386 declare <4 x i32> @llvm.aarch64.neon.vurshr.v4i32(<4 x i32>, i32)
1388 declare <2 x i64> @llvm.aarch64.neon.vurshr.v2i64(<2 x i64>, i32)
1390 declare <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8>, <8 x i8>, i32)
1392 declare <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16>, <4 x i16>, i32)
1394 declare <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32>, <2 x i32>, i32)
1396 declare <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8>, <16 x i8>, i32)
1398 declare <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16>, <8 x i16>, i32)
1400 declare <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32>, <4 x i32>, i32)
1402 declare <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64>, <2 x i64>, i32)
1404 declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32)
1406 declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32)
1408 declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32)
1410 declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32)
1412 declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32)
1414 declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32)
1416 declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32)
1418 declare <8 x i8> @llvm.aarch64.neon.vsqshlu.v8i8(<8 x i8>, i32)
1420 declare <4 x i16> @llvm.aarch64.neon.vsqshlu.v4i16(<4 x i16>, i32)
1422 declare <2 x i32> @llvm.aarch64.neon.vsqshlu.v2i32(<2 x i32>, i32)
1424 declare <16 x i8> @llvm.aarch64.neon.vsqshlu.v16i8(<16 x i8>, i32)
1426 declare <8 x i16> @llvm.aarch64.neon.vsqshlu.v8i16(<8 x i16>, i32)
1428 declare <4 x i32> @llvm.aarch64.neon.vsqshlu.v4i32(<4 x i32>, i32)
1430 declare <2 x i64> @llvm.aarch64.neon.vsqshlu.v2i64(<2 x i64>, i32)
1432 declare <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>)
1434 declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>)
1436 declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>)
1438 declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>)
1440 declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>)
1442 declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>)
1444 declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
1446 declare <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>)
1448 declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>)
1450 declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>)
1452 declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>)
1454 declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>)
1456 declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>)
1458 declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
1460 declare <8 x i8> @llvm.aarch64.neon.vsqshrun.v8i8(<8 x i16>, i32)
1462 declare <4 x i16> @llvm.aarch64.neon.vsqshrun.v4i16(<4 x i32>, i32)
1464 declare <2 x i32> @llvm.aarch64.neon.vsqshrun.v2i32(<2 x i64>, i32)
1466 declare <8 x i8> @llvm.aarch64.neon.vrshrn.v8i8(<8 x i16>, i32)
1468 declare <4 x i16> @llvm.aarch64.neon.vrshrn.v4i16(<4 x i32>, i32)
1470 declare <2 x i32> @llvm.aarch64.neon.vrshrn.v2i32(<2 x i64>, i32)
1472 declare <8 x i8> @llvm.aarch64.neon.vsqrshrun.v8i8(<8 x i16>, i32)
1474 declare <4 x i16> @llvm.aarch64.neon.vsqrshrun.v4i16(<4 x i32>, i32)
1476 declare <2 x i32> @llvm.aarch64.neon.vsqrshrun.v2i32(<2 x i64>, i32)
1478 declare <8 x i8> @llvm.aarch64.neon.vsqshrn.v8i8(<8 x i16>, i32)
1480 declare <4 x i16> @llvm.aarch64.neon.vsqshrn.v4i16(<4 x i32>, i32)
1482 declare <2 x i32> @llvm.aarch64.neon.vsqshrn.v2i32(<2 x i64>, i32)
1484 declare <8 x i8> @llvm.aarch64.neon.vuqshrn.v8i8(<8 x i16>, i32)
1486 declare <4 x i16> @llvm.aarch64.neon.vuqshrn.v4i16(<4 x i32>, i32)
1488 declare <2 x i32> @llvm.aarch64.neon.vuqshrn.v2i32(<2 x i64>, i32)
1490 declare <8 x i8> @llvm.aarch64.neon.vsqrshrn.v8i8(<8 x i16>, i32)
1492 declare <4 x i16> @llvm.aarch64.neon.vsqrshrn.v4i16(<4 x i32>, i32)
1494 declare <2 x i32> @llvm.aarch64.neon.vsqrshrn.v2i32(<2 x i64>, i32)
1496 declare <8 x i8> @llvm.aarch64.neon.vuqrshrn.v8i8(<8 x i16>, i32)
1498 declare <4 x i16> @llvm.aarch64.neon.vuqrshrn.v4i16(<4 x i32>, i32)
1500 declare <2 x i32> @llvm.aarch64.neon.vuqrshrn.v2i32(<2 x i64>, i32)
1502 declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
1504 declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
1506 declare <2 x double> @llvm.arm.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
1508 declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
1510 declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
1512 declare <2 x double> @llvm.arm.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
1514 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
1516 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
1518 declare <2 x i64> @llvm.arm.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
1520 declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
1522 declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
1524 declare <2 x i64> @llvm.arm.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
1526 define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
1527 ; CHECK-LABEL: test_vcvt_n_s64_f64
1528 ; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
1529 %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
1533 define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
1534 ; CHECK-LABEL: test_vcvt_n_u64_f64
1535 ; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
1536 %1 = tail call <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
1540 define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
1541 ; CHECK-LABEL: test_vcvt_n_f64_s64
1542 ; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
1543 %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
1547 define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
1548 ; CHECK-LABEL: test_vcvt_n_f64_u64
1549 ; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
1550 %1 = tail call <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
1554 declare <1 x i64> @llvm.arm.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
1555 declare <1 x i64> @llvm.arm.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
1556 declare <1 x double> @llvm.arm.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
1557 declare <1 x double> @llvm.arm.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)