1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
3 ; arm64 already has these. Essentially just a copy/paste from Clang output from
6 define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) {
7 ; CHECK-LABEL: test_ldst1_v16i8:
8 ; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
9 ; CHECK: st1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
10 %tmp = load <16 x i8>* %ptr
11 store <16 x i8> %tmp, <16 x i8>* %ptr2
15 define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) {
16 ; CHECK-LABEL: test_ldst1_v8i16:
17 ; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
18 ; CHECK: st1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
19 %tmp = load <8 x i16>* %ptr
20 store <8 x i16> %tmp, <8 x i16>* %ptr2
24 define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) {
25 ; CHECK-LABEL: test_ldst1_v4i32:
26 ; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
27 ; CHECK: st1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
28 %tmp = load <4 x i32>* %ptr
29 store <4 x i32> %tmp, <4 x i32>* %ptr2
33 define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) {
34 ; CHECK-LABEL: test_ldst1_v2i64:
35 ; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
36 ; CHECK: st1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
37 %tmp = load <2 x i64>* %ptr
38 store <2 x i64> %tmp, <2 x i64>* %ptr2
42 define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) {
43 ; CHECK-LABEL: test_ldst1_v8i8:
44 ; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
45 ; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
46 %tmp = load <8 x i8>* %ptr
47 store <8 x i8> %tmp, <8 x i8>* %ptr2
51 define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) {
52 ; CHECK-LABEL: test_ldst1_v4i16:
53 ; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
54 ; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
55 %tmp = load <4 x i16>* %ptr
56 store <4 x i16> %tmp, <4 x i16>* %ptr2
60 define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) {
61 ; CHECK-LABEL: test_ldst1_v2i32:
62 ; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
63 ; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
64 %tmp = load <2 x i32>* %ptr
65 store <2 x i32> %tmp, <2 x i32>* %ptr2
69 define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) {
70 ; CHECK-LABEL: test_ldst1_v1i64:
71 ; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
72 ; CHECK: st1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
73 %tmp = load <1 x i64>* %ptr
74 store <1 x i64> %tmp, <1 x i64>* %ptr2
78 %struct.int8x16x2_t = type { [2 x <16 x i8>] }
79 %struct.int16x8x2_t = type { [2 x <8 x i16>] }
80 %struct.int32x4x2_t = type { [2 x <4 x i32>] }
81 %struct.int64x2x2_t = type { [2 x <2 x i64>] }
82 %struct.float32x4x2_t = type { [2 x <4 x float>] }
83 %struct.float64x2x2_t = type { [2 x <2 x double>] }
84 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
85 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
86 %struct.int32x2x2_t = type { [2 x <2 x i32>] }
87 %struct.int64x1x2_t = type { [2 x <1 x i64>] }
88 %struct.float32x2x2_t = type { [2 x <2 x float>] }
89 %struct.float64x1x2_t = type { [2 x <1 x double>] }
90 %struct.int8x16x3_t = type { [3 x <16 x i8>] }
91 %struct.int16x8x3_t = type { [3 x <8 x i16>] }
92 %struct.int32x4x3_t = type { [3 x <4 x i32>] }
93 %struct.int64x2x3_t = type { [3 x <2 x i64>] }
94 %struct.float32x4x3_t = type { [3 x <4 x float>] }
95 %struct.float64x2x3_t = type { [3 x <2 x double>] }
96 %struct.int8x8x3_t = type { [3 x <8 x i8>] }
97 %struct.int16x4x3_t = type { [3 x <4 x i16>] }
98 %struct.int32x2x3_t = type { [3 x <2 x i32>] }
99 %struct.int64x1x3_t = type { [3 x <1 x i64>] }
100 %struct.float32x2x3_t = type { [3 x <2 x float>] }
101 %struct.float64x1x3_t = type { [3 x <1 x double>] }
102 %struct.int8x16x4_t = type { [4 x <16 x i8>] }
103 %struct.int16x8x4_t = type { [4 x <8 x i16>] }
104 %struct.int32x4x4_t = type { [4 x <4 x i32>] }
105 %struct.int64x2x4_t = type { [4 x <2 x i64>] }
106 %struct.float32x4x4_t = type { [4 x <4 x float>] }
107 %struct.float64x2x4_t = type { [4 x <2 x double>] }
108 %struct.int8x8x4_t = type { [4 x <8 x i8>] }
109 %struct.int16x4x4_t = type { [4 x <4 x i16>] }
110 %struct.int32x2x4_t = type { [4 x <2 x i32>] }
111 %struct.int64x1x4_t = type { [4 x <1 x i64>] }
112 %struct.float32x2x4_t = type { [4 x <2 x float>] }
113 %struct.float64x1x4_t = type { [4 x <1 x double>] }
116 define <16 x i8> @test_vld1q_s8(i8* readonly %a) {
117 ; CHECK-LABEL: test_vld1q_s8
118 ; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
119 %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1)
123 define <8 x i16> @test_vld1q_s16(i16* readonly %a) {
124 ; CHECK-LABEL: test_vld1q_s16
125 ; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
126 %1 = bitcast i16* %a to i8*
127 %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2)
131 define <4 x i32> @test_vld1q_s32(i32* readonly %a) {
132 ; CHECK-LABEL: test_vld1q_s32
133 ; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
134 %1 = bitcast i32* %a to i8*
135 %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4)
139 define <2 x i64> @test_vld1q_s64(i64* readonly %a) {
140 ; CHECK-LABEL: test_vld1q_s64
141 ; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
142 %1 = bitcast i64* %a to i8*
143 %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8)
147 define <4 x float> @test_vld1q_f32(float* readonly %a) {
148 ; CHECK-LABEL: test_vld1q_f32
149 ; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
150 %1 = bitcast float* %a to i8*
151 %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4)
152 ret <4 x float> %vld1
155 define <2 x double> @test_vld1q_f64(double* readonly %a) {
156 ; CHECK-LABEL: test_vld1q_f64
157 ; CHECK: ld1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
158 %1 = bitcast double* %a to i8*
159 %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8)
160 ret <2 x double> %vld1
163 define <8 x i8> @test_vld1_s8(i8* readonly %a) {
164 ; CHECK-LABEL: test_vld1_s8
165 ; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
166 %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
170 define <4 x i16> @test_vld1_s16(i16* readonly %a) {
171 ; CHECK-LABEL: test_vld1_s16
172 ; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
173 %1 = bitcast i16* %a to i8*
174 %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
178 define <2 x i32> @test_vld1_s32(i32* readonly %a) {
179 ; CHECK-LABEL: test_vld1_s32
180 ; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
181 %1 = bitcast i32* %a to i8*
182 %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4)
186 define <1 x i64> @test_vld1_s64(i64* readonly %a) {
187 ; CHECK-LABEL: test_vld1_s64
188 ; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
189 %1 = bitcast i64* %a to i8*
190 %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8)
194 define <2 x float> @test_vld1_f32(float* readonly %a) {
195 ; CHECK-LABEL: test_vld1_f32
196 ; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
197 %1 = bitcast float* %a to i8*
198 %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4)
199 ret <2 x float> %vld1
202 define <1 x double> @test_vld1_f64(double* readonly %a) {
203 ; CHECK-LABEL: test_vld1_f64
204 ; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
205 %1 = bitcast double* %a to i8*
206 %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8)
207 ret <1 x double> %vld1
210 define <8 x i8> @test_vld1_p8(i8* readonly %a) {
211 ; CHECK-LABEL: test_vld1_p8
212 ; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
213 %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
217 define <4 x i16> @test_vld1_p16(i16* readonly %a) {
218 ; CHECK-LABEL: test_vld1_p16
219 ; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
220 %1 = bitcast i16* %a to i8*
221 %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
225 define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) {
226 ; CHECK-LABEL: test_vld2q_s8
227 ; CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
228 %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1)
229 %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
230 %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
231 %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0
232 %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1
233 ret %struct.int8x16x2_t %.fca.0.1.insert
236 define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) {
237 ; CHECK-LABEL: test_vld2q_s16
238 ; CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
239 %1 = bitcast i16* %a to i8*
240 %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2)
241 %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0
242 %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1
243 %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0
244 %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1
245 ret %struct.int16x8x2_t %.fca.0.1.insert
248 define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) {
249 ; CHECK-LABEL: test_vld2q_s32
250 ; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
251 %1 = bitcast i32* %a to i8*
252 %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4)
253 %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0
254 %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1
255 %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0
256 %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1
257 ret %struct.int32x4x2_t %.fca.0.1.insert
260 define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) {
261 ; CHECK-LABEL: test_vld2q_s64
262 ; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
263 %1 = bitcast i64* %a to i8*
264 %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8)
265 %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0
266 %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1
267 %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0
268 %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1
269 ret %struct.int64x2x2_t %.fca.0.1.insert
272 define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) {
273 ; CHECK-LABEL: test_vld2q_f32
274 ; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
275 %1 = bitcast float* %a to i8*
276 %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
277 %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0
278 %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1
279 %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0
280 %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1
281 ret %struct.float32x4x2_t %.fca.0.1.insert
284 define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) {
285 ; CHECK-LABEL: test_vld2q_f64
286 ; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
287 %1 = bitcast double* %a to i8*
288 %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8)
289 %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0
290 %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1
291 %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0
292 %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1
293 ret %struct.float64x2x2_t %.fca.0.1.insert
296 define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) {
297 ; CHECK-LABEL: test_vld2_s8
298 ; CHECK: ld2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
299 %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1)
300 %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
301 %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
302 %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0
303 %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1
304 ret %struct.int8x8x2_t %.fca.0.1.insert
307 define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) {
308 ; CHECK-LABEL: test_vld2_s16
309 ; CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
310 %1 = bitcast i16* %a to i8*
311 %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2)
312 %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0
313 %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1
314 %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0
315 %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1
316 ret %struct.int16x4x2_t %.fca.0.1.insert
319 define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) {
320 ; CHECK-LABEL: test_vld2_s32
321 ; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
322 %1 = bitcast i32* %a to i8*
323 %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4)
324 %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0
325 %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1
326 %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0
327 %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1
328 ret %struct.int32x2x2_t %.fca.0.1.insert
331 define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) {
332 ; CHECK-LABEL: test_vld2_s64
333 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
334 %1 = bitcast i64* %a to i8*
335 %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8)
336 %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0
337 %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1
338 %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0
339 %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1
340 ret %struct.int64x1x2_t %.fca.0.1.insert
343 define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) {
344 ; CHECK-LABEL: test_vld2_f32
345 ; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
346 %1 = bitcast float* %a to i8*
347 %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4)
348 %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0
349 %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1
350 %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0
351 %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1
352 ret %struct.float32x2x2_t %.fca.0.1.insert
355 define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) {
356 ; CHECK-LABEL: test_vld2_f64
357 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
358 %1 = bitcast double* %a to i8*
359 %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8)
360 %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0
361 %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1
362 %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0
363 %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1
364 ret %struct.float64x1x2_t %.fca.0.1.insert
367 define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) {
368 ; CHECK-LABEL: test_vld3q_s8
369 ; CHECK: ld3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
370 %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1)
371 %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
372 %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
373 %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
374 %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0
375 %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1
376 %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2
377 ret %struct.int8x16x3_t %.fca.0.2.insert
380 define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) {
381 ; CHECK-LABEL: test_vld3q_s16
382 ; CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
383 %1 = bitcast i16* %a to i8*
384 %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2)
385 %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0
386 %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1
387 %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2
388 %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0
389 %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1
390 %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2
391 ret %struct.int16x8x3_t %.fca.0.2.insert
394 define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) {
395 ; CHECK-LABEL: test_vld3q_s32
396 ; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
397 %1 = bitcast i32* %a to i8*
398 %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4)
399 %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0
400 %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1
401 %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2
402 %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0
403 %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1
404 %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2
405 ret %struct.int32x4x3_t %.fca.0.2.insert
408 define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) {
409 ; CHECK-LABEL: test_vld3q_s64
410 ; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
411 %1 = bitcast i64* %a to i8*
412 %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8)
413 %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0
414 %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1
415 %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2
416 %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0
417 %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1
418 %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2
419 ret %struct.int64x2x3_t %.fca.0.2.insert
422 define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) {
423 ; CHECK-LABEL: test_vld3q_f32
424 ; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
425 %1 = bitcast float* %a to i8*
426 %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4)
427 %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
428 %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1
429 %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2
430 %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0
431 %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1
432 %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2
433 ret %struct.float32x4x3_t %.fca.0.2.insert
436 define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) {
437 ; CHECK-LABEL: test_vld3q_f64
438 ; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
439 %1 = bitcast double* %a to i8*
440 %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8)
441 %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0
442 %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1
443 %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2
444 %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0
445 %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1
446 %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2
447 ret %struct.float64x2x3_t %.fca.0.2.insert
450 define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) {
451 ; CHECK-LABEL: test_vld3_s8
452 ; CHECK: ld3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
453 %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1)
454 %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
455 %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
456 %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
457 %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0
458 %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1
459 %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2
460 ret %struct.int8x8x3_t %.fca.0.2.insert
463 define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) {
464 ; CHECK-LABEL: test_vld3_s16
465 ; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
466 %1 = bitcast i16* %a to i8*
467 %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2)
468 %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0
469 %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1
470 %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2
471 %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0
472 %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1
473 %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2
474 ret %struct.int16x4x3_t %.fca.0.2.insert
477 define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) {
478 ; CHECK-LABEL: test_vld3_s32
479 ; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
480 %1 = bitcast i32* %a to i8*
481 %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4)
482 %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0
483 %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1
484 %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2
485 %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0
486 %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1
487 %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2
488 ret %struct.int32x2x3_t %.fca.0.2.insert
491 define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) {
492 ; CHECK-LABEL: test_vld3_s64
493 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
494 %1 = bitcast i64* %a to i8*
495 %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8)
496 %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0
497 %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1
498 %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2
499 %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0
500 %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1
501 %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2
502 ret %struct.int64x1x3_t %.fca.0.2.insert
505 define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) {
506 ; CHECK-LABEL: test_vld3_f32
507 ; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
508 %1 = bitcast float* %a to i8*
509 %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4)
510 %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0
511 %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1
512 %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2
513 %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0
514 %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1
515 %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2
516 ret %struct.float32x2x3_t %.fca.0.2.insert
519 define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) {
520 ; CHECK-LABEL: test_vld3_f64
521 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
522 %1 = bitcast double* %a to i8*
523 %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8)
524 %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0
525 %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1
526 %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2
527 %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0
528 %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1
529 %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2
530 ret %struct.float64x1x3_t %.fca.0.2.insert
533 define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) {
534 ; CHECK-LABEL: test_vld4q_s8
535 ; CHECK: ld4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
536 %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1)
537 %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
538 %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
539 %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
540 %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
541 %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0
542 %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1
543 %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2
544 %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3
545 ret %struct.int8x16x4_t %.fca.0.3.insert
548 define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) {
549 ; CHECK-LABEL: test_vld4q_s16
550 ; CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
551 %1 = bitcast i16* %a to i8*
552 %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2)
553 %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0
554 %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1
555 %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2
556 %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3
557 %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0
558 %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1
559 %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2
560 %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3
561 ret %struct.int16x8x4_t %.fca.0.3.insert
564 define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) {
565 ; CHECK-LABEL: test_vld4q_s32
566 ; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
567 %1 = bitcast i32* %a to i8*
568 %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4)
569 %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0
570 %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1
571 %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2
572 %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3
573 %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0
574 %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1
575 %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2
576 %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3
577 ret %struct.int32x4x4_t %.fca.0.3.insert
580 define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) {
581 ; CHECK-LABEL: test_vld4q_s64
582 ; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
583 %1 = bitcast i64* %a to i8*
584 %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8)
585 %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0
586 %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1
587 %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2
588 %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3
589 %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0
590 %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1
591 %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2
592 %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3
593 ret %struct.int64x2x4_t %.fca.0.3.insert
596 define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) {
597 ; CHECK-LABEL: test_vld4q_f32
598 ; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
599 %1 = bitcast float* %a to i8*
600 %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4)
601 %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0
602 %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1
603 %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2
604 %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3
605 %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0
606 %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1
607 %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2
608 %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3
609 ret %struct.float32x4x4_t %.fca.0.3.insert
612 define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) {
613 ; CHECK-LABEL: test_vld4q_f64
614 ; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
615 %1 = bitcast double* %a to i8*
616 %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8)
617 %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0
618 %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1
619 %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2
620 %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3
621 %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0
622 %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1
623 %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2
624 %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3
625 ret %struct.float64x2x4_t %.fca.0.3.insert
628 define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) {
629 ; CHECK-LABEL: test_vld4_s8
630 ; CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
631 %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1)
632 %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
633 %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
634 %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
635 %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
636 %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0
637 %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1
638 %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2
639 %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3
640 ret %struct.int8x8x4_t %.fca.0.3.insert
643 define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) {
644 ; CHECK-LABEL: test_vld4_s16
645 ; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
646 %1 = bitcast i16* %a to i8*
647 %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2)
648 %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0
649 %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1
650 %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2
651 %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3
652 %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0
653 %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1
654 %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2
655 %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3
656 ret %struct.int16x4x4_t %.fca.0.3.insert
659 define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) {
660 ; CHECK-LABEL: test_vld4_s32
661 ; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
662 %1 = bitcast i32* %a to i8*
663 %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4)
664 %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0
665 %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1
666 %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2
667 %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3
668 %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0
669 %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1
670 %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2
671 %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3
672 ret %struct.int32x2x4_t %.fca.0.3.insert
675 define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) {
676 ; CHECK-LABEL: test_vld4_s64
677 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
678 %1 = bitcast i64* %a to i8*
679 %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8)
680 %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0
681 %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1
682 %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2
683 %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3
684 %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0
685 %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1
686 %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2
687 %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3
688 ret %struct.int64x1x4_t %.fca.0.3.insert
691 define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) {
692 ; CHECK-LABEL: test_vld4_f32
693 ; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
694 %1 = bitcast float* %a to i8*
695 %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4)
696 %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0
697 %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1
698 %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2
699 %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3
700 %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0
701 %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1
702 %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2
703 %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3
704 ret %struct.float32x2x4_t %.fca.0.3.insert
707 define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) {
708 ; CHECK-LABEL: test_vld4_f64
709 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
710 %1 = bitcast double* %a to i8*
711 %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8)
712 %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0
713 %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1
714 %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2
715 %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3
716 %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0
717 %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1
718 %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2
719 %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3
720 ret %struct.float64x1x4_t %.fca.0.3.insert
723 declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32)
724 declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32)
725 declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32)
726 declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32)
727 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32)
728 declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32)
729 declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32)
730 declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
731 declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
732 declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32)
733 declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32)
734 declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32)
735 declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
736 declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32)
737 declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
738 declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32)
739 declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32)
740 declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32)
741 declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
742 declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32)
743 declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
744 declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
745 declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
746 declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
747 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
748 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32)
749 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
750 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32)
751 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
752 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32)
753 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
754 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
755 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32)
756 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
757 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32)
758 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
759 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
760 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
761 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32)
762 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32)
763 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32)
764 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32)
765 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
766 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
767 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32)
768 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
769 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32)
770 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
772 define void @test_vst1q_s8(i8* %a, <16 x i8> %b) {
773 ; CHECK-LABEL: test_vst1q_s8
774 ; CHECK: st1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
775 tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1)
779 define void @test_vst1q_s16(i16* %a, <8 x i16> %b) {
780 ; CHECK-LABEL: test_vst1q_s16
781 ; CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
782 %1 = bitcast i16* %a to i8*
783 tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2)
787 define void @test_vst1q_s32(i32* %a, <4 x i32> %b) {
788 ; CHECK-LABEL: test_vst1q_s32
789 ; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
790 %1 = bitcast i32* %a to i8*
791 tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4)
795 define void @test_vst1q_s64(i64* %a, <2 x i64> %b) {
796 ; CHECK-LABEL: test_vst1q_s64
797 ; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
798 %1 = bitcast i64* %a to i8*
799 tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8)
803 define void @test_vst1q_f32(float* %a, <4 x float> %b) {
804 ; CHECK-LABEL: test_vst1q_f32
805 ; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
806 %1 = bitcast float* %a to i8*
807 tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4)
811 define void @test_vst1q_f64(double* %a, <2 x double> %b) {
812 ; CHECK-LABEL: test_vst1q_f64
813 ; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
814 %1 = bitcast double* %a to i8*
815 tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8)
819 define void @test_vst1_s8(i8* %a, <8 x i8> %b) {
820 ; CHECK-LABEL: test_vst1_s8
821 ; CHECK: st1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
822 tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1)
826 define void @test_vst1_s16(i16* %a, <4 x i16> %b) {
827 ; CHECK-LABEL: test_vst1_s16
828 ; CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
829 %1 = bitcast i16* %a to i8*
830 tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2)
834 define void @test_vst1_s32(i32* %a, <2 x i32> %b) {
835 ; CHECK-LABEL: test_vst1_s32
836 ; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
837 %1 = bitcast i32* %a to i8*
838 tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4)
842 define void @test_vst1_s64(i64* %a, <1 x i64> %b) {
843 ; CHECK-LABEL: test_vst1_s64
844 ; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
845 %1 = bitcast i64* %a to i8*
846 tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8)
850 define void @test_vst1_f32(float* %a, <2 x float> %b) {
851 ; CHECK-LABEL: test_vst1_f32
852 ; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
853 %1 = bitcast float* %a to i8*
854 tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4)
858 define void @test_vst1_f64(double* %a, <1 x double> %b) {
859 ; CHECK-LABEL: test_vst1_f64
860 ; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
861 %1 = bitcast double* %a to i8*
862 tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8)
866 define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
867 ; CHECK-LABEL: test_vst2q_s8
868 ; CHECK: st2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
869 %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
870 %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
871 tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1)
875 define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
876 ; CHECK-LABEL: test_vst2q_s16
877 ; CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
878 %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
879 %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
880 %1 = bitcast i16* %a to i8*
881 tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2)
885 define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
886 ; CHECK-LABEL: test_vst2q_s32
887 ; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
888 %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
889 %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
890 %1 = bitcast i32* %a to i8*
891 tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4)
895 define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
896 ; CHECK-LABEL: test_vst2q_s64
897 ; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
898 %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
899 %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
900 %1 = bitcast i64* %a to i8*
901 tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8)
905 define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) {
906 ; CHECK-LABEL: test_vst2q_f32
907 ; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
908 %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
909 %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
910 %1 = bitcast float* %a to i8*
911 tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4)
915 define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) {
916 ; CHECK-LABEL: test_vst2q_f64
917 ; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
918 %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
919 %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
920 %1 = bitcast double* %a to i8*
921 tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8)
925 define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
926 ; CHECK-LABEL: test_vst2_s8
927 ; CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
928 %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
929 %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
930 tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1)
934 define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
935 ; CHECK-LABEL: test_vst2_s16
936 ; CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
937 %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
938 %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
939 %1 = bitcast i16* %a to i8*
940 tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2)
944 define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
945 ; CHECK-LABEL: test_vst2_s32
946 ; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
947 %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
948 %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
949 %1 = bitcast i32* %a to i8*
950 tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4)
954 define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
955 ; CHECK-LABEL: test_vst2_s64
956 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
957 %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
958 %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
959 %1 = bitcast i64* %a to i8*
960 tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8)
964 define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) {
965 ; CHECK-LABEL: test_vst2_f32
966 ; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
967 %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
968 %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
969 %1 = bitcast float* %a to i8*
970 tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4)
974 define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) {
975 ; CHECK-LABEL: test_vst2_f64
976 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
977 %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
978 %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
979 %1 = bitcast double* %a to i8*
980 tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8)
984 define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
985 ; CHECK-LABEL: test_vst3q_s8
986 ; CHECK: st3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
987 %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
988 %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
989 %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
990 tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1)
994 define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
995 ; CHECK-LABEL: test_vst3q_s16
996 ; CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
997 %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
998 %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
999 %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
1000 %1 = bitcast i16* %a to i8*
1001 tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2)
1005 define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
1006 ; CHECK-LABEL: test_vst3q_s32
1007 ; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1008 %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
1009 %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
1010 %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
1011 %1 = bitcast i32* %a to i8*
1012 tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4)
1016 define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
1017 ; CHECK-LABEL: test_vst3q_s64
1018 ; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1019 %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
1020 %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
1021 %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
1022 %1 = bitcast i64* %a to i8*
1023 tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8)
1027 define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) {
1028 ; CHECK-LABEL: test_vst3q_f32
1029 ; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1030 %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
1031 %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
1032 %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
1033 %1 = bitcast float* %a to i8*
1034 tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4)
1038 define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) {
1039 ; CHECK-LABEL: test_vst3q_f64
1040 ; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1041 %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
1042 %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
1043 %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
1044 %1 = bitcast double* %a to i8*
1045 tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8)
1049 define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
1050 ; CHECK-LABEL: test_vst3_s8
1051 ; CHECK: st3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1052 %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
1053 %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
1054 %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
1055 tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1)
1059 define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
1060 ; CHECK-LABEL: test_vst3_s16
1061 ; CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1062 %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
1063 %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
1064 %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
1065 %1 = bitcast i16* %a to i8*
1066 tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2)
1070 define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
1071 ; CHECK-LABEL: test_vst3_s32
1072 ; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1073 %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
1074 %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
1075 %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
1076 %1 = bitcast i32* %a to i8*
1077 tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4)
1081 define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
1082 ; CHECK-LABEL: test_vst3_s64
1083 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1084 %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
1085 %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
1086 %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
1087 %1 = bitcast i64* %a to i8*
1088 tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8)
1092 define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) {
1093 ; CHECK-LABEL: test_vst3_f32
1094 ; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1095 %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
1096 %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
1097 %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
1098 %1 = bitcast float* %a to i8*
1099 tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4)
1103 define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) {
1104 ; CHECK-LABEL: test_vst3_f64
1105 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1106 %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
1107 %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
1108 %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
1109 %1 = bitcast double* %a to i8*
1110 tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8)
1114 define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
1115 ; CHECK-LABEL: test_vst4q_s8
1116 ; CHECK: st4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
1117 %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
1118 %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
1119 %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
1120 %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
1121 tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1)
1125 define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
1126 ; CHECK-LABEL: test_vst4q_s16
1127 ; CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
1128 %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
1129 %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
1130 %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
1131 %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
1132 %1 = bitcast i16* %a to i8*
1133 tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2)
1137 define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
1138 ; CHECK-LABEL: test_vst4q_s32
1139 ; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1140 %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
1141 %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
1142 %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
1143 %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
1144 %1 = bitcast i32* %a to i8*
1145 tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4)
1149 define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
1150 ; CHECK-LABEL: test_vst4q_s64
1151 ; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1152 %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
1153 %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
1154 %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
1155 %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
1156 %1 = bitcast i64* %a to i8*
1157 tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8)
1161 define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) {
1162 ; CHECK-LABEL: test_vst4q_f32
1163 ; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1164 %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
1165 %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
1166 %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
1167 %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
1168 %1 = bitcast float* %a to i8*
1169 tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4)
1173 define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) {
1174 ; CHECK-LABEL: test_vst4q_f64
1175 ; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1176 %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
1177 %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
1178 %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
1179 %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
1180 %1 = bitcast double* %a to i8*
1181 tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8)
1185 define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
1186 ; CHECK-LABEL: test_vst4_s8
1187 ; CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1188 %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
1189 %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
1190 %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
1191 %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
1192 tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1)
1196 define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
1197 ; CHECK-LABEL: test_vst4_s16
1198 ; CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1199 %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
1200 %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
1201 %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
1202 %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
1203 %1 = bitcast i16* %a to i8*
1204 tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2)
1208 define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
1209 ; CHECK-LABEL: test_vst4_s32
1210 ; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1211 %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
1212 %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
1213 %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
1214 %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
1215 %1 = bitcast i32* %a to i8*
1216 tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4)
1220 define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
1221 ; CHECK-LABEL: test_vst4_s64
1222 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1223 %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
1224 %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
1225 %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
1226 %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
1227 %1 = bitcast i64* %a to i8*
1228 tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8)
1232 define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) {
1233 ; CHECK-LABEL: test_vst4_f32
1234 ; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1235 %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
1236 %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
1237 %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
1238 %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
1239 %1 = bitcast float* %a to i8*
1240 tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4)
1244 define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) {
1245 ; CHECK-LABEL: test_vst4_f64
1246 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1247 %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
1248 %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
1249 %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
1250 %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
1251 %1 = bitcast double* %a to i8*
1252 tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8)
1256 declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
1257 declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
1258 declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32)
1259 declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32)
1260 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32)
1261 declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32)
1262 declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32)
1263 declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32)
1264 declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32)
1265 declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32)
1266 declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
1267 declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32)
1268 declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
1269 declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
1270 declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
1271 declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
1272 declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32)
1273 declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32)
1274 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
1275 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
1276 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
1277 declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
1278 declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32)
1279 declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32)
1280 declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
1281 declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
1282 declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
1283 declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
1284 declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
1285 declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
1286 declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
1287 declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
1288 declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
1289 declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
1290 declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
1291 declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
1292 declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
1293 declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
1294 declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
1295 declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
1296 declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
1297 declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
1298 declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
1299 declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
1300 declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
1301 declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
1302 declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
1303 declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
1305 define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) {
1306 ; CHECK-LABEL: test_vld1q_s8_x2
1307 ; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
1308 %1 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
1309 %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
1310 %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
1311 %4 = insertvalue %struct.int8x16x2_t undef, <16 x i8> %2, 0, 0
1312 %5 = insertvalue %struct.int8x16x2_t %4, <16 x i8> %3, 0, 1
1313 ret %struct.int8x16x2_t %5
1316 define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) {
1317 ; CHECK-LABEL: test_vld1q_s16_x2
1318 ; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
1319 %1 = bitcast i16* %a to i8*
1320 %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
1321 %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
1322 %4 = extractvalue { <8 x i16>, <8 x i16> } %2, 1
1323 %5 = insertvalue %struct.int16x8x2_t undef, <8 x i16> %3, 0, 0
1324 %6 = insertvalue %struct.int16x8x2_t %5, <8 x i16> %4, 0, 1
1325 ret %struct.int16x8x2_t %6
1328 define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) {
1329 ; CHECK-LABEL: test_vld1q_s32_x2
1330 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1331 %1 = bitcast i32* %a to i8*
1332 %2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8* %1, i32 4)
1333 %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
1334 %4 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
1335 %5 = insertvalue %struct.int32x4x2_t undef, <4 x i32> %3, 0, 0
1336 %6 = insertvalue %struct.int32x4x2_t %5, <4 x i32> %4, 0, 1
1337 ret %struct.int32x4x2_t %6
1340 define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) {
1341 ; CHECK-LABEL: test_vld1q_s64_x2
1342 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1343 %1 = bitcast i64* %a to i8*
1344 %2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8* %1, i32 8)
1345 %3 = extractvalue { <2 x i64>, <2 x i64> } %2, 0
1346 %4 = extractvalue { <2 x i64>, <2 x i64> } %2, 1
1347 %5 = insertvalue %struct.int64x2x2_t undef, <2 x i64> %3, 0, 0
1348 %6 = insertvalue %struct.int64x2x2_t %5, <2 x i64> %4, 0, 1
1349 ret %struct.int64x2x2_t %6
1352 define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) {
1353 ; CHECK-LABEL: test_vld1q_f32_x2
1354 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1355 %1 = bitcast float* %a to i8*
1356 %2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8* %1, i32 4)
1357 %3 = extractvalue { <4 x float>, <4 x float> } %2, 0
1358 %4 = extractvalue { <4 x float>, <4 x float> } %2, 1
1359 %5 = insertvalue %struct.float32x4x2_t undef, <4 x float> %3, 0, 0
1360 %6 = insertvalue %struct.float32x4x2_t %5, <4 x float> %4, 0, 1
1361 ret %struct.float32x4x2_t %6
1365 define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) {
1366 ; CHECK-LABEL: test_vld1q_f64_x2
1367 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1368 %1 = bitcast double* %a to i8*
1369 %2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8* %1, i32 8)
1370 %3 = extractvalue { <2 x double>, <2 x double> } %2, 0
1371 %4 = extractvalue { <2 x double>, <2 x double> } %2, 1
1372 %5 = insertvalue %struct.float64x2x2_t undef, <2 x double> %3, 0, 0
1373 %6 = insertvalue %struct.float64x2x2_t %5, <2 x double> %4, 0, 1
1374 ret %struct.float64x2x2_t %6
1377 define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) {
1378 ; CHECK-LABEL: test_vld1_s8_x2
1379 ; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1380 %1 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8* %a, i32 1)
1381 %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
1382 %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
1383 %4 = insertvalue %struct.int8x8x2_t undef, <8 x i8> %2, 0, 0
1384 %5 = insertvalue %struct.int8x8x2_t %4, <8 x i8> %3, 0, 1
1385 ret %struct.int8x8x2_t %5
1388 define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) {
1389 ; CHECK-LABEL: test_vld1_s16_x2
1390 ; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1391 %1 = bitcast i16* %a to i8*
1392 %2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8* %1, i32 2)
1393 %3 = extractvalue { <4 x i16>, <4 x i16> } %2, 0
1394 %4 = extractvalue { <4 x i16>, <4 x i16> } %2, 1
1395 %5 = insertvalue %struct.int16x4x2_t undef, <4 x i16> %3, 0, 0
1396 %6 = insertvalue %struct.int16x4x2_t %5, <4 x i16> %4, 0, 1
1397 ret %struct.int16x4x2_t %6
1400 define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) {
1401 ; CHECK-LABEL: test_vld1_s32_x2
1402 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1403 %1 = bitcast i32* %a to i8*
1404 %2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8* %1, i32 4)
1405 %3 = extractvalue { <2 x i32>, <2 x i32> } %2, 0
1406 %4 = extractvalue { <2 x i32>, <2 x i32> } %2, 1
1407 %5 = insertvalue %struct.int32x2x2_t undef, <2 x i32> %3, 0, 0
1408 %6 = insertvalue %struct.int32x2x2_t %5, <2 x i32> %4, 0, 1
1409 ret %struct.int32x2x2_t %6
1412 define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) {
1413 ; CHECK-LABEL: test_vld1_s64_x2
1414 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1415 %1 = bitcast i64* %a to i8*
1416 %2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8* %1, i32 8)
1417 %3 = extractvalue { <1 x i64>, <1 x i64> } %2, 0
1418 %4 = extractvalue { <1 x i64>, <1 x i64> } %2, 1
1419 %5 = insertvalue %struct.int64x1x2_t undef, <1 x i64> %3, 0, 0
1420 %6 = insertvalue %struct.int64x1x2_t %5, <1 x i64> %4, 0, 1
1421 ret %struct.int64x1x2_t %6
1424 define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) {
1425 ; CHECK-LABEL: test_vld1_f32_x2
1426 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1427 %1 = bitcast float* %a to i8*
1428 %2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8* %1, i32 4)
1429 %3 = extractvalue { <2 x float>, <2 x float> } %2, 0
1430 %4 = extractvalue { <2 x float>, <2 x float> } %2, 1
1431 %5 = insertvalue %struct.float32x2x2_t undef, <2 x float> %3, 0, 0
1432 %6 = insertvalue %struct.float32x2x2_t %5, <2 x float> %4, 0, 1
1433 ret %struct.float32x2x2_t %6
1436 define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) {
1437 ; CHECK-LABEL: test_vld1_f64_x2
1438 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1439 %1 = bitcast double* %a to i8*
1440 %2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8* %1, i32 8)
1441 %3 = extractvalue { <1 x double>, <1 x double> } %2, 0
1442 %4 = extractvalue { <1 x double>, <1 x double> } %2, 1
1443 %5 = insertvalue %struct.float64x1x2_t undef, <1 x double> %3, 0, 0
1444 %6 = insertvalue %struct.float64x1x2_t %5, <1 x double> %4, 0, 1
1445 ret %struct.float64x1x2_t %6
1448 define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) {
1449 ; CHECK-LABEL: test_vld1q_s8_x3
1450 ; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
1452 %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8* %a, i32 1)
1453 %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
1454 %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
1455 %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
1456 %5 = insertvalue %struct.int8x16x3_t undef, <16 x i8> %2, 0, 0
1457 %6 = insertvalue %struct.int8x16x3_t %5, <16 x i8> %3, 0, 1
1458 %7 = insertvalue %struct.int8x16x3_t %6, <16 x i8> %4, 0, 2
1459 ret %struct.int8x16x3_t %7
1462 define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) {
1463 ; CHECK-LABEL: test_vld1q_s16_x3
1464 ; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
1466 %1 = bitcast i16* %a to i8*
1467 %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
1468 %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
1469 %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
1470 %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
1471 %6 = insertvalue %struct.int16x8x3_t undef, <8 x i16> %3, 0, 0
1472 %7 = insertvalue %struct.int16x8x3_t %6, <8 x i16> %4, 0, 1
1473 %8 = insertvalue %struct.int16x8x3_t %7, <8 x i16> %5, 0, 2
1474 ret %struct.int16x8x3_t %8
1477 define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) {
1478 ; CHECK-LABEL: test_vld1q_s32_x3
1479 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
1481 %1 = bitcast i32* %a to i8*
1482 %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8* %1, i32 4)
1483 %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
1484 %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
1485 %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
1486 %6 = insertvalue %struct.int32x4x3_t undef, <4 x i32> %3, 0, 0
1487 %7 = insertvalue %struct.int32x4x3_t %6, <4 x i32> %4, 0, 1
1488 %8 = insertvalue %struct.int32x4x3_t %7, <4 x i32> %5, 0, 2
1489 ret %struct.int32x4x3_t %8
1492 define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) {
1493 ; CHECK-LABEL: test_vld1q_s64_x3
1494 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
1496 %1 = bitcast i64* %a to i8*
1497 %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
1498 %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
1499 %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
1500 %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
1501 %6 = insertvalue %struct.int64x2x3_t undef, <2 x i64> %3, 0, 0
1502 %7 = insertvalue %struct.int64x2x3_t %6, <2 x i64> %4, 0, 1
1503 %8 = insertvalue %struct.int64x2x3_t %7, <2 x i64> %5, 0, 2
1504 ret %struct.int64x2x3_t %8
1507 define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) {
1508 ; CHECK-LABEL: test_vld1q_f32_x3
1509 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
1511 %1 = bitcast float* %a to i8*
1512 %2 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8* %1, i32 4)
1513 %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 0
1514 %4 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 1
1515 %5 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 2
1516 %6 = insertvalue %struct.float32x4x3_t undef, <4 x float> %3, 0, 0
1517 %7 = insertvalue %struct.float32x4x3_t %6, <4 x float> %4, 0, 1
1518 %8 = insertvalue %struct.float32x4x3_t %7, <4 x float> %5, 0, 2
1519 ret %struct.float32x4x3_t %8
1523 define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) {
1524 ; CHECK-LABEL: test_vld1q_f64_x3
1525 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
1527 %1 = bitcast double* %a to i8*
1528 %2 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8* %1, i32 8)
1529 %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 0
1530 %4 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 1
1531 %5 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 2
1532 %6 = insertvalue %struct.float64x2x3_t undef, <2 x double> %3, 0, 0
1533 %7 = insertvalue %struct.float64x2x3_t %6, <2 x double> %4, 0, 1
1534 %8 = insertvalue %struct.float64x2x3_t %7, <2 x double> %5, 0, 2
1535 ret %struct.float64x2x3_t %8
1538 define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) {
1539 ; CHECK-LABEL: test_vld1_s8_x3
1540 ; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
1542 %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8* %a, i32 1)
1543 %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
1544 %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
1545 %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
1546 %5 = insertvalue %struct.int8x8x3_t undef, <8 x i8> %2, 0, 0
1547 %6 = insertvalue %struct.int8x8x3_t %5, <8 x i8> %3, 0, 1
1548 %7 = insertvalue %struct.int8x8x3_t %6, <8 x i8> %4, 0, 2
1549 ret %struct.int8x8x3_t %7
1552 define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) {
1553 ; CHECK-LABEL: test_vld1_s16_x3
1554 ; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
1556 %1 = bitcast i16* %a to i8*
1557 %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8* %1, i32 2)
1558 %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
1559 %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
1560 %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
1561 %6 = insertvalue %struct.int16x4x3_t undef, <4 x i16> %3, 0, 0
1562 %7 = insertvalue %struct.int16x4x3_t %6, <4 x i16> %4, 0, 1
1563 %8 = insertvalue %struct.int16x4x3_t %7, <4 x i16> %5, 0, 2
1564 ret %struct.int16x4x3_t %8
1567 define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) {
1568 %1 = bitcast i32* %a to i8*
1569 ; CHECK-LABEL: test_vld1_s32_x3
1570 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
1572 %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8* %1, i32 4)
1573 %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
1574 %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
1575 %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
1576 %6 = insertvalue %struct.int32x2x3_t undef, <2 x i32> %3, 0, 0
1577 %7 = insertvalue %struct.int32x2x3_t %6, <2 x i32> %4, 0, 1
1578 %8 = insertvalue %struct.int32x2x3_t %7, <2 x i32> %5, 0, 2
1579 ret %struct.int32x2x3_t %8
1582 define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) {
1583 ; CHECK-LABEL: test_vld1_s64_x3
1584 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
1586 %1 = bitcast i64* %a to i8*
1587 %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8* %1, i32 8)
1588 %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
1589 %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
1590 %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
1591 %6 = insertvalue %struct.int64x1x3_t undef, <1 x i64> %3, 0, 0
1592 %7 = insertvalue %struct.int64x1x3_t %6, <1 x i64> %4, 0, 1
1593 %8 = insertvalue %struct.int64x1x3_t %7, <1 x i64> %5, 0, 2
1594 ret %struct.int64x1x3_t %8
1597 define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) {
1598 ; CHECK-LABEL: test_vld1_f32_x3
1599 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
1601 %1 = bitcast float* %a to i8*
1602 %2 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8* %1, i32 4)
1603 %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 0
1604 %4 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 1
1605 %5 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 2
1606 %6 = insertvalue %struct.float32x2x3_t undef, <2 x float> %3, 0, 0
1607 %7 = insertvalue %struct.float32x2x3_t %6, <2 x float> %4, 0, 1
1608 %8 = insertvalue %struct.float32x2x3_t %7, <2 x float> %5, 0, 2
1609 ret %struct.float32x2x3_t %8
1613 define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) {
1614 ; CHECK-LABEL: test_vld1_f64_x3
1615 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
1617 %1 = bitcast double* %a to i8*
1618 %2 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8* %1, i32 8)
1619 %3 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 0
1620 %4 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 1
1621 %5 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 2
1622 %6 = insertvalue %struct.float64x1x3_t undef, <1 x double> %3, 0, 0
1623 %7 = insertvalue %struct.float64x1x3_t %6, <1 x double> %4, 0, 1
1624 %8 = insertvalue %struct.float64x1x3_t %7, <1 x double> %5, 0, 2
1625 ret %struct.float64x1x3_t %8
1628 define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) {
1629 ; CHECK-LABEL: test_vld1q_s8_x4
1630 ; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
1631 ; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
1632 %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8* %a, i32 1)
1633 %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
1634 %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
1635 %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
1636 %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
1637 %6 = insertvalue %struct.int8x16x4_t undef, <16 x i8> %2, 0, 0
1638 %7 = insertvalue %struct.int8x16x4_t %6, <16 x i8> %3, 0, 1
1639 %8 = insertvalue %struct.int8x16x4_t %7, <16 x i8> %4, 0, 2
1640 %9 = insertvalue %struct.int8x16x4_t %8, <16 x i8> %5, 0, 3
1641 ret %struct.int8x16x4_t %9
1644 define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) {
1645 ; CHECK-LABEL: test_vld1q_s16_x4
1646 ; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
1647 ; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
1648 %1 = bitcast i16* %a to i8*
1649 %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8* %1, i32 2)
1650 %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
1651 %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
1652 %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
1653 %6 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 3
1654 %7 = insertvalue %struct.int16x8x4_t undef, <8 x i16> %3, 0, 0
1655 %8 = insertvalue %struct.int16x8x4_t %7, <8 x i16> %4, 0, 1
1656 %9 = insertvalue %struct.int16x8x4_t %8, <8 x i16> %5, 0, 2
1657 %10 = insertvalue %struct.int16x8x4_t %9, <8 x i16> %6, 0, 3
1658 ret %struct.int16x8x4_t %10
1661 define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) {
1662 ; CHECK-LABEL: test_vld1q_s32_x4
1663 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
1664 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1665 %1 = bitcast i32* %a to i8*
1666 %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8* %1, i32 4)
1667 %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
1668 %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
1669 %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
1670 %6 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 3
1671 %7 = insertvalue %struct.int32x4x4_t undef, <4 x i32> %3, 0, 0
1672 %8 = insertvalue %struct.int32x4x4_t %7, <4 x i32> %4, 0, 1
1673 %9 = insertvalue %struct.int32x4x4_t %8, <4 x i32> %5, 0, 2
1674 %10 = insertvalue %struct.int32x4x4_t %9, <4 x i32> %6, 0, 3
1675 ret %struct.int32x4x4_t %10
1678 define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) {
1679 ; CHECK-LABEL: test_vld1q_s64_x4
1680 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
1681 ; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1682 %1 = bitcast i64* %a to i8*
1683 %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8* %1, i32 8)
1684 %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
1685 %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
1686 %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
1687 %6 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 3
1688 %7 = insertvalue %struct.int64x2x4_t undef, <2 x i64> %3, 0, 0
1689 %8 = insertvalue %struct.int64x2x4_t %7, <2 x i64> %4, 0, 1
1690 %9 = insertvalue %struct.int64x2x4_t %8, <2 x i64> %5, 0, 2
1691 %10 = insertvalue %struct.int64x2x4_t %9, <2 x i64> %6, 0, 3
1692 ret %struct.int64x2x4_t %10
1695 define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) {
1696 ; CHECK-LABEL: test_vld1q_f32_x4
1697 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
1698 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1699 %1 = bitcast float* %a to i8*
1700 %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
1701 %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
1702 %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1
1703 %5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 2
1704 %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3
1705 %7 = insertvalue %struct.float32x4x4_t undef, <4 x float> %3, 0, 0
1706 %8 = insertvalue %struct.float32x4x4_t %7, <4 x float> %4, 0, 1
1707 %9 = insertvalue %struct.float32x4x4_t %8, <4 x float> %5, 0, 2
1708 %10 = insertvalue %struct.float32x4x4_t %9, <4 x float> %6, 0, 3
1709 ret %struct.float32x4x4_t %10
1712 define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) {
1713 ; CHECK-LABEL: test_vld1q_f64_x4
1714 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
1715 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1716 %1 = bitcast double* %a to i8*
1717 %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8* %1, i32 8)
1718 %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
1719 %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
1720 %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
1721 %6 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
1722 %7 = insertvalue %struct.float64x2x4_t undef, <2 x double> %3, 0, 0
1723 %8 = insertvalue %struct.float64x2x4_t %7, <2 x double> %4, 0, 1
1724 %9 = insertvalue %struct.float64x2x4_t %8, <2 x double> %5, 0, 2
1725 %10 = insertvalue %struct.float64x2x4_t %9, <2 x double> %6, 0, 3
1726 ret %struct.float64x2x4_t %10
1729 define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) {
1730 ; CHECK-LABEL: test_vld1_s8_x4
1731 ; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
1732 ; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1733 %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
1734 %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
1735 %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
1736 %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
1737 %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3
1738 %6 = insertvalue %struct.int8x8x4_t undef, <8 x i8> %2, 0, 0
1739 %7 = insertvalue %struct.int8x8x4_t %6, <8 x i8> %3, 0, 1
1740 %8 = insertvalue %struct.int8x8x4_t %7, <8 x i8> %4, 0, 2
1741 %9 = insertvalue %struct.int8x8x4_t %8, <8 x i8> %5, 0, 3
1742 ret %struct.int8x8x4_t %9
1745 define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) {
1746 ; CHECK-LABEL: test_vld1_s16_x4
1747 ; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
1748 ; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1749 %1 = bitcast i16* %a to i8*
1750 %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8* %1, i32 2)
1751 %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
1752 %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
1753 %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
1754 %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 3
1755 %7 = insertvalue %struct.int16x4x4_t undef, <4 x i16> %3, 0, 0
1756 %8 = insertvalue %struct.int16x4x4_t %7, <4 x i16> %4, 0, 1
1757 %9 = insertvalue %struct.int16x4x4_t %8, <4 x i16> %5, 0, 2
1758 %10 = insertvalue %struct.int16x4x4_t %9, <4 x i16> %6, 0, 3
1759 ret %struct.int16x4x4_t %10
1762 define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) {
1763 ; CHECK-LABEL: test_vld1_s32_x4
1764 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
1765 ; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1766 %1 = bitcast i32* %a to i8*
1767 %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8* %1, i32 4)
1768 %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
1769 %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
1770 %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
1771 %6 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
1772 %7 = insertvalue %struct.int32x2x4_t undef, <2 x i32> %3, 0, 0
1773 %8 = insertvalue %struct.int32x2x4_t %7, <2 x i32> %4, 0, 1
1774 %9 = insertvalue %struct.int32x2x4_t %8, <2 x i32> %5, 0, 2
1775 %10 = insertvalue %struct.int32x2x4_t %9, <2 x i32> %6, 0, 3
1776 ret %struct.int32x2x4_t %10
1779 define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) {
1780 ; CHECK-LABEL: test_vld1_s64_x4
1781 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
1782 ; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1783 %1 = bitcast i64* %a to i8*
1784 %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8* %1, i32 8)
1785 %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
1786 %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
1787 %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
1788 %6 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 3
1789 %7 = insertvalue %struct.int64x1x4_t undef, <1 x i64> %3, 0, 0
1790 %8 = insertvalue %struct.int64x1x4_t %7, <1 x i64> %4, 0, 1
1791 %9 = insertvalue %struct.int64x1x4_t %8, <1 x i64> %5, 0, 2
1792 %10 = insertvalue %struct.int64x1x4_t %9, <1 x i64> %6, 0, 3
1793 ret %struct.int64x1x4_t %10
1796 define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) {
1797 ; CHECK-LABEL: test_vld1_f32_x4
1798 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
1799 ; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1800 %1 = bitcast float* %a to i8*
1801 %2 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8* %1, i32 4)
1802 %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 0
1803 %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 1
1804 %5 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 2
1805 %6 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 3
1806 %7 = insertvalue %struct.float32x2x4_t undef, <2 x float> %3, 0, 0
1807 %8 = insertvalue %struct.float32x2x4_t %7, <2 x float> %4, 0, 1
1808 %9 = insertvalue %struct.float32x2x4_t %8, <2 x float> %5, 0, 2
1809 %10 = insertvalue %struct.float32x2x4_t %9, <2 x float> %6, 0, 3
1810 ret %struct.float32x2x4_t %10
1814 define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) {
1815 ; CHECK-LABEL: test_vld1_f64_x4
1816 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
1817 ; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1818 %1 = bitcast double* %a to i8*
1819 %2 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8* %1, i32 8)
1820 %3 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 0
1821 %4 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 1
1822 %5 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 2
1823 %6 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 3
1824 %7 = insertvalue %struct.float64x1x4_t undef, <1 x double> %3, 0, 0
1825 %8 = insertvalue %struct.float64x1x4_t %7, <1 x double> %4, 0, 1
1826 %9 = insertvalue %struct.float64x1x4_t %8, <1 x double> %5, 0, 2
1827 %10 = insertvalue %struct.float64x1x4_t %9, <1 x double> %6, 0, 3
1828 ret %struct.float64x1x4_t %10
1831 define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b) {
1832 ; CHECK-LABEL: test_vst1q_s8_x2
1833 ; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
1834 %1 = extractvalue [2 x <16 x i8>] %b, 0
1835 %2 = extractvalue [2 x <16 x i8>] %b, 1
1836 tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
1840 define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b) {
1841 ; CHECK-LABEL: test_vst1q_s16_x2
1842 ; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
1843 %1 = extractvalue [2 x <8 x i16>] %b, 0
1844 %2 = extractvalue [2 x <8 x i16>] %b, 1
1845 %3 = bitcast i16* %a to i8*
1846 tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
1850 define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b) {
1851 ; CHECK-LABEL: test_vst1q_s32_x2
1852 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1853 %1 = extractvalue [2 x <4 x i32>] %b, 0
1854 %2 = extractvalue [2 x <4 x i32>] %b, 1
1855 %3 = bitcast i32* %a to i8*
1856 tail call void @llvm.aarch64.neon.vst1x2.v4i32(i8* %3, <4 x i32> %1, <4 x i32> %2, i32 4)
1860 define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b) {
1861 ; CHECK-LABEL: test_vst1q_s64_x2
1862 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1863 %1 = extractvalue [2 x <2 x i64>] %b, 0
1864 %2 = extractvalue [2 x <2 x i64>] %b, 1
1865 %3 = bitcast i64* %a to i8*
1866 tail call void @llvm.aarch64.neon.vst1x2.v2i64(i8* %3, <2 x i64> %1, <2 x i64> %2, i32 8)
1870 define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b) {
1871 ; CHECK-LABEL: test_vst1q_f32_x2
1872 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1873 %1 = extractvalue [2 x <4 x float>] %b, 0
1874 %2 = extractvalue [2 x <4 x float>] %b, 1
1875 %3 = bitcast float* %a to i8*
1876 tail call void @llvm.aarch64.neon.vst1x2.v4f32(i8* %3, <4 x float> %1, <4 x float> %2, i32 4)
1881 define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b) {
1882 ; CHECK-LABEL: test_vst1q_f64_x2
1883 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1884 %1 = extractvalue [2 x <2 x double>] %b, 0
1885 %2 = extractvalue [2 x <2 x double>] %b, 1
1886 %3 = bitcast double* %a to i8*
1887 tail call void @llvm.aarch64.neon.vst1x2.v2f64(i8* %3, <2 x double> %1, <2 x double> %2, i32 8)
1891 define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b) {
1892 ; CHECK-LABEL: test_vst1_s8_x2
1893 ; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1894 %1 = extractvalue [2 x <8 x i8>] %b, 0
1895 %2 = extractvalue [2 x <8 x i8>] %b, 1
1896 tail call void @llvm.aarch64.neon.vst1x2.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 1)
1900 define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b) {
1901 ; CHECK-LABEL: test_vst1_s16_x2
1902 ; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1903 %1 = extractvalue [2 x <4 x i16>] %b, 0
1904 %2 = extractvalue [2 x <4 x i16>] %b, 1
1905 %3 = bitcast i16* %a to i8*
1906 tail call void @llvm.aarch64.neon.vst1x2.v4i16(i8* %3, <4 x i16> %1, <4 x i16> %2, i32 2)
1910 define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b) {
1911 ; CHECK-LABEL: test_vst1_s32_x2
1912 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1913 %1 = extractvalue [2 x <2 x i32>] %b, 0
1914 %2 = extractvalue [2 x <2 x i32>] %b, 1
1915 %3 = bitcast i32* %a to i8*
1916 tail call void @llvm.aarch64.neon.vst1x2.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 4)
1920 define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b) {
1921 ; CHECK-LABEL: test_vst1_s64_x2
1922 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1923 %1 = extractvalue [2 x <1 x i64>] %b, 0
1924 %2 = extractvalue [2 x <1 x i64>] %b, 1
1925 %3 = bitcast i64* %a to i8*
1926 tail call void @llvm.aarch64.neon.vst1x2.v1i64(i8* %3, <1 x i64> %1, <1 x i64> %2, i32 8)
1930 define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b) {
1931 ; CHECK-LABEL: test_vst1_f32_x2
1932 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1933 %1 = extractvalue [2 x <2 x float>] %b, 0
1934 %2 = extractvalue [2 x <2 x float>] %b, 1
1935 %3 = bitcast float* %a to i8*
1936 tail call void @llvm.aarch64.neon.vst1x2.v2f32(i8* %3, <2 x float> %1, <2 x float> %2, i32 4)
1940 define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b) {
1941 ; CHECK-LABEL: test_vst1_f64_x2
1942 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1943 %1 = extractvalue [2 x <1 x double>] %b, 0
1944 %2 = extractvalue [2 x <1 x double>] %b, 1
1945 %3 = bitcast double* %a to i8*
1946 tail call void @llvm.aarch64.neon.vst1x2.v1f64(i8* %3, <1 x double> %1, <1 x double> %2, i32 8)
1950 define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b) {
1951 ; CHECK-LABEL: test_vst1q_s8_x3
1952 ; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
1954 %1 = extractvalue [3 x <16 x i8>] %b, 0
1955 %2 = extractvalue [3 x <16 x i8>] %b, 1
1956 %3 = extractvalue [3 x <16 x i8>] %b, 2
1957 tail call void @llvm.aarch64.neon.vst1x3.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, i32 1)
1961 define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b) {
1962 ; CHECK-LABEL: test_vst1q_s16_x3
1963 ; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
1965 %1 = extractvalue [3 x <8 x i16>] %b, 0
1966 %2 = extractvalue [3 x <8 x i16>] %b, 1
1967 %3 = extractvalue [3 x <8 x i16>] %b, 2
1968 %4 = bitcast i16* %a to i8*
1969 tail call void @llvm.aarch64.neon.vst1x3.v8i16(i8* %4, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, i32 2)
1973 define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b) {
1974 ; CHECK-LABEL: test_vst1q_s32_x3
1975 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
1977 %1 = extractvalue [3 x <4 x i32>] %b, 0
1978 %2 = extractvalue [3 x <4 x i32>] %b, 1
1979 %3 = extractvalue [3 x <4 x i32>] %b, 2
1980 %4 = bitcast i32* %a to i8*
1981 tail call void @llvm.aarch64.neon.vst1x3.v4i32(i8* %4, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, i32 4)
1985 define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b) {
1986 ; CHECK-LABEL: test_vst1q_s64_x3
1987 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
1989 %1 = extractvalue [3 x <2 x i64>] %b, 0
1990 %2 = extractvalue [3 x <2 x i64>] %b, 1
1991 %3 = extractvalue [3 x <2 x i64>] %b, 2
1992 %4 = bitcast i64* %a to i8*
1993 tail call void @llvm.aarch64.neon.vst1x3.v2i64(i8* %4, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, i32 8)
1997 define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b) {
1998 ; CHECK-LABEL: test_vst1q_f32_x3
1999 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
2001 %1 = extractvalue [3 x <4 x float>] %b, 0
2002 %2 = extractvalue [3 x <4 x float>] %b, 1
2003 %3 = extractvalue [3 x <4 x float>] %b, 2
2004 %4 = bitcast float* %a to i8*
2005 tail call void @llvm.aarch64.neon.vst1x3.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 4)
2009 define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b) {
2010 ; CHECK-LABEL: test_vst1q_f64_x3
2011 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
2013 %1 = extractvalue [3 x <2 x double>] %b, 0
2014 %2 = extractvalue [3 x <2 x double>] %b, 1
2015 %3 = extractvalue [3 x <2 x double>] %b, 2
2016 %4 = bitcast double* %a to i8*
2017 tail call void @llvm.aarch64.neon.vst1x3.v2f64(i8* %4, <2 x double> %1, <2 x double> %2, <2 x double> %3, i32 8)
2021 define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b) {
2022 ; CHECK-LABEL: test_vst1_s8_x3
2023 ; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
2025 %1 = extractvalue [3 x <8 x i8>] %b, 0
2026 %2 = extractvalue [3 x <8 x i8>] %b, 1
2027 %3 = extractvalue [3 x <8 x i8>] %b, 2
2028 tail call void @llvm.aarch64.neon.vst1x3.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1)
2032 define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b) {
2033 ; CHECK-LABEL: test_vst1_s16_x3
2034 ; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
2036 %1 = extractvalue [3 x <4 x i16>] %b, 0
2037 %2 = extractvalue [3 x <4 x i16>] %b, 1
2038 %3 = extractvalue [3 x <4 x i16>] %b, 2
2039 %4 = bitcast i16* %a to i8*
2040 tail call void @llvm.aarch64.neon.vst1x3.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2)
2044 define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b) {
2045 ; CHECK-LABEL: test_vst1_s32_x3
2046 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
2048 %1 = extractvalue [3 x <2 x i32>] %b, 0
2049 %2 = extractvalue [3 x <2 x i32>] %b, 1
2050 %3 = extractvalue [3 x <2 x i32>] %b, 2
2051 %4 = bitcast i32* %a to i8*
2052 tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
2056 define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b) {
2057 ; CHECK-LABEL: test_vst1_s64_x3
2058 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
2060 %1 = extractvalue [3 x <1 x i64>] %b, 0
2061 %2 = extractvalue [3 x <1 x i64>] %b, 1
2062 %3 = extractvalue [3 x <1 x i64>] %b, 2
2063 %4 = bitcast i64* %a to i8*
2064 tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
2068 define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b) {
2069 ; CHECK-LABEL: test_vst1_f32_x3
2070 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
2072 %1 = extractvalue [3 x <2 x float>] %b, 0
2073 %2 = extractvalue [3 x <2 x float>] %b, 1
2074 %3 = extractvalue [3 x <2 x float>] %b, 2
2075 %4 = bitcast float* %a to i8*
2076 tail call void @llvm.aarch64.neon.vst1x3.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4)
2080 define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b) {
2081 ; CHECK-LABEL: test_vst1_f64_x3
2082 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
2084 %1 = extractvalue [3 x <1 x double>] %b, 0
2085 %2 = extractvalue [3 x <1 x double>] %b, 1
2086 %3 = extractvalue [3 x <1 x double>] %b, 2
2087 %4 = bitcast double* %a to i8*
2088 tail call void @llvm.aarch64.neon.vst1x3.v1f64(i8* %4, <1 x double> %1, <1 x double> %2, <1 x double> %3, i32 8)
2092 define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b) {
2093 ; CHECK-LABEL: test_vst1q_s8_x4
2094 ; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
2095 ; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
2096 %1 = extractvalue [4 x <16 x i8>] %b, 0
2097 %2 = extractvalue [4 x <16 x i8>] %b, 1
2098 %3 = extractvalue [4 x <16 x i8>] %b, 2
2099 %4 = extractvalue [4 x <16 x i8>] %b, 3
2100 tail call void @llvm.aarch64.neon.vst1x4.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, <16 x i8> %4, i32 1)
2104 define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b) {
2105 ; CHECK-LABEL: test_vst1q_s16_x4
2106 ; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
2107 ; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
2108 %1 = extractvalue [4 x <8 x i16>] %b, 0
2109 %2 = extractvalue [4 x <8 x i16>] %b, 1
2110 %3 = extractvalue [4 x <8 x i16>] %b, 2
2111 %4 = extractvalue [4 x <8 x i16>] %b, 3
2112 %5 = bitcast i16* %a to i8*
2113 tail call void @llvm.aarch64.neon.vst1x4.v8i16(i8* %5, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4, i32 2)
2117 define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b) {
2118 ; CHECK-LABEL: test_vst1q_s32_x4
2119 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
2120 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
2121 %1 = extractvalue [4 x <4 x i32>] %b, 0
2122 %2 = extractvalue [4 x <4 x i32>] %b, 1
2123 %3 = extractvalue [4 x <4 x i32>] %b, 2
2124 %4 = extractvalue [4 x <4 x i32>] %b, 3
2125 %5 = bitcast i32* %a to i8*
2126 tail call void @llvm.aarch64.neon.vst1x4.v4i32(i8* %5, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, i32 4)
2130 define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b) {
2131 ; CHECK-LABEL: test_vst1q_s64_x4
2132 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
2133 ; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
2134 %1 = extractvalue [4 x <2 x i64>] %b, 0
2135 %2 = extractvalue [4 x <2 x i64>] %b, 1
2136 %3 = extractvalue [4 x <2 x i64>] %b, 2
2137 %4 = extractvalue [4 x <2 x i64>] %b, 3
2138 %5 = bitcast i64* %a to i8*
2139 tail call void @llvm.aarch64.neon.vst1x4.v2i64(i8* %5, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4, i32 8)
2143 define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b) {
2144 ; CHECK-LABEL: test_vst1q_f32_x4
2145 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
2146 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
2147 %1 = extractvalue [4 x <4 x float>] %b, 0
2148 %2 = extractvalue [4 x <4 x float>] %b, 1
2149 %3 = extractvalue [4 x <4 x float>] %b, 2
2150 %4 = extractvalue [4 x <4 x float>] %b, 3
2151 %5 = bitcast float* %a to i8*
2152 tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
2156 define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b) {
2157 ; CHECK-LABEL: test_vst1q_f64_x4
2158 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
2159 ; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
2160 %1 = extractvalue [4 x <2 x double>] %b, 0
2161 %2 = extractvalue [4 x <2 x double>] %b, 1
2162 %3 = extractvalue [4 x <2 x double>] %b, 2
2163 %4 = extractvalue [4 x <2 x double>] %b, 3
2164 %5 = bitcast double* %a to i8*
2165 tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
2169 define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b) {
2170 ; CHECK-LABEL: test_vst1_s8_x4
2171 ; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
2172 ; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
2173 %1 = extractvalue [4 x <8 x i8>] %b, 0
2174 %2 = extractvalue [4 x <8 x i8>] %b, 1
2175 %3 = extractvalue [4 x <8 x i8>] %b, 2
2176 %4 = extractvalue [4 x <8 x i8>] %b, 3
2177 tail call void @llvm.aarch64.neon.vst1x4.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %4, i32 1)
2181 define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b) {
2182 ; CHECK-LABEL: test_vst1_s16_x4
2183 ; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
2184 ; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
2185 %1 = extractvalue [4 x <4 x i16>] %b, 0
2186 %2 = extractvalue [4 x <4 x i16>] %b, 1
2187 %3 = extractvalue [4 x <4 x i16>] %b, 2
2188 %4 = extractvalue [4 x <4 x i16>] %b, 3
2189 %5 = bitcast i16* %a to i8*
2190 tail call void @llvm.aarch64.neon.vst1x4.v4i16(i8* %5, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, <4 x i16> %4, i32 2)
2194 define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b) {
2195 ; CHECK-LABEL: test_vst1_s32_x4
2196 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
2197 ; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
2198 %1 = extractvalue [4 x <2 x i32>] %b, 0
2199 %2 = extractvalue [4 x <2 x i32>] %b, 1
2200 %3 = extractvalue [4 x <2 x i32>] %b, 2
2201 %4 = extractvalue [4 x <2 x i32>] %b, 3
2202 %5 = bitcast i32* %a to i8*
2203 tail call void @llvm.aarch64.neon.vst1x4.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 4)
2207 define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b) {
2208 ; CHECK-LABEL: test_vst1_s64_x4
2209 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
2210 ; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
2211 %1 = extractvalue [4 x <1 x i64>] %b, 0
2212 %2 = extractvalue [4 x <1 x i64>] %b, 1
2213 %3 = extractvalue [4 x <1 x i64>] %b, 2
2214 %4 = extractvalue [4 x <1 x i64>] %b, 3
2215 %5 = bitcast i64* %a to i8*
2216 tail call void @llvm.aarch64.neon.vst1x4.v1i64(i8* %5, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, <1 x i64> %4, i32 8)
2220 define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b) {
2221 ; CHECK-LABEL: test_vst1_f32_x4
2222 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
2223 ; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
2224 %1 = extractvalue [4 x <2 x float>] %b, 0
2225 %2 = extractvalue [4 x <2 x float>] %b, 1
2226 %3 = extractvalue [4 x <2 x float>] %b, 2
2227 %4 = extractvalue [4 x <2 x float>] %b, 3
2228 %5 = bitcast float* %a to i8*
2229 tail call void @llvm.aarch64.neon.vst1x4.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 4)
2233 define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b) {
2234 ; CHECK-LABEL: test_vst1_f64_x4
2235 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
2236 ; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
2237 %1 = extractvalue [4 x <1 x double>] %b, 0
2238 %2 = extractvalue [4 x <1 x double>] %b, 1
2239 %3 = extractvalue [4 x <1 x double>] %b, 2
2240 %4 = extractvalue [4 x <1 x double>] %b, 3
2241 %5 = bitcast double* %a to i8*
2242 tail call void @llvm.aarch64.neon.vst1x4.v1f64(i8* %5, <1 x double> %1, <1 x double> %2, <1 x double> %3, <1 x double> %4, i32 8)
2246 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
2247 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
2248 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8*, i32)
2249 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8*, i32)
2250 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8*, i32)
2251 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8*, i32)
2252 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8*, i32)
2253 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8*, i32)
2254 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8*, i32)
2255 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8*, i32)
2256 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8*, i32)
2257 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8*, i32)
2258 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8*, i32)
2259 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
2260 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8*, i32)
2261 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
2262 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8*, i32)
2263 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8*, i32)
2264 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8*, i32)
2265 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8*, i32)
2266 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8*, i32)
2267 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8*, i32)
2268 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8*, i32)
2269 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8*, i32)
2270 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8*, i32)
2271 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8*, i32)
2272 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8*, i32)
2273 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8*, i32)
2274 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
2275 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8*, i32)
2276 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
2277 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8*, i32)
2278 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8*, i32)
2279 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8*, i32)
2280 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8*, i32)
2281 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8*, i32)
2282 declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
2283 declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
2284 declare void @llvm.aarch64.neon.vst1x2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
2285 declare void @llvm.aarch64.neon.vst1x2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
2286 declare void @llvm.aarch64.neon.vst1x2.v4f32(i8*, <4 x float>, <4 x float>, i32)
2287 declare void @llvm.aarch64.neon.vst1x2.v2f64(i8*, <2 x double>, <2 x double>, i32)
2288 declare void @llvm.aarch64.neon.vst1x2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
2289 declare void @llvm.aarch64.neon.vst1x2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
2290 declare void @llvm.aarch64.neon.vst1x2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
2291 declare void @llvm.aarch64.neon.vst1x2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
2292 declare void @llvm.aarch64.neon.vst1x2.v2f32(i8*, <2 x float>, <2 x float>, i32)
2293 declare void @llvm.aarch64.neon.vst1x2.v1f64(i8*, <1 x double>, <1 x double>, i32)
2294 declare void @llvm.aarch64.neon.vst1x3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
2295 declare void @llvm.aarch64.neon.vst1x3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
2296 declare void @llvm.aarch64.neon.vst1x3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
2297 declare void @llvm.aarch64.neon.vst1x3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
2298 declare void @llvm.aarch64.neon.vst1x3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
2299 declare void @llvm.aarch64.neon.vst1x3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
2300 declare void @llvm.aarch64.neon.vst1x3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
2301 declare void @llvm.aarch64.neon.vst1x3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
2302 declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
2303 declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
2304 declare void @llvm.aarch64.neon.vst1x3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
2305 declare void @llvm.aarch64.neon.vst1x3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
2306 declare void @llvm.aarch64.neon.vst1x4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
2307 declare void @llvm.aarch64.neon.vst1x4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
2308 declare void @llvm.aarch64.neon.vst1x4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
2309 declare void @llvm.aarch64.neon.vst1x4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
2310 declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
2311 declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
2312 declare void @llvm.aarch64.neon.vst1x4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
2313 declare void @llvm.aarch64.neon.vst1x4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
2314 declare void @llvm.aarch64.neon.vst1x4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
2315 declare void @llvm.aarch64.neon.vst1x4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
2316 declare void @llvm.aarch64.neon.vst1x4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
2317 declare void @llvm.aarch64.neon.vst1x4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)