1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
3 define void @test_ldst1_v16i8(<16 x i8>* %ptr, <16 x i8>* %ptr2) {
4 ; CHECK-LABEL: test_ldst1_v16i8:
5 ; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
6 ; CHECK: st1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
7 %tmp = load <16 x i8>* %ptr
8 store <16 x i8> %tmp, <16 x i8>* %ptr2
12 define void @test_ldst1_v8i16(<8 x i16>* %ptr, <8 x i16>* %ptr2) {
13 ; CHECK-LABEL: test_ldst1_v8i16:
14 ; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
15 ; CHECK: st1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
16 %tmp = load <8 x i16>* %ptr
17 store <8 x i16> %tmp, <8 x i16>* %ptr2
21 define void @test_ldst1_v4i32(<4 x i32>* %ptr, <4 x i32>* %ptr2) {
22 ; CHECK-LABEL: test_ldst1_v4i32:
23 ; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
24 ; CHECK: st1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
25 %tmp = load <4 x i32>* %ptr
26 store <4 x i32> %tmp, <4 x i32>* %ptr2
30 define void @test_ldst1_v2i64(<2 x i64>* %ptr, <2 x i64>* %ptr2) {
31 ; CHECK-LABEL: test_ldst1_v2i64:
32 ; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
33 ; CHECK: st1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
34 %tmp = load <2 x i64>* %ptr
35 store <2 x i64> %tmp, <2 x i64>* %ptr2
39 define void @test_ldst1_v8i8(<8 x i8>* %ptr, <8 x i8>* %ptr2) {
40 ; CHECK-LABEL: test_ldst1_v8i8:
41 ; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
42 ; CHECK: st1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
43 %tmp = load <8 x i8>* %ptr
44 store <8 x i8> %tmp, <8 x i8>* %ptr2
48 define void @test_ldst1_v4i16(<4 x i16>* %ptr, <4 x i16>* %ptr2) {
49 ; CHECK-LABEL: test_ldst1_v4i16:
50 ; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
51 ; CHECK: st1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
52 %tmp = load <4 x i16>* %ptr
53 store <4 x i16> %tmp, <4 x i16>* %ptr2
57 define void @test_ldst1_v2i32(<2 x i32>* %ptr, <2 x i32>* %ptr2) {
58 ; CHECK-LABEL: test_ldst1_v2i32:
59 ; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
60 ; CHECK: st1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
61 %tmp = load <2 x i32>* %ptr
62 store <2 x i32> %tmp, <2 x i32>* %ptr2
66 define void @test_ldst1_v1i64(<1 x i64>* %ptr, <1 x i64>* %ptr2) {
67 ; CHECK-LABEL: test_ldst1_v1i64:
68 ; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
69 ; CHECK: st1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
70 %tmp = load <1 x i64>* %ptr
71 store <1 x i64> %tmp, <1 x i64>* %ptr2
75 %struct.int8x16x2_t = type { [2 x <16 x i8>] }
76 %struct.int16x8x2_t = type { [2 x <8 x i16>] }
77 %struct.int32x4x2_t = type { [2 x <4 x i32>] }
78 %struct.int64x2x2_t = type { [2 x <2 x i64>] }
79 %struct.float32x4x2_t = type { [2 x <4 x float>] }
80 %struct.float64x2x2_t = type { [2 x <2 x double>] }
81 %struct.int8x8x2_t = type { [2 x <8 x i8>] }
82 %struct.int16x4x2_t = type { [2 x <4 x i16>] }
83 %struct.int32x2x2_t = type { [2 x <2 x i32>] }
84 %struct.int64x1x2_t = type { [2 x <1 x i64>] }
85 %struct.float32x2x2_t = type { [2 x <2 x float>] }
86 %struct.float64x1x2_t = type { [2 x <1 x double>] }
87 %struct.int8x16x3_t = type { [3 x <16 x i8>] }
88 %struct.int16x8x3_t = type { [3 x <8 x i16>] }
89 %struct.int32x4x3_t = type { [3 x <4 x i32>] }
90 %struct.int64x2x3_t = type { [3 x <2 x i64>] }
91 %struct.float32x4x3_t = type { [3 x <4 x float>] }
92 %struct.float64x2x3_t = type { [3 x <2 x double>] }
93 %struct.int8x8x3_t = type { [3 x <8 x i8>] }
94 %struct.int16x4x3_t = type { [3 x <4 x i16>] }
95 %struct.int32x2x3_t = type { [3 x <2 x i32>] }
96 %struct.int64x1x3_t = type { [3 x <1 x i64>] }
97 %struct.float32x2x3_t = type { [3 x <2 x float>] }
98 %struct.float64x1x3_t = type { [3 x <1 x double>] }
99 %struct.int8x16x4_t = type { [4 x <16 x i8>] }
100 %struct.int16x8x4_t = type { [4 x <8 x i16>] }
101 %struct.int32x4x4_t = type { [4 x <4 x i32>] }
102 %struct.int64x2x4_t = type { [4 x <2 x i64>] }
103 %struct.float32x4x4_t = type { [4 x <4 x float>] }
104 %struct.float64x2x4_t = type { [4 x <2 x double>] }
105 %struct.int8x8x4_t = type { [4 x <8 x i8>] }
106 %struct.int16x4x4_t = type { [4 x <4 x i16>] }
107 %struct.int32x2x4_t = type { [4 x <2 x i32>] }
108 %struct.int64x1x4_t = type { [4 x <1 x i64>] }
109 %struct.float32x2x4_t = type { [4 x <2 x float>] }
110 %struct.float64x1x4_t = type { [4 x <1 x double>] }
113 define <16 x i8> @test_vld1q_s8(i8* readonly %a) {
114 ; CHECK-LABEL: test_vld1q_s8
115 ; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
116 %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1)
120 define <8 x i16> @test_vld1q_s16(i16* readonly %a) {
121 ; CHECK-LABEL: test_vld1q_s16
122 ; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
123 %1 = bitcast i16* %a to i8*
124 %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2)
128 define <4 x i32> @test_vld1q_s32(i32* readonly %a) {
129 ; CHECK-LABEL: test_vld1q_s32
130 ; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
131 %1 = bitcast i32* %a to i8*
132 %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4)
136 define <2 x i64> @test_vld1q_s64(i64* readonly %a) {
137 ; CHECK-LABEL: test_vld1q_s64
138 ; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
139 %1 = bitcast i64* %a to i8*
140 %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8)
144 define <4 x float> @test_vld1q_f32(float* readonly %a) {
145 ; CHECK-LABEL: test_vld1q_f32
146 ; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
147 %1 = bitcast float* %a to i8*
148 %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4)
149 ret <4 x float> %vld1
152 define <2 x double> @test_vld1q_f64(double* readonly %a) {
153 ; CHECK-LABEL: test_vld1q_f64
154 ; CHECK: ld1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
155 %1 = bitcast double* %a to i8*
156 %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8)
157 ret <2 x double> %vld1
160 define <8 x i8> @test_vld1_s8(i8* readonly %a) {
161 ; CHECK-LABEL: test_vld1_s8
162 ; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
163 %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
167 define <4 x i16> @test_vld1_s16(i16* readonly %a) {
168 ; CHECK-LABEL: test_vld1_s16
169 ; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
170 %1 = bitcast i16* %a to i8*
171 %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
175 define <2 x i32> @test_vld1_s32(i32* readonly %a) {
176 ; CHECK-LABEL: test_vld1_s32
177 ; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
178 %1 = bitcast i32* %a to i8*
179 %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4)
183 define <1 x i64> @test_vld1_s64(i64* readonly %a) {
184 ; CHECK-LABEL: test_vld1_s64
185 ; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
186 %1 = bitcast i64* %a to i8*
187 %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8)
191 define <2 x float> @test_vld1_f32(float* readonly %a) {
192 ; CHECK-LABEL: test_vld1_f32
193 ; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
194 %1 = bitcast float* %a to i8*
195 %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4)
196 ret <2 x float> %vld1
199 define <1 x double> @test_vld1_f64(double* readonly %a) {
200 ; CHECK-LABEL: test_vld1_f64
201 ; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
202 %1 = bitcast double* %a to i8*
203 %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8)
204 ret <1 x double> %vld1
207 define <8 x i8> @test_vld1_p8(i8* readonly %a) {
208 ; CHECK-LABEL: test_vld1_p8
209 ; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
210 %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1)
214 define <4 x i16> @test_vld1_p16(i16* readonly %a) {
215 ; CHECK-LABEL: test_vld1_p16
216 ; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
217 %1 = bitcast i16* %a to i8*
218 %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2)
222 define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) {
223 ; CHECK-LABEL: test_vld2q_s8
224 ; CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
225 %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1)
226 %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
227 %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
228 %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0
229 %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1
230 ret %struct.int8x16x2_t %.fca.0.1.insert
233 define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) {
234 ; CHECK-LABEL: test_vld2q_s16
235 ; CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
236 %1 = bitcast i16* %a to i8*
237 %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2)
238 %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0
239 %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1
240 %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0
241 %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1
242 ret %struct.int16x8x2_t %.fca.0.1.insert
245 define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) {
246 ; CHECK-LABEL: test_vld2q_s32
247 ; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
248 %1 = bitcast i32* %a to i8*
249 %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4)
250 %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0
251 %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1
252 %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0
253 %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1
254 ret %struct.int32x4x2_t %.fca.0.1.insert
257 define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) {
258 ; CHECK-LABEL: test_vld2q_s64
259 ; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
260 %1 = bitcast i64* %a to i8*
261 %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8)
262 %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0
263 %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1
264 %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0
265 %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1
266 ret %struct.int64x2x2_t %.fca.0.1.insert
269 define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) {
270 ; CHECK-LABEL: test_vld2q_f32
271 ; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
272 %1 = bitcast float* %a to i8*
273 %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
274 %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0
275 %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1
276 %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0
277 %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1
278 ret %struct.float32x4x2_t %.fca.0.1.insert
281 define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) {
282 ; CHECK-LABEL: test_vld2q_f64
283 ; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
284 %1 = bitcast double* %a to i8*
285 %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8)
286 %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0
287 %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1
288 %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0
289 %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1
290 ret %struct.float64x2x2_t %.fca.0.1.insert
293 define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) {
294 ; CHECK-LABEL: test_vld2_s8
295 ; CHECK: ld2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
296 %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1)
297 %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
298 %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
299 %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0
300 %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1
301 ret %struct.int8x8x2_t %.fca.0.1.insert
304 define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) {
305 ; CHECK-LABEL: test_vld2_s16
306 ; CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
307 %1 = bitcast i16* %a to i8*
308 %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2)
309 %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0
310 %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1
311 %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0
312 %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1
313 ret %struct.int16x4x2_t %.fca.0.1.insert
316 define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) {
317 ; CHECK-LABEL: test_vld2_s32
318 ; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
319 %1 = bitcast i32* %a to i8*
320 %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4)
321 %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0
322 %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1
323 %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0
324 %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1
325 ret %struct.int32x2x2_t %.fca.0.1.insert
328 define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) {
329 ; CHECK-LABEL: test_vld2_s64
330 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
331 %1 = bitcast i64* %a to i8*
332 %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8)
333 %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0
334 %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1
335 %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0
336 %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1
337 ret %struct.int64x1x2_t %.fca.0.1.insert
340 define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) {
341 ; CHECK-LABEL: test_vld2_f32
342 ; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
343 %1 = bitcast float* %a to i8*
344 %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4)
345 %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0
346 %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1
347 %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0
348 %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1
349 ret %struct.float32x2x2_t %.fca.0.1.insert
352 define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) {
353 ; CHECK-LABEL: test_vld2_f64
354 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
355 %1 = bitcast double* %a to i8*
356 %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8)
357 %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0
358 %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1
359 %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0
360 %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1
361 ret %struct.float64x1x2_t %.fca.0.1.insert
364 define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) {
365 ; CHECK-LABEL: test_vld3q_s8
366 ; CHECK: ld3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
367 %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1)
368 %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
369 %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
370 %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
371 %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0
372 %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1
373 %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2
374 ret %struct.int8x16x3_t %.fca.0.2.insert
377 define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) {
378 ; CHECK-LABEL: test_vld3q_s16
379 ; CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
380 %1 = bitcast i16* %a to i8*
381 %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2)
382 %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0
383 %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1
384 %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2
385 %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0
386 %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1
387 %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2
388 ret %struct.int16x8x3_t %.fca.0.2.insert
391 define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) {
392 ; CHECK-LABEL: test_vld3q_s32
393 ; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
394 %1 = bitcast i32* %a to i8*
395 %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4)
396 %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0
397 %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1
398 %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2
399 %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0
400 %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1
401 %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2
402 ret %struct.int32x4x3_t %.fca.0.2.insert
405 define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) {
406 ; CHECK-LABEL: test_vld3q_s64
407 ; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
408 %1 = bitcast i64* %a to i8*
409 %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8)
410 %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0
411 %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1
412 %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2
413 %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0
414 %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1
415 %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2
416 ret %struct.int64x2x3_t %.fca.0.2.insert
419 define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) {
420 ; CHECK-LABEL: test_vld3q_f32
421 ; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
422 %1 = bitcast float* %a to i8*
423 %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4)
424 %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0
425 %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1
426 %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2
427 %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0
428 %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1
429 %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2
430 ret %struct.float32x4x3_t %.fca.0.2.insert
433 define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) {
434 ; CHECK-LABEL: test_vld3q_f64
435 ; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
436 %1 = bitcast double* %a to i8*
437 %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8)
438 %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0
439 %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1
440 %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2
441 %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0
442 %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1
443 %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2
444 ret %struct.float64x2x3_t %.fca.0.2.insert
447 define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) {
448 ; CHECK-LABEL: test_vld3_s8
449 ; CHECK: ld3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
450 %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1)
451 %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
452 %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
453 %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
454 %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0
455 %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1
456 %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2
457 ret %struct.int8x8x3_t %.fca.0.2.insert
460 define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) {
461 ; CHECK-LABEL: test_vld3_s16
462 ; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
463 %1 = bitcast i16* %a to i8*
464 %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2)
465 %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0
466 %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1
467 %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2
468 %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0
469 %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1
470 %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2
471 ret %struct.int16x4x3_t %.fca.0.2.insert
474 define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) {
475 ; CHECK-LABEL: test_vld3_s32
476 ; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
477 %1 = bitcast i32* %a to i8*
478 %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4)
479 %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0
480 %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1
481 %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2
482 %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0
483 %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1
484 %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2
485 ret %struct.int32x2x3_t %.fca.0.2.insert
488 define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) {
489 ; CHECK-LABEL: test_vld3_s64
490 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
491 %1 = bitcast i64* %a to i8*
492 %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8)
493 %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0
494 %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1
495 %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2
496 %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0
497 %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1
498 %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2
499 ret %struct.int64x1x3_t %.fca.0.2.insert
502 define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) {
503 ; CHECK-LABEL: test_vld3_f32
504 ; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
505 %1 = bitcast float* %a to i8*
506 %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4)
507 %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0
508 %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1
509 %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2
510 %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0
511 %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1
512 %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2
513 ret %struct.float32x2x3_t %.fca.0.2.insert
516 define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) {
517 ; CHECK-LABEL: test_vld3_f64
518 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
519 %1 = bitcast double* %a to i8*
520 %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8)
521 %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0
522 %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1
523 %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2
524 %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0
525 %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1
526 %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2
527 ret %struct.float64x1x3_t %.fca.0.2.insert
530 define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) {
531 ; CHECK-LABEL: test_vld4q_s8
532 ; CHECK: ld4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}]
533 %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1)
534 %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
535 %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
536 %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
537 %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
538 %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0
539 %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1
540 %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2
541 %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3
542 ret %struct.int8x16x4_t %.fca.0.3.insert
545 define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) {
546 ; CHECK-LABEL: test_vld4q_s16
547 ; CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}]
548 %1 = bitcast i16* %a to i8*
549 %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2)
550 %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0
551 %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1
552 %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2
553 %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3
554 %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0
555 %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1
556 %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2
557 %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3
558 ret %struct.int16x8x4_t %.fca.0.3.insert
561 define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) {
562 ; CHECK-LABEL: test_vld4q_s32
563 ; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
564 %1 = bitcast i32* %a to i8*
565 %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4)
566 %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0
567 %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1
568 %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2
569 %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3
570 %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0
571 %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1
572 %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2
573 %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3
574 ret %struct.int32x4x4_t %.fca.0.3.insert
577 define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) {
578 ; CHECK-LABEL: test_vld4q_s64
579 ; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
580 %1 = bitcast i64* %a to i8*
581 %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8)
582 %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0
583 %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1
584 %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2
585 %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3
586 %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0
587 %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1
588 %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2
589 %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3
590 ret %struct.int64x2x4_t %.fca.0.3.insert
593 define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) {
594 ; CHECK-LABEL: test_vld4q_f32
595 ; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}]
596 %1 = bitcast float* %a to i8*
597 %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4)
598 %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0
599 %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1
600 %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2
601 %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3
602 %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0
603 %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1
604 %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2
605 %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3
606 ret %struct.float32x4x4_t %.fca.0.3.insert
609 define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) {
610 ; CHECK-LABEL: test_vld4q_f64
611 ; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}]
612 %1 = bitcast double* %a to i8*
613 %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8)
614 %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0
615 %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1
616 %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2
617 %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3
618 %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0
619 %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1
620 %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2
621 %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3
622 ret %struct.float64x2x4_t %.fca.0.3.insert
625 define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) {
626 ; CHECK-LABEL: test_vld4_s8
627 ; CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}]
628 %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1)
629 %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
630 %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
631 %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
632 %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
633 %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0
634 %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1
635 %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2
636 %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3
637 ret %struct.int8x8x4_t %.fca.0.3.insert
640 define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) {
641 ; CHECK-LABEL: test_vld4_s16
642 ; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}]
643 %1 = bitcast i16* %a to i8*
644 %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2)
645 %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0
646 %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1
647 %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2
648 %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3
649 %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0
650 %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1
651 %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2
652 %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3
653 ret %struct.int16x4x4_t %.fca.0.3.insert
656 define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) {
657 ; CHECK-LABEL: test_vld4_s32
658 ; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
659 %1 = bitcast i32* %a to i8*
660 %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4)
661 %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0
662 %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1
663 %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2
664 %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3
665 %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0
666 %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1
667 %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2
668 %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3
669 ret %struct.int32x2x4_t %.fca.0.3.insert
672 define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) {
673 ; CHECK-LABEL: test_vld4_s64
674 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
675 %1 = bitcast i64* %a to i8*
676 %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8)
677 %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0
678 %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1
679 %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2
680 %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3
681 %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0
682 %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1
683 %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2
684 %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3
685 ret %struct.int64x1x4_t %.fca.0.3.insert
688 define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) {
689 ; CHECK-LABEL: test_vld4_f32
690 ; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}]
691 %1 = bitcast float* %a to i8*
692 %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4)
693 %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0
694 %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1
695 %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2
696 %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3
697 %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0
698 %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1
699 %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2
700 %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3
701 ret %struct.float32x2x4_t %.fca.0.3.insert
704 define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) {
705 ; CHECK-LABEL: test_vld4_f64
706 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}]
707 %1 = bitcast double* %a to i8*
708 %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8)
709 %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0
710 %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1
711 %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2
712 %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3
713 %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0
714 %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1
715 %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2
716 %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3
717 ret %struct.float64x1x4_t %.fca.0.3.insert
720 declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32)
721 declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32)
722 declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32)
723 declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32)
724 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32)
725 declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32)
726 declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32)
727 declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32)
728 declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32)
729 declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32)
730 declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32)
731 declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32)
732 declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
733 declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32)
734 declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32)
735 declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32)
736 declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32)
737 declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32)
738 declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
739 declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32)
740 declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32)
741 declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32)
742 declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32)
743 declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32)
744 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
745 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32)
746 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32)
747 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32)
748 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32)
749 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32)
750 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
751 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32)
752 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32)
753 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32)
754 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32)
755 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32)
756 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
757 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32)
758 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32)
759 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32)
760 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32)
761 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32)
762 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
763 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32)
764 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32)
765 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32)
766 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32)
767 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32)
769 define void @test_vst1q_s8(i8* %a, <16 x i8> %b) {
770 ; CHECK-LABEL: test_vst1q_s8
771 ; CHECK: st1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
772 tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1)
776 define void @test_vst1q_s16(i16* %a, <8 x i16> %b) {
777 ; CHECK-LABEL: test_vst1q_s16
778 ; CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
779 %1 = bitcast i16* %a to i8*
780 tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2)
784 define void @test_vst1q_s32(i32* %a, <4 x i32> %b) {
785 ; CHECK-LABEL: test_vst1q_s32
786 ; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
787 %1 = bitcast i32* %a to i8*
788 tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4)
792 define void @test_vst1q_s64(i64* %a, <2 x i64> %b) {
793 ; CHECK-LABEL: test_vst1q_s64
794 ; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
795 %1 = bitcast i64* %a to i8*
796 tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8)
800 define void @test_vst1q_f32(float* %a, <4 x float> %b) {
801 ; CHECK-LABEL: test_vst1q_f32
802 ; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
803 %1 = bitcast float* %a to i8*
804 tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4)
808 define void @test_vst1q_f64(double* %a, <2 x double> %b) {
809 ; CHECK-LABEL: test_vst1q_f64
810 ; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
811 %1 = bitcast double* %a to i8*
812 tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8)
816 define void @test_vst1_s8(i8* %a, <8 x i8> %b) {
817 ; CHECK-LABEL: test_vst1_s8
818 ; CHECK: st1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
819 tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1)
823 define void @test_vst1_s16(i16* %a, <4 x i16> %b) {
824 ; CHECK-LABEL: test_vst1_s16
825 ; CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
826 %1 = bitcast i16* %a to i8*
827 tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2)
831 define void @test_vst1_s32(i32* %a, <2 x i32> %b) {
832 ; CHECK-LABEL: test_vst1_s32
833 ; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
834 %1 = bitcast i32* %a to i8*
835 tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4)
839 define void @test_vst1_s64(i64* %a, <1 x i64> %b) {
840 ; CHECK-LABEL: test_vst1_s64
841 ; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
842 %1 = bitcast i64* %a to i8*
843 tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8)
847 define void @test_vst1_f32(float* %a, <2 x float> %b) {
848 ; CHECK-LABEL: test_vst1_f32
849 ; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
850 %1 = bitcast float* %a to i8*
851 tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4)
855 define void @test_vst1_f64(double* %a, <1 x double> %b) {
856 ; CHECK-LABEL: test_vst1_f64
857 ; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
858 %1 = bitcast double* %a to i8*
859 tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8)
863 define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) {
864 ; CHECK-LABEL: test_vst2q_s8
865 ; CHECK: st2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
866 %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0
867 %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1
868 tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1)
872 define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) {
873 ; CHECK-LABEL: test_vst2q_s16
874 ; CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
875 %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0
876 %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1
877 %1 = bitcast i16* %a to i8*
878 tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2)
882 define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) {
883 ; CHECK-LABEL: test_vst2q_s32
884 ; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
885 %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0
886 %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1
887 %1 = bitcast i32* %a to i8*
888 tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4)
892 define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) {
893 ; CHECK-LABEL: test_vst2q_s64
894 ; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
895 %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0
896 %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1
897 %1 = bitcast i64* %a to i8*
898 tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8)
902 define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) {
903 ; CHECK-LABEL: test_vst2q_f32
904 ; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
905 %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0
906 %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1
907 %1 = bitcast float* %a to i8*
908 tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4)
912 define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) {
913 ; CHECK-LABEL: test_vst2q_f64
914 ; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
915 %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0
916 %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1
917 %1 = bitcast double* %a to i8*
918 tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8)
922 define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) {
923 ; CHECK-LABEL: test_vst2_s8
924 ; CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
925 %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0
926 %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1
927 tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1)
931 define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) {
932 ; CHECK-LABEL: test_vst2_s16
933 ; CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
934 %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0
935 %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1
936 %1 = bitcast i16* %a to i8*
937 tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2)
941 define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) {
942 ; CHECK-LABEL: test_vst2_s32
943 ; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
944 %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0
945 %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1
946 %1 = bitcast i32* %a to i8*
947 tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4)
951 define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) {
952 ; CHECK-LABEL: test_vst2_s64
953 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
954 %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0
955 %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1
956 %1 = bitcast i64* %a to i8*
957 tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8)
961 define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) {
962 ; CHECK-LABEL: test_vst2_f32
963 ; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
964 %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0
965 %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1
966 %1 = bitcast float* %a to i8*
967 tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4)
971 define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) {
972 ; CHECK-LABEL: test_vst2_f64
973 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
974 %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0
975 %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1
976 %1 = bitcast double* %a to i8*
977 tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8)
981 define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) {
982 ; CHECK-LABEL: test_vst3q_s8
983 ; CHECK: st3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
984 %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0
985 %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1
986 %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2
987 tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1)
991 define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) {
992 ; CHECK-LABEL: test_vst3q_s16
993 ; CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
994 %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0
995 %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1
996 %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2
997 %1 = bitcast i16* %a to i8*
998 tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2)
1002 define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) {
1003 ; CHECK-LABEL: test_vst3q_s32
1004 ; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1005 %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0
1006 %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1
1007 %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2
1008 %1 = bitcast i32* %a to i8*
1009 tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4)
1013 define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) {
1014 ; CHECK-LABEL: test_vst3q_s64
1015 ; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1016 %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0
1017 %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1
1018 %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2
1019 %1 = bitcast i64* %a to i8*
1020 tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8)
1024 define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) {
1025 ; CHECK-LABEL: test_vst3q_f32
1026 ; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1027 %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0
1028 %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1
1029 %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2
1030 %1 = bitcast float* %a to i8*
1031 tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4)
1035 define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) {
1036 ; CHECK-LABEL: test_vst3q_f64
1037 ; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1038 %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0
1039 %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1
1040 %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2
1041 %1 = bitcast double* %a to i8*
1042 tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8)
1046 define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) {
1047 ; CHECK-LABEL: test_vst3_s8
1048 ; CHECK: st3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1049 %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0
1050 %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1
1051 %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2
1052 tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1)
1056 define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) {
1057 ; CHECK-LABEL: test_vst3_s16
1058 ; CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1059 %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0
1060 %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1
1061 %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2
1062 %1 = bitcast i16* %a to i8*
1063 tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2)
1067 define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) {
1068 ; CHECK-LABEL: test_vst3_s32
1069 ; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1070 %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0
1071 %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1
1072 %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2
1073 %1 = bitcast i32* %a to i8*
1074 tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4)
1078 define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) {
1079 ; CHECK-LABEL: test_vst3_s64
1080 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1081 %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0
1082 %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1
1083 %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2
1084 %1 = bitcast i64* %a to i8*
1085 tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8)
1089 define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) {
1090 ; CHECK-LABEL: test_vst3_f32
1091 ; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1092 %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0
1093 %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1
1094 %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2
1095 %1 = bitcast float* %a to i8*
1096 tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4)
1100 define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) {
1101 ; CHECK-LABEL: test_vst3_f64
1102 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1103 %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0
1104 %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1
1105 %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2
1106 %1 = bitcast double* %a to i8*
1107 tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8)
1111 define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) {
1112 ; CHECK-LABEL: test_vst4q_s8
1113 ; CHECK: st4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
1114 %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0
1115 %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1
1116 %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2
1117 %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3
1118 tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1)
1122 define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) {
1123 ; CHECK-LABEL: test_vst4q_s16
1124 ; CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
1125 %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0
1126 %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1
1127 %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2
1128 %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3
1129 %1 = bitcast i16* %a to i8*
1130 tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2)
1134 define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) {
1135 ; CHECK-LABEL: test_vst4q_s32
1136 ; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1137 %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0
1138 %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1
1139 %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2
1140 %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3
1141 %1 = bitcast i32* %a to i8*
1142 tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4)
1146 define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) {
1147 ; CHECK-LABEL: test_vst4q_s64
1148 ; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1149 %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0
1150 %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1
1151 %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2
1152 %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3
1153 %1 = bitcast i64* %a to i8*
1154 tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8)
1158 define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) {
1159 ; CHECK-LABEL: test_vst4q_f32
1160 ; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1161 %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0
1162 %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1
1163 %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2
1164 %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3
1165 %1 = bitcast float* %a to i8*
1166 tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4)
1170 define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) {
1171 ; CHECK-LABEL: test_vst4q_f64
1172 ; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1173 %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0
1174 %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1
1175 %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2
1176 %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3
1177 %1 = bitcast double* %a to i8*
1178 tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8)
1182 define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) {
1183 ; CHECK-LABEL: test_vst4_s8
1184 ; CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1185 %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0
1186 %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1
1187 %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2
1188 %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3
1189 tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1)
1193 define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) {
1194 ; CHECK-LABEL: test_vst4_s16
1195 ; CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1196 %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0
1197 %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1
1198 %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2
1199 %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3
1200 %1 = bitcast i16* %a to i8*
1201 tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2)
1205 define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) {
1206 ; CHECK-LABEL: test_vst4_s32
1207 ; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1208 %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0
1209 %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1
1210 %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2
1211 %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3
1212 %1 = bitcast i32* %a to i8*
1213 tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4)
1217 define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) {
1218 ; CHECK-LABEL: test_vst4_s64
1219 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1220 %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0
1221 %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1
1222 %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2
1223 %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3
1224 %1 = bitcast i64* %a to i8*
1225 tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8)
1229 define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) {
1230 ; CHECK-LABEL: test_vst4_f32
1231 ; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1232 %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0
1233 %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1
1234 %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2
1235 %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3
1236 %1 = bitcast float* %a to i8*
1237 tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4)
1241 define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) {
1242 ; CHECK-LABEL: test_vst4_f64
1243 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1244 %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0
1245 %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1
1246 %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2
1247 %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3
1248 %1 = bitcast double* %a to i8*
1249 tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8)
1253 declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
1254 declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32)
1255 declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32)
1256 declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32)
1257 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32)
1258 declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32)
1259 declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32)
1260 declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32)
1261 declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32)
1262 declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32)
1263 declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32)
1264 declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32)
1265 declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
1266 declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
1267 declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
1268 declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
1269 declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32)
1270 declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32)
1271 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
1272 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
1273 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
1274 declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
1275 declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32)
1276 declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32)
1277 declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
1278 declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
1279 declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
1280 declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
1281 declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
1282 declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
1283 declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
1284 declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
1285 declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
1286 declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
1287 declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
1288 declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
1289 declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
1290 declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
1291 declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
1292 declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
1293 declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
1294 declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
1295 declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
1296 declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
1297 declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
1298 declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
1299 declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
1300 declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
1302 define %struct.int8x16x2_t @test_vld1q_s8_x2(i8* %a) {
1303 ; CHECK-LABEL: test_vld1q_s8_x2
1304 ; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
1305 %1 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8* %a, i32 1)
1306 %2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
1307 %3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
1308 %4 = insertvalue %struct.int8x16x2_t undef, <16 x i8> %2, 0, 0
1309 %5 = insertvalue %struct.int8x16x2_t %4, <16 x i8> %3, 0, 1
1310 ret %struct.int8x16x2_t %5
1313 define %struct.int16x8x2_t @test_vld1q_s16_x2(i16* %a) {
1314 ; CHECK-LABEL: test_vld1q_s16_x2
1315 ; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
1316 %1 = bitcast i16* %a to i8*
1317 %2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8* %1, i32 2)
1318 %3 = extractvalue { <8 x i16>, <8 x i16> } %2, 0
1319 %4 = extractvalue { <8 x i16>, <8 x i16> } %2, 1
1320 %5 = insertvalue %struct.int16x8x2_t undef, <8 x i16> %3, 0, 0
1321 %6 = insertvalue %struct.int16x8x2_t %5, <8 x i16> %4, 0, 1
1322 ret %struct.int16x8x2_t %6
1325 define %struct.int32x4x2_t @test_vld1q_s32_x2(i32* %a) {
1326 ; CHECK-LABEL: test_vld1q_s32_x2
1327 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1328 %1 = bitcast i32* %a to i8*
1329 %2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8* %1, i32 4)
1330 %3 = extractvalue { <4 x i32>, <4 x i32> } %2, 0
1331 %4 = extractvalue { <4 x i32>, <4 x i32> } %2, 1
1332 %5 = insertvalue %struct.int32x4x2_t undef, <4 x i32> %3, 0, 0
1333 %6 = insertvalue %struct.int32x4x2_t %5, <4 x i32> %4, 0, 1
1334 ret %struct.int32x4x2_t %6
1337 define %struct.int64x2x2_t @test_vld1q_s64_x2(i64* %a) {
1338 ; CHECK-LABEL: test_vld1q_s64_x2
1339 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1340 %1 = bitcast i64* %a to i8*
1341 %2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8* %1, i32 8)
1342 %3 = extractvalue { <2 x i64>, <2 x i64> } %2, 0
1343 %4 = extractvalue { <2 x i64>, <2 x i64> } %2, 1
1344 %5 = insertvalue %struct.int64x2x2_t undef, <2 x i64> %3, 0, 0
1345 %6 = insertvalue %struct.int64x2x2_t %5, <2 x i64> %4, 0, 1
1346 ret %struct.int64x2x2_t %6
1349 define %struct.float32x4x2_t @test_vld1q_f32_x2(float* %a) {
1350 ; CHECK-LABEL: test_vld1q_f32_x2
1351 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1352 %1 = bitcast float* %a to i8*
1353 %2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8* %1, i32 4)
1354 %3 = extractvalue { <4 x float>, <4 x float> } %2, 0
1355 %4 = extractvalue { <4 x float>, <4 x float> } %2, 1
1356 %5 = insertvalue %struct.float32x4x2_t undef, <4 x float> %3, 0, 0
1357 %6 = insertvalue %struct.float32x4x2_t %5, <4 x float> %4, 0, 1
1358 ret %struct.float32x4x2_t %6
1362 define %struct.float64x2x2_t @test_vld1q_f64_x2(double* %a) {
1363 ; CHECK-LABEL: test_vld1q_f64_x2
1364 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1365 %1 = bitcast double* %a to i8*
1366 %2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8* %1, i32 8)
1367 %3 = extractvalue { <2 x double>, <2 x double> } %2, 0
1368 %4 = extractvalue { <2 x double>, <2 x double> } %2, 1
1369 %5 = insertvalue %struct.float64x2x2_t undef, <2 x double> %3, 0, 0
1370 %6 = insertvalue %struct.float64x2x2_t %5, <2 x double> %4, 0, 1
1371 ret %struct.float64x2x2_t %6
1374 define %struct.int8x8x2_t @test_vld1_s8_x2(i8* %a) {
1375 ; CHECK-LABEL: test_vld1_s8_x2
1376 ; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1377 %1 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8* %a, i32 1)
1378 %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0
1379 %3 = extractvalue { <8 x i8>, <8 x i8> } %1, 1
1380 %4 = insertvalue %struct.int8x8x2_t undef, <8 x i8> %2, 0, 0
1381 %5 = insertvalue %struct.int8x8x2_t %4, <8 x i8> %3, 0, 1
1382 ret %struct.int8x8x2_t %5
1385 define %struct.int16x4x2_t @test_vld1_s16_x2(i16* %a) {
1386 ; CHECK-LABEL: test_vld1_s16_x2
1387 ; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1388 %1 = bitcast i16* %a to i8*
1389 %2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8* %1, i32 2)
1390 %3 = extractvalue { <4 x i16>, <4 x i16> } %2, 0
1391 %4 = extractvalue { <4 x i16>, <4 x i16> } %2, 1
1392 %5 = insertvalue %struct.int16x4x2_t undef, <4 x i16> %3, 0, 0
1393 %6 = insertvalue %struct.int16x4x2_t %5, <4 x i16> %4, 0, 1
1394 ret %struct.int16x4x2_t %6
1397 define %struct.int32x2x2_t @test_vld1_s32_x2(i32* %a) {
1398 ; CHECK-LABEL: test_vld1_s32_x2
1399 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1400 %1 = bitcast i32* %a to i8*
1401 %2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8* %1, i32 4)
1402 %3 = extractvalue { <2 x i32>, <2 x i32> } %2, 0
1403 %4 = extractvalue { <2 x i32>, <2 x i32> } %2, 1
1404 %5 = insertvalue %struct.int32x2x2_t undef, <2 x i32> %3, 0, 0
1405 %6 = insertvalue %struct.int32x2x2_t %5, <2 x i32> %4, 0, 1
1406 ret %struct.int32x2x2_t %6
1409 define %struct.int64x1x2_t @test_vld1_s64_x2(i64* %a) {
1410 ; CHECK-LABEL: test_vld1_s64_x2
1411 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1412 %1 = bitcast i64* %a to i8*
1413 %2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8* %1, i32 8)
1414 %3 = extractvalue { <1 x i64>, <1 x i64> } %2, 0
1415 %4 = extractvalue { <1 x i64>, <1 x i64> } %2, 1
1416 %5 = insertvalue %struct.int64x1x2_t undef, <1 x i64> %3, 0, 0
1417 %6 = insertvalue %struct.int64x1x2_t %5, <1 x i64> %4, 0, 1
1418 ret %struct.int64x1x2_t %6
1421 define %struct.float32x2x2_t @test_vld1_f32_x2(float* %a) {
1422 ; CHECK-LABEL: test_vld1_f32_x2
1423 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1424 %1 = bitcast float* %a to i8*
1425 %2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8* %1, i32 4)
1426 %3 = extractvalue { <2 x float>, <2 x float> } %2, 0
1427 %4 = extractvalue { <2 x float>, <2 x float> } %2, 1
1428 %5 = insertvalue %struct.float32x2x2_t undef, <2 x float> %3, 0, 0
1429 %6 = insertvalue %struct.float32x2x2_t %5, <2 x float> %4, 0, 1
1430 ret %struct.float32x2x2_t %6
1433 define %struct.float64x1x2_t @test_vld1_f64_x2(double* %a) {
1434 ; CHECK-LABEL: test_vld1_f64_x2
1435 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1436 %1 = bitcast double* %a to i8*
1437 %2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8* %1, i32 8)
1438 %3 = extractvalue { <1 x double>, <1 x double> } %2, 0
1439 %4 = extractvalue { <1 x double>, <1 x double> } %2, 1
1440 %5 = insertvalue %struct.float64x1x2_t undef, <1 x double> %3, 0, 0
1441 %6 = insertvalue %struct.float64x1x2_t %5, <1 x double> %4, 0, 1
1442 ret %struct.float64x1x2_t %6
1445 define %struct.int8x16x3_t @test_vld1q_s8_x3(i8* %a) {
1446 ; CHECK-LABEL: test_vld1q_s8_x3
1447 ; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
1449 %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8* %a, i32 1)
1450 %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
1451 %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
1452 %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
1453 %5 = insertvalue %struct.int8x16x3_t undef, <16 x i8> %2, 0, 0
1454 %6 = insertvalue %struct.int8x16x3_t %5, <16 x i8> %3, 0, 1
1455 %7 = insertvalue %struct.int8x16x3_t %6, <16 x i8> %4, 0, 2
1456 ret %struct.int8x16x3_t %7
1459 define %struct.int16x8x3_t @test_vld1q_s16_x3(i16* %a) {
1460 ; CHECK-LABEL: test_vld1q_s16_x3
1461 ; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
1463 %1 = bitcast i16* %a to i8*
1464 %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8* %1, i32 2)
1465 %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
1466 %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
1467 %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
1468 %6 = insertvalue %struct.int16x8x3_t undef, <8 x i16> %3, 0, 0
1469 %7 = insertvalue %struct.int16x8x3_t %6, <8 x i16> %4, 0, 1
1470 %8 = insertvalue %struct.int16x8x3_t %7, <8 x i16> %5, 0, 2
1471 ret %struct.int16x8x3_t %8
1474 define %struct.int32x4x3_t @test_vld1q_s32_x3(i32* %a) {
1475 ; CHECK-LABEL: test_vld1q_s32_x3
1476 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
1478 %1 = bitcast i32* %a to i8*
1479 %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8* %1, i32 4)
1480 %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
1481 %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
1482 %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
1483 %6 = insertvalue %struct.int32x4x3_t undef, <4 x i32> %3, 0, 0
1484 %7 = insertvalue %struct.int32x4x3_t %6, <4 x i32> %4, 0, 1
1485 %8 = insertvalue %struct.int32x4x3_t %7, <4 x i32> %5, 0, 2
1486 ret %struct.int32x4x3_t %8
1489 define %struct.int64x2x3_t @test_vld1q_s64_x3(i64* %a) {
1490 ; CHECK-LABEL: test_vld1q_s64_x3
1491 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
1493 %1 = bitcast i64* %a to i8*
1494 %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8* %1, i32 8)
1495 %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
1496 %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
1497 %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
1498 %6 = insertvalue %struct.int64x2x3_t undef, <2 x i64> %3, 0, 0
1499 %7 = insertvalue %struct.int64x2x3_t %6, <2 x i64> %4, 0, 1
1500 %8 = insertvalue %struct.int64x2x3_t %7, <2 x i64> %5, 0, 2
1501 ret %struct.int64x2x3_t %8
1504 define %struct.float32x4x3_t @test_vld1q_f32_x3(float* %a) {
1505 ; CHECK-LABEL: test_vld1q_f32_x3
1506 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
1508 %1 = bitcast float* %a to i8*
1509 %2 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8* %1, i32 4)
1510 %3 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 0
1511 %4 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 1
1512 %5 = extractvalue { <4 x float>, <4 x float>, <4 x float> } %2, 2
1513 %6 = insertvalue %struct.float32x4x3_t undef, <4 x float> %3, 0, 0
1514 %7 = insertvalue %struct.float32x4x3_t %6, <4 x float> %4, 0, 1
1515 %8 = insertvalue %struct.float32x4x3_t %7, <4 x float> %5, 0, 2
1516 ret %struct.float32x4x3_t %8
1520 define %struct.float64x2x3_t @test_vld1q_f64_x3(double* %a) {
1521 ; CHECK-LABEL: test_vld1q_f64_x3
1522 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
1524 %1 = bitcast double* %a to i8*
1525 %2 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8* %1, i32 8)
1526 %3 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 0
1527 %4 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 1
1528 %5 = extractvalue { <2 x double>, <2 x double>, <2 x double> } %2, 2
1529 %6 = insertvalue %struct.float64x2x3_t undef, <2 x double> %3, 0, 0
1530 %7 = insertvalue %struct.float64x2x3_t %6, <2 x double> %4, 0, 1
1531 %8 = insertvalue %struct.float64x2x3_t %7, <2 x double> %5, 0, 2
1532 ret %struct.float64x2x3_t %8
1535 define %struct.int8x8x3_t @test_vld1_s8_x3(i8* %a) {
1536 ; CHECK-LABEL: test_vld1_s8_x3
1537 ; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
1539 %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8* %a, i32 1)
1540 %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
1541 %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
1542 %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
1543 %5 = insertvalue %struct.int8x8x3_t undef, <8 x i8> %2, 0, 0
1544 %6 = insertvalue %struct.int8x8x3_t %5, <8 x i8> %3, 0, 1
1545 %7 = insertvalue %struct.int8x8x3_t %6, <8 x i8> %4, 0, 2
1546 ret %struct.int8x8x3_t %7
1549 define %struct.int16x4x3_t @test_vld1_s16_x3(i16* %a) {
1550 ; CHECK-LABEL: test_vld1_s16_x3
1551 ; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
1553 %1 = bitcast i16* %a to i8*
1554 %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8* %1, i32 2)
1555 %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
1556 %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
1557 %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
1558 %6 = insertvalue %struct.int16x4x3_t undef, <4 x i16> %3, 0, 0
1559 %7 = insertvalue %struct.int16x4x3_t %6, <4 x i16> %4, 0, 1
1560 %8 = insertvalue %struct.int16x4x3_t %7, <4 x i16> %5, 0, 2
1561 ret %struct.int16x4x3_t %8
1564 define %struct.int32x2x3_t @test_vld1_s32_x3(i32* %a) {
1565 %1 = bitcast i32* %a to i8*
1566 ; CHECK-LABEL: test_vld1_s32_x3
1567 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
1569 %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8* %1, i32 4)
1570 %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
1571 %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
1572 %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
1573 %6 = insertvalue %struct.int32x2x3_t undef, <2 x i32> %3, 0, 0
1574 %7 = insertvalue %struct.int32x2x3_t %6, <2 x i32> %4, 0, 1
1575 %8 = insertvalue %struct.int32x2x3_t %7, <2 x i32> %5, 0, 2
1576 ret %struct.int32x2x3_t %8
1579 define %struct.int64x1x3_t @test_vld1_s64_x3(i64* %a) {
1580 ; CHECK-LABEL: test_vld1_s64_x3
1581 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
1583 %1 = bitcast i64* %a to i8*
1584 %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8* %1, i32 8)
1585 %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
1586 %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
1587 %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
1588 %6 = insertvalue %struct.int64x1x3_t undef, <1 x i64> %3, 0, 0
1589 %7 = insertvalue %struct.int64x1x3_t %6, <1 x i64> %4, 0, 1
1590 %8 = insertvalue %struct.int64x1x3_t %7, <1 x i64> %5, 0, 2
1591 ret %struct.int64x1x3_t %8
1594 define %struct.float32x2x3_t @test_vld1_f32_x3(float* %a) {
1595 ; CHECK-LABEL: test_vld1_f32_x3
1596 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
1598 %1 = bitcast float* %a to i8*
1599 %2 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8* %1, i32 4)
1600 %3 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 0
1601 %4 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 1
1602 %5 = extractvalue { <2 x float>, <2 x float>, <2 x float> } %2, 2
1603 %6 = insertvalue %struct.float32x2x3_t undef, <2 x float> %3, 0, 0
1604 %7 = insertvalue %struct.float32x2x3_t %6, <2 x float> %4, 0, 1
1605 %8 = insertvalue %struct.float32x2x3_t %7, <2 x float> %5, 0, 2
1606 ret %struct.float32x2x3_t %8
1610 define %struct.float64x1x3_t @test_vld1_f64_x3(double* %a) {
1611 ; CHECK-LABEL: test_vld1_f64_x3
1612 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
1614 %1 = bitcast double* %a to i8*
1615 %2 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8* %1, i32 8)
1616 %3 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 0
1617 %4 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 1
1618 %5 = extractvalue { <1 x double>, <1 x double>, <1 x double> } %2, 2
1619 %6 = insertvalue %struct.float64x1x3_t undef, <1 x double> %3, 0, 0
1620 %7 = insertvalue %struct.float64x1x3_t %6, <1 x double> %4, 0, 1
1621 %8 = insertvalue %struct.float64x1x3_t %7, <1 x double> %5, 0, 2
1622 ret %struct.float64x1x3_t %8
1625 define %struct.int8x16x4_t @test_vld1q_s8_x4(i8* %a) {
1626 ; CHECK-LABEL: test_vld1q_s8_x4
1627 ; CHECK: ld1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
1628 ; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
1629 %1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8* %a, i32 1)
1630 %2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
1631 %3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
1632 %4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
1633 %5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
1634 %6 = insertvalue %struct.int8x16x4_t undef, <16 x i8> %2, 0, 0
1635 %7 = insertvalue %struct.int8x16x4_t %6, <16 x i8> %3, 0, 1
1636 %8 = insertvalue %struct.int8x16x4_t %7, <16 x i8> %4, 0, 2
1637 %9 = insertvalue %struct.int8x16x4_t %8, <16 x i8> %5, 0, 3
1638 ret %struct.int8x16x4_t %9
1641 define %struct.int16x8x4_t @test_vld1q_s16_x4(i16* %a) {
1642 ; CHECK-LABEL: test_vld1q_s16_x4
1643 ; CHECK: ld1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
1644 ; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
1645 %1 = bitcast i16* %a to i8*
1646 %2 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8* %1, i32 2)
1647 %3 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 0
1648 %4 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 1
1649 %5 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 2
1650 %6 = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %2, 3
1651 %7 = insertvalue %struct.int16x8x4_t undef, <8 x i16> %3, 0, 0
1652 %8 = insertvalue %struct.int16x8x4_t %7, <8 x i16> %4, 0, 1
1653 %9 = insertvalue %struct.int16x8x4_t %8, <8 x i16> %5, 0, 2
1654 %10 = insertvalue %struct.int16x8x4_t %9, <8 x i16> %6, 0, 3
1655 ret %struct.int16x8x4_t %10
1658 define %struct.int32x4x4_t @test_vld1q_s32_x4(i32* %a) {
1659 ; CHECK-LABEL: test_vld1q_s32_x4
1660 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
1661 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1662 %1 = bitcast i32* %a to i8*
1663 %2 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8* %1, i32 4)
1664 %3 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 0
1665 %4 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 1
1666 %5 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 2
1667 %6 = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %2, 3
1668 %7 = insertvalue %struct.int32x4x4_t undef, <4 x i32> %3, 0, 0
1669 %8 = insertvalue %struct.int32x4x4_t %7, <4 x i32> %4, 0, 1
1670 %9 = insertvalue %struct.int32x4x4_t %8, <4 x i32> %5, 0, 2
1671 %10 = insertvalue %struct.int32x4x4_t %9, <4 x i32> %6, 0, 3
1672 ret %struct.int32x4x4_t %10
1675 define %struct.int64x2x4_t @test_vld1q_s64_x4(i64* %a) {
1676 ; CHECK-LABEL: test_vld1q_s64_x4
1677 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
1678 ; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1679 %1 = bitcast i64* %a to i8*
1680 %2 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8* %1, i32 8)
1681 %3 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 0
1682 %4 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 1
1683 %5 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 2
1684 %6 = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %2, 3
1685 %7 = insertvalue %struct.int64x2x4_t undef, <2 x i64> %3, 0, 0
1686 %8 = insertvalue %struct.int64x2x4_t %7, <2 x i64> %4, 0, 1
1687 %9 = insertvalue %struct.int64x2x4_t %8, <2 x i64> %5, 0, 2
1688 %10 = insertvalue %struct.int64x2x4_t %9, <2 x i64> %6, 0, 3
1689 ret %struct.int64x2x4_t %10
1692 define %struct.float32x4x4_t @test_vld1q_f32_x4(float* %a) {
1693 ; CHECK-LABEL: test_vld1q_f32_x4
1694 ; CHECK: ld1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
1695 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1696 %1 = bitcast float* %a to i8*
1697 %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8* %1, i32 4)
1698 %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
1699 %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1
1700 %5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 2
1701 %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3
1702 %7 = insertvalue %struct.float32x4x4_t undef, <4 x float> %3, 0, 0
1703 %8 = insertvalue %struct.float32x4x4_t %7, <4 x float> %4, 0, 1
1704 %9 = insertvalue %struct.float32x4x4_t %8, <4 x float> %5, 0, 2
1705 %10 = insertvalue %struct.float32x4x4_t %9, <4 x float> %6, 0, 3
1706 ret %struct.float32x4x4_t %10
1709 define %struct.float64x2x4_t @test_vld1q_f64_x4(double* %a) {
1710 ; CHECK-LABEL: test_vld1q_f64_x4
1711 ; CHECK: ld1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
1712 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1713 %1 = bitcast double* %a to i8*
1714 %2 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8* %1, i32 8)
1715 %3 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 0
1716 %4 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 1
1717 %5 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 2
1718 %6 = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %2, 3
1719 %7 = insertvalue %struct.float64x2x4_t undef, <2 x double> %3, 0, 0
1720 %8 = insertvalue %struct.float64x2x4_t %7, <2 x double> %4, 0, 1
1721 %9 = insertvalue %struct.float64x2x4_t %8, <2 x double> %5, 0, 2
1722 %10 = insertvalue %struct.float64x2x4_t %9, <2 x double> %6, 0, 3
1723 ret %struct.float64x2x4_t %10
1726 define %struct.int8x8x4_t @test_vld1_s8_x4(i8* %a) {
1727 ; CHECK-LABEL: test_vld1_s8_x4
1728 ; CHECK: ld1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
1729 ; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1730 %1 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8* %a, i32 1)
1731 %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0
1732 %3 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 1
1733 %4 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 2
1734 %5 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 3
1735 %6 = insertvalue %struct.int8x8x4_t undef, <8 x i8> %2, 0, 0
1736 %7 = insertvalue %struct.int8x8x4_t %6, <8 x i8> %3, 0, 1
1737 %8 = insertvalue %struct.int8x8x4_t %7, <8 x i8> %4, 0, 2
1738 %9 = insertvalue %struct.int8x8x4_t %8, <8 x i8> %5, 0, 3
1739 ret %struct.int8x8x4_t %9
1742 define %struct.int16x4x4_t @test_vld1_s16_x4(i16* %a) {
1743 ; CHECK-LABEL: test_vld1_s16_x4
1744 ; CHECK: ld1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
1745 ; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1746 %1 = bitcast i16* %a to i8*
1747 %2 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8* %1, i32 2)
1748 %3 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 0
1749 %4 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 1
1750 %5 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 2
1751 %6 = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %2, 3
1752 %7 = insertvalue %struct.int16x4x4_t undef, <4 x i16> %3, 0, 0
1753 %8 = insertvalue %struct.int16x4x4_t %7, <4 x i16> %4, 0, 1
1754 %9 = insertvalue %struct.int16x4x4_t %8, <4 x i16> %5, 0, 2
1755 %10 = insertvalue %struct.int16x4x4_t %9, <4 x i16> %6, 0, 3
1756 ret %struct.int16x4x4_t %10
1759 define %struct.int32x2x4_t @test_vld1_s32_x4(i32* %a) {
1760 ; CHECK-LABEL: test_vld1_s32_x4
1761 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
1762 ; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1763 %1 = bitcast i32* %a to i8*
1764 %2 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8* %1, i32 4)
1765 %3 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 0
1766 %4 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 1
1767 %5 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 2
1768 %6 = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %2, 3
1769 %7 = insertvalue %struct.int32x2x4_t undef, <2 x i32> %3, 0, 0
1770 %8 = insertvalue %struct.int32x2x4_t %7, <2 x i32> %4, 0, 1
1771 %9 = insertvalue %struct.int32x2x4_t %8, <2 x i32> %5, 0, 2
1772 %10 = insertvalue %struct.int32x2x4_t %9, <2 x i32> %6, 0, 3
1773 ret %struct.int32x2x4_t %10
1776 define %struct.int64x1x4_t @test_vld1_s64_x4(i64* %a) {
1777 ; CHECK-LABEL: test_vld1_s64_x4
1778 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
1779 ; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1780 %1 = bitcast i64* %a to i8*
1781 %2 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8* %1, i32 8)
1782 %3 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 0
1783 %4 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 1
1784 %5 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 2
1785 %6 = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %2, 3
1786 %7 = insertvalue %struct.int64x1x4_t undef, <1 x i64> %3, 0, 0
1787 %8 = insertvalue %struct.int64x1x4_t %7, <1 x i64> %4, 0, 1
1788 %9 = insertvalue %struct.int64x1x4_t %8, <1 x i64> %5, 0, 2
1789 %10 = insertvalue %struct.int64x1x4_t %9, <1 x i64> %6, 0, 3
1790 ret %struct.int64x1x4_t %10
1793 define %struct.float32x2x4_t @test_vld1_f32_x4(float* %a) {
1794 ; CHECK-LABEL: test_vld1_f32_x4
1795 ; CHECK: ld1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
1796 ; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1797 %1 = bitcast float* %a to i8*
1798 %2 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8* %1, i32 4)
1799 %3 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 0
1800 %4 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 1
1801 %5 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 2
1802 %6 = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %2, 3
1803 %7 = insertvalue %struct.float32x2x4_t undef, <2 x float> %3, 0, 0
1804 %8 = insertvalue %struct.float32x2x4_t %7, <2 x float> %4, 0, 1
1805 %9 = insertvalue %struct.float32x2x4_t %8, <2 x float> %5, 0, 2
1806 %10 = insertvalue %struct.float32x2x4_t %9, <2 x float> %6, 0, 3
1807 ret %struct.float32x2x4_t %10
1811 define %struct.float64x1x4_t @test_vld1_f64_x4(double* %a) {
1812 ; CHECK-LABEL: test_vld1_f64_x4
1813 ; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
1814 ; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1815 %1 = bitcast double* %a to i8*
1816 %2 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8* %1, i32 8)
1817 %3 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 0
1818 %4 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 1
1819 %5 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 2
1820 %6 = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %2, 3
1821 %7 = insertvalue %struct.float64x1x4_t undef, <1 x double> %3, 0, 0
1822 %8 = insertvalue %struct.float64x1x4_t %7, <1 x double> %4, 0, 1
1823 %9 = insertvalue %struct.float64x1x4_t %8, <1 x double> %5, 0, 2
1824 %10 = insertvalue %struct.float64x1x4_t %9, <1 x double> %6, 0, 3
1825 ret %struct.float64x1x4_t %10
1828 define void @test_vst1q_s8_x2(i8* %a, [2 x <16 x i8>] %b) {
1829 ; CHECK-LABEL: test_vst1q_s8_x2
1830 ; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
1831 %1 = extractvalue [2 x <16 x i8>] %b, 0
1832 %2 = extractvalue [2 x <16 x i8>] %b, 1
1833 tail call void @llvm.aarch64.neon.vst1x2.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, i32 1)
1837 define void @test_vst1q_s16_x2(i16* %a, [2 x <8 x i16>] %b) {
1838 ; CHECK-LABEL: test_vst1q_s16_x2
1839 ; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
1840 %1 = extractvalue [2 x <8 x i16>] %b, 0
1841 %2 = extractvalue [2 x <8 x i16>] %b, 1
1842 %3 = bitcast i16* %a to i8*
1843 tail call void @llvm.aarch64.neon.vst1x2.v8i16(i8* %3, <8 x i16> %1, <8 x i16> %2, i32 2)
1847 define void @test_vst1q_s32_x2(i32* %a, [2 x <4 x i32>] %b) {
1848 ; CHECK-LABEL: test_vst1q_s32_x2
1849 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1850 %1 = extractvalue [2 x <4 x i32>] %b, 0
1851 %2 = extractvalue [2 x <4 x i32>] %b, 1
1852 %3 = bitcast i32* %a to i8*
1853 tail call void @llvm.aarch64.neon.vst1x2.v4i32(i8* %3, <4 x i32> %1, <4 x i32> %2, i32 4)
1857 define void @test_vst1q_s64_x2(i64* %a, [2 x <2 x i64>] %b) {
1858 ; CHECK-LABEL: test_vst1q_s64_x2
1859 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1860 %1 = extractvalue [2 x <2 x i64>] %b, 0
1861 %2 = extractvalue [2 x <2 x i64>] %b, 1
1862 %3 = bitcast i64* %a to i8*
1863 tail call void @llvm.aarch64.neon.vst1x2.v2i64(i8* %3, <2 x i64> %1, <2 x i64> %2, i32 8)
1867 define void @test_vst1q_f32_x2(float* %a, [2 x <4 x float>] %b) {
1868 ; CHECK-LABEL: test_vst1q_f32_x2
1869 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
1870 %1 = extractvalue [2 x <4 x float>] %b, 0
1871 %2 = extractvalue [2 x <4 x float>] %b, 1
1872 %3 = bitcast float* %a to i8*
1873 tail call void @llvm.aarch64.neon.vst1x2.v4f32(i8* %3, <4 x float> %1, <4 x float> %2, i32 4)
1878 define void @test_vst1q_f64_x2(double* %a, [2 x <2 x double>] %b) {
1879 ; CHECK-LABEL: test_vst1q_f64_x2
1880 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
1881 %1 = extractvalue [2 x <2 x double>] %b, 0
1882 %2 = extractvalue [2 x <2 x double>] %b, 1
1883 %3 = bitcast double* %a to i8*
1884 tail call void @llvm.aarch64.neon.vst1x2.v2f64(i8* %3, <2 x double> %1, <2 x double> %2, i32 8)
1888 define void @test_vst1_s8_x2(i8* %a, [2 x <8 x i8>] %b) {
1889 ; CHECK-LABEL: test_vst1_s8_x2
1890 ; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
1891 %1 = extractvalue [2 x <8 x i8>] %b, 0
1892 %2 = extractvalue [2 x <8 x i8>] %b, 1
1893 tail call void @llvm.aarch64.neon.vst1x2.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, i32 1)
1897 define void @test_vst1_s16_x2(i16* %a, [2 x <4 x i16>] %b) {
1898 ; CHECK-LABEL: test_vst1_s16_x2
1899 ; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
1900 %1 = extractvalue [2 x <4 x i16>] %b, 0
1901 %2 = extractvalue [2 x <4 x i16>] %b, 1
1902 %3 = bitcast i16* %a to i8*
1903 tail call void @llvm.aarch64.neon.vst1x2.v4i16(i8* %3, <4 x i16> %1, <4 x i16> %2, i32 2)
1907 define void @test_vst1_s32_x2(i32* %a, [2 x <2 x i32>] %b) {
1908 ; CHECK-LABEL: test_vst1_s32_x2
1909 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1910 %1 = extractvalue [2 x <2 x i32>] %b, 0
1911 %2 = extractvalue [2 x <2 x i32>] %b, 1
1912 %3 = bitcast i32* %a to i8*
1913 tail call void @llvm.aarch64.neon.vst1x2.v2i32(i8* %3, <2 x i32> %1, <2 x i32> %2, i32 4)
1917 define void @test_vst1_s64_x2(i64* %a, [2 x <1 x i64>] %b) {
1918 ; CHECK-LABEL: test_vst1_s64_x2
1919 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1920 %1 = extractvalue [2 x <1 x i64>] %b, 0
1921 %2 = extractvalue [2 x <1 x i64>] %b, 1
1922 %3 = bitcast i64* %a to i8*
1923 tail call void @llvm.aarch64.neon.vst1x2.v1i64(i8* %3, <1 x i64> %1, <1 x i64> %2, i32 8)
1927 define void @test_vst1_f32_x2(float* %a, [2 x <2 x float>] %b) {
1928 ; CHECK-LABEL: test_vst1_f32_x2
1929 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
1930 %1 = extractvalue [2 x <2 x float>] %b, 0
1931 %2 = extractvalue [2 x <2 x float>] %b, 1
1932 %3 = bitcast float* %a to i8*
1933 tail call void @llvm.aarch64.neon.vst1x2.v2f32(i8* %3, <2 x float> %1, <2 x float> %2, i32 4)
1937 define void @test_vst1_f64_x2(double* %a, [2 x <1 x double>] %b) {
1938 ; CHECK-LABEL: test_vst1_f64_x2
1939 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
1940 %1 = extractvalue [2 x <1 x double>] %b, 0
1941 %2 = extractvalue [2 x <1 x double>] %b, 1
1942 %3 = bitcast double* %a to i8*
1943 tail call void @llvm.aarch64.neon.vst1x2.v1f64(i8* %3, <1 x double> %1, <1 x double> %2, i32 8)
1947 define void @test_vst1q_s8_x3(i8* %a, [3 x <16 x i8>] %b) {
1948 ; CHECK-LABEL: test_vst1q_s8_x3
1949 ; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b},
1951 %1 = extractvalue [3 x <16 x i8>] %b, 0
1952 %2 = extractvalue [3 x <16 x i8>] %b, 1
1953 %3 = extractvalue [3 x <16 x i8>] %b, 2
1954 tail call void @llvm.aarch64.neon.vst1x3.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, i32 1)
1958 define void @test_vst1q_s16_x3(i16* %a, [3 x <8 x i16>] %b) {
1959 ; CHECK-LABEL: test_vst1q_s16_x3
1960 ; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h},
1962 %1 = extractvalue [3 x <8 x i16>] %b, 0
1963 %2 = extractvalue [3 x <8 x i16>] %b, 1
1964 %3 = extractvalue [3 x <8 x i16>] %b, 2
1965 %4 = bitcast i16* %a to i8*
1966 tail call void @llvm.aarch64.neon.vst1x3.v8i16(i8* %4, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, i32 2)
1970 define void @test_vst1q_s32_x3(i32* %a, [3 x <4 x i32>] %b) {
1971 ; CHECK-LABEL: test_vst1q_s32_x3
1972 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
1974 %1 = extractvalue [3 x <4 x i32>] %b, 0
1975 %2 = extractvalue [3 x <4 x i32>] %b, 1
1976 %3 = extractvalue [3 x <4 x i32>] %b, 2
1977 %4 = bitcast i32* %a to i8*
1978 tail call void @llvm.aarch64.neon.vst1x3.v4i32(i8* %4, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, i32 4)
1982 define void @test_vst1q_s64_x3(i64* %a, [3 x <2 x i64>] %b) {
1983 ; CHECK-LABEL: test_vst1q_s64_x3
1984 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
1986 %1 = extractvalue [3 x <2 x i64>] %b, 0
1987 %2 = extractvalue [3 x <2 x i64>] %b, 1
1988 %3 = extractvalue [3 x <2 x i64>] %b, 2
1989 %4 = bitcast i64* %a to i8*
1990 tail call void @llvm.aarch64.neon.vst1x3.v2i64(i8* %4, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, i32 8)
1994 define void @test_vst1q_f32_x3(float* %a, [3 x <4 x float>] %b) {
1995 ; CHECK-LABEL: test_vst1q_f32_x3
1996 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s},
1998 %1 = extractvalue [3 x <4 x float>] %b, 0
1999 %2 = extractvalue [3 x <4 x float>] %b, 1
2000 %3 = extractvalue [3 x <4 x float>] %b, 2
2001 %4 = bitcast float* %a to i8*
2002 tail call void @llvm.aarch64.neon.vst1x3.v4f32(i8* %4, <4 x float> %1, <4 x float> %2, <4 x float> %3, i32 4)
2006 define void @test_vst1q_f64_x3(double* %a, [3 x <2 x double>] %b) {
2007 ; CHECK-LABEL: test_vst1q_f64_x3
2008 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d},
2010 %1 = extractvalue [3 x <2 x double>] %b, 0
2011 %2 = extractvalue [3 x <2 x double>] %b, 1
2012 %3 = extractvalue [3 x <2 x double>] %b, 2
2013 %4 = bitcast double* %a to i8*
2014 tail call void @llvm.aarch64.neon.vst1x3.v2f64(i8* %4, <2 x double> %1, <2 x double> %2, <2 x double> %3, i32 8)
2018 define void @test_vst1_s8_x3(i8* %a, [3 x <8 x i8>] %b) {
2019 ; CHECK-LABEL: test_vst1_s8_x3
2020 ; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b},
2022 %1 = extractvalue [3 x <8 x i8>] %b, 0
2023 %2 = extractvalue [3 x <8 x i8>] %b, 1
2024 %3 = extractvalue [3 x <8 x i8>] %b, 2
2025 tail call void @llvm.aarch64.neon.vst1x3.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, i32 1)
2029 define void @test_vst1_s16_x3(i16* %a, [3 x <4 x i16>] %b) {
2030 ; CHECK-LABEL: test_vst1_s16_x3
2031 ; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h},
2033 %1 = extractvalue [3 x <4 x i16>] %b, 0
2034 %2 = extractvalue [3 x <4 x i16>] %b, 1
2035 %3 = extractvalue [3 x <4 x i16>] %b, 2
2036 %4 = bitcast i16* %a to i8*
2037 tail call void @llvm.aarch64.neon.vst1x3.v4i16(i8* %4, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, i32 2)
2041 define void @test_vst1_s32_x3(i32* %a, [3 x <2 x i32>] %b) {
2042 ; CHECK-LABEL: test_vst1_s32_x3
2043 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
2045 %1 = extractvalue [3 x <2 x i32>] %b, 0
2046 %2 = extractvalue [3 x <2 x i32>] %b, 1
2047 %3 = extractvalue [3 x <2 x i32>] %b, 2
2048 %4 = bitcast i32* %a to i8*
2049 tail call void @llvm.aarch64.neon.vst1x3.v2i32(i8* %4, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, i32 4)
2053 define void @test_vst1_s64_x3(i64* %a, [3 x <1 x i64>] %b) {
2054 ; CHECK-LABEL: test_vst1_s64_x3
2055 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
2057 %1 = extractvalue [3 x <1 x i64>] %b, 0
2058 %2 = extractvalue [3 x <1 x i64>] %b, 1
2059 %3 = extractvalue [3 x <1 x i64>] %b, 2
2060 %4 = bitcast i64* %a to i8*
2061 tail call void @llvm.aarch64.neon.vst1x3.v1i64(i8* %4, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, i32 8)
2065 define void @test_vst1_f32_x3(float* %a, [3 x <2 x float>] %b) {
2066 ; CHECK-LABEL: test_vst1_f32_x3
2067 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s},
2069 %1 = extractvalue [3 x <2 x float>] %b, 0
2070 %2 = extractvalue [3 x <2 x float>] %b, 1
2071 %3 = extractvalue [3 x <2 x float>] %b, 2
2072 %4 = bitcast float* %a to i8*
2073 tail call void @llvm.aarch64.neon.vst1x3.v2f32(i8* %4, <2 x float> %1, <2 x float> %2, <2 x float> %3, i32 4)
2077 define void @test_vst1_f64_x3(double* %a, [3 x <1 x double>] %b) {
2078 ; CHECK-LABEL: test_vst1_f64_x3
2079 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d},
2081 %1 = extractvalue [3 x <1 x double>] %b, 0
2082 %2 = extractvalue [3 x <1 x double>] %b, 1
2083 %3 = extractvalue [3 x <1 x double>] %b, 2
2084 %4 = bitcast double* %a to i8*
2085 tail call void @llvm.aarch64.neon.vst1x3.v1f64(i8* %4, <1 x double> %1, <1 x double> %2, <1 x double> %3, i32 8)
2089 define void @test_vst1q_s8_x4(i8* %a, [4 x <16 x i8>] %b) {
2090 ; CHECK-LABEL: test_vst1q_s8_x4
2091 ; CHECK: st1 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b,
2092 ; v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}]
2093 %1 = extractvalue [4 x <16 x i8>] %b, 0
2094 %2 = extractvalue [4 x <16 x i8>] %b, 1
2095 %3 = extractvalue [4 x <16 x i8>] %b, 2
2096 %4 = extractvalue [4 x <16 x i8>] %b, 3
2097 tail call void @llvm.aarch64.neon.vst1x4.v16i8(i8* %a, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3, <16 x i8> %4, i32 1)
2101 define void @test_vst1q_s16_x4(i16* %a, [4 x <8 x i16>] %b) {
2102 ; CHECK-LABEL: test_vst1q_s16_x4
2103 ; CHECK: st1 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h,
2104 ; v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}]
2105 %1 = extractvalue [4 x <8 x i16>] %b, 0
2106 %2 = extractvalue [4 x <8 x i16>] %b, 1
2107 %3 = extractvalue [4 x <8 x i16>] %b, 2
2108 %4 = extractvalue [4 x <8 x i16>] %b, 3
2109 %5 = bitcast i16* %a to i8*
2110 tail call void @llvm.aarch64.neon.vst1x4.v8i16(i8* %5, <8 x i16> %1, <8 x i16> %2, <8 x i16> %3, <8 x i16> %4, i32 2)
2114 define void @test_vst1q_s32_x4(i32* %a, [4 x <4 x i32>] %b) {
2115 ; CHECK-LABEL: test_vst1q_s32_x4
2116 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
2117 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
2118 %1 = extractvalue [4 x <4 x i32>] %b, 0
2119 %2 = extractvalue [4 x <4 x i32>] %b, 1
2120 %3 = extractvalue [4 x <4 x i32>] %b, 2
2121 %4 = extractvalue [4 x <4 x i32>] %b, 3
2122 %5 = bitcast i32* %a to i8*
2123 tail call void @llvm.aarch64.neon.vst1x4.v4i32(i8* %5, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, i32 4)
2127 define void @test_vst1q_s64_x4(i64* %a, [4 x <2 x i64>] %b) {
2128 ; CHECK-LABEL: test_vst1q_s64_x4
2129 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
2130 ; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
2131 %1 = extractvalue [4 x <2 x i64>] %b, 0
2132 %2 = extractvalue [4 x <2 x i64>] %b, 1
2133 %3 = extractvalue [4 x <2 x i64>] %b, 2
2134 %4 = extractvalue [4 x <2 x i64>] %b, 3
2135 %5 = bitcast i64* %a to i8*
2136 tail call void @llvm.aarch64.neon.vst1x4.v2i64(i8* %5, <2 x i64> %1, <2 x i64> %2, <2 x i64> %3, <2 x i64> %4, i32 8)
2140 define void @test_vst1q_f32_x4(float* %a, [4 x <4 x float>] %b) {
2141 ; CHECK-LABEL: test_vst1q_f32_x4
2142 ; CHECK: st1 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s,
2143 ; v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}]
2144 %1 = extractvalue [4 x <4 x float>] %b, 0
2145 %2 = extractvalue [4 x <4 x float>] %b, 1
2146 %3 = extractvalue [4 x <4 x float>] %b, 2
2147 %4 = extractvalue [4 x <4 x float>] %b, 3
2148 %5 = bitcast float* %a to i8*
2149 tail call void @llvm.aarch64.neon.vst1x4.v4f32(i8* %5, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, i32 4)
2153 define void @test_vst1q_f64_x4(double* %a, [4 x <2 x double>] %b) {
2154 ; CHECK-LABEL: test_vst1q_f64_x4
2155 ; CHECK: st1 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d,
2156 ; v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}]
2157 %1 = extractvalue [4 x <2 x double>] %b, 0
2158 %2 = extractvalue [4 x <2 x double>] %b, 1
2159 %3 = extractvalue [4 x <2 x double>] %b, 2
2160 %4 = extractvalue [4 x <2 x double>] %b, 3
2161 %5 = bitcast double* %a to i8*
2162 tail call void @llvm.aarch64.neon.vst1x4.v2f64(i8* %5, <2 x double> %1, <2 x double> %2, <2 x double> %3, <2 x double> %4, i32 8)
2166 define void @test_vst1_s8_x4(i8* %a, [4 x <8 x i8>] %b) {
2167 ; CHECK-LABEL: test_vst1_s8_x4
2168 ; CHECK: st1 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b,
2169 ; v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}]
2170 %1 = extractvalue [4 x <8 x i8>] %b, 0
2171 %2 = extractvalue [4 x <8 x i8>] %b, 1
2172 %3 = extractvalue [4 x <8 x i8>] %b, 2
2173 %4 = extractvalue [4 x <8 x i8>] %b, 3
2174 tail call void @llvm.aarch64.neon.vst1x4.v8i8(i8* %a, <8 x i8> %1, <8 x i8> %2, <8 x i8> %3, <8 x i8> %4, i32 1)
2178 define void @test_vst1_s16_x4(i16* %a, [4 x <4 x i16>] %b) {
2179 ; CHECK-LABEL: test_vst1_s16_x4
2180 ; CHECK: st1 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h,
2181 ; v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}]
2182 %1 = extractvalue [4 x <4 x i16>] %b, 0
2183 %2 = extractvalue [4 x <4 x i16>] %b, 1
2184 %3 = extractvalue [4 x <4 x i16>] %b, 2
2185 %4 = extractvalue [4 x <4 x i16>] %b, 3
2186 %5 = bitcast i16* %a to i8*
2187 tail call void @llvm.aarch64.neon.vst1x4.v4i16(i8* %5, <4 x i16> %1, <4 x i16> %2, <4 x i16> %3, <4 x i16> %4, i32 2)
2191 define void @test_vst1_s32_x4(i32* %a, [4 x <2 x i32>] %b) {
2192 ; CHECK-LABEL: test_vst1_s32_x4
2193 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
2194 ; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
2195 %1 = extractvalue [4 x <2 x i32>] %b, 0
2196 %2 = extractvalue [4 x <2 x i32>] %b, 1
2197 %3 = extractvalue [4 x <2 x i32>] %b, 2
2198 %4 = extractvalue [4 x <2 x i32>] %b, 3
2199 %5 = bitcast i32* %a to i8*
2200 tail call void @llvm.aarch64.neon.vst1x4.v2i32(i8* %5, <2 x i32> %1, <2 x i32> %2, <2 x i32> %3, <2 x i32> %4, i32 4)
2204 define void @test_vst1_s64_x4(i64* %a, [4 x <1 x i64>] %b) {
2205 ; CHECK-LABEL: test_vst1_s64_x4
2206 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
2207 ; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
2208 %1 = extractvalue [4 x <1 x i64>] %b, 0
2209 %2 = extractvalue [4 x <1 x i64>] %b, 1
2210 %3 = extractvalue [4 x <1 x i64>] %b, 2
2211 %4 = extractvalue [4 x <1 x i64>] %b, 3
2212 %5 = bitcast i64* %a to i8*
2213 tail call void @llvm.aarch64.neon.vst1x4.v1i64(i8* %5, <1 x i64> %1, <1 x i64> %2, <1 x i64> %3, <1 x i64> %4, i32 8)
2217 define void @test_vst1_f32_x4(float* %a, [4 x <2 x float>] %b) {
2218 ; CHECK-LABEL: test_vst1_f32_x4
2219 ; CHECK: st1 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s,
2220 ; v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}]
2221 %1 = extractvalue [4 x <2 x float>] %b, 0
2222 %2 = extractvalue [4 x <2 x float>] %b, 1
2223 %3 = extractvalue [4 x <2 x float>] %b, 2
2224 %4 = extractvalue [4 x <2 x float>] %b, 3
2225 %5 = bitcast float* %a to i8*
2226 tail call void @llvm.aarch64.neon.vst1x4.v2f32(i8* %5, <2 x float> %1, <2 x float> %2, <2 x float> %3, <2 x float> %4, i32 4)
2230 define void @test_vst1_f64_x4(double* %a, [4 x <1 x double>] %b) {
2231 ; CHECK-LABEL: test_vst1_f64_x4
2232 ; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d,
2233 ; v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}]
2234 %1 = extractvalue [4 x <1 x double>] %b, 0
2235 %2 = extractvalue [4 x <1 x double>] %b, 1
2236 %3 = extractvalue [4 x <1 x double>] %b, 2
2237 %4 = extractvalue [4 x <1 x double>] %b, 3
2238 %5 = bitcast double* %a to i8*
2239 tail call void @llvm.aarch64.neon.vst1x4.v1f64(i8* %5, <1 x double> %1, <1 x double> %2, <1 x double> %3, <1 x double> %4, i32 8)
2243 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x2.v16i8(i8*, i32)
2244 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x2.v8i16(i8*, i32)
2245 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x2.v4i32(i8*, i32)
2246 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x2.v2i64(i8*, i32)
2247 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x2.v4f32(i8*, i32)
2248 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x2.v2f64(i8*, i32)
2249 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x2.v8i8(i8*, i32)
2250 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x2.v4i16(i8*, i32)
2251 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x2.v2i32(i8*, i32)
2252 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x2.v1i64(i8*, i32)
2253 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x2.v2f32(i8*, i32)
2254 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x2.v1f64(i8*, i32)
2255 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x3.v16i8(i8*, i32)
2256 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x3.v8i16(i8*, i32)
2257 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x3.v4i32(i8*, i32)
2258 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x3.v2i64(i8*, i32)
2259 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x3.v4f32(i8*, i32)
2260 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x3.v2f64(i8*, i32)
2261 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x3.v8i8(i8*, i32)
2262 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x3.v4i16(i8*, i32)
2263 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x3.v2i32(i8*, i32)
2264 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x3.v1i64(i8*, i32)
2265 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x3.v2f32(i8*, i32)
2266 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x3.v1f64(i8*, i32)
2267 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.vld1x4.v16i8(i8*, i32)
2268 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.vld1x4.v8i16(i8*, i32)
2269 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.vld1x4.v4i32(i8*, i32)
2270 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.vld1x4.v2i64(i8*, i32)
2271 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.vld1x4.v4f32(i8*, i32)
2272 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.vld1x4.v2f64(i8*, i32)
2273 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.vld1x4.v8i8(i8*, i32)
2274 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.vld1x4.v4i16(i8*, i32)
2275 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.vld1x4.v2i32(i8*, i32)
2276 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.vld1x4.v1i64(i8*, i32)
2277 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.vld1x4.v2f32(i8*, i32)
2278 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.vld1x4.v1f64(i8*, i32)
2279 declare void @llvm.aarch64.neon.vst1x2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
2280 declare void @llvm.aarch64.neon.vst1x2.v8i16(i8*, <8 x i16>, <8 x i16>, i32)
2281 declare void @llvm.aarch64.neon.vst1x2.v4i32(i8*, <4 x i32>, <4 x i32>, i32)
2282 declare void @llvm.aarch64.neon.vst1x2.v2i64(i8*, <2 x i64>, <2 x i64>, i32)
2283 declare void @llvm.aarch64.neon.vst1x2.v4f32(i8*, <4 x float>, <4 x float>, i32)
2284 declare void @llvm.aarch64.neon.vst1x2.v2f64(i8*, <2 x double>, <2 x double>, i32)
2285 declare void @llvm.aarch64.neon.vst1x2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)
2286 declare void @llvm.aarch64.neon.vst1x2.v4i16(i8*, <4 x i16>, <4 x i16>, i32)
2287 declare void @llvm.aarch64.neon.vst1x2.v2i32(i8*, <2 x i32>, <2 x i32>, i32)
2288 declare void @llvm.aarch64.neon.vst1x2.v1i64(i8*, <1 x i64>, <1 x i64>, i32)
2289 declare void @llvm.aarch64.neon.vst1x2.v2f32(i8*, <2 x float>, <2 x float>, i32)
2290 declare void @llvm.aarch64.neon.vst1x2.v1f64(i8*, <1 x double>, <1 x double>, i32)
2291 declare void @llvm.aarch64.neon.vst1x3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
2292 declare void @llvm.aarch64.neon.vst1x3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32)
2293 declare void @llvm.aarch64.neon.vst1x3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32)
2294 declare void @llvm.aarch64.neon.vst1x3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32)
2295 declare void @llvm.aarch64.neon.vst1x3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32)
2296 declare void @llvm.aarch64.neon.vst1x3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32)
2297 declare void @llvm.aarch64.neon.vst1x3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
2298 declare void @llvm.aarch64.neon.vst1x3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32)
2299 declare void @llvm.aarch64.neon.vst1x3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32)
2300 declare void @llvm.aarch64.neon.vst1x3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32)
2301 declare void @llvm.aarch64.neon.vst1x3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32)
2302 declare void @llvm.aarch64.neon.vst1x3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32)
2303 declare void @llvm.aarch64.neon.vst1x4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
2304 declare void @llvm.aarch64.neon.vst1x4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32)
2305 declare void @llvm.aarch64.neon.vst1x4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32)
2306 declare void @llvm.aarch64.neon.vst1x4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32)
2307 declare void @llvm.aarch64.neon.vst1x4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32)
2308 declare void @llvm.aarch64.neon.vst1x4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32)
2309 declare void @llvm.aarch64.neon.vst1x4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
2310 declare void @llvm.aarch64.neon.vst1x4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32)
2311 declare void @llvm.aarch64.neon.vst1x4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32)
2312 declare void @llvm.aarch64.neon.vst1x4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
2313 declare void @llvm.aarch64.neon.vst1x4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32)
2314 declare void @llvm.aarch64.neon.vst1x4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)