1 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -o - | FileCheck %s
2 ; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -mattr=+neon -o - | FileCheck %s
3 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-no-strict-align -mattr=+neon -o - | FileCheck %s
4 ; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -aarch64-no-strict-align -mattr=+neon -o - | FileCheck %s
5 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -aarch64-strict-align -mattr=+neon -o - | FileCheck %s
6 ; RUN: llc < %s -mtriple=aarch64_be-none-linux-gnu -aarch64-strict-align -mattr=+neon -o - | FileCheck %s --check-prefix=BE-STRICT-ALIGN
8 ;; Check element-aligned 128-bit vector load/store - integer
9 define <16 x i8> @qwordint (<16 x i8>* %head.v16i8, <8 x i16>* %head.v8i16, <4 x i32>* %head.v4i32, <2 x i64>* %head.v2i64,
10 <16 x i8>* %tail.v16i8, <8 x i16>* %tail.v8i16, <4 x i32>* %tail.v4i32, <2 x i64>* %tail.v2i64) {
11 ; CHECK-LABEL: qwordint
12 ; CHECK: ld1 {v0.16b}, [x0]
13 ; CHECK: ld1 {v1.8h}, [x1]
14 ; CHECK: ld1 {v2.4s}, [x2]
15 ; CHECK: ld1 {v3.2d}, [x3]
16 ; CHECK: st1 {v0.16b}, [x4]
17 ; CHECK: st1 {v1.8h}, [x5]
18 ; CHECK: st1 {v2.4s}, [x6]
19 ; CHECK: st1 {v3.2d}, [x7]
20 ; BE-STRICT-ALIGN-LABEL: qwordint
21 ; BE-STRICT-ALIGN: ldrb
22 ; BE-STRICT-ALIGN: ldrh
23 ; BE-STRICT-ALIGN: ldr
24 ; BE-STRICT-ALIGN: ldr
25 ; BE-STRICT-ALIGN: strb
26 ; BE-STRICT-ALIGN: strh
27 ; BE-STRICT-ALIGN: str
28 ; BE-STRICT-ALIGN: str
30 %val.v16i8 = load <16 x i8>* %head.v16i8, align 1
31 %val.v8i16 = load <8 x i16>* %head.v8i16, align 2
32 %val.v4i32 = load <4 x i32>* %head.v4i32, align 4
33 %val.v2i64 = load <2 x i64>* %head.v2i64, align 8
34 store <16 x i8> %val.v16i8, <16 x i8>* %tail.v16i8, align 1
35 store <8 x i16> %val.v8i16, <8 x i16>* %tail.v8i16, align 2
36 store <4 x i32> %val.v4i32, <4 x i32>* %tail.v4i32, align 4
37 store <2 x i64> %val.v2i64, <2 x i64>* %tail.v2i64, align 8
38 ret <16 x i8> %val.v16i8
41 ;; Check element-aligned 128-bit vector load/store - floating point
42 define <4 x float> @qwordfloat (<4 x float>* %head.v4f32, <2 x double>* %head.v2f64,
43 <4 x float>* %tail.v4f32, <2 x double>* %tail.v2f64) {
44 ; CHECK-LABEL: qwordfloat
45 ; CHECK: ld1 {v0.4s}, [x0]
46 ; CHECK: ld1 {v1.2d}, [x1]
47 ; CHECK: st1 {v0.4s}, [x2]
48 ; CHECK: st1 {v1.2d}, [x3]
49 ; BE-STRICT-ALIGN-LABEL: qwordfloat
50 ; BE-STRICT-ALIGN: ldr
51 ; BE-STRICT-ALIGN: ldr
52 ; BE-STRICT-ALIGN: str
53 ; BE-STRICT-ALIGN: str
55 %val.v4f32 = load <4 x float>* %head.v4f32, align 4
56 %val.v2f64 = load <2 x double>* %head.v2f64, align 8
57 store <4 x float> %val.v4f32, <4 x float>* %tail.v4f32, align 4
58 store <2 x double> %val.v2f64, <2 x double>* %tail.v2f64, align 8
59 ret <4 x float> %val.v4f32
62 ;; Check element-aligned 64-bit vector load/store - integer
63 define <8 x i8> @dwordint (<8 x i8>* %head.v8i8, <4 x i16>* %head.v4i16, <2 x i32>* %head.v2i32, <1 x i64>* %head.v1i64,
64 <8 x i8>* %tail.v8i8, <4 x i16>* %tail.v4i16, <2 x i32>* %tail.v2i32, <1 x i64>* %tail.v1i64) {
65 ; CHECK-LABEL: dwordint
66 ; CHECK: ld1 {v0.8b}, [x0]
67 ; CHECK: ld1 {v1.4h}, [x1]
68 ; CHECK: ld1 {v2.2s}, [x2]
69 ; CHECK: ld1 {v3.1d}, [x3]
70 ; CHECK: st1 {v0.8b}, [x4]
71 ; CHECK: st1 {v1.4h}, [x5]
72 ; CHECK: st1 {v2.2s}, [x6]
73 ; CHECK: st1 {v3.1d}, [x7]
74 ; BE-STRICT-ALIGN-LABEL: dwordint
75 ; BE-STRICT-ALIGN: ldrb
76 ; BE-STRICT-ALIGN: ldrh
77 ; BE-STRICT-ALIGN: ldr
78 ; BE-STRICT-ALIGN: ld1 {v1.1d}, [x3]
79 ; BE-STRICT-ALIGN: strb
80 ; BE-STRICT-ALIGN: strh
81 ; BE-STRICT-ALIGN: str
82 ; BE-STRICT-ALIGN: st1 {v1.1d}, [x7]
84 %val.v8i8 = load <8 x i8>* %head.v8i8, align 1
85 %val.v4i16 = load <4 x i16>* %head.v4i16, align 2
86 %val.v2i32 = load <2 x i32>* %head.v2i32, align 4
87 %val.v1i64 = load <1 x i64>* %head.v1i64, align 8
88 store <8 x i8> %val.v8i8, <8 x i8>* %tail.v8i8 , align 1
89 store <4 x i16> %val.v4i16, <4 x i16>* %tail.v4i16, align 2
90 store <2 x i32> %val.v2i32, <2 x i32>* %tail.v2i32, align 4
91 store <1 x i64> %val.v1i64, <1 x i64>* %tail.v1i64, align 8
92 ret <8 x i8> %val.v8i8
95 ;; Check element-aligned 64-bit vector load/store - floating point
96 define <2 x float> @dwordfloat (<2 x float>* %head.v2f32, <1 x double>* %head.v1f64,
97 <2 x float>* %tail.v2f32, <1 x double>* %tail.v1f64) {
98 ; CHECK-LABEL: dwordfloat
99 ; CHECK: ld1 {v0.2s}, [x0]
100 ; CHECK: ld1 {v1.1d}, [x1]
101 ; CHECK: st1 {v0.2s}, [x2]
102 ; CHECK: st1 {v1.1d}, [x3]
103 ; BE-STRICT-ALIGN-LABEL: dwordfloat
104 ; BE-STRICT-ALIGN: ldr
105 ; BE-STRICT-ALIGN: ld1 {v1.1d}, [x1]
106 ; BE-STRICT-ALIGN: str
107 ; BE-STRICT-ALIGN: st1 {v1.1d}, [x3]
109 %val.v2f32 = load <2 x float>* %head.v2f32, align 4
110 %val.v1f64 = load <1 x double>* %head.v1f64, align 8
111 store <2 x float> %val.v2f32, <2 x float>* %tail.v2f32, align 4
112 store <1 x double> %val.v1f64, <1 x double>* %tail.v1f64, align 8
113 ret <2 x float> %val.v2f32
116 ;; Check load/store of 128-bit vectors with less-than 16-byte alignment
117 define <2 x i64> @align2vi64 (<2 x i64>* %head.byte, <2 x i64>* %head.half, <2 x i64>* %head.word, <2 x i64>* %head.dword,
118 <2 x i64>* %tail.byte, <2 x i64>* %tail.half, <2 x i64>* %tail.word, <2 x i64>* %tail.dword) {
119 ; CHECK-LABEL: align2vi64
120 ; CHECK: ld1 {v0.2d}, [x0]
121 ; CHECK: ld1 {v1.2d}, [x1]
122 ; CHECK: ld1 {v2.2d}, [x2]
123 ; CHECK: ld1 {v3.2d}, [x3]
124 ; CHECK: st1 {v0.2d}, [x4]
125 ; CHECK: st1 {v1.2d}, [x5]
126 ; CHECK: st1 {v2.2d}, [x6]
127 ; CHECK: st1 {v3.2d}, [x7]
128 ; BE-STRICT-ALIGN-LABEL: align2vi64
129 ; BE-STRICT-ALIGN: ldrb
130 ; BE-STRICT-ALIGN: ldrh
131 ; BE-STRICT-ALIGN: ldr
132 ; BE-STRICT-ALIGN: strb
133 ; BE-STRICT-ALIGN: strh
134 ; BE-STRICT-ALIGN: str
136 %val.byte = load <2 x i64>* %head.byte, align 1
137 %val.half = load <2 x i64>* %head.half, align 2
138 %val.word = load <2 x i64>* %head.word, align 4
139 %val.dword = load <2 x i64>* %head.dword, align 8
140 store <2 x i64> %val.byte, <2 x i64>* %tail.byte, align 1
141 store <2 x i64> %val.half, <2 x i64>* %tail.half, align 2
142 store <2 x i64> %val.word, <2 x i64>* %tail.word, align 4
143 store <2 x i64> %val.dword, <2 x i64>* %tail.dword, align 8
144 ret <2 x i64> %val.byte
147 ;; Check load/store of 64-bit vectors with less-than 8-byte alignment
148 define <2 x float> @align2vf32 (<2 x float>* %head.byte, <2 x float>* %head.half, <2 x float>* %head.word, <2 x float>* %head.dword,
149 <2 x float>* %tail.byte, <2 x float>* %tail.half, <2 x float>* %tail.word, <2 x float>* %tail.dword) {
150 ; CHECK-LABEL: align2vf32
151 ; CHECK: ld1 {v0.2s}, [x0]
152 ; CHECK: ld1 {v1.2s}, [x1]
153 ; CHECK: ld1 {v2.2s}, [x2]
154 ; CHECK: st1 {v0.2s}, [x4]
155 ; CHECK: st1 {v1.2s}, [x5]
156 ; CHECK: st1 {v2.2s}, [x6]
157 ; BE-STRICT-ALIGN-LABEL: align2vf32
158 ; BE-STRICT-ALIGN: ldrb
159 ; BE-STRICT-ALIGN: ldrh
160 ; BE-STRICT-ALIGN: ldr
161 ; BE-STRICT-ALIGN: strb
162 ; BE-STRICT-ALIGN: strh
163 ; BE-STRICT-ALIGN: str
165 %val.byte = load <2 x float>* %head.byte, align 1
166 %val.half = load <2 x float>* %head.half, align 2
167 %val.word = load <2 x float>* %head.word, align 4
168 store <2 x float> %val.byte, <2 x float>* %tail.byte, align 1
169 store <2 x float> %val.half, <2 x float>* %tail.half, align 2
170 store <2 x float> %val.word, <2 x float>* %tail.word, align 4
171 ret <2 x float> %val.byte