1 ; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
2 ; Just intrinsic mashing. Duplicates existing arm64 tests.
4 define void @test_ldstq_4v(i8* noalias %io, i32 %count) {
5 ; CHECK-LABEL: test_ldstq_4v
6 ; CHECK: ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
7 ; CHECK: st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0]
9 %tobool62 = icmp eq i32 %count, 0
10 br i1 %tobool62, label %while.end, label %while.body
12 while.body: ; preds = %entry, %while.body
13 %count.addr.063 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
14 %dec = add i32 %count.addr.063, -1
15 %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %io, i32 1)
16 %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0
17 %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1
18 %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2
19 %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3
20 tail call void @llvm.arm.neon.vst4.v16i8(i8* %io, <16 x i8> %vld4.fca.0.extract, <16 x i8> %vld4.fca.1.extract, <16 x i8> %vld4.fca.2.extract, <16 x i8> %vld4.fca.3.extract, i32 1)
21 %tobool = icmp eq i32 %dec, 0
22 br i1 %tobool, label %while.end, label %while.body
24 while.end: ; preds = %while.body, %entry
28 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
30 declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32)
32 define void @test_ldstq_3v(i8* noalias %io, i32 %count) {
33 ; CHECK-LABEL: test_ldstq_3v
34 ; CHECK: ld3 {v0.16b, v1.16b, v2.16b}, [x0]
35 ; CHECK: st3 {v0.16b, v1.16b, v2.16b}, [x0]
37 %tobool47 = icmp eq i32 %count, 0
38 br i1 %tobool47, label %while.end, label %while.body
40 while.body: ; preds = %entry, %while.body
41 %count.addr.048 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
42 %dec = add i32 %count.addr.048, -1
43 %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %io, i32 1)
44 %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0
45 %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1
46 %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2
47 tail call void @llvm.arm.neon.vst3.v16i8(i8* %io, <16 x i8> %vld3.fca.0.extract, <16 x i8> %vld3.fca.1.extract, <16 x i8> %vld3.fca.2.extract, i32 1)
48 %tobool = icmp eq i32 %dec, 0
49 br i1 %tobool, label %while.end, label %while.body
51 while.end: ; preds = %while.body, %entry
55 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32)
57 declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32)
59 define void @test_ldstq_2v(i8* noalias %io, i32 %count) {
60 ; CHECK-LABEL: test_ldstq_2v
61 ; CHECK: ld2 {v0.16b, v1.16b}, [x0]
62 ; CHECK: st2 {v0.16b, v1.16b}, [x0]
64 %tobool22 = icmp eq i32 %count, 0
65 br i1 %tobool22, label %while.end, label %while.body
67 while.body: ; preds = %entry, %while.body
68 %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
69 %dec = add i32 %count.addr.023, -1
70 %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %io, i32 1)
71 %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0
72 %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1
73 tail call void @llvm.arm.neon.vst2.v16i8(i8* %io, <16 x i8> %vld2.fca.0.extract, <16 x i8> %vld2.fca.1.extract, i32 1)
74 %tobool = icmp eq i32 %dec, 0
75 br i1 %tobool, label %while.end, label %while.body
77 while.end: ; preds = %while.body, %entry
81 declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32)
83 declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32)
85 define void @test_ldst_4v(i8* noalias %io, i32 %count) {
86 ; CHECK-LABEL: test_ldst_4v
87 ; CHECK: ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
88 ; CHECK: st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0]
90 %tobool42 = icmp eq i32 %count, 0
91 br i1 %tobool42, label %while.end, label %while.body
93 while.body: ; preds = %entry, %while.body
94 %count.addr.043 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
95 %dec = add i32 %count.addr.043, -1
96 %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %io, i32 1)
97 %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0
98 %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1
99 %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2
100 %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3
101 tail call void @llvm.arm.neon.vst4.v8i8(i8* %io, <8 x i8> %vld4.fca.0.extract, <8 x i8> %vld4.fca.1.extract, <8 x i8> %vld4.fca.2.extract, <8 x i8> %vld4.fca.3.extract, i32 1)
102 %tobool = icmp eq i32 %dec, 0
103 br i1 %tobool, label %while.end, label %while.body
105 while.end: ; preds = %while.body, %entry
109 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32)
111 declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32)
113 define void @test_ldst_3v(i8* noalias %io, i32 %count) {
114 ; CHECK-LABEL: test_ldst_3v
115 ; CHECK: ld3 {v0.8b, v1.8b, v2.8b}, [x0]
116 ; CHECK: st3 {v0.8b, v1.8b, v2.8b}, [x0]
118 %tobool32 = icmp eq i32 %count, 0
119 br i1 %tobool32, label %while.end, label %while.body
121 while.body: ; preds = %entry, %while.body
122 %count.addr.033 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
123 %dec = add i32 %count.addr.033, -1
124 %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %io, i32 1)
125 %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0
126 %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1
127 %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2
128 tail call void @llvm.arm.neon.vst3.v8i8(i8* %io, <8 x i8> %vld3.fca.0.extract, <8 x i8> %vld3.fca.1.extract, <8 x i8> %vld3.fca.2.extract, i32 1)
129 %tobool = icmp eq i32 %dec, 0
130 br i1 %tobool, label %while.end, label %while.body
132 while.end: ; preds = %while.body, %entry
136 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32)
138 declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
140 define void @test_ldst_2v(i8* noalias %io, i32 %count) {
141 ; CHECK-LABEL: test_ldst_2v
142 ; CHECK: ld2 {v0.8b, v1.8b}, [x0]
143 ; CHECK: st2 {v0.8b, v1.8b}, [x0]
145 %tobool22 = icmp eq i32 %count, 0
146 br i1 %tobool22, label %while.end, label %while.body
148 while.body: ; preds = %entry, %while.body
149 %count.addr.023 = phi i32 [ %dec, %while.body ], [ %count, %entry ]
150 %dec = add i32 %count.addr.023, -1
151 %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %io, i32 1)
152 %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0
153 %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1
154 tail call void @llvm.arm.neon.vst2.v8i8(i8* %io, <8 x i8> %vld2.fca.0.extract, <8 x i8> %vld2.fca.1.extract, i32 1)
155 %tobool = icmp eq i32 %dec, 0
156 br i1 %tobool, label %while.end, label %while.body
158 while.end: ; preds = %while.body, %entry
162 declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32)
164 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32)