Implement AArch64 NEON instruction set AdvSIMD (table).
[oota-llvm.git] / include / llvm / IR / IntrinsicsAArch64.td
1 //===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines all of the AArch64-specific intrinsics.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // Advanced SIMD (NEON)
16
17 let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
18
19 // Vector Absolute Compare (Floating Point)
20 def int_aarch64_neon_vacgeq :
21   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
22 def int_aarch64_neon_vacgtq :
23   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
24
25 // Vector maxNum (Floating Point)
26 def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
27
28 // Vector minNum (Floating Point)
29 def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
30
31 // Vector Pairwise maxNum (Floating Point)
32 def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
33
34 // Vector Pairwise minNum (Floating Point)
35 def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
36
37 // Vector Multiply Extended (Floating Point)
38 def int_aarch64_neon_vmulx : Neon_2Arg_Intrinsic;
39
40 class Neon_N2V_Intrinsic
41   : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
42               [IntrNoMem]>;
43 class Neon_N3V_Intrinsic
44   : Intrinsic<[llvm_anyvector_ty],
45               [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
46               [IntrNoMem]>;
47 class Neon_N2V_Narrow_Intrinsic
48   : Intrinsic<[llvm_anyvector_ty],
49               [LLVMExtendedElementVectorType<0>, llvm_i32_ty],
50               [IntrNoMem]>;
51
52 // Vector rounding shift right by immediate (Signed)
53 def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic;
54 def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic;
55 def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic;
56
57 def int_aarch64_neon_vsri : Neon_N3V_Intrinsic;
58 def int_aarch64_neon_vsli : Neon_N3V_Intrinsic;
59
60 def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic;
61 def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic;
62 def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic;
63 def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
64 def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
65 def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
66 def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
67
68 // Vector across
69 class Neon_Across_Intrinsic
70   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
71
72 class Neon_2Arg_Across_Float_Intrinsic
73   : Intrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>;
74
75 def int_aarch64_neon_saddlv : Neon_Across_Intrinsic;
76 def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic;
77 def int_aarch64_neon_smaxv  : Neon_Across_Intrinsic;
78 def int_aarch64_neon_umaxv  : Neon_Across_Intrinsic;
79 def int_aarch64_neon_sminv  : Neon_Across_Intrinsic;
80 def int_aarch64_neon_uminv  : Neon_Across_Intrinsic;
81 def int_aarch64_neon_vaddv  : Neon_Across_Intrinsic;
82 def int_aarch64_neon_vmaxv  : Neon_Across_Intrinsic;
83 def int_aarch64_neon_vminv  : Neon_Across_Intrinsic;
84 def int_aarch64_neon_vmaxnmv : Neon_Across_Intrinsic;
85 def int_aarch64_neon_vminnmv : Neon_Across_Intrinsic;
86
87 // Vector Table Lookup.
88 def int_aarch64_neon_vtbl1 :
89   Intrinsic<[llvm_anyvector_ty],
90             [llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
91
92 def int_aarch64_neon_vtbl2 :
93   Intrinsic<[llvm_anyvector_ty],
94             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<0>],
95             [IntrNoMem]>;
96
97 def int_aarch64_neon_vtbl3 :
98   Intrinsic<[llvm_anyvector_ty],
99             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
100             LLVMMatchType<0>], [IntrNoMem]>;
101
102 def int_aarch64_neon_vtbl4 :
103   Intrinsic<[llvm_anyvector_ty],
104             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
105             LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
106
107 // Vector Table Extension.
108 // Some elements of the destination vector may not be updated, so the original
109 // value of that vector is passed as the first argument.  The next 1-4
110 // arguments after that are the table.
111 def int_aarch64_neon_vtbx1 :
112   Intrinsic<[llvm_anyvector_ty],
113             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
114             [IntrNoMem]>;
115
116 def int_aarch64_neon_vtbx2 :
117   Intrinsic<[llvm_anyvector_ty],
118             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
119             LLVMMatchType<0>], [IntrNoMem]>;
120
121 def int_aarch64_neon_vtbx3 :
122   Intrinsic<[llvm_anyvector_ty],
123             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
124             LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
125
126 def int_aarch64_neon_vtbx4 :
127   Intrinsic<[llvm_anyvector_ty],
128             [LLVMMatchType<0>, llvm_anyvector_ty,  LLVMMatchType<1>,
129             LLVMMatchType<1>,  LLVMMatchType<1>, LLVMMatchType<0>],
130             [IntrNoMem]>;
131
132 // Scalar Add
133 def int_aarch64_neon_vaddds :
134   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
135 def int_aarch64_neon_vadddu :
136   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
137
138 // Scalar Saturating Add (Signed, Unsigned)
139 def int_aarch64_neon_vqadds : Neon_2Arg_Intrinsic;
140 def int_aarch64_neon_vqaddu : Neon_2Arg_Intrinsic;
141
142 // Scalar Sub
143 def int_aarch64_neon_vsubds :
144   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
145 def int_aarch64_neon_vsubdu :
146   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
147
148 // Scalar Saturating Sub (Signed, Unsigned)
149 def int_aarch64_neon_vqsubs : Neon_2Arg_Intrinsic;
150 def int_aarch64_neon_vqsubu : Neon_2Arg_Intrinsic;
151
152 // Scalar Shift
153 // Scalar Shift Left
154 def int_aarch64_neon_vshlds :
155   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
156 def int_aarch64_neon_vshldu :
157   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
158
159 // Scalar Saturating Shift Left
160 def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
161 def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
162
163 // Scalar Shift Rouding Left
164 def int_aarch64_neon_vrshlds :
165   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
166 def int_aarch64_neon_vrshldu :
167   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
168
169 // Scalar Saturating Rounding Shift Left
170 def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
171 def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
172
173 // Scalar Reduce Pairwise Add.
174 def int_aarch64_neon_vpadd :
175   Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
176 def int_aarch64_neon_vpfadd :
177   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
178 def int_aarch64_neon_vpfaddq :
179   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
180
181 // Scalar Reduce Pairwise Floating Point Max/Min.
182 def int_aarch64_neon_vpmax :
183   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
184 def int_aarch64_neon_vpmaxq :
185   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
186 def int_aarch64_neon_vpmin :
187   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
188 def int_aarch64_neon_vpminq :
189   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
190
191 // Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
192 def int_aarch64_neon_vpfmaxnm :
193   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
194 def int_aarch64_neon_vpfmaxnmq :
195   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
196 def int_aarch64_neon_vpfminnm :
197   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
198 def int_aarch64_neon_vpfminnmq :
199   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
200
201 // Scalar Signed Integer Convert To Floating-point
202 def int_aarch64_neon_vcvtf32_s32 :
203   Intrinsic<[llvm_v1f32_ty], [llvm_v1i32_ty], [IntrNoMem]>;
204 def int_aarch64_neon_vcvtf64_s64 :
205   Intrinsic<[llvm_v1f64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
206
207 // Scalar Unsigned Integer Convert To Floating-point
208 def int_aarch64_neon_vcvtf32_u32 :
209   Intrinsic<[llvm_v1f32_ty], [llvm_v1i32_ty], [IntrNoMem]>;
210 def int_aarch64_neon_vcvtf64_u64 :
211   Intrinsic<[llvm_v1f64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
212
213 // Scalar Floating-point Reciprocal Exponent
214 def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic;
215
216 class Neon_Cmp_Intrinsic
217   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty],
218               [IntrNoMem]>;
219
220 // Scalar Compare Equal
221 def int_aarch64_neon_vceq : Neon_Cmp_Intrinsic;
222
223 // Scalar Compare Greater-Than or Equal
224 def int_aarch64_neon_vcge : Neon_Cmp_Intrinsic;
225 def int_aarch64_neon_vchs : Neon_Cmp_Intrinsic;
226
227 // Scalar Compare Less-Than or Equal
228 def int_aarch64_neon_vclez : Neon_Cmp_Intrinsic;
229
230 // Scalar Compare Less-Than
231 def int_aarch64_neon_vcltz : Neon_Cmp_Intrinsic;
232
233 // Scalar Compare Greater-Than
234 def int_aarch64_neon_vcgt : Neon_Cmp_Intrinsic;
235 def int_aarch64_neon_vchi : Neon_Cmp_Intrinsic;
236
237 // Scalar Compare Bitwise Test Bits
238 def int_aarch64_neon_vtstd : Neon_Cmp_Intrinsic;
239
240 // Scalar Floating-point Absolute Compare Greater Than Or Equal
241 def int_aarch64_neon_vcage : Neon_Cmp_Intrinsic;
242  
243 // Scalar Floating-point Absolute Compare Greater Than
244 def int_aarch64_neon_vcagt : Neon_Cmp_Intrinsic;
245
246 // Scalar Signed Saturating Accumulated of Unsigned Value
247 def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
248
249 // Scalar Unsigned Saturating Accumulated of Signed Value
250 def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
251
252 // Scalar Absolute Value
253 def int_aarch64_neon_vabs :
254   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
255
256 // Scalar Negate Value
257 def int_aarch64_neon_vneg :
258   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
259
260 // Signed Saturating Doubling Multiply-Add Long
261 def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
262
263 // Signed Saturating Doubling Multiply-Subtract Long
264 def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
265
266 // Signed Saturating Doubling Multiply Long
267 def int_aarch64_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
268
269 class Neon_2Arg_ShiftImm_Intrinsic
270   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
271
272 class Neon_3Arg_ShiftImm_Intrinsic
273   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty],
274               [IntrNoMem]>;
275
276 // Scalar Shift Right (Immediate)
277 def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
278 def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
279
280 // Scalar Shift Right and Accumulate (Immediate)
281 def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
282 def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
283
284 // Scalar Rounding Shift Right and Accumulate (Immediate)
285 def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
286 def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
287
288 // Scalar Shift Left (Immediate)
289 def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic;
290
291 // Scalar Saturating Shift Left (Immediate)
292 def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic;
293 def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic;
294
295 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
296 def int_aarch64_neon_vcvtf32_n_s32 :
297   Intrinsic<[llvm_v1f32_ty], [llvm_v1i32_ty, llvm_i32_ty], [IntrNoMem]>;
298 def int_aarch64_neon_vcvtf64_n_s64 :
299   Intrinsic<[llvm_v1f64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
300
301 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
302 def int_aarch64_neon_vcvtf32_n_u32 :
303   Intrinsic<[llvm_v1f32_ty], [llvm_v1i32_ty, llvm_i32_ty], [IntrNoMem]>;
304 def int_aarch64_neon_vcvtf64_n_u64 :
305   Intrinsic<[llvm_v1f64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
306
307 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
308 def int_aarch64_neon_vcvts_n_s32_f32 :
309   Intrinsic<[llvm_v1i32_ty], [llvm_v1f32_ty, llvm_i32_ty], [IntrNoMem]>;
310 def int_aarch64_neon_vcvtd_n_s64_f64 :
311   Intrinsic<[llvm_v1i64_ty], [llvm_v1f64_ty, llvm_i32_ty], [IntrNoMem]>;
312
313 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
314 def int_aarch64_neon_vcvts_n_u32_f32 :
315   Intrinsic<[llvm_v1i32_ty], [llvm_v1f32_ty, llvm_i32_ty], [IntrNoMem]>;
316 def int_aarch64_neon_vcvtd_n_u64_f64 :
317   Intrinsic<[llvm_v1i64_ty], [llvm_v1f64_ty, llvm_i32_ty], [IntrNoMem]>;
318
319 class Neon_SHA_Intrinsic
320   : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v1i32_ty, llvm_v4i32_ty],
321               [IntrNoMem]>;
322
323 def int_aarch64_neon_sha1c : Neon_SHA_Intrinsic;
324 def int_aarch64_neon_sha1m : Neon_SHA_Intrinsic;
325 def int_aarch64_neon_sha1p : Neon_SHA_Intrinsic;
326 }