Implement the newly added ACLE functions for ld1/st1 with 2/3/4 vectors.
[oota-llvm.git] / include / llvm / IR / IntrinsicsAArch64.td
1 //===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines all of the AArch64-specific intrinsics.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // Advanced SIMD (NEON)
16
17 let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
18
19 // Vector Absolute Compare (Floating Point)
20 def int_aarch64_neon_vacgeq :
21   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
22 def int_aarch64_neon_vacgtq :
23   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
24
25 // Vector saturating accumulate
26 def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic;
27 def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic;
28
29 // Vector Bitwise reverse
30 def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic;
31
32 // Vector extract and narrow
33 def int_aarch64_neon_xtn : 
34   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
35
36 // Vector floating-point convert
37 def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
38 def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
39 def int_aarch64_neon_fcvtxn :
40   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
41 def int_aarch64_neon_fcvtns : 
42   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
43 def int_aarch64_neon_fcvtnu :
44   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
45 def int_aarch64_neon_fcvtps :
46   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
47 def int_aarch64_neon_fcvtpu :
48   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
49 def int_aarch64_neon_fcvtms :
50   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
51 def int_aarch64_neon_fcvtmu :
52   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
53 def int_aarch64_neon_fcvtas :
54   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
55 def int_aarch64_neon_fcvtau :
56   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
57
58 // Vector maxNum (Floating Point)
59 def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
60
61 // Vector minNum (Floating Point)
62 def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
63
64 // Vector Pairwise maxNum (Floating Point)
65 def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
66
67 // Vector Pairwise minNum (Floating Point)
68 def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
69
70 // Vector Multiply Extended and Scalar Multiply Extended (Floating Point)
71 def int_aarch64_neon_vmulx  :
72   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
73
74 class Neon_N2V_Intrinsic
75   : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
76               [IntrNoMem]>;
77 class Neon_N3V_Intrinsic
78   : Intrinsic<[llvm_anyvector_ty],
79               [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
80               [IntrNoMem]>;
81 class Neon_N2V_Narrow_Intrinsic
82   : Intrinsic<[llvm_anyvector_ty],
83               [LLVMExtendedElementVectorType<0>, llvm_i32_ty],
84               [IntrNoMem]>;
85
86 // Vector rounding shift right by immediate (Signed)
87 def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic;
88 def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic;
89 def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic;
90
91 def int_aarch64_neon_vsri : Neon_N3V_Intrinsic;
92 def int_aarch64_neon_vsli : Neon_N3V_Intrinsic;
93
94 def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic;
95 def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic;
96 def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic;
97 def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
98 def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
99 def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
100 def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
101
102 // Vector across
103 class Neon_Across_Intrinsic
104   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
105
106 class Neon_2Arg_Across_Float_Intrinsic
107   : Intrinsic<[llvm_anyvector_ty], [llvm_v4f32_ty], [IntrNoMem]>;
108
109 def int_aarch64_neon_saddlv : Neon_Across_Intrinsic;
110 def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic;
111 def int_aarch64_neon_smaxv  : Neon_Across_Intrinsic;
112 def int_aarch64_neon_umaxv  : Neon_Across_Intrinsic;
113 def int_aarch64_neon_sminv  : Neon_Across_Intrinsic;
114 def int_aarch64_neon_uminv  : Neon_Across_Intrinsic;
115 def int_aarch64_neon_vaddv  : Neon_Across_Intrinsic;
116 def int_aarch64_neon_vmaxv  : Neon_Across_Intrinsic;
117 def int_aarch64_neon_vminv  : Neon_Across_Intrinsic;
118 def int_aarch64_neon_vmaxnmv : Neon_Across_Intrinsic;
119 def int_aarch64_neon_vminnmv : Neon_Across_Intrinsic;
120
121 // Vector Table Lookup.
122 def int_aarch64_neon_vtbl1 :
123   Intrinsic<[llvm_anyvector_ty],
124             [llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
125
126 def int_aarch64_neon_vtbl2 :
127   Intrinsic<[llvm_anyvector_ty],
128             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<0>],
129             [IntrNoMem]>;
130
131 def int_aarch64_neon_vtbl3 :
132   Intrinsic<[llvm_anyvector_ty],
133             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
134             LLVMMatchType<0>], [IntrNoMem]>;
135
136 def int_aarch64_neon_vtbl4 :
137   Intrinsic<[llvm_anyvector_ty],
138             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
139             LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
140
141 // Vector Table Extension.
142 // Some elements of the destination vector may not be updated, so the original
143 // value of that vector is passed as the first argument.  The next 1-4
144 // arguments after that are the table.
145 def int_aarch64_neon_vtbx1 :
146   Intrinsic<[llvm_anyvector_ty],
147             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
148             [IntrNoMem]>;
149
150 def int_aarch64_neon_vtbx2 :
151   Intrinsic<[llvm_anyvector_ty],
152             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
153             LLVMMatchType<0>], [IntrNoMem]>;
154
155 def int_aarch64_neon_vtbx3 :
156   Intrinsic<[llvm_anyvector_ty],
157             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
158             LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
159
160 def int_aarch64_neon_vtbx4 :
161   Intrinsic<[llvm_anyvector_ty],
162             [LLVMMatchType<0>, llvm_anyvector_ty,  LLVMMatchType<1>,
163             LLVMMatchType<1>,  LLVMMatchType<1>, LLVMMatchType<0>],
164             [IntrNoMem]>;
165
166 // Vector Load/store
167 def int_aarch64_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
168                                         [llvm_ptr_ty, llvm_i32_ty],
169                                         [IntrReadArgMem]>;
170 def int_aarch64_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
171                                          LLVMMatchType<0>],
172                                         [llvm_ptr_ty, llvm_i32_ty],
173                                         [IntrReadArgMem]>;
174 def int_aarch64_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
175                                          LLVMMatchType<0>, LLVMMatchType<0>],
176                                         [llvm_ptr_ty, llvm_i32_ty],
177                                         [IntrReadArgMem]>;
178
179 def int_aarch64_neon_vst1x2 : Intrinsic<[],
180                                         [llvm_ptr_ty, llvm_anyvector_ty,
181                                          LLVMMatchType<0>, llvm_i32_ty],
182                                         [IntrReadWriteArgMem]>;
183 def int_aarch64_neon_vst1x3 : Intrinsic<[],
184                                         [llvm_ptr_ty, llvm_anyvector_ty,
185                                          LLVMMatchType<0>, LLVMMatchType<0>,
186                                          llvm_i32_ty], [IntrReadWriteArgMem]>;
187 def int_aarch64_neon_vst1x4 : Intrinsic<[],
188                                         [llvm_ptr_ty, llvm_anyvector_ty,
189                                          LLVMMatchType<0>, LLVMMatchType<0>,
190                                          LLVMMatchType<0>, llvm_i32_ty],
191                                         [IntrReadWriteArgMem]>;
192
193 // Scalar Add
194 def int_aarch64_neon_vaddds :
195   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
196 def int_aarch64_neon_vadddu :
197   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
198
199 // Scalar Saturating Add (Signed, Unsigned)
200 def int_aarch64_neon_vqadds : Neon_2Arg_Intrinsic;
201 def int_aarch64_neon_vqaddu : Neon_2Arg_Intrinsic;
202
203 // Scalar Sub
204 def int_aarch64_neon_vsubds :
205   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
206 def int_aarch64_neon_vsubdu :
207   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
208
209 // Scalar Saturating Sub (Signed, Unsigned)
210 def int_aarch64_neon_vqsubs : Neon_2Arg_Intrinsic;
211 def int_aarch64_neon_vqsubu : Neon_2Arg_Intrinsic;
212
213 // Scalar Shift
214 // Scalar Shift Left
215 def int_aarch64_neon_vshlds :
216   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
217 def int_aarch64_neon_vshldu :
218   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
219
220 // Scalar Saturating Shift Left
221 def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
222 def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
223
224 // Scalar Shift Rouding Left
225 def int_aarch64_neon_vrshlds :
226   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
227 def int_aarch64_neon_vrshldu :
228   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
229
230 // Scalar Saturating Rounding Shift Left
231 def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
232 def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
233
234 // Scalar Reduce Pairwise Add.
235 def int_aarch64_neon_vpadd :
236   Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
237 def int_aarch64_neon_vpfadd :
238   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
239 def int_aarch64_neon_vpfaddq :
240   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
241
242 // Scalar Reduce Pairwise Floating Point Max/Min.
243 def int_aarch64_neon_vpmax :
244   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
245 def int_aarch64_neon_vpmaxq :
246   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
247 def int_aarch64_neon_vpmin :
248   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
249 def int_aarch64_neon_vpminq :
250   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
251
252 // Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
253 def int_aarch64_neon_vpfmaxnm :
254   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
255 def int_aarch64_neon_vpfmaxnmq :
256   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
257 def int_aarch64_neon_vpfminnm :
258   Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
259 def int_aarch64_neon_vpfminnmq :
260   Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
261
262 // Scalar Signed Integer Convert To Floating-point
263 def int_aarch64_neon_vcvtf32_s32 :
264   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty], [IntrNoMem]>;
265 def int_aarch64_neon_vcvtf64_s64 :
266   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>;
267
268 // Scalar Unsigned Integer Convert To Floating-point
269 def int_aarch64_neon_vcvtf32_u32 :
270   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty], [IntrNoMem]>;
271 def int_aarch64_neon_vcvtf64_u64 :
272   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>;
273
274 // Scalar Floating-point Reciprocal Exponent
275 def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic;
276
277 class Neon_Cmp_Intrinsic
278   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty],
279               [IntrNoMem]>;
280
281 // Scalar Compare Equal
282 def int_aarch64_neon_vceq : Neon_Cmp_Intrinsic;
283
284 // Scalar Compare Greater-Than or Equal
285 def int_aarch64_neon_vcge : Neon_Cmp_Intrinsic;
286 def int_aarch64_neon_vchs : Neon_Cmp_Intrinsic;
287
288 // Scalar Compare Less-Than or Equal
289 def int_aarch64_neon_vclez : Neon_Cmp_Intrinsic;
290
291 // Scalar Compare Less-Than
292 def int_aarch64_neon_vcltz : Neon_Cmp_Intrinsic;
293
294 // Scalar Compare Greater-Than
295 def int_aarch64_neon_vcgt : Neon_Cmp_Intrinsic;
296 def int_aarch64_neon_vchi : Neon_Cmp_Intrinsic;
297
298 // Scalar Compare Bitwise Test Bits
299 def int_aarch64_neon_vtstd : Neon_Cmp_Intrinsic;
300
301 // Scalar Floating-point Absolute Compare Greater Than Or Equal
302 def int_aarch64_neon_vcage : Neon_Cmp_Intrinsic;
303  
304 // Scalar Floating-point Absolute Compare Greater Than
305 def int_aarch64_neon_vcagt : Neon_Cmp_Intrinsic;
306
307 // Scalar Signed Saturating Accumulated of Unsigned Value
308 def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
309
310 // Scalar Unsigned Saturating Accumulated of Signed Value
311 def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
312
313 // Scalar Absolute Value
314 def int_aarch64_neon_vabs :
315   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
316
317 // Scalar Negate Value
318 def int_aarch64_neon_vneg :
319   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
320
321 // Signed Saturating Doubling Multiply-Add Long
322 def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
323
324 // Signed Saturating Doubling Multiply-Subtract Long
325 def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
326
327 // Signed Saturating Doubling Multiply Long
328 def int_aarch64_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
329
330 class Neon_2Arg_ShiftImm_Intrinsic
331   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
332
333 class Neon_3Arg_ShiftImm_Intrinsic
334   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty],
335               [IntrNoMem]>;
336
337 // Scalar Shift Right (Immediate)
338 def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
339 def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
340
341 // Scalar Shift Right and Accumulate (Immediate)
342 def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
343 def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
344
345 // Scalar Rounding Shift Right and Accumulate (Immediate)
346 def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
347 def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
348
349 // Scalar Shift Left (Immediate)
350 def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic;
351
352 // Scalar Saturating Shift Left (Immediate)
353 def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic;
354 def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic;
355
356 // Scalar Signed Saturating Shift Left Unsigned (Immediate)
357 def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic;
358
359 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
360 def int_aarch64_neon_vcvtf32_n_s32 :
361   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty, llvm_i32_ty], [IntrNoMem]>;
362 def int_aarch64_neon_vcvtf64_n_s64 :
363   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
364
365 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
366 def int_aarch64_neon_vcvtf32_n_u32 :
367   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty, llvm_i32_ty], [IntrNoMem]>;
368 def int_aarch64_neon_vcvtf64_n_u64 :
369   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
370
371 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
372 def int_aarch64_neon_vcvts_n_s32_f32 :
373   Intrinsic<[llvm_v1i32_ty], [llvm_v1f32_ty, llvm_i32_ty], [IntrNoMem]>;
374 def int_aarch64_neon_vcvtd_n_s64_f64 :
375   Intrinsic<[llvm_v1i64_ty], [llvm_v1f64_ty, llvm_i32_ty], [IntrNoMem]>;
376
377 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
378 def int_aarch64_neon_vcvts_n_u32_f32 :
379   Intrinsic<[llvm_v1i32_ty], [llvm_v1f32_ty, llvm_i32_ty], [IntrNoMem]>;
380 def int_aarch64_neon_vcvtd_n_u64_f64 :
381   Intrinsic<[llvm_v1i64_ty], [llvm_v1f64_ty, llvm_i32_ty], [IntrNoMem]>;
382
383 class Neon_SHA_Intrinsic
384   : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v1i32_ty, llvm_v4i32_ty],
385               [IntrNoMem]>;
386
387 def int_aarch64_neon_sha1c : Neon_SHA_Intrinsic;
388 def int_aarch64_neon_sha1m : Neon_SHA_Intrinsic;
389 def int_aarch64_neon_sha1p : Neon_SHA_Intrinsic;
390 }