[AArch64 NEON] Support poly128_t and implement relevant intrinsic.
[oota-llvm.git] / include / llvm / IR / IntrinsicsAArch64.td
1 //===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines all of the AArch64-specific intrinsics.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // Advanced SIMD (NEON)
16
17 let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
18
19 // Vector Absolute Compare (Floating Point)
20 def int_aarch64_neon_vacgeq :
21   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
22 def int_aarch64_neon_vacgtq :
23   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
24
25 // Vector saturating accumulate
26 def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic;
27 def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic;
28
29 // Vector Bitwise reverse
30 def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic;
31
32 // Vector extract and narrow
33 def int_aarch64_neon_xtn : 
34   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
35
36 // Vector floating-point convert
37 def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
38 def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
39 def int_aarch64_neon_fcvtxn :
40   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
41 def int_aarch64_neon_fcvtns : 
42   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
43 def int_aarch64_neon_fcvtnu :
44   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
45 def int_aarch64_neon_fcvtps :
46   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
47 def int_aarch64_neon_fcvtpu :
48   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
49 def int_aarch64_neon_fcvtms :
50   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
51 def int_aarch64_neon_fcvtmu :
52   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
53 def int_aarch64_neon_fcvtas :
54   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
55 def int_aarch64_neon_fcvtau :
56   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
57 def int_aarch64_neon_fcvtzs :
58   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
59 def int_aarch64_neon_fcvtzu :
60   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
61
62 // Vector maxNum (Floating Point)
63 def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
64
65 // Vector minNum (Floating Point)
66 def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
67
68 // Vector Pairwise maxNum (Floating Point)
69 def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
70
71 // Vector Pairwise minNum (Floating Point)
72 def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
73
74 // Vector Multiply Extended and Scalar Multiply Extended (Floating Point)
75 def int_aarch64_neon_vmulx  :
76   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
77
78 class Neon_N2V_Intrinsic
79   : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
80               [IntrNoMem]>;
81 class Neon_N3V_Intrinsic
82   : Intrinsic<[llvm_anyvector_ty],
83               [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
84               [IntrNoMem]>;
85 class Neon_N2V_Narrow_Intrinsic
86   : Intrinsic<[llvm_anyvector_ty],
87               [LLVMExtendedElementVectorType<0>, llvm_i32_ty],
88               [IntrNoMem]>;
89
90 // Vector rounding shift right by immediate (Signed)
91 def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic;
92 def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic;
93 def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic;
94
95 def int_aarch64_neon_vsri : Neon_N3V_Intrinsic;
96 def int_aarch64_neon_vsli : Neon_N3V_Intrinsic;
97
98 def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic;
99 def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic;
100 def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic;
101 def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
102 def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
103 def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
104 def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
105
106 // Vector across
107 class Neon_Across_Intrinsic
108   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
109
110 def int_aarch64_neon_saddlv : Neon_Across_Intrinsic;
111 def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic;
112 def int_aarch64_neon_smaxv  : Neon_Across_Intrinsic;
113 def int_aarch64_neon_umaxv  : Neon_Across_Intrinsic;
114 def int_aarch64_neon_sminv  : Neon_Across_Intrinsic;
115 def int_aarch64_neon_uminv  : Neon_Across_Intrinsic;
116 def int_aarch64_neon_vaddv  : Neon_Across_Intrinsic;
117 def int_aarch64_neon_vmaxv  : Neon_Across_Intrinsic;
118 def int_aarch64_neon_vminv  : Neon_Across_Intrinsic;
119 def int_aarch64_neon_vmaxnmv : Neon_Across_Intrinsic;
120 def int_aarch64_neon_vminnmv : Neon_Across_Intrinsic;
121
122 // Vector Table Lookup.
123 def int_aarch64_neon_vtbl1 :
124   Intrinsic<[llvm_anyvector_ty],
125             [llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
126
127 def int_aarch64_neon_vtbl2 :
128   Intrinsic<[llvm_anyvector_ty],
129             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<0>],
130             [IntrNoMem]>;
131
132 def int_aarch64_neon_vtbl3 :
133   Intrinsic<[llvm_anyvector_ty],
134             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
135             LLVMMatchType<0>], [IntrNoMem]>;
136
137 def int_aarch64_neon_vtbl4 :
138   Intrinsic<[llvm_anyvector_ty],
139             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
140             LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
141
142 // Vector Table Extension.
143 // Some elements of the destination vector may not be updated, so the original
144 // value of that vector is passed as the first argument.  The next 1-4
145 // arguments after that are the table.
146 def int_aarch64_neon_vtbx1 :
147   Intrinsic<[llvm_anyvector_ty],
148             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
149             [IntrNoMem]>;
150
151 def int_aarch64_neon_vtbx2 :
152   Intrinsic<[llvm_anyvector_ty],
153             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
154             LLVMMatchType<0>], [IntrNoMem]>;
155
156 def int_aarch64_neon_vtbx3 :
157   Intrinsic<[llvm_anyvector_ty],
158             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
159             LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
160
161 def int_aarch64_neon_vtbx4 :
162   Intrinsic<[llvm_anyvector_ty],
163             [LLVMMatchType<0>, llvm_anyvector_ty,  LLVMMatchType<1>,
164             LLVMMatchType<1>,  LLVMMatchType<1>, LLVMMatchType<0>],
165             [IntrNoMem]>;
166
167 // Vector Load/store
168 def int_aarch64_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
169                                         [llvm_ptr_ty, llvm_i32_ty],
170                                         [IntrReadArgMem]>;
171 def int_aarch64_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
172                                          LLVMMatchType<0>],
173                                         [llvm_ptr_ty, llvm_i32_ty],
174                                         [IntrReadArgMem]>;
175 def int_aarch64_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
176                                          LLVMMatchType<0>, LLVMMatchType<0>],
177                                         [llvm_ptr_ty, llvm_i32_ty],
178                                         [IntrReadArgMem]>;
179
180 def int_aarch64_neon_vst1x2 : Intrinsic<[],
181                                         [llvm_ptr_ty, llvm_anyvector_ty,
182                                          LLVMMatchType<0>, llvm_i32_ty],
183                                         [IntrReadWriteArgMem]>;
184 def int_aarch64_neon_vst1x3 : Intrinsic<[],
185                                         [llvm_ptr_ty, llvm_anyvector_ty,
186                                          LLVMMatchType<0>, LLVMMatchType<0>,
187                                          llvm_i32_ty], [IntrReadWriteArgMem]>;
188 def int_aarch64_neon_vst1x4 : Intrinsic<[],
189                                         [llvm_ptr_ty, llvm_anyvector_ty,
190                                          LLVMMatchType<0>, LLVMMatchType<0>,
191                                          LLVMMatchType<0>, llvm_i32_ty],
192                                         [IntrReadWriteArgMem]>;
193
194 // Scalar Add
195 def int_aarch64_neon_vaddds :
196   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
197 def int_aarch64_neon_vadddu :
198   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
199
200
201 // Scalar Sub
202 def int_aarch64_neon_vsubds :
203   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
204 def int_aarch64_neon_vsubdu :
205   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
206
207
208 // Scalar Shift
209 // Scalar Shift Left
210 def int_aarch64_neon_vshlds :
211   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
212 def int_aarch64_neon_vshldu :
213   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
214
215 // Scalar Saturating Shift Left
216 def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
217 def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
218
219 // Scalar Shift Rouding Left
220 def int_aarch64_neon_vrshlds :
221   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
222 def int_aarch64_neon_vrshldu :
223   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
224
225 // Scalar Saturating Rounding Shift Left
226 def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
227 def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
228
229 // Scalar Reduce Pairwise Add.
230 def int_aarch64_neon_vpadd :
231   Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
232 def int_aarch64_neon_vpfadd :
233   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
234
235 // Scalar Reduce Pairwise Floating Point Max/Min.
236 def int_aarch64_neon_vpmax :
237   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
238 def int_aarch64_neon_vpmin :
239   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
240
241 // Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
242 def int_aarch64_neon_vpfmaxnm :
243   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
244 def int_aarch64_neon_vpfminnm :
245   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
246
247 // Scalar Signed Integer Convert To Floating-point
248 def int_aarch64_neon_vcvtf32_s32 :
249   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty], [IntrNoMem]>;
250 def int_aarch64_neon_vcvtf64_s64 :
251   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>;
252
253 // Scalar Unsigned Integer Convert To Floating-point
254 def int_aarch64_neon_vcvtf32_u32 :
255   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty], [IntrNoMem]>;
256 def int_aarch64_neon_vcvtf64_u64 :
257   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>;
258
259 // Scalar Floating-point Reciprocal Exponent
260 def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic;
261
262 class Neon_Cmp_Intrinsic
263   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty],
264               [IntrNoMem]>;
265
266 // Scalar Compare Equal
267 def int_aarch64_neon_vceq : Neon_Cmp_Intrinsic;
268
269 // Scalar Compare Greater-Than or Equal
270 def int_aarch64_neon_vcge : Neon_Cmp_Intrinsic;
271 def int_aarch64_neon_vchs : Neon_Cmp_Intrinsic;
272
273 // Scalar Compare Less-Than or Equal
274 def int_aarch64_neon_vclez : Neon_Cmp_Intrinsic;
275
276 // Scalar Compare Less-Than
277 def int_aarch64_neon_vcltz : Neon_Cmp_Intrinsic;
278
279 // Scalar Compare Greater-Than
280 def int_aarch64_neon_vcgt : Neon_Cmp_Intrinsic;
281 def int_aarch64_neon_vchi : Neon_Cmp_Intrinsic;
282
283 // Scalar Compare Bitwise Test Bits
284 def int_aarch64_neon_vtstd : Neon_Cmp_Intrinsic;
285
286 // Scalar Floating-point Absolute Compare Greater Than Or Equal
287 def int_aarch64_neon_vcage : Neon_Cmp_Intrinsic;
288  
289 // Scalar Floating-point Absolute Compare Greater Than
290 def int_aarch64_neon_vcagt : Neon_Cmp_Intrinsic;
291
292 // Scalar Signed Saturating Accumulated of Unsigned Value
293 def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
294
295 // Scalar Unsigned Saturating Accumulated of Signed Value
296 def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
297
298 // Scalar Absolute Value
299 def int_aarch64_neon_vabs :
300   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
301
302 // Scalar Absolute Difference
303 def int_aarch64_neon_vabd : Neon_2Arg_Intrinsic;
304
305 // Scalar Negate Value
306 def int_aarch64_neon_vneg :
307   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
308
309 // Signed Saturating Doubling Multiply-Add Long
310 def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
311
312 // Signed Saturating Doubling Multiply-Subtract Long
313 def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
314
315 def int_aarch64_neon_vmull_p64 :
316   Intrinsic<[llvm_v16i8_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
317
318 class Neon_2Arg_ShiftImm_Intrinsic
319   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
320
321 class Neon_3Arg_ShiftImm_Intrinsic
322   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty],
323               [IntrNoMem]>;
324
325 // Scalar Shift Right (Immediate)
326 def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
327 def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
328
329 // Scalar Shift Right and Accumulate (Immediate)
330 def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
331 def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
332
333 // Scalar Rounding Shift Right and Accumulate (Immediate)
334 def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
335 def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
336
337 // Scalar Shift Left (Immediate)
338 def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic;
339
340 // Scalar Saturating Shift Left (Immediate)
341 def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic;
342 def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic;
343
344 // Scalar Signed Saturating Shift Left Unsigned (Immediate)
345 def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic;
346
347 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
348 def int_aarch64_neon_vcvtf32_n_s32 :
349   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty, llvm_i32_ty], [IntrNoMem]>;
350 def int_aarch64_neon_vcvtf64_n_s64 :
351   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
352
353 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
354 def int_aarch64_neon_vcvtf32_n_u32 :
355   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty, llvm_i32_ty], [IntrNoMem]>;
356 def int_aarch64_neon_vcvtf64_n_u64 :
357   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
358
359 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
360 def int_aarch64_neon_vcvts_n_s32_f32 :
361   Intrinsic<[llvm_v1i32_ty], [llvm_v1f32_ty, llvm_i32_ty], [IntrNoMem]>;
362 def int_aarch64_neon_vcvtd_n_s64_f64 :
363   Intrinsic<[llvm_v1i64_ty], [llvm_v1f64_ty, llvm_i32_ty], [IntrNoMem]>;
364
365 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
366 def int_aarch64_neon_vcvts_n_u32_f32 :
367   Intrinsic<[llvm_v1i32_ty], [llvm_v1f32_ty, llvm_i32_ty], [IntrNoMem]>;
368 def int_aarch64_neon_vcvtd_n_u64_f64 :
369   Intrinsic<[llvm_v1i64_ty], [llvm_v1f64_ty, llvm_i32_ty], [IntrNoMem]>;
370
371 class Neon_SHA_Intrinsic
372   : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v1i32_ty, llvm_v4i32_ty],
373               [IntrNoMem]>;
374
375 def int_aarch64_neon_sha1c : Neon_SHA_Intrinsic;
376 def int_aarch64_neon_sha1m : Neon_SHA_Intrinsic;
377 def int_aarch64_neon_sha1p : Neon_SHA_Intrinsic;
378 }