[AArch64] Refactor the Neon vector/scalar floating-point convert implementation.
[oota-llvm.git] / include / llvm / IR / IntrinsicsAArch64.td
1 //===- IntrinsicsAArch64.td - Defines AArch64 intrinsics -----------*- tablegen -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines all of the AArch64-specific intrinsics.
11 //
12 //===----------------------------------------------------------------------===//
13
14 //===----------------------------------------------------------------------===//
15 // Advanced SIMD (NEON)
16
17 let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
18
19 // Vector Absolute Compare (Floating Point)
20 def int_aarch64_neon_vacgeq :
21   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
22 def int_aarch64_neon_vacgtq :
23   Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
24
25 // Vector saturating accumulate
26 def int_aarch64_neon_suqadd : Neon_2Arg_Intrinsic;
27 def int_aarch64_neon_usqadd : Neon_2Arg_Intrinsic;
28
29 // Vector Bitwise reverse
30 def int_aarch64_neon_rbit : Neon_1Arg_Intrinsic;
31
32 // Vector extract and narrow
33 def int_aarch64_neon_xtn : 
34   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
35
36 // Vector floating-point convert
37 def int_aarch64_neon_frintn : Neon_1Arg_Intrinsic;
38 def int_aarch64_neon_fsqrt : Neon_1Arg_Intrinsic;
39 def int_aarch64_neon_fcvtxn :
40   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
41 def int_aarch64_neon_fcvtzs :
42   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
43 def int_aarch64_neon_fcvtzu :
44   Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
45
46 // Vector maxNum (Floating Point)
47 def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
48
49 // Vector minNum (Floating Point)
50 def int_aarch64_neon_vminnm : Neon_2Arg_Intrinsic;
51
52 // Vector Pairwise maxNum (Floating Point)
53 def int_aarch64_neon_vpmaxnm : Neon_2Arg_Intrinsic;
54
55 // Vector Pairwise minNum (Floating Point)
56 def int_aarch64_neon_vpminnm : Neon_2Arg_Intrinsic;
57
58 // Vector Multiply Extended and Scalar Multiply Extended (Floating Point)
59 def int_aarch64_neon_vmulx  :
60   Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>;
61
62 class Neon_N2V_Intrinsic
63   : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty],
64               [IntrNoMem]>;
65 class Neon_N3V_Intrinsic
66   : Intrinsic<[llvm_anyvector_ty],
67               [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
68               [IntrNoMem]>;
69 class Neon_N2V_Narrow_Intrinsic
70   : Intrinsic<[llvm_anyvector_ty],
71               [LLVMExtendedElementVectorType<0>, llvm_i32_ty],
72               [IntrNoMem]>;
73
74 // Vector rounding shift right by immediate (Signed)
75 def int_aarch64_neon_vsrshr : Neon_N2V_Intrinsic;
76 def int_aarch64_neon_vurshr : Neon_N2V_Intrinsic;
77 def int_aarch64_neon_vsqshlu : Neon_N2V_Intrinsic;
78
79 def int_aarch64_neon_vsri : Neon_N3V_Intrinsic;
80 def int_aarch64_neon_vsli : Neon_N3V_Intrinsic;
81
82 def int_aarch64_neon_vsqshrun : Neon_N2V_Narrow_Intrinsic;
83 def int_aarch64_neon_vrshrn : Neon_N2V_Narrow_Intrinsic;
84 def int_aarch64_neon_vsqrshrun : Neon_N2V_Narrow_Intrinsic;
85 def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
86 def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
87 def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
88 def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
89
90 // Vector across
91 class Neon_Across_Intrinsic
92   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>;
93
94 def int_aarch64_neon_saddlv : Neon_Across_Intrinsic;
95 def int_aarch64_neon_uaddlv : Neon_Across_Intrinsic;
96 def int_aarch64_neon_smaxv  : Neon_Across_Intrinsic;
97 def int_aarch64_neon_umaxv  : Neon_Across_Intrinsic;
98 def int_aarch64_neon_sminv  : Neon_Across_Intrinsic;
99 def int_aarch64_neon_uminv  : Neon_Across_Intrinsic;
100 def int_aarch64_neon_vaddv  : Neon_Across_Intrinsic;
101 def int_aarch64_neon_vmaxv  : Neon_Across_Intrinsic;
102 def int_aarch64_neon_vminv  : Neon_Across_Intrinsic;
103 def int_aarch64_neon_vmaxnmv : Neon_Across_Intrinsic;
104 def int_aarch64_neon_vminnmv : Neon_Across_Intrinsic;
105
106 // Vector Table Lookup.
107 def int_aarch64_neon_vtbl1 :
108   Intrinsic<[llvm_anyvector_ty],
109             [llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
110
111 def int_aarch64_neon_vtbl2 :
112   Intrinsic<[llvm_anyvector_ty],
113             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<0>],
114             [IntrNoMem]>;
115
116 def int_aarch64_neon_vtbl3 :
117   Intrinsic<[llvm_anyvector_ty],
118             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
119             LLVMMatchType<0>], [IntrNoMem]>;
120
121 def int_aarch64_neon_vtbl4 :
122   Intrinsic<[llvm_anyvector_ty],
123             [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
124             LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
125
126 // Vector Table Extension.
127 // Some elements of the destination vector may not be updated, so the original
128 // value of that vector is passed as the first argument.  The next 1-4
129 // arguments after that are the table.
130 def int_aarch64_neon_vtbx1 :
131   Intrinsic<[llvm_anyvector_ty],
132             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
133             [IntrNoMem]>;
134
135 def int_aarch64_neon_vtbx2 :
136   Intrinsic<[llvm_anyvector_ty],
137             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
138             LLVMMatchType<0>], [IntrNoMem]>;
139
140 def int_aarch64_neon_vtbx3 :
141   Intrinsic<[llvm_anyvector_ty],
142             [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
143             LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
144
145 def int_aarch64_neon_vtbx4 :
146   Intrinsic<[llvm_anyvector_ty],
147             [LLVMMatchType<0>, llvm_anyvector_ty,  LLVMMatchType<1>,
148             LLVMMatchType<1>,  LLVMMatchType<1>, LLVMMatchType<0>],
149             [IntrNoMem]>;
150
151 // Vector Load/store
152 def int_aarch64_neon_vld1x2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
153                                         [llvm_ptr_ty, llvm_i32_ty],
154                                         [IntrReadArgMem]>;
155 def int_aarch64_neon_vld1x3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
156                                          LLVMMatchType<0>],
157                                         [llvm_ptr_ty, llvm_i32_ty],
158                                         [IntrReadArgMem]>;
159 def int_aarch64_neon_vld1x4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
160                                          LLVMMatchType<0>, LLVMMatchType<0>],
161                                         [llvm_ptr_ty, llvm_i32_ty],
162                                         [IntrReadArgMem]>;
163
164 def int_aarch64_neon_vst1x2 : Intrinsic<[],
165                                         [llvm_ptr_ty, llvm_anyvector_ty,
166                                          LLVMMatchType<0>, llvm_i32_ty],
167                                         [IntrReadWriteArgMem]>;
168 def int_aarch64_neon_vst1x3 : Intrinsic<[],
169                                         [llvm_ptr_ty, llvm_anyvector_ty,
170                                          LLVMMatchType<0>, LLVMMatchType<0>,
171                                          llvm_i32_ty], [IntrReadWriteArgMem]>;
172 def int_aarch64_neon_vst1x4 : Intrinsic<[],
173                                         [llvm_ptr_ty, llvm_anyvector_ty,
174                                          LLVMMatchType<0>, LLVMMatchType<0>,
175                                          LLVMMatchType<0>, llvm_i32_ty],
176                                         [IntrReadWriteArgMem]>;
177
178 // Scalar Add
179 def int_aarch64_neon_vaddds :
180   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
181 def int_aarch64_neon_vadddu :
182   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
183
184
185 // Scalar Sub
186 def int_aarch64_neon_vsubds :
187   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
188 def int_aarch64_neon_vsubdu :
189   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
190
191
192 // Scalar Shift
193 // Scalar Shift Left
194 def int_aarch64_neon_vshlds :
195   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
196 def int_aarch64_neon_vshldu :
197   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
198
199 // Scalar Saturating Shift Left
200 def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
201 def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
202
203 // Scalar Shift Rouding Left
204 def int_aarch64_neon_vrshlds :
205   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
206 def int_aarch64_neon_vrshldu :
207   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
208
209 // Scalar Saturating Rounding Shift Left
210 def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
211 def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
212
213 // Scalar Reduce Pairwise Add.
214 def int_aarch64_neon_vpadd :
215   Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
216 def int_aarch64_neon_vpfadd :
217   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
218
219 // Scalar Reduce Pairwise Floating Point Max/Min.
220 def int_aarch64_neon_vpmax :
221   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
222 def int_aarch64_neon_vpmin :
223   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
224
225 // Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
226 def int_aarch64_neon_vpfmaxnm :
227   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
228 def int_aarch64_neon_vpfminnm :
229   Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>;
230
231 // Scalar Signed Integer Convert To Floating-point
232 def int_aarch64_neon_vcvtf32_s32 :
233   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty], [IntrNoMem]>;
234 def int_aarch64_neon_vcvtf64_s64 :
235   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>;
236
237 // Scalar Unsigned Integer Convert To Floating-point
238 def int_aarch64_neon_vcvtf32_u32 :
239   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty], [IntrNoMem]>;
240 def int_aarch64_neon_vcvtf64_u64 :
241   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty], [IntrNoMem]>;
242
243 // Scalar Floating-point Reciprocal Exponent
244 def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic;
245
246 class Neon_Cmp_Intrinsic
247   : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty],
248               [IntrNoMem]>;
249
250 // Scalar Compare Equal
251 def int_aarch64_neon_vceq : Neon_Cmp_Intrinsic;
252
253 // Scalar Compare Greater-Than or Equal
254 def int_aarch64_neon_vcge : Neon_Cmp_Intrinsic;
255 def int_aarch64_neon_vchs : Neon_Cmp_Intrinsic;
256
257 // Scalar Compare Less-Than or Equal
258 def int_aarch64_neon_vclez : Neon_Cmp_Intrinsic;
259
260 // Scalar Compare Less-Than
261 def int_aarch64_neon_vcltz : Neon_Cmp_Intrinsic;
262
263 // Scalar Compare Greater-Than
264 def int_aarch64_neon_vcgt : Neon_Cmp_Intrinsic;
265 def int_aarch64_neon_vchi : Neon_Cmp_Intrinsic;
266
267 // Scalar Compare Bitwise Test Bits
268 def int_aarch64_neon_vtstd : Neon_Cmp_Intrinsic;
269
270 // Scalar Floating-point Absolute Compare Greater Than Or Equal
271 def int_aarch64_neon_vcage : Neon_Cmp_Intrinsic;
272  
273 // Scalar Floating-point Absolute Compare Greater Than
274 def int_aarch64_neon_vcagt : Neon_Cmp_Intrinsic;
275
276 // Scalar Signed Saturating Accumulated of Unsigned Value
277 def int_aarch64_neon_vuqadd : Neon_2Arg_Intrinsic;
278
279 // Scalar Unsigned Saturating Accumulated of Signed Value
280 def int_aarch64_neon_vsqadd : Neon_2Arg_Intrinsic;
281
282 // Scalar Absolute Value
283 def int_aarch64_neon_vabs :
284   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
285
286 // Scalar Absolute Difference
287 def int_aarch64_neon_vabd : Neon_2Arg_Intrinsic;
288
289 // Scalar Negate Value
290 def int_aarch64_neon_vneg :
291   Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty], [IntrNoMem]>;
292
293 // Signed Saturating Doubling Multiply-Add Long
294 def int_aarch64_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
295
296 // Signed Saturating Doubling Multiply-Subtract Long
297 def int_aarch64_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
298
299 def int_aarch64_neon_vmull_p64 :
300   Intrinsic<[llvm_v16i8_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
301
302 class Neon_2Arg_ShiftImm_Intrinsic
303   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
304
305 class Neon_3Arg_ShiftImm_Intrinsic
306   : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty, llvm_i32_ty],
307               [IntrNoMem]>;
308
309 // Scalar Shift Right (Immediate)
310 def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic;
311 def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic;
312
313 // Scalar Shift Right and Accumulate (Immediate)
314 def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
315 def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
316
317 // Scalar Rounding Shift Right and Accumulate (Immediate)
318 def int_aarch64_neon_vrsrads_n : Neon_3Arg_ShiftImm_Intrinsic;
319 def int_aarch64_neon_vrsradu_n : Neon_3Arg_ShiftImm_Intrinsic;
320
321 // Scalar Shift Left (Immediate)
322 def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic;
323
324 // Scalar Saturating Shift Left (Immediate)
325 def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic;
326 def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic;
327
328 // Scalar Signed Saturating Shift Left Unsigned (Immediate)
329 def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic;
330
331 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
332 def int_aarch64_neon_vcvtf32_n_s32 :
333   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty, llvm_i32_ty], [IntrNoMem]>;
334 def int_aarch64_neon_vcvtf64_n_s64 :
335   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
336
337 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
338 def int_aarch64_neon_vcvtf32_n_u32 :
339   Intrinsic<[llvm_float_ty], [llvm_v1i32_ty, llvm_i32_ty], [IntrNoMem]>;
340 def int_aarch64_neon_vcvtf64_n_u64 :
341   Intrinsic<[llvm_double_ty], [llvm_v1i64_ty, llvm_i32_ty], [IntrNoMem]>;
342
343 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
344 def int_aarch64_neon_vcvts_n_s32_f32 :
345   Intrinsic<[llvm_v1i32_ty], [llvm_v1f32_ty, llvm_i32_ty], [IntrNoMem]>;
346 def int_aarch64_neon_vcvtd_n_s64_f64 :
347   Intrinsic<[llvm_v1i64_ty], [llvm_v1f64_ty, llvm_i32_ty], [IntrNoMem]>;
348
349 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
350 def int_aarch64_neon_vcvts_n_u32_f32 :
351   Intrinsic<[llvm_v1i32_ty], [llvm_v1f32_ty, llvm_i32_ty], [IntrNoMem]>;
352 def int_aarch64_neon_vcvtd_n_u64_f64 :
353   Intrinsic<[llvm_v1i64_ty], [llvm_v1f64_ty, llvm_i32_ty], [IntrNoMem]>;
354
355 class Neon_SHA_Intrinsic
356   : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v1i32_ty, llvm_v4i32_ty],
357               [IntrNoMem]>;
358
359 def int_aarch64_neon_sha1c : Neon_SHA_Intrinsic;
360 def int_aarch64_neon_sha1m : Neon_SHA_Intrinsic;
361 def int_aarch64_neon_sha1p : Neon_SHA_Intrinsic;
362 }